diff options
Diffstat (limited to 'net')
537 files changed, 26423 insertions, 16994 deletions
diff --git a/net/802/fc.c b/net/802/fc.c index 675d9ba8e59..cb3475ea6fd 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -35,7 +35,7 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) + const void *daddr, const void *saddr, unsigned len) { struct fch_hdr *fch; int hdr_len; @@ -95,11 +95,14 @@ static int fc_rebuild_header(struct sk_buff *skb) #endif } +static const struct header_ops fc_header_ops = { + .create = fc_header, + .rebuild = fc_rebuild_header, +}; + static void fc_setup(struct net_device *dev) { - dev->hard_header = fc_header; - dev->rebuild_header = fc_rebuild_header; - + dev->header_ops = &fc_header_ops; dev->type = ARPHRD_IEEE802; dev->hard_header_len = FC_HLEN; dev->mtu = 2024; diff --git a/net/802/fddi.c b/net/802/fddi.c index 91dde41b548..0549317b935 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -52,7 +52,7 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) + const void *daddr, const void *saddr, unsigned len) { int hl = FDDI_K_SNAP_HLEN; struct fddihdr *fddi; @@ -175,11 +175,15 @@ static int fddi_change_mtu(struct net_device *dev, int new_mtu) return(0); } +static const struct header_ops fddi_header_ops = { + .create = fddi_header, + .rebuild = fddi_rebuild_header, +}; + static void fddi_setup(struct net_device *dev) { dev->change_mtu = fddi_change_mtu; - dev->hard_header = fddi_header; - dev->rebuild_header = fddi_rebuild_header; + dev->header_ops = &fddi_header_ops; dev->type = ARPHRD_FDDI; dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */ diff --git a/net/802/hippi.c b/net/802/hippi.c index 87ffc12b689..e35dc1e0915 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -45,8 +45,8 @@ */ static int hippi_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, void *daddr, void *saddr, - unsigned len) + unsigned short type, + const void *daddr, const void *saddr, unsigned len) { struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN); struct hippi_cb *hcb = (struct hippi_cb *) skb->cb; @@ -182,16 +182,18 @@ static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) return 0; } +static const struct header_ops hippi_header_ops = { + .create = hippi_header, + .rebuild = hippi_rebuild_header, +}; + + static void hippi_setup(struct net_device *dev) { dev->set_multicast_list = NULL; dev->change_mtu = hippi_change_mtu; - dev->hard_header = hippi_header; - dev->rebuild_header = hippi_rebuild_header; + dev->header_ops = &hippi_header_ops; dev->set_mac_address = hippi_mac_addr; - dev->hard_header_parse = NULL; - dev->hard_header_cache = NULL; - dev->header_cache_update = NULL; dev->neigh_setup = hippi_neigh_setup_dev; /* diff --git a/net/802/p8023.c b/net/802/p8023.c index 53cf0570928..6ab1835041a 100644 --- a/net/802/p8023.c +++ b/net/802/p8023.c @@ -31,7 +31,7 @@ static int p8023_request(struct datalink_proto *dl, { struct net_device *dev = skb->dev; - dev->hard_header(skb, dev, ETH_P_802_3, dest_node, NULL, skb->len); + dev_hard_header(skb, dev, ETH_P_802_3, dest_node, NULL, skb->len); return dev_queue_xmit(skb); } diff --git a/net/802/tr.c b/net/802/tr.c index e56e61a7f54..a2bd0f2e3af 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -36,6 +36,7 @@ #include <linux/seq_file.h> #include <linux/init.h> #include <net/arp.h> +#include <net/net_namespace.h> static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev); static void rif_check_expire(unsigned long dummy); @@ -99,7 +100,7 @@ static inline unsigned long rif_hash(const unsigned char *addr) static int tr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) + const void *daddr, const void *saddr, unsigned len) { struct trh_hdr *trh; int hdr_len; @@ -141,7 +142,7 @@ static int tr_header(struct sk_buff *skb, struct net_device *dev, if(daddr) { memcpy(trh->daddr,daddr,dev->addr_len); - tr_source_route(skb,trh,dev); + tr_source_route(skb, trh, dev); return(hdr_len); } @@ -246,7 +247,8 @@ __be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev) * We try to do source routing... */ -void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct net_device *dev) +void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh, + struct net_device *dev) { int slack; unsigned int hash; @@ -282,8 +284,10 @@ void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct net_device * if(entry) { #if TR_SR_DEBUG -printk("source routing for %02X:%02X:%02X:%02X:%02X:%02X\n",trh->daddr[0], - trh->daddr[1],trh->daddr[2],trh->daddr[3],trh->daddr[4],trh->daddr[5]); +{ +DECLARE_MAC_BUF(mac); +printk("source routing for %s\n",print_mac(mac, trh->daddr)); +} #endif if(!entry->local_ring && (ntohs(entry->rcf) & TR_RCF_LEN_MASK) >> 8) { @@ -365,10 +369,9 @@ static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev) if(entry==NULL) { #if TR_SR_DEBUG -printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", - trh->saddr[0],trh->saddr[1],trh->saddr[2], - trh->saddr[3],trh->saddr[4],trh->saddr[5], - ntohs(trh->rcf)); + DECLARE_MAC_BUF(mac); + printk("adding rif_entry: addr:%s rcf:%04X\n", + print_mac(mac, trh->saddr), ntohs(trh->rcf)); #endif /* * Allocate our new entry. A failure to allocate loses @@ -413,10 +416,11 @@ printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", !(trh->rcf & htons(TR_RCF_BROADCAST_MASK))) { #if TR_SR_DEBUG -printk("updating rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", - trh->saddr[0],trh->saddr[1],trh->saddr[2], - trh->saddr[3],trh->saddr[4],trh->saddr[5], - ntohs(trh->rcf)); +{ +DECLARE_MAC_BUF(mac); +printk("updating rif_entry: addr:%s rcf:%04X\n", + print_mac(mac, trh->saddr), ntohs(trh->rcf)); +} #endif entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK); memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short)); @@ -527,19 +531,19 @@ static int rif_seq_show(struct seq_file *seq, void *v) { int j, rcf_len, segment, brdgnmb; struct rif_cache *entry = v; + DECLARE_MAC_BUF(mac); if (v == SEQ_START_TOKEN) seq_puts(seq, "if TR address TTL rcf routing segments\n"); else { - struct net_device *dev = dev_get_by_index(entry->iface); + struct net_device *dev = dev_get_by_index(&init_net, entry->iface); long ttl = (long) (entry->last_used + sysctl_tr_rif_timeout) - (long) jiffies; - seq_printf(seq, "%s %02X:%02X:%02X:%02X:%02X:%02X %7li ", + seq_printf(seq, "%s %s %7li ", dev?dev->name:"?", - entry->addr[0],entry->addr[1],entry->addr[2], - entry->addr[3],entry->addr[4],entry->addr[5], + print_mac(mac, entry->addr), ttl/HZ); if (entry->local_ring) @@ -589,14 +593,18 @@ static const struct file_operations rif_seq_fops = { #endif +static const struct header_ops tr_header_ops = { + .create = tr_header, + .rebuild= tr_rebuild_header, +}; + static void tr_setup(struct net_device *dev) { /* * Configure and register */ - dev->hard_header = tr_header; - dev->rebuild_header = tr_rebuild_header; + dev->header_ops = &tr_header_ops; dev->type = ARPHRD_IEEE802_TR; dev->hard_header_len = TR_HLEN; @@ -639,7 +647,7 @@ static int __init rif_init(void) rif_timer.function = rif_check_expire; add_timer(&rif_timer); - proc_net_fops_create("tr_rif", S_IRUGO, &rif_seq_fops); + proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops); return 0; } diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 2a546919d6f..3fe4fc86055 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -31,6 +31,7 @@ #include <net/arp.h> #include <linux/rtnetlink.h> #include <linux/notifier.h> +#include <net/net_namespace.h> #include <linux/if_vlan.h> #include "vlan.h" @@ -50,7 +51,7 @@ static char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>"; static char vlan_buggyright[] = "David S. Miller <davem@redhat.com>"; static int vlan_device_event(struct notifier_block *, unsigned long, void *); -static int vlan_ioctl_handler(void __user *); +static int vlan_ioctl_handler(struct net *net, void __user *); static int unregister_vlan_dev(struct net_device *, unsigned short ); static struct notifier_block vlan_notifier_block = { @@ -313,6 +314,12 @@ int unregister_vlan_device(struct net_device *dev) */ static struct lock_class_key vlan_netdev_xmit_lock_key; +static const struct header_ops vlan_header_ops = { + .create = vlan_dev_hard_header, + .rebuild = vlan_dev_rebuild_header, + .parse = eth_header_parse, +}; + static int vlan_dev_init(struct net_device *dev) { struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; @@ -324,24 +331,23 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); + /* ipv6 shared card related stuff */ + dev->dev_id = real_dev->dev_id; + if (is_zero_ether_addr(dev->dev_addr)) memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len); if (is_zero_ether_addr(dev->broadcast)) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); if (real_dev->features & NETIF_F_HW_VLAN_TX) { - dev->hard_header = real_dev->hard_header; + dev->header_ops = real_dev->header_ops; dev->hard_header_len = real_dev->hard_header_len; dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit; - dev->rebuild_header = real_dev->rebuild_header; } else { - dev->hard_header = vlan_dev_hard_header; + dev->header_ops = &vlan_header_ops; dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; dev->hard_start_xmit = vlan_dev_hard_start_xmit; - dev->rebuild_header = vlan_dev_rebuild_header; } - dev->hard_header_parse = real_dev->hard_header_parse; - dev->hard_header_cache = NULL; lockdep_set_class(&dev->_xmit_lock, &vlan_netdev_xmit_lock_key); return 0; @@ -349,8 +355,6 @@ static int vlan_dev_init(struct net_device *dev) void vlan_setup(struct net_device *new_dev) { - SET_MODULE_OWNER(new_dev); - ether_setup(new_dev); /* new_dev->ifindex = 0; it will be set when added to @@ -603,6 +607,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, int i, flgs; struct net_device *vlandev; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (!grp) goto out; @@ -693,7 +700,7 @@ out: * o execute requested action or pass command to the device driver * arg is really a struct vlan_ioctl_args __user *. */ -static int vlan_ioctl_handler(void __user *arg) +static int vlan_ioctl_handler(struct net *net, void __user *arg) { int err; unsigned short vid = 0; @@ -722,7 +729,7 @@ static int vlan_ioctl_handler(void __user *arg) case GET_VLAN_REALDEV_NAME_CMD: case GET_VLAN_VID_CMD: err = -ENODEV; - dev = __dev_get_by_name(args.device1); + dev = __dev_get_by_name(&init_net, args.device1); if (!dev) goto out; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 7df5b293557..cf4a80d06b3 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -53,8 +53,8 @@ int vlan_dev_rebuild_header(struct sk_buff *skb); int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev); int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, void *daddr, void *saddr, - unsigned len); + unsigned short type, const void *daddr, + const void *saddr, unsigned len); int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev); int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev); int vlan_dev_change_mtu(struct net_device *dev, int new_mtu); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 328759c32d6..1a1740aa9a8 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -122,6 +122,11 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, unsigned short vlan_TCI; __be16 proto; + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return -1; + } + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return -1; @@ -338,8 +343,8 @@ static inline unsigned short vlan_dev_get_egress_qos_mask(struct net_device* dev * physical devices. */ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, void *daddr, void *saddr, - unsigned len) + unsigned short type, + const void *daddr, const void *saddr, unsigned len) { struct vlan_hdr *vhdr; unsigned short veth_TCI = 0; @@ -429,21 +434,19 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, if (build_vlan_header) { /* Now make the underlying real hard header */ - rc = dev->hard_header(skb, dev, ETH_P_8021Q, daddr, saddr, len + VLAN_HLEN); - - if (rc > 0) { + rc = dev_hard_header(skb, dev, ETH_P_8021Q, daddr, saddr, + len + VLAN_HLEN); + if (rc > 0) rc += VLAN_HLEN; - } else if (rc < 0) { + else if (rc < 0) rc -= VLAN_HLEN; - } - } else { + } else /* If here, then we'll just make a normal looking ethernet frame, * but, the hard_start_xmit method will insert the tag (it has to * be able to do this for bridged and other skbs that don't come * down the protocol stack in an orderly manner. */ - rc = dev->hard_header(skb, dev, type, daddr, saddr, len); - } + rc = dev_hard_header(skb, dev, type, daddr, saddr, len); return rc; } diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 6cdd1e015e2..0996185e2ed 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> +#include <net/net_namespace.h> #include <net/netlink.h> #include <net/rtnetlink.h> #include "vlan.h" @@ -112,7 +113,7 @@ static int vlan_newlink(struct net_device *dev, if (!tb[IFLA_LINK]) return -EINVAL; - real_dev = __dev_get_by_index(nla_get_u32(tb[IFLA_LINK])); + real_dev = __dev_get_by_index(&init_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index bd08aa09076..6cefdf8e381 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -33,6 +33,7 @@ #include <linux/fs.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> +#include <net/net_namespace.h> #include "vlanproc.h" #include "vlan.h" @@ -143,7 +144,7 @@ void vlan_proc_cleanup(void) remove_proc_entry(name_conf, proc_vlan_dir); if (proc_vlan_dir) - proc_net_remove(name_root); + proc_net_remove(&init_net, name_root); /* Dynamically added entries should be cleaned up as their vlan_device * is removed, so we should not have to take care of it here... @@ -156,7 +157,7 @@ void vlan_proc_cleanup(void) int __init vlan_proc_init(void) { - proc_vlan_dir = proc_mkdir(name_root, proc_net); + proc_vlan_dir = proc_mkdir(name_root, init_net.proc_net); if (proc_vlan_dir) { proc_vlan_conf = create_proc_entry(name_conf, S_IFREG|S_IRUSR|S_IWUSR, @@ -253,7 +254,7 @@ static void *vlan_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!is_vlan_dev(dev)) continue; @@ -272,9 +273,9 @@ static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos) dev = (struct net_device *)v; if (v == SEQ_START_TOKEN) - dev = net_device_entry(&dev_base_head); + dev = net_device_entry(&init_net.dev_base_head); - for_each_netdev_continue(dev) { + for_each_netdev_continue(&init_net, dev) { if (!is_vlan_dev(dev)) continue; diff --git a/net/9p/Kconfig b/net/9p/Kconfig index 66821cd64a7..bafc50c9e6f 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -13,9 +13,26 @@ menuconfig NET_9P If unsure, say N. +config NET_9P_FD + depends on NET_9P + default y if NET_9P + tristate "9P File Descriptor Transports (Experimental)" + help + This builds support for file descriptor transports for 9p + which includes support for TCP/IP, named pipes, or passed + file descriptors. TCP/IP is the default transport for 9p, + so if you are going to use 9p, you'll likely want this. + +config NET_9P_VIRTIO + depends on NET_9P && EXPERIMENTAL && VIRTIO + tristate "9P Virtio Transport (Experimental)" + help + This builds support for a transports between + guest partitions and a host partition. + config NET_9P_DEBUG bool "Debug information" depends on NET_9P help - Say Y if you want the 9P subsistem to log debug information. + Say Y if you want the 9P subsystem to log debug information. diff --git a/net/9p/Makefile b/net/9p/Makefile index 85b3a7838ac..d3abb246cca 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -1,8 +1,9 @@ obj-$(CONFIG_NET_9P) := 9pnet.o +obj-$(CONFIG_NET_9P_FD) += 9pnet_fd.o +obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o 9pnet-objs := \ mod.o \ - trans_fd.o \ mux.o \ client.o \ conv.o \ @@ -10,4 +11,8 @@ obj-$(CONFIG_NET_9P) := 9pnet.o fcprint.o \ util.o \ -9pnet-$(CONFIG_SYSCTL) += sysctl.o +9pnet_fd-objs := \ + trans_fd.o \ + +9pnet_virtio-objs := \ + trans_virtio.o \ diff --git a/net/9p/client.c b/net/9p/client.c index cb170750337..af919936404 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -30,6 +30,7 @@ #include <linux/sched.h> #include <linux/uaccess.h> #include <net/9p/9p.h> +#include <linux/parser.h> #include <net/9p/transport.h> #include <net/9p/conn.h> #include <net/9p/client.h> @@ -38,7 +39,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt); static void p9_fid_destroy(struct p9_fid *fid); static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu); -struct p9_client *p9_client_create(struct p9_transport *trans, int msize, +struct p9_client *p9_client_create(struct p9_trans *trans, int msize, int dotu) { int err, n; @@ -146,7 +147,7 @@ void p9_client_disconnect(struct p9_client *clnt) EXPORT_SYMBOL(p9_client_disconnect); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, - char *uname, char *aname) + char *uname, u32 n_uname, char *aname) { int err; struct p9_fcall *tc, *rc; @@ -165,7 +166,8 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, goto error; } - tc = p9_create_tattach(fid->fid, afid?afid->fid:P9_NOFID, uname, aname); + tc = p9_create_tattach(fid->fid, afid?afid->fid:P9_NOFID, uname, aname, + n_uname, clnt->dotu); if (IS_ERR(tc)) { err = PTR_ERR(tc); tc = NULL; @@ -190,7 +192,8 @@ error: } EXPORT_SYMBOL(p9_client_attach); -struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, char *aname) +struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, + u32 n_uname, char *aname) { int err; struct p9_fcall *tc, *rc; @@ -209,7 +212,7 @@ struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, char *aname) goto error; } - tc = p9_create_tauth(fid->fid, uname, aname); + tc = p9_create_tauth(fid->fid, uname, aname, n_uname, clnt->dotu); if (IS_ERR(tc)) { err = PTR_ERR(tc); tc = NULL; diff --git a/net/9p/conv.c b/net/9p/conv.c index d979d958ea1..aa2aa9884f9 100644 --- a/net/9p/conv.c +++ b/net/9p/conv.c @@ -547,7 +547,8 @@ error: } EXPORT_SYMBOL(p9_create_tversion); -struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname) +struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname, + u32 n_uname, int dotu) { int size; struct p9_fcall *fc; @@ -555,7 +556,16 @@ struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname) struct cbuf *bufp = &buffer; /* afid[4] uname[s] aname[s] */ - size = 4 + 2 + strlen(uname) + 2 + strlen(aname); + size = 4 + 2 + 2; + if (uname) + size += strlen(uname); + + if (aname) + size += strlen(aname); + + if (dotu) + size += 4; /* n_uname */ + fc = p9_create_common(bufp, size, P9_TAUTH); if (IS_ERR(fc)) goto error; @@ -563,6 +573,8 @@ struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname) p9_put_int32(bufp, afid, &fc->params.tauth.afid); p9_put_str(bufp, uname, &fc->params.tauth.uname); p9_put_str(bufp, aname, &fc->params.tauth.aname); + if (dotu) + p9_put_int32(bufp, n_uname, &fc->params.tauth.n_uname); if (buf_check_overflow(bufp)) { kfree(fc); @@ -574,7 +586,8 @@ error: EXPORT_SYMBOL(p9_create_tauth); struct p9_fcall * -p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname) +p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname, + u32 n_uname, int dotu) { int size; struct p9_fcall *fc; @@ -582,7 +595,16 @@ p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname) struct cbuf *bufp = &buffer; /* fid[4] afid[4] uname[s] aname[s] */ - size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname); + size = 4 + 4 + 2 + 2; + if (uname) + size += strlen(uname); + + if (aname) + size += strlen(aname); + + if (dotu) + size += 4; /* n_uname */ + fc = p9_create_common(bufp, size, P9_TATTACH); if (IS_ERR(fc)) goto error; @@ -591,6 +613,8 @@ p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname) p9_put_int32(bufp, afid, &fc->params.tattach.afid); p9_put_str(bufp, uname, &fc->params.tattach.uname); p9_put_str(bufp, aname, &fc->params.tattach.aname); + if (dotu) + p9_put_int32(bufp, n_uname, &fc->params.tattach.n_uname); error: return fc; diff --git a/net/9p/mod.c b/net/9p/mod.c index 4f9e1d2ac25..41d70f47375 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -27,6 +27,10 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <net/9p/9p.h> +#include <linux/fs.h> +#include <linux/parser.h> +#include <net/9p/transport.h> +#include <linux/list.h> #ifdef CONFIG_NET_9P_DEBUG unsigned int p9_debug_level = 0; /* feature-rific global debug level */ @@ -37,8 +41,64 @@ MODULE_PARM_DESC(debug, "9P debugging level"); extern int p9_mux_global_init(void); extern void p9_mux_global_exit(void); -extern int p9_sysctl_register(void); -extern void p9_sysctl_unregister(void); + +/* + * Dynamic Transport Registration Routines + * + */ + +static LIST_HEAD(v9fs_trans_list); +static struct p9_trans_module *v9fs_default_transport; + +/** + * v9fs_register_trans - register a new transport with 9p + * @m - structure describing the transport module and entry points + * + */ +void v9fs_register_trans(struct p9_trans_module *m) +{ + list_add_tail(&m->list, &v9fs_trans_list); + if (m->def) + v9fs_default_transport = m; +} +EXPORT_SYMBOL(v9fs_register_trans); + +/** + * v9fs_match_trans - match transport versus registered transports + * @arg: string identifying transport + * + */ +struct p9_trans_module *v9fs_match_trans(const substring_t *name) +{ + struct list_head *p; + struct p9_trans_module *t = NULL; + + list_for_each(p, &v9fs_trans_list) { + t = list_entry(p, struct p9_trans_module, list); + if (strncmp(t->name, name->from, name->to-name->from) == 0) + break; + } + return t; +} +EXPORT_SYMBOL(v9fs_match_trans); + +/** + * v9fs_default_trans - returns pointer to default transport + * + */ + +struct p9_trans_module *v9fs_default_trans(void) +{ + if (v9fs_default_transport) + return v9fs_default_transport; + else if (!list_empty(&v9fs_trans_list)) + return list_first_entry(&v9fs_trans_list, + struct p9_trans_module, list); + else + return NULL; +} +EXPORT_SYMBOL(v9fs_default_trans); + /** * v9fs_init - Initialize module @@ -56,12 +116,6 @@ static int __init init_p9(void) return ret; } - ret = p9_sysctl_register(); - if (ret) { - printk(KERN_WARNING "9p: registering sysctl failed\n"); - return ret; - } - return ret; } @@ -72,7 +126,6 @@ static int __init init_p9(void) static void __exit exit_p9(void) { - p9_sysctl_unregister(); p9_mux_global_exit(); } diff --git a/net/9p/mux.c b/net/9p/mux.c index 5d70558c4c6..c9f0805048e 100644 --- a/net/9p/mux.c +++ b/net/9p/mux.c @@ -31,6 +31,7 @@ #include <linux/idr.h> #include <linux/mutex.h> #include <net/9p/9p.h> +#include <linux/parser.h> #include <net/9p/transport.h> #include <net/9p/conn.h> @@ -71,7 +72,7 @@ struct p9_conn { struct p9_mux_poll_task *poll_task; int msize; unsigned char *extended; - struct p9_transport *trans; + struct p9_trans *trans; struct p9_idpool *tagpool; int err; wait_queue_head_t equeue; @@ -221,8 +222,10 @@ static int p9_mux_poll_start(struct p9_conn *m) } if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) { - if (vptlast == NULL) + if (vptlast == NULL) { + mutex_unlock(&p9_mux_task_lock); return -ENOMEM; + } P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i); list_add(&m->mux_list, &vptlast->mux_list); @@ -271,7 +274,7 @@ static void p9_mux_poll_stop(struct p9_conn *m) * @msize - maximum message size * @extended - pointer to the extended flag */ -struct p9_conn *p9_conn_create(struct p9_transport *trans, int msize, +struct p9_conn *p9_conn_create(struct p9_trans *trans, int msize, unsigned char *extended) { int i, n; diff --git a/net/9p/sysctl.c b/net/9p/sysctl.c deleted file mode 100644 index 8b61027a24e..00000000000 --- a/net/9p/sysctl.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * net/9p/sysctl.c - * - * 9P sysctl interface - * - * Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/sysctl.h> -#include <linux/init.h> -#include <net/9p/9p.h> - -static struct ctl_table p9_table[] = { -#ifdef CONFIG_NET_9P_DEBUG - { - .ctl_name = CTL_UNNUMBERED, - .procname = "debug", - .data = &p9_debug_level, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, -#endif - {}, -}; - -static struct ctl_table p9_net_table[] = { - { - .ctl_name = CTL_UNNUMBERED, - .procname = "9p", - .maxlen = 0, - .mode = 0555, - .child = p9_table, - }, - {}, -}; - -static struct ctl_table p9_ctl_table[] = { - { - .ctl_name = CTL_NET, - .procname = "net", - .maxlen = 0, - .mode = 0555, - .child = p9_net_table, - }, - {}, -}; - -static struct ctl_table_header *p9_table_header; - -int __init p9_sysctl_register(void) -{ - p9_table_header = register_sysctl_table(p9_ctl_table); - if (!p9_table_header) - return -ENOMEM; - - return 0; -} - -void __exit p9_sysctl_unregister(void) -{ - unregister_sysctl_table(p9_table_header); -} diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index fd636e94358..30269a4ff22 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -5,7 +5,7 @@ * * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> - * Copyright (C) 2004-2005 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2004-2007 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> * * This program is free software; you can redistribute it and/or modify @@ -36,160 +36,114 @@ #include <linux/inet.h> #include <linux/idr.h> #include <linux/file.h> +#include <linux/parser.h> #include <net/9p/9p.h> #include <net/9p/transport.h> #define P9_PORT 564 +#define MAX_SOCK_BUF (64*1024) + + +struct p9_fd_opts { + int rfd; + int wfd; + u16 port; +}; struct p9_trans_fd { struct file *rd; struct file *wr; }; -static int p9_socket_open(struct p9_transport *trans, struct socket *csocket); -static int p9_fd_open(struct p9_transport *trans, int rfd, int wfd); -static int p9_fd_read(struct p9_transport *trans, void *v, int len); -static int p9_fd_write(struct p9_transport *trans, void *v, int len); -static unsigned int p9_fd_poll(struct p9_transport *trans, - struct poll_table_struct *pt); -static void p9_fd_close(struct p9_transport *trans); - -struct p9_transport *p9_trans_create_tcp(const char *addr, int port) -{ - int err; - struct p9_transport *trans; - struct socket *csocket; - struct sockaddr_in sin_server; - - csocket = NULL; - trans = kmalloc(sizeof(struct p9_transport), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); - - trans->write = p9_fd_write; - trans->read = p9_fd_read; - trans->close = p9_fd_close; - trans->poll = p9_fd_poll; - - sin_server.sin_family = AF_INET; - sin_server.sin_addr.s_addr = in_aton(addr); - sin_server.sin_port = htons(port); - sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); - - if (!csocket) { - P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); - err = -EIO; - goto error; - } - - err = csocket->ops->connect(csocket, - (struct sockaddr *)&sin_server, - sizeof(struct sockaddr_in), 0); - if (err < 0) { - P9_EPRINTK(KERN_ERR, - "p9_trans_tcp: problem connecting socket to %s\n", - addr); - goto error; - } - - err = p9_socket_open(trans, csocket); - if (err < 0) - goto error; +/* + * Option Parsing (code inspired by NFS code) + * - a little lazy - parse all fd-transport options + */ - return trans; +enum { + /* Options that take integer arguments */ + Opt_port, Opt_rfdno, Opt_wfdno, +}; -error: - if (csocket) - sock_release(csocket); +static match_table_t tokens = { + {Opt_port, "port=%u"}, + {Opt_rfdno, "rfdno=%u"}, + {Opt_wfdno, "wfdno=%u"}, +}; - kfree(trans); - return ERR_PTR(err); -} -EXPORT_SYMBOL(p9_trans_create_tcp); +/** + * v9fs_parse_options - parse mount options into session structure + * @options: options string passed from mount + * @v9ses: existing v9fs session information + * + */ -struct p9_transport *p9_trans_create_unix(const char *addr) +static void parse_opts(char *options, struct p9_fd_opts *opts) { - int err; - struct socket *csocket; - struct sockaddr_un sun_server; - struct p9_transport *trans; - - csocket = NULL; - trans = kmalloc(sizeof(struct p9_transport), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + int ret; - trans->write = p9_fd_write; - trans->read = p9_fd_read; - trans->close = p9_fd_close; - trans->poll = p9_fd_poll; + opts->port = P9_PORT; + opts->rfd = ~0; + opts->wfd = ~0; - if (strlen(addr) > UNIX_PATH_MAX) { - P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", - addr); - err = -ENAMETOOLONG; - goto error; - } + if (!options) + return; - sun_server.sun_family = PF_UNIX; - strcpy(sun_server.sun_path, addr); - sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); - err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, - sizeof(struct sockaddr_un) - 1, 0); - if (err < 0) { - P9_EPRINTK(KERN_ERR, - "p9_trans_unix: problem connecting socket: %s: %d\n", - addr, err); - goto error; + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + token = match_token(p, tokens, args); + ret = match_int(&args[0], &option); + if (ret < 0) { + P9_DPRINTK(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); + continue; + } + switch (token) { + case Opt_port: + opts->port = option; + break; + case Opt_rfdno: + opts->rfd = option; + break; + case Opt_wfdno: + opts->wfd = option; + break; + default: + continue; + } } - - err = p9_socket_open(trans, csocket); - if (err < 0) - goto error; - - return trans; - -error: - if (csocket) - sock_release(csocket); - - kfree(trans); - return ERR_PTR(err); } -EXPORT_SYMBOL(p9_trans_create_unix); -struct p9_transport *p9_trans_create_fd(int rfd, int wfd) +static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd) { - int err; - struct p9_transport *trans; + struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd), + GFP_KERNEL); + if (!ts) + return -ENOMEM; - if (rfd == ~0 || wfd == ~0) { - printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); - return ERR_PTR(-ENOPROTOOPT); + ts->rd = fget(rfd); + ts->wr = fget(wfd); + if (!ts->rd || !ts->wr) { + if (ts->rd) + fput(ts->rd); + if (ts->wr) + fput(ts->wr); + kfree(ts); + return -EIO; } - trans = kmalloc(sizeof(struct p9_transport), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); - - trans->write = p9_fd_write; - trans->read = p9_fd_read; - trans->close = p9_fd_close; - trans->poll = p9_fd_poll; - - err = p9_fd_open(trans, rfd, wfd); - if (err < 0) - goto error; - - return trans; + trans->priv = ts; + trans->status = Connected; -error: - kfree(trans); - return ERR_PTR(err); + return 0; } -EXPORT_SYMBOL(p9_trans_create_fd); -static int p9_socket_open(struct p9_transport *trans, struct socket *csocket) +static int p9_socket_open(struct p9_trans *trans, struct socket *csocket) { int fd, ret; @@ -212,30 +166,6 @@ static int p9_socket_open(struct p9_transport *trans, struct socket *csocket) return 0; } -static int p9_fd_open(struct p9_transport *trans, int rfd, int wfd) -{ - struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd), - GFP_KERNEL); - if (!ts) - return -ENOMEM; - - ts->rd = fget(rfd); - ts->wr = fget(wfd); - if (!ts->rd || !ts->wr) { - if (ts->rd) - fput(ts->rd); - if (ts->wr) - fput(ts->wr); - kfree(ts); - return -EIO; - } - - trans->priv = ts; - trans->status = Connected; - - return 0; -} - /** * p9_fd_read- read from a fd * @v9ses: session information @@ -243,7 +173,7 @@ static int p9_fd_open(struct p9_transport *trans, int rfd, int wfd) * @len: size of receive buffer * */ -static int p9_fd_read(struct p9_transport *trans, void *v, int len) +static int p9_fd_read(struct p9_trans *trans, void *v, int len) { int ret; struct p9_trans_fd *ts = NULL; @@ -270,7 +200,7 @@ static int p9_fd_read(struct p9_transport *trans, void *v, int len) * @len: size of send buffer * */ -static int p9_fd_write(struct p9_transport *trans, void *v, int len) +static int p9_fd_write(struct p9_trans *trans, void *v, int len) { int ret; mm_segment_t oldfs; @@ -297,7 +227,7 @@ static int p9_fd_write(struct p9_transport *trans, void *v, int len) } static unsigned int -p9_fd_poll(struct p9_transport *trans, struct poll_table_struct *pt) +p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt) { int ret, n; struct p9_trans_fd *ts = NULL; @@ -341,7 +271,7 @@ end: * @trans: private socket structure * */ -static void p9_fd_close(struct p9_transport *trans) +static void p9_fd_close(struct p9_trans *trans) { struct p9_trans_fd *ts; @@ -361,3 +291,182 @@ static void p9_fd_close(struct p9_transport *trans) kfree(ts); } +static struct p9_trans *p9_trans_create_tcp(const char *addr, char *args) +{ + int err; + struct p9_trans *trans; + struct socket *csocket; + struct sockaddr_in sin_server; + struct p9_fd_opts opts; + + parse_opts(args, &opts); + + csocket = NULL; + trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); + if (!trans) + return ERR_PTR(-ENOMEM); + + trans->write = p9_fd_write; + trans->read = p9_fd_read; + trans->close = p9_fd_close; + trans->poll = p9_fd_poll; + + sin_server.sin_family = AF_INET; + sin_server.sin_addr.s_addr = in_aton(addr); + sin_server.sin_port = htons(opts.port); + sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); + + if (!csocket) { + P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); + err = -EIO; + goto error; + } + + err = csocket->ops->connect(csocket, + (struct sockaddr *)&sin_server, + sizeof(struct sockaddr_in), 0); + if (err < 0) { + P9_EPRINTK(KERN_ERR, + "p9_trans_tcp: problem connecting socket to %s\n", + addr); + goto error; + } + + err = p9_socket_open(trans, csocket); + if (err < 0) + goto error; + + return trans; + +error: + if (csocket) + sock_release(csocket); + + kfree(trans); + return ERR_PTR(err); +} + +static struct p9_trans *p9_trans_create_unix(const char *addr, char *args) +{ + int err; + struct socket *csocket; + struct sockaddr_un sun_server; + struct p9_trans *trans; + + csocket = NULL; + trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); + if (!trans) + return ERR_PTR(-ENOMEM); + + trans->write = p9_fd_write; + trans->read = p9_fd_read; + trans->close = p9_fd_close; + trans->poll = p9_fd_poll; + + if (strlen(addr) > UNIX_PATH_MAX) { + P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", + addr); + err = -ENAMETOOLONG; + goto error; + } + + sun_server.sun_family = PF_UNIX; + strcpy(sun_server.sun_path, addr); + sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); + err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, + sizeof(struct sockaddr_un) - 1, 0); + if (err < 0) { + P9_EPRINTK(KERN_ERR, + "p9_trans_unix: problem connecting socket: %s: %d\n", + addr, err); + goto error; + } + + err = p9_socket_open(trans, csocket); + if (err < 0) + goto error; + + return trans; + +error: + if (csocket) + sock_release(csocket); + + kfree(trans); + return ERR_PTR(err); +} + +static struct p9_trans *p9_trans_create_fd(const char *name, char *args) +{ + int err; + struct p9_trans *trans; + struct p9_fd_opts opts; + + parse_opts(args, &opts); + + if (opts.rfd == ~0 || opts.wfd == ~0) { + printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); + return ERR_PTR(-ENOPROTOOPT); + } + + trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); + if (!trans) + return ERR_PTR(-ENOMEM); + + trans->write = p9_fd_write; + trans->read = p9_fd_read; + trans->close = p9_fd_close; + trans->poll = p9_fd_poll; + + err = p9_fd_open(trans, opts.rfd, opts.wfd); + if (err < 0) + goto error; + + return trans; + +error: + kfree(trans); + return ERR_PTR(err); +} + +static struct p9_trans_module p9_tcp_trans = { + .name = "tcp", + .maxsize = MAX_SOCK_BUF, + .def = 1, + .create = p9_trans_create_tcp, +}; + +static struct p9_trans_module p9_unix_trans = { + .name = "unix", + .maxsize = MAX_SOCK_BUF, + .def = 0, + .create = p9_trans_create_unix, +}; + +static struct p9_trans_module p9_fd_trans = { + .name = "fd", + .maxsize = MAX_SOCK_BUF, + .def = 0, + .create = p9_trans_create_fd, +}; + +static int __init p9_trans_fd_init(void) +{ + v9fs_register_trans(&p9_tcp_trans); + v9fs_register_trans(&p9_unix_trans); + v9fs_register_trans(&p9_fd_trans); + + return 1; +} + +static void __exit p9_trans_fd_exit(void) { + printk(KERN_ERR "Removal of 9p transports not implemented\n"); + BUG(); +} + +module_init(p9_trans_fd_init); +module_exit(p9_trans_fd_exit); + +MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>"); +MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); +MODULE_LICENSE("GPL"); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c new file mode 100644 index 00000000000..40b71a29fc3 --- /dev/null +++ b/net/9p/trans_virtio.c @@ -0,0 +1,353 @@ +/* + * The Guest 9p transport driver + * + * This is a trivial pipe-based transport driver based on the lguest console + * code: we use lguest's DMA mechanism to send bytes out, and register a + * DMA buffer to receive bytes in. It is assumed to be present and available + * from the very beginning of boot. + * + * This may be have been done by just instaniating another HVC console, + * but HVC's blocksize of 16 bytes is annoying and painful to performance. + * + * A more efficient transport could be built based on the virtio block driver + * but it requires some changes in the 9p transport model (which are in + * progress) + * + */ +/* + * Copyright (C) 2007 Eric Van Hensbergen, IBM Corporation + * + * Based on virtio console driver + * Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/in.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/ipv6.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/un.h> +#include <linux/uaccess.h> +#include <linux/inet.h> +#include <linux/idr.h> +#include <linux/file.h> +#include <net/9p/9p.h> +#include <linux/parser.h> +#include <net/9p/transport.h> +#include <linux/scatterlist.h> +#include <linux/virtio.h> +#include <linux/virtio_9p.h> + +/* a single mutex to manage channel initialization and attachment */ +static DECLARE_MUTEX(virtio_9p_lock); +/* global which tracks highest initialized channel */ +static int chan_index; + +/* We keep all per-channel information in a structure. + * This structure is allocated within the devices dev->mem space. + * A pointer to the structure will get put in the transport private. + */ +static struct virtio_chan { + bool initialized; /* channel is initialized */ + bool inuse; /* channel is in use */ + + struct virtqueue *in_vq, *out_vq; + struct virtio_device *vdev; + + /* This is our input buffer, and how much data is left in it. */ + unsigned int in_len; + char *in, *inbuf; + + wait_queue_head_t wq; /* waitq for buffer */ +} channels[MAX_9P_CHAN]; + +/* How many bytes left in this page. */ +static unsigned int rest_of_page(void *data) +{ + return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); +} + +static int p9_virtio_write(struct p9_trans *trans, void *buf, int count) +{ + struct virtio_chan *chan = (struct virtio_chan *) trans->priv; + struct virtqueue *out_vq = chan->out_vq; + struct scatterlist sg[1]; + unsigned int len; + + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio write (%d)\n", count); + + /* keep it simple - make sure we don't overflow a page */ + if (rest_of_page(buf) < count) + count = rest_of_page(buf); + + sg_init_one(sg, buf, count); + + /* add_buf wants a token to identify this buffer: we hand it any + * non-NULL pointer, since there's only ever one buffer. */ + if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) == 0) { + /* Tell Host to go! */ + out_vq->vq_ops->kick(out_vq); + /* Chill out until it's done with the buffer. */ + while (!out_vq->vq_ops->get_buf(out_vq, &len)) + cpu_relax(); + } + + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio wrote (%d)\n", count); + + /* We're expected to return the amount of data we wrote: all of it. */ + return count; +} + +/* Create a scatter-gather list representing our input buffer and put it in the + * queue. */ +static void add_inbuf(struct virtio_chan *chan) +{ + struct scatterlist sg[1]; + + sg_init_one(sg, chan->inbuf, PAGE_SIZE); + + /* We should always be able to add one buffer to an empty queue. */ + if (chan->in_vq->vq_ops->add_buf(chan->in_vq, sg, 0, 1, chan->inbuf)) + BUG(); + chan->in_vq->vq_ops->kick(chan->in_vq); +} + +static int p9_virtio_read(struct p9_trans *trans, void *buf, int count) +{ + struct virtio_chan *chan = (struct virtio_chan *) trans->priv; + struct virtqueue *in_vq = chan->in_vq; + + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio read (%d)\n", count); + + /* If we don't have an input queue yet, we can't get input. */ + BUG_ON(!in_vq); + + /* No buffer? Try to get one. */ + if (!chan->in_len) { + chan->in = in_vq->vq_ops->get_buf(in_vq, &chan->in_len); + if (!chan->in) + return 0; + } + + /* You want more than we have to give? Well, try wanting less! */ + if (chan->in_len < count) + count = chan->in_len; + + /* Copy across to their buffer and increment offset. */ + memcpy(buf, chan->in, count); + chan->in += count; + chan->in_len -= count; + + /* Finished? Re-register buffer so Host will use it again. */ + if (chan->in_len == 0) + add_inbuf(chan); + + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio finished read (%d)\n", + count); + + return count; +} + +/* The poll function is used by 9p transports to determine if there + * is there is activity available on a particular channel. In our case + * we use it to wait for a callback from the input routines. + */ +static unsigned int +p9_virtio_poll(struct p9_trans *trans, struct poll_table_struct *pt) +{ + struct virtio_chan *chan = (struct virtio_chan *)trans->priv; + struct virtqueue *in_vq = chan->in_vq; + int ret = POLLOUT; /* we can always handle more output */ + + poll_wait(NULL, &chan->wq, pt); + + /* No buffer? Try to get one. */ + if (!chan->in_len) + chan->in = in_vq->vq_ops->get_buf(in_vq, &chan->in_len); + + if (chan->in_len) + ret |= POLLIN; + + return ret; +} + +static void p9_virtio_close(struct p9_trans *trans) +{ + struct virtio_chan *chan = trans->priv; + + down(&virtio_9p_lock); + chan->inuse = false; + up(&virtio_9p_lock); + + kfree(trans); +} + +static bool p9_virtio_intr(struct virtqueue *q) +{ + struct virtio_chan *chan = q->vdev->priv; + + P9_DPRINTK(P9_DEBUG_TRANS, "9p poll_wakeup: %p\n", &chan->wq); + wake_up_interruptible(&chan->wq); + + return true; +} + +static int p9_virtio_probe(struct virtio_device *dev) +{ + int err; + struct virtio_chan *chan; + int index; + + down(&virtio_9p_lock); + index = chan_index++; + chan = &channels[index]; + up(&virtio_9p_lock); + + if (chan_index > MAX_9P_CHAN) { + printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n"); + BUG(); + } + + chan->vdev = dev; + + /* This is the scratch page we use to receive console input */ + chan->inbuf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!chan->inbuf) { + err = -ENOMEM; + goto fail; + } + + /* Find the input queue. */ + dev->priv = chan; + chan->in_vq = dev->config->find_vq(dev, p9_virtio_intr); + if (IS_ERR(chan->in_vq)) { + err = PTR_ERR(chan->in_vq); + goto free; + } + + chan->out_vq = dev->config->find_vq(dev, NULL); + if (IS_ERR(chan->out_vq)) { + err = PTR_ERR(chan->out_vq); + goto free_in_vq; + } + + init_waitqueue_head(&chan->wq); + + /* Register the input buffer the first time. */ + add_inbuf(chan); + chan->inuse = false; + chan->initialized = true; + + return 0; + +free_in_vq: + dev->config->del_vq(chan->in_vq); +free: + kfree(chan->inbuf); +fail: + down(&virtio_9p_lock); + chan_index--; + up(&virtio_9p_lock); + return err; +} + +/* This sets up a transport channel for 9p communication. Right now + * we only match the first available channel, but eventually we couldlook up + * alternate channels by matching devname versus a virtio_config entry. + * We use a simple reference count mechanism to ensure that only a single + * mount has a channel open at a time. */ +static struct p9_trans *p9_virtio_create(const char *devname, char *args) +{ + struct p9_trans *trans; + int index = 0; + struct virtio_chan *chan = channels; + + down(&virtio_9p_lock); + while (index < MAX_9P_CHAN) { + if (chan->initialized && !chan->inuse) { + chan->inuse = true; + break; + } else { + index++; + chan = &channels[index]; + } + } + up(&virtio_9p_lock); + + if (index >= MAX_9P_CHAN) { + printk(KERN_ERR "9p: virtio: couldn't find a free channel\n"); + return NULL; + } + + trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); + if (!trans) { + printk(KERN_ERR "9p: couldn't allocate transport\n"); + return ERR_PTR(-ENOMEM); + } + + trans->write = p9_virtio_write; + trans->read = p9_virtio_read; + trans->close = p9_virtio_close; + trans->poll = p9_virtio_poll; + trans->priv = chan; + + return trans; +} + +#define VIRTIO_ID_9P 9 + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_9P, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +/* The standard "struct lguest_driver": */ +static struct virtio_driver p9_virtio_drv = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = p9_virtio_probe, +}; + +static struct p9_trans_module p9_virtio_trans = { + .name = "virtio", + .create = p9_virtio_create, + .maxsize = PAGE_SIZE, + .def = 0, +}; + +/* The standard init function */ +static int __init p9_virtio_init(void) +{ + int count; + + for (count = 0; count < MAX_9P_CHAN; count++) + channels[count].initialized = false; + + v9fs_register_trans(&p9_virtio_trans); + return register_virtio_driver(&p9_virtio_drv); +} + +module_init(p9_virtio_init); + +MODULE_DEVICE_TABLE(virtio, id_table); +MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); +MODULE_DESCRIPTION("Virtio 9p Transport"); +MODULE_LICENSE("GPL"); diff --git a/net/Kconfig b/net/Kconfig index cdba08ca2ef..ab4e6da5012 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -27,6 +27,14 @@ if NET menu "Networking options" +config NET_NS + bool "Network namespace support" + default n + depends on EXPERIMENTAL && !SYSFS + help + Allow user space to create what appear to be multiple instances + of the network stack. + source "net/packet/Kconfig" source "net/unix/Kconfig" source "net/xfrm/Kconfig" diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 3d1655f9838..6c5c6dc098e 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -330,15 +330,19 @@ static void aarp_expire_timeout(unsigned long unused) static int aarp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; int ct; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) { write_lock_bh(&aarp_lock); for (ct = 0; ct < AARP_HASH_SIZE; ct++) { - __aarp_expire_device(&resolved[ct], ptr); - __aarp_expire_device(&unresolved[ct], ptr); - __aarp_expire_device(&proxies[ct], ptr); + __aarp_expire_device(&resolved[ct], dev); + __aarp_expire_device(&unresolved[ct], dev); + __aarp_expire_device(&proxies[ct], dev); } write_unlock_bh(&aarp_lock); @@ -712,6 +716,9 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, struct atalk_addr sa, *ma, da; struct atalk_iface *ifa; + if (dev->nd_net != &init_net) + goto out0; + /* We only do Ethernet SNAP AARP. */ if (dev->type != ARPHRD_ETHER) goto out0; @@ -815,8 +822,6 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, * address. So as a precaution flush any * entries we have for this address. */ - struct aarp_entry *a; - a = __aarp_find_entry(resolved[sa.s_node % (AARP_HASH_SIZE - 1)], skb->dev, &sa); @@ -990,6 +995,7 @@ static int aarp_seq_show(struct seq_file *seq, void *v) struct aarp_iter_state *iter = seq->private; struct aarp_entry *entry = v; unsigned long now = jiffies; + DECLARE_MAC_BUF(mac); if (v == SEQ_START_TOKEN) seq_puts(seq, @@ -1000,13 +1006,7 @@ static int aarp_seq_show(struct seq_file *seq, void *v) ntohs(entry->target_addr.s_net), (unsigned int) entry->target_addr.s_node, entry->dev ? entry->dev->name : "????"); - seq_printf(seq, "%02X:%02X:%02X:%02X:%02X:%02X", - entry->hwaddr[0] & 0xFF, - entry->hwaddr[1] & 0xFF, - entry->hwaddr[2] & 0xFF, - entry->hwaddr[3] & 0xFF, - entry->hwaddr[4] & 0xFF, - entry->hwaddr[5] & 0xFF); + seq_printf(seq, "%s", print_mac(mac, entry->hwaddr)); seq_printf(seq, " %8s", dt2str((long)entry->expires_at - (long)now)); if (iter->table == unresolved) diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index 87a582cc811..05d9652afcb 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -11,6 +11,7 @@ #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <linux/atalk.h> @@ -271,7 +272,7 @@ int __init atalk_proc_init(void) struct proc_dir_entry *p; int rc = -ENOMEM; - atalk_proc_dir = proc_mkdir("atalk", proc_net); + atalk_proc_dir = proc_mkdir("atalk", init_net.proc_net); if (!atalk_proc_dir) goto out; atalk_proc_dir->owner = THIS_MODULE; @@ -306,7 +307,7 @@ out_socket: out_route: remove_proc_entry("interface", atalk_proc_dir); out_interface: - remove_proc_entry("atalk", proc_net); + remove_proc_entry("atalk", init_net.proc_net); goto out; } @@ -316,5 +317,5 @@ void __exit atalk_proc_exit(void) remove_proc_entry("route", atalk_proc_dir); remove_proc_entry("socket", atalk_proc_dir); remove_proc_entry("arp", atalk_proc_dir); - remove_proc_entry("atalk", proc_net); + remove_proc_entry("atalk", init_net.proc_net); } diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index fbdfb1224ae..7c0b5151d52 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -647,9 +647,14 @@ static inline void atalk_dev_down(struct net_device *dev) static int ddp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) /* Discard any use of this */ - atalk_dev_down(ptr); + atalk_dev_down(dev); return NOTIFY_DONE; } @@ -672,7 +677,7 @@ static int atif_ioctl(int cmd, void __user *arg) if (copy_from_user(&atreq, arg, sizeof(atreq))) return -EFAULT; - dev = __dev_get_by_name(atreq.ifr_name); + dev = __dev_get_by_name(&init_net, atreq.ifr_name); if (!dev) return -ENODEV; @@ -896,7 +901,7 @@ static int atrtr_ioctl(unsigned int cmd, void __user *arg) if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1)) return -EFAULT; name[IFNAMSIZ-1] = '\0'; - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(&init_net, name); if (!dev) return -ENODEV; } @@ -1024,11 +1029,14 @@ static struct proto ddp_proto = { * Create a socket. Initialise the socket, blank the addresses * set the state. */ -static int atalk_create(struct socket *sock, int protocol) +static int atalk_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; + if (net != &init_net) + return -EAFNOSUPPORT; + /* * We permit SOCK_DGRAM and RAW is an extension. It is trivial to do * and gives you the full ELAP frame. Should be handy for CAP 8) @@ -1036,7 +1044,7 @@ static int atalk_create(struct socket *sock, int protocol) if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) goto out; rc = -ENOMEM; - sk = sk_alloc(PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1); + sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1); if (!sk) goto out; rc = 0; @@ -1265,7 +1273,7 @@ static __inline__ int is_ip_over_ddp(struct sk_buff *skb) static int handle_ip_over_ddp(struct sk_buff *skb) { - struct net_device *dev = __dev_get_by_name("ipddp0"); + struct net_device *dev = __dev_get_by_name(&init_net, "ipddp0"); struct net_device_stats *stats; /* This needs to be able to handle ipddp"N" devices */ @@ -1398,6 +1406,9 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, int origlen; __u16 len_hops; + if (dev->nd_net != &init_net) + goto freeit; + /* Don't mangle buffer if shared */ if (!(skb = skb_share_check(skb, GFP_ATOMIC))) goto out; @@ -1483,6 +1494,9 @@ freeit: static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { + if (dev->nd_net != &init_net) + goto freeit; + /* Expand any short form frames */ if (skb_mac_header(skb)[2] == 1) { struct ddpehdr *ddp; diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c index 9e4dffc1e42..d856a62ab50 100644 --- a/net/appletalk/dev.c +++ b/net/appletalk/dev.c @@ -24,11 +24,7 @@ static void ltalk_setup(struct net_device *dev) /* Fill in the fields of the device structure with localtalk-generic values. */ dev->change_mtu = ltalk_change_mtu; - dev->hard_header = NULL; - dev->rebuild_header = NULL; dev->set_mac_address = ltalk_mac_addr; - dev->hard_header_cache = NULL; - dev->header_cache_update= NULL; dev->type = ARPHRD_LOCALTLK; dev->hard_header_len = LTALK_HLEN; diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index f094a0879c1..9ef07eda2c4 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -105,10 +105,9 @@ static struct class_device_attribute *atm_attrs[] = { NULL }; -static int atm_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size) +static int atm_uevent(struct class_device *cdev, struct kobj_uevent_env *env) { struct atm_dev *adev; - int i = 0, len = 0; if (!cdev) return -ENODEV; @@ -117,11 +116,9 @@ static int atm_uevent(struct class_device *cdev, char **envp, int num_envp, char if (!adev) return -ENODEV; - if (add_uevent_var(envp, num_envp, &i, buf, size, &len, - "NAME=%s%d", adev->type, adev->number)) + if (add_uevent_var(env, "NAME=%s%d", adev->type, adev->number)) return -ENOMEM; - envp[i] = NULL; return 0; } diff --git a/net/atm/br2684.c b/net/atm/br2684.c index c0f6861eefe..ba6428f204f 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -24,22 +24,6 @@ Author: Marcell GAL, 2000, XDSL Ltd, Hungary #include "common.h" -/* - * Define this to use a version of the code which interacts with the higher - * layers in a more intellegent way, by always reserving enough space for - * our header at the begining of the packet. However, there may still be - * some problems with programs like tcpdump. In 2.5 we'll sort out what - * we need to do to get this perfect. For now we just will copy the packet - * if we need space for the header - */ -/* #define FASTER_VERSION */ - -#ifdef DEBUG -#define DPRINTK(format, args...) printk(KERN_DEBUG "br2684: " format, ##args) -#else -#define DPRINTK(format, args...) -#endif - #ifdef SKB_DEBUG static void skb_debug(const struct sk_buff *skb) { @@ -75,9 +59,7 @@ struct br2684_vcc { #ifdef CONFIG_ATM_BR2684_IPFILTER struct br2684_filter filter; #endif /* CONFIG_ATM_BR2684_IPFILTER */ -#ifndef FASTER_VERSION unsigned copies_needed, copies_failed; -#endif /* FASTER_VERSION */ }; struct br2684_dev { @@ -153,13 +135,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, struct br2684_vcc *brvcc) { struct atm_vcc *atmvcc; -#ifdef FASTER_VERSION - if (brvcc->encaps == e_llc) - memcpy(skb_push(skb, 8), llc_oui_pid_pad, 8); - /* last 2 bytes of llc_oui_pid_pad are managed by header routines; - yes, you got it: 8 + 2 = sizeof(llc_oui_pid_pad) - */ -#else int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2; if (skb_headroom(skb) < minheadroom) { struct sk_buff *skb2 = skb_realloc_headroom(skb, minheadroom); @@ -176,11 +151,10 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, skb_copy_to_linear_data(skb, llc_oui_pid_pad, 10); else memset(skb->data, 0, 2); -#endif /* FASTER_VERSION */ skb_debug(skb); ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc; - DPRINTK("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); + pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); if (!atm_may_send(atmvcc, skb->truesize)) { /* we free this here for now, because we cannot know in a higher layer whether the skb point it supplied wasn't freed yet. @@ -209,11 +183,11 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev) struct br2684_dev *brdev = BRPRIV(dev); struct br2684_vcc *brvcc; - DPRINTK("br2684_start_xmit, skb->dst=%p\n", skb->dst); + pr_debug("br2684_start_xmit, skb->dst=%p\n", skb->dst); read_lock(&devs_lock); brvcc = pick_outgoing_vcc(skb, brdev); if (brvcc == NULL) { - DPRINTK("no vcc attached to dev %s\n", dev->name); + pr_debug("no vcc attached to dev %s\n", dev->name); brdev->stats.tx_errors++; brdev->stats.tx_carrier_errors++; /* netif_stop_queue(dev); */ @@ -239,91 +213,10 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev) static struct net_device_stats *br2684_get_stats(struct net_device *dev) { - DPRINTK("br2684_get_stats\n"); + pr_debug("br2684_get_stats\n"); return &BRPRIV(dev)->stats; } -#ifdef FASTER_VERSION -/* - * These mirror eth_header and eth_header_cache. They are not usually - * exported for use in modules, so we grab them from net_device - * after ether_setup() is done with it. Bit of a hack. - */ -static int (*my_eth_header)(struct sk_buff *, struct net_device *, - unsigned short, void *, void *, unsigned); -static int (*my_eth_header_cache)(struct neighbour *, struct hh_cache *); - -static int -br2684_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, void *daddr, void *saddr, unsigned len) -{ - u16 *pad_before_eth; - int t = my_eth_header(skb, dev, type, daddr, saddr, len); - if (t > 0) { - pad_before_eth = (u16 *) skb_push(skb, 2); - *pad_before_eth = 0; - return dev->hard_header_len; /* or return 16; ? */ - } else - return t; -} - -static int -br2684_header_cache(struct neighbour *neigh, struct hh_cache *hh) -{ -/* hh_data is 16 bytes long. if encaps is ether-llc we need 24, so -xmit will add the additional header part in that case */ - u16 *pad_before_eth = (u16 *)(hh->hh_data); - int t = my_eth_header_cache(neigh, hh); - DPRINTK("br2684_header_cache, neigh=%p, hh_cache=%p\n", neigh, hh); - if (t < 0) - return t; - else { - *pad_before_eth = 0; - hh->hh_len = PADLEN + ETH_HLEN; - } - return 0; -} - -/* - * This is similar to eth_type_trans, which cannot be used because of - * our dev->hard_header_len - */ -static inline __be16 br_type_trans(struct sk_buff *skb, struct net_device *dev) -{ - struct ethhdr *eth; - unsigned char *rawp; - eth = eth_hdr(skb); - - if (is_multicast_ether_addr(eth->h_dest)) { - if (!compare_ether_addr(eth->h_dest, dev->broadcast)) - skb->pkt_type = PACKET_BROADCAST; - else - skb->pkt_type = PACKET_MULTICAST; - } - - else if (compare_ether_addr(eth->h_dest, dev->dev_addr)) - skb->pkt_type = PACKET_OTHERHOST; - - if (ntohs(eth->h_proto) >= 1536) - return eth->h_proto; - - rawp = skb->data; - - /* - * This is a magic hack to spot IPX packets. Older Novell breaks - * the protocol design and runs IPX over 802.3 without an 802.2 LLC - * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This - * won't work for fault tolerant netware but does for the rest. - */ - if (*(unsigned short *) rawp == 0xFFFF) - return htons(ETH_P_802_3); - - /* - * Real 802.2 LLC - */ - return htons(ETH_P_802_2); -} -#endif /* FASTER_VERSION */ /* * We remember when the MAC gets set, so we don't override it later with @@ -390,7 +283,7 @@ packet_fails_filter(__be16 type, struct br2684_vcc *brvcc, struct sk_buff *skb) static void br2684_close_vcc(struct br2684_vcc *brvcc) { - DPRINTK("removing VCC %p from dev %p\n", brvcc, brvcc->device); + pr_debug("removing VCC %p from dev %p\n", brvcc, brvcc->device); write_lock_irq(&devs_lock); list_del(&brvcc->brvccs); write_unlock_irq(&devs_lock); @@ -408,7 +301,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) struct br2684_dev *brdev = BRPRIV(net_dev); int plen = sizeof(llc_oui_pid_pad) + ETH_HLEN; - DPRINTK("br2684_push\n"); + pr_debug("br2684_push\n"); if (unlikely(skb == NULL)) { /* skb==NULL means VCC is being destroyed */ @@ -425,7 +318,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) skb_debug(skb); atm_return(atmvcc, skb->truesize); - DPRINTK("skb from brdev %p\n", brdev); + pr_debug("skb from brdev %p\n", brdev); if (brvcc->encaps == e_llc) { /* let us waste some time for checking the encapsulation. Note, that only 7 char is checked so frames with a valid FCS @@ -454,17 +347,8 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) return; } -#ifdef FASTER_VERSION - /* FIXME: tcpdump shows that pointer to mac header is 2 bytes earlier, - than should be. What else should I set? */ - skb_pull(skb, plen); - skb_set_mac_header(skb, -ETH_HLEN); - skb->pkt_type = PACKET_HOST; - skb->protocol = br_type_trans(skb, net_dev); -#else skb_pull(skb, plen - ETH_HLEN); skb->protocol = eth_type_trans(skb, net_dev); -#endif /* FASTER_VERSION */ #ifdef CONFIG_ATM_BR2684_IPFILTER if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) { brdev->stats.rx_dropped++; @@ -474,7 +358,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) #endif /* CONFIG_ATM_BR2684_IPFILTER */ skb->dev = net_dev; ATM_SKB(skb)->vcc = atmvcc; /* needed ? */ - DPRINTK("received packet's protocol: %x\n", ntohs(skb->protocol)); + pr_debug("received packet's protocol: %x\n", ntohs(skb->protocol)); skb_debug(skb); if (unlikely(!(net_dev->flags & IFF_UP))) { /* sigh, interface is down */ @@ -532,7 +416,7 @@ Note: we do not have explicit unassign, but look at _push() err = -EINVAL; goto error; } - DPRINTK("br2684_regvcc vcc=%p, encaps=%d, brvcc=%p\n", atmvcc, be.encaps, + pr_debug("br2684_regvcc vcc=%p, encaps=%d, brvcc=%p\n", atmvcc, be.encaps, brvcc); if (list_empty(&brdev->brvccs) && !brdev->mac_was_set) { unsigned char *esi = atmvcc->dev->esi; @@ -590,13 +474,6 @@ static void br2684_setup(struct net_device *netdev) ether_setup(netdev); brdev->net_dev = netdev; -#ifdef FASTER_VERSION - my_eth_header = netdev->hard_header; - netdev->hard_header = br2684_header; - my_eth_header_cache = netdev->hard_header_cache; - netdev->hard_header_cache = br2684_header_cache; - netdev->hard_header_len = sizeof(llc_oui_pid_pad) + ETH_HLEN; /* 10 + 14 */ -#endif my_eth_mac_addr = netdev->set_mac_address; netdev->set_mac_address = br2684_mac_addr; netdev->hard_start_xmit = br2684_start_xmit; @@ -612,7 +489,7 @@ static int br2684_create(void __user *arg) struct br2684_dev *brdev; struct atm_newif_br2684 ni; - DPRINTK("br2684_create\n"); + pr_debug("br2684_create\n"); if (copy_from_user(&ni, arg, sizeof ni)) { return -EFAULT; @@ -629,7 +506,7 @@ static int br2684_create(void __user *arg) brdev = BRPRIV(netdev); - DPRINTK("registered netdev %s\n", netdev->name); + pr_debug("registered netdev %s\n", netdev->name); /* open, stop, do_ioctl ? */ err = register_netdev(netdev); if (err < 0) { @@ -715,30 +592,22 @@ static int br2684_seq_show(struct seq_file *seq, void *v) br2684_devs); const struct net_device *net_dev = brdev->net_dev; const struct br2684_vcc *brvcc; + DECLARE_MAC_BUF(mac); - seq_printf(seq, "dev %.16s: num=%d, mac=%02X:%02X:" - "%02X:%02X:%02X:%02X (%s)\n", net_dev->name, - brdev->number, - net_dev->dev_addr[0], - net_dev->dev_addr[1], - net_dev->dev_addr[2], - net_dev->dev_addr[3], - net_dev->dev_addr[4], - net_dev->dev_addr[5], - brdev->mac_was_set ? "set" : "auto"); + seq_printf(seq, "dev %.16s: num=%d, mac=%s (%s)\n", + net_dev->name, + brdev->number, + print_mac(mac, net_dev->dev_addr), + brdev->mac_was_set ? "set" : "auto"); list_for_each_entry(brvcc, &brdev->brvccs, brvccs) { seq_printf(seq, " vcc %d.%d.%d: encaps=%s" -#ifndef FASTER_VERSION ", failed copies %u/%u" -#endif /* FASTER_VERSION */ "\n", brvcc->atmvcc->dev->number, brvcc->atmvcc->vpi, brvcc->atmvcc->vci, (brvcc->encaps == e_llc) ? "LLC" : "VC" -#ifndef FASTER_VERSION , brvcc->copies_failed , brvcc->copies_needed -#endif /* FASTER_VERSION */ ); #ifdef CONFIG_ATM_BR2684_IPFILTER #define b1(var, byte) ((u8 *) &brvcc->filter.var)[byte] diff --git a/net/atm/clip.c b/net/atm/clip.c index ecf0f79b94a..741742f0079 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -40,14 +40,6 @@ #include "resources.h" #include <net/atmclip.h> - -#if 0 -#define DPRINTK(format,args...) printk(format,##args) -#else -#define DPRINTK(format,args...) -#endif - - static struct net_device *clip_devs; static struct atm_vcc *atmarpd; static struct neigh_table clip_tbl; @@ -59,7 +51,7 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip) struct atmarp_ctrl *ctrl; struct sk_buff *skb; - DPRINTK("to_atmarpd(%d)\n", type); + pr_debug("to_atmarpd(%d)\n", type); if (!atmarpd) return -EUNATCH; skb = alloc_skb(sizeof(struct atmarp_ctrl),GFP_ATOMIC); @@ -79,7 +71,7 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip) static void link_vcc(struct clip_vcc *clip_vcc, struct atmarp_entry *entry) { - DPRINTK("link_vcc %p to entry %p (neigh %p)\n", clip_vcc, entry, + pr_debug("link_vcc %p to entry %p (neigh %p)\n", clip_vcc, entry, entry->neigh); clip_vcc->entry = entry; clip_vcc->xoff = 0; /* @@@ may overrun buffer by one packet */ @@ -134,7 +126,7 @@ static int neigh_check_cb(struct neighbour *n) unsigned long exp = cv->last_use + cv->idle_timeout; if (cv->idle_timeout && time_after(jiffies, exp)) { - DPRINTK("releasing vcc %p->%p of entry %p\n", + pr_debug("releasing vcc %p->%p of entry %p\n", cv, cv->vcc, entry); vcc_release_async(cv->vcc, -ETIMEDOUT); } @@ -146,7 +138,7 @@ static int neigh_check_cb(struct neighbour *n) if (atomic_read(&n->refcnt) > 1) { struct sk_buff *skb; - DPRINTK("destruction postponed with ref %d\n", + pr_debug("destruction postponed with ref %d\n", atomic_read(&n->refcnt)); while ((skb = skb_dequeue(&n->arp_queue)) != NULL) @@ -155,7 +147,7 @@ static int neigh_check_cb(struct neighbour *n) return 0; } - DPRINTK("expired neigh %p\n", n); + pr_debug("expired neigh %p\n", n); return 1; } @@ -171,14 +163,14 @@ static int clip_arp_rcv(struct sk_buff *skb) { struct atm_vcc *vcc; - DPRINTK("clip_arp_rcv\n"); + pr_debug("clip_arp_rcv\n"); vcc = ATM_SKB(skb)->vcc; if (!vcc || !atm_charge(vcc, skb->truesize)) { dev_kfree_skb_any(skb); return 0; } - DPRINTK("pushing to %p\n", vcc); - DPRINTK("using %p\n", CLIP_VCC(vcc)->old_push); + pr_debug("pushing to %p\n", vcc); + pr_debug("using %p\n", CLIP_VCC(vcc)->old_push); CLIP_VCC(vcc)->old_push(vcc, skb); return 0; } @@ -196,9 +188,9 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb) { struct clip_vcc *clip_vcc = CLIP_VCC(vcc); - DPRINTK("clip push\n"); + pr_debug("clip push\n"); if (!skb) { - DPRINTK("removing VCC %p\n", clip_vcc); + pr_debug("removing VCC %p\n", clip_vcc); if (clip_vcc->entry) unlink_clip_vcc(clip_vcc); clip_vcc->old_push(vcc, NULL); /* pass on the bad news */ @@ -247,7 +239,7 @@ static void clip_pop(struct atm_vcc *vcc, struct sk_buff *skb) int old; unsigned long flags; - DPRINTK("clip_pop(vcc %p)\n", vcc); + pr_debug("clip_pop(vcc %p)\n", vcc); clip_vcc->old_pop(vcc, skb); /* skb->dev == NULL in outbound ARP packets */ if (!dev) @@ -263,7 +255,7 @@ static void clip_pop(struct atm_vcc *vcc, struct sk_buff *skb) static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb) { - DPRINTK("clip_neigh_solicit (neigh %p, skb %p)\n", neigh, skb); + pr_debug("clip_neigh_solicit (neigh %p, skb %p)\n", neigh, skb); to_atmarpd(act_need, PRIV(neigh->dev)->number, NEIGH2ENTRY(neigh)->ip); } @@ -292,7 +284,7 @@ static int clip_constructor(struct neighbour *neigh) struct in_device *in_dev; struct neigh_parms *parms; - DPRINTK("clip_constructor (neigh %p, entry %p)\n", neigh, entry); + pr_debug("clip_constructor (neigh %p, entry %p)\n", neigh, entry); neigh->type = inet_addr_type(entry->ip); if (neigh->type != RTN_UNICAST) return -EINVAL; @@ -376,7 +368,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) int old; unsigned long flags; - DPRINTK("clip_start_xmit (skb %p)\n", skb); + pr_debug("clip_start_xmit (skb %p)\n", skb); if (!skb->dst) { printk(KERN_ERR "clip_start_xmit: skb->dst == NULL\n"); dev_kfree_skb(skb); @@ -412,9 +404,9 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) } return 0; } - DPRINTK("neigh %p, vccs %p\n", entry, entry->vccs); + pr_debug("neigh %p, vccs %p\n", entry, entry->vccs); ATM_SKB(skb)->vcc = vcc = entry->vccs->vcc; - DPRINTK("using neighbour %p, vcc %p\n", skb->dst->neighbour, vcc); + pr_debug("using neighbour %p, vcc %p\n", skb->dst->neighbour, vcc); if (entry->vccs->encap) { void *here; @@ -425,7 +417,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = vcc->atm_options; entry->vccs->last_use = jiffies; - DPRINTK("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); + pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); old = xchg(&entry->vccs->xoff, 1); /* assume XOFF ... */ if (old) { printk(KERN_WARNING "clip_start_xmit: XOFF->XOFF transition\n"); @@ -468,7 +460,7 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout) clip_vcc = kmalloc(sizeof(struct clip_vcc), GFP_KERNEL); if (!clip_vcc) return -ENOMEM; - DPRINTK("mkip clip_vcc %p vcc %p\n", clip_vcc, vcc); + pr_debug("mkip clip_vcc %p vcc %p\n", clip_vcc, vcc); clip_vcc->vcc = vcc; vcc->user_back = clip_vcc; set_bit(ATM_VF_IS_CLIP, &vcc->flags); @@ -538,7 +530,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) printk(KERN_ERR "hiding hidden ATMARP entry\n"); return 0; } - DPRINTK("setentry: remove\n"); + pr_debug("setentry: remove\n"); unlink_clip_vcc(clip_vcc); return 0; } @@ -552,9 +544,9 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) entry = NEIGH2ENTRY(neigh); if (entry != clip_vcc->entry) { if (!clip_vcc->entry) - DPRINTK("setentry: add\n"); + pr_debug("setentry: add\n"); else { - DPRINTK("setentry: update\n"); + pr_debug("setentry: update\n"); unlink_clip_vcc(clip_vcc); } link_vcc(clip_vcc, entry); @@ -611,7 +603,7 @@ static int clip_create(int number) } clip_priv->next = clip_devs; clip_devs = dev; - DPRINTK("registered (net:%s)\n", dev->name); + pr_debug("registered (net:%s)\n", dev->name); return number; } @@ -620,6 +612,9 @@ static int clip_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = arg; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_UNREGISTER) { neigh_ifdown(&clip_tbl, dev); return NOTIFY_DONE; @@ -631,16 +626,16 @@ static int clip_device_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_UP: - DPRINTK("clip_device_event NETDEV_UP\n"); + pr_debug("clip_device_event NETDEV_UP\n"); to_atmarpd(act_up, PRIV(dev)->number, 0); break; case NETDEV_GOING_DOWN: - DPRINTK("clip_device_event NETDEV_DOWN\n"); + pr_debug("clip_device_event NETDEV_DOWN\n"); to_atmarpd(act_down, PRIV(dev)->number, 0); break; case NETDEV_CHANGE: case NETDEV_CHANGEMTU: - DPRINTK("clip_device_event NETDEV_CHANGE*\n"); + pr_debug("clip_device_event NETDEV_CHANGE*\n"); to_atmarpd(act_change, PRIV(dev)->number, 0); break; } @@ -681,14 +676,14 @@ static struct notifier_block clip_inet_notifier = { static void atmarpd_close(struct atm_vcc *vcc) { - DPRINTK("atmarpd_close\n"); + pr_debug("atmarpd_close\n"); rtnl_lock(); atmarpd = NULL; skb_queue_purge(&sk_atm(vcc)->sk_receive_queue); rtnl_unlock(); - DPRINTK("(done)\n"); + pr_debug("(done)\n"); module_put(THIS_MODULE); } diff --git a/net/atm/common.c b/net/atm/common.c index 282d761454b..e166d9e0ffd 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -30,13 +30,6 @@ #include "addr.h" /* address registry */ #include "signaling.h" /* for WAITING and sigd_attach */ - -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - struct hlist_head vcc_hash[VCC_HTABLE_SIZE]; DEFINE_RWLOCK(vcc_sklist_lock); @@ -70,13 +63,13 @@ static struct sk_buff *alloc_tx(struct atm_vcc *vcc,unsigned int size) struct sock *sk = sk_atm(vcc); if (atomic_read(&sk->sk_wmem_alloc) && !atm_may_send(vcc, size)) { - DPRINTK("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n", + pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n", atomic_read(&sk->sk_wmem_alloc), size, sk->sk_sndbuf); return NULL; } while (!(skb = alloc_skb(size,GFP_KERNEL))) schedule(); - DPRINTK("AlTx %d += %d\n", atomic_read(&sk->sk_wmem_alloc), + pr_debug("AlTx %d += %d\n", atomic_read(&sk->sk_wmem_alloc), skb->truesize); atomic_add(skb->truesize, &sk->sk_wmem_alloc); return skb; @@ -132,7 +125,7 @@ static struct proto vcc_proto = { .obj_size = sizeof(struct atm_vcc), }; -int vcc_create(struct socket *sock, int protocol, int family) +int vcc_create(struct net *net, struct socket *sock, int protocol, int family) { struct sock *sk; struct atm_vcc *vcc; @@ -140,7 +133,7 @@ int vcc_create(struct socket *sock, int protocol, int family) sock->sk = NULL; if (sock->type == SOCK_STREAM) return -EINVAL; - sk = sk_alloc(family, GFP_KERNEL, &vcc_proto, 1); + sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, 1); if (!sk) return -ENOMEM; sock_init_data(sock, sk); @@ -392,10 +385,10 @@ static int __vcc_connect(struct atm_vcc *vcc, struct atm_dev *dev, short vpi, if (!error) error = adjust_tp(&vcc->qos.rxtp,vcc->qos.aal); if (error) goto fail; - DPRINTK("VCC %d.%d, AAL %d\n",vpi,vci,vcc->qos.aal); - DPRINTK(" TX: %d, PCR %d..%d, SDU %d\n",vcc->qos.txtp.traffic_class, + pr_debug("VCC %d.%d, AAL %d\n",vpi,vci,vcc->qos.aal); + pr_debug(" TX: %d, PCR %d..%d, SDU %d\n",vcc->qos.txtp.traffic_class, vcc->qos.txtp.min_pcr,vcc->qos.txtp.max_pcr,vcc->qos.txtp.max_sdu); - DPRINTK(" RX: %d, PCR %d..%d, SDU %d\n",vcc->qos.rxtp.traffic_class, + pr_debug(" RX: %d, PCR %d..%d, SDU %d\n",vcc->qos.rxtp.traffic_class, vcc->qos.rxtp.min_pcr,vcc->qos.rxtp.max_pcr,vcc->qos.rxtp.max_sdu); if (dev->ops->open) { @@ -420,7 +413,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci) struct atm_vcc *vcc = ATM_SD(sock); int error; - DPRINTK("vcc_connect (vpi %d, vci %d)\n",vpi,vci); + pr_debug("vcc_connect (vpi %d, vci %d)\n",vpi,vci); if (sock->state == SS_CONNECTED) return -EISCONN; if (sock->state != SS_UNCONNECTED) @@ -433,7 +426,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci) else if (test_bit(ATM_VF_PARTIAL,&vcc->flags)) return -EINVAL; - DPRINTK("vcc_connect (TX: cl %d,bw %d-%d,sdu %d; " + pr_debug("vcc_connect (TX: cl %d,bw %d-%d,sdu %d; " "RX: cl %d,bw %d-%d,sdu %d,AAL %s%d)\n", vcc->qos.txtp.traffic_class,vcc->qos.txtp.min_pcr, vcc->qos.txtp.max_pcr,vcc->qos.txtp.max_sdu, @@ -504,7 +497,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, if (error) return error; sock_recv_timestamp(msg, sk, skb); - DPRINTK("RcvM %d -= %d\n", atomic_read(&sk->rmem_alloc), skb->truesize); + pr_debug("RcvM %d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize); atm_return(vcc, skb->truesize); skb_free_datagram(sk, skb); return copied; diff --git a/net/atm/common.h b/net/atm/common.h index ad78c9e1117..16f32c1fa1c 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -10,7 +10,7 @@ #include <linux/poll.h> /* for poll_table */ -int vcc_create(struct socket *sock, int protocol, int family); +int vcc_create(struct net *net, struct socket *sock, int protocol, int family); int vcc_release(struct socket *sock); int vcc_connect(struct socket *sock, int itf, short vpi, int vci); int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, diff --git a/net/atm/lec.c b/net/atm/lec.c index 59d5aa3366f..7eb1b21a0e9 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -48,12 +48,6 @@ static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 }; #include "lec_arpc.h" #include "resources.h" -#if 0 -#define DPRINTK printk -#else -#define DPRINTK(format,args...) -#endif - #define DUMP_PACKETS 0 /* * 0 = None, * 1 = 30 first bytes @@ -272,8 +266,9 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) char buf[300]; int i = 0; #endif /* DUMP_PACKETS >0 */ + DECLARE_MAC_BUF(mac); - DPRINTK("lec_start_xmit called\n"); + pr_debug("lec_start_xmit called\n"); if (!priv->lecd) { printk("%s:No lecd attached\n", dev->name); priv->stats.tx_errors++; @@ -281,7 +276,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) return -EUNATCH; } - DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n", + pr_debug("skbuff head:%lx data:%lx tail:%lx end:%lx\n", (long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb), (long)skb_end_pointer(skb)); #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) @@ -292,7 +287,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Make sure we have room for lec_id */ if (skb_headroom(skb) < 2) { - DPRINTK("lec_start_xmit: reallocating skb\n"); + pr_debug("lec_start_xmit: reallocating skb\n"); skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN); kfree_skb(skb); if (skb2 == NULL) @@ -373,25 +368,21 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) #endif entry = NULL; vcc = lec_arp_resolve(priv, dst, is_rdesc, &entry); - DPRINTK("%s:vcc:%p vcc_flags:%x, entry:%p\n", dev->name, + pr_debug("%s:vcc:%p vcc_flags:%lx, entry:%p\n", dev->name, vcc, vcc ? vcc->flags : 0, entry); if (!vcc || !test_bit(ATM_VF_READY, &vcc->flags)) { if (entry && (entry->tx_wait.qlen < LEC_UNRES_QUE_LEN)) { - DPRINTK("%s:lec_start_xmit: queuing packet, ", + pr_debug("%s:lec_start_xmit: queuing packet, ", dev->name); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - lec_h->h_dest[0], lec_h->h_dest[1], - lec_h->h_dest[2], lec_h->h_dest[3], - lec_h->h_dest[4], lec_h->h_dest[5]); + pr_debug("MAC address %s\n", + print_mac(mac, lec_h->h_dest)); skb_queue_tail(&entry->tx_wait, skb); } else { - DPRINTK + pr_debug ("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ", dev->name); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - lec_h->h_dest[0], lec_h->h_dest[1], - lec_h->h_dest[2], lec_h->h_dest[3], - lec_h->h_dest[4], lec_h->h_dest[5]); + pr_debug("MAC address %s\n", + print_mac(mac, lec_h->h_dest)); priv->stats.tx_dropped++; dev_kfree_skb(skb); } @@ -402,10 +393,9 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) #endif /* DUMP_PACKETS > 0 */ while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) { - DPRINTK("lec.c: emptying tx queue, "); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2], - lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]); + pr_debug("lec.c: emptying tx queue, "); + pr_debug("MAC address %s\n", + print_mac(mac, lec_h->h_dest)); lec_send(vcc, skb2, priv); } @@ -459,12 +449,13 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) struct lec_arp_table *entry; int i; char *tmp; /* FIXME */ + DECLARE_MAC_BUF(mac); atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); mesg = (struct atmlec_msg *)skb->data; tmp = skb->data; tmp += sizeof(struct atmlec_msg); - DPRINTK("%s: msg from zeppelin:%d\n", dev->name, mesg->type); + pr_debug("%s: msg from zeppelin:%d\n", dev->name, mesg->type); switch (mesg->type) { case l_set_mac_addr: for (i = 0; i < 6; i++) { @@ -500,9 +491,9 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) mesg->content.normal.atm_addr, mesg->content.normal.flag, mesg->content.normal.targetless_le_arp); - DPRINTK("lec: in l_arp_update\n"); + pr_debug("lec: in l_arp_update\n"); if (mesg->sizeoftlvs != 0) { /* LANE2 3.1.5 */ - DPRINTK("lec: LANE2 3.1.5, got tlvs, size %d\n", + pr_debug("lec: LANE2 3.1.5, got tlvs, size %d\n", mesg->sizeoftlvs); lane2_associate_ind(dev, mesg->content.normal.mac_addr, tmp, mesg->sizeoftlvs); @@ -544,14 +535,10 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) { struct net_bridge_fdb_entry *f; - DPRINTK - ("%s: bridge zeppelin asks about 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - dev->name, mesg->content.proxy.mac_addr[0], - mesg->content.proxy.mac_addr[1], - mesg->content.proxy.mac_addr[2], - mesg->content.proxy.mac_addr[3], - mesg->content.proxy.mac_addr[4], - mesg->content.proxy.mac_addr[5]); + pr_debug + ("%s: bridge zeppelin asks about %s\n", + dev->name, + print_mac(mac, mesg->content.proxy.mac_addr)); if (br_fdb_get_hook == NULL || dev->br_port == NULL) break; @@ -564,7 +551,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) struct sk_buff *skb2; struct sock *sk; - DPRINTK + pr_debug ("%s: entry found, responding to zeppelin\n", dev->name); skb2 = @@ -670,7 +657,7 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, sk->sk_data_ready(sk, skb->len); if (data != NULL) { - DPRINTK("lec: about to send %d bytes of data\n", data->len); + pr_debug("lec: about to send %d bytes of data\n", data->len); atm_force_charge(priv->lecd, data->truesize); skb_queue_tail(&sk->sk_receive_queue, data); sk->sk_data_ready(sk, skb->len); @@ -742,7 +729,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) vcc->vpi, vcc->vci); #endif if (!skb) { - DPRINTK("%s: null skb\n", dev->name); + pr_debug("%s: null skb\n", dev->name); lec_vcc_close(priv, vcc); return; } @@ -766,7 +753,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) if (memcmp(skb->data, lec_ctrl_magic, 4) == 0) { /* Control frame, to daemon */ struct sock *sk = sk_atm(vcc); - DPRINTK("%s: To daemon\n", dev->name); + pr_debug("%s: To daemon\n", dev->name); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, skb->len); } else { /* Data frame, queue to protocol handlers */ @@ -780,7 +767,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) * Probably looping back, or if lecd is missing, * lecd has gone down */ - DPRINTK("Ignoring frame...\n"); + pr_debug("Ignoring frame...\n"); dev_kfree_skb(skb); return; } @@ -1442,9 +1429,9 @@ static void lane2_associate_ind(struct net_device *dev, u8 *mac_addr, #include <net/route.h> #if 0 -#define DPRINTK(format,args...) +#define pr_debug(format,args...) /* -#define DPRINTK printk +#define pr_debug printk */ #endif #define DEBUG_ARP_TABLE 0 @@ -1513,7 +1500,7 @@ lec_arp_add(struct lec_priv *priv, struct lec_arp_table *entry) tmp = &priv->lec_arp_tables[HASH(entry->mac_addr[ETH_ALEN - 1])]; hlist_add_head(&entry->next, tmp); - DPRINTK("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + pr_debug("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", 0xff & entry->mac_addr[0], 0xff & entry->mac_addr[1], 0xff & entry->mac_addr[2], 0xff & entry->mac_addr[3], 0xff & entry->mac_addr[4], 0xff & entry->mac_addr[5]); @@ -1555,7 +1542,7 @@ lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) } skb_queue_purge(&to_remove->tx_wait); /* FIXME: good place for this? */ - DPRINTK("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + pr_debug("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", 0xff & to_remove->mac_addr[0], 0xff & to_remove->mac_addr[1], 0xff & to_remove->mac_addr[2], 0xff & to_remove->mac_addr[3], 0xff & to_remove->mac_addr[4], 0xff & to_remove->mac_addr[5]); @@ -1777,7 +1764,7 @@ static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, struct hlist_head *head; struct lec_arp_table *entry; - DPRINTK("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + pr_debug("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", mac_addr[0] & 0xff, mac_addr[1] & 0xff, mac_addr[2] & 0xff, mac_addr[3] & 0xff, mac_addr[4] & 0xff, mac_addr[5] & 0xff); @@ -1819,7 +1806,7 @@ static void lec_arp_expire_arp(unsigned long data) entry = (struct lec_arp_table *)data; - DPRINTK("lec_arp_expire_arp\n"); + pr_debug("lec_arp_expire_arp\n"); if (entry->status == ESI_ARP_PENDING) { if (entry->no_tries <= entry->priv->max_retry_count) { if (entry->is_rdesc) @@ -1843,7 +1830,7 @@ static void lec_arp_expire_vcc(unsigned long data) del_timer(&to_remove->timer); - DPRINTK("LEC_ARP %p %p: lec_arp_expire_vcc vpi:%d vci:%d\n", + pr_debug("LEC_ARP %p %p: lec_arp_expire_vcc vpi:%d vci:%d\n", to_remove, priv, to_remove->vcc ? to_remove->recv_vcc->vpi : 0, to_remove->vcc ? to_remove->recv_vcc->vci : 0); @@ -1883,7 +1870,7 @@ static void lec_arp_check_expire(struct work_struct *work) unsigned long time_to_check; int i; - DPRINTK("lec_arp_check_expire %p\n", priv); + pr_debug("lec_arp_check_expire %p\n", priv); now = jiffies; restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); @@ -1895,13 +1882,13 @@ restart: else time_to_check = priv->aging_time; - DPRINTK("About to expire: %lx - %lx > %lx\n", + pr_debug("About to expire: %lx - %lx > %lx\n", now, entry->last_used, time_to_check); if (time_after(now, entry->last_used + time_to_check) && !(entry->flags & LEC_PERMANENT_FLAG) && !(entry->mac_addr[0] & 0x01)) { /* LANE2: 7.1.20 */ /* Remove entry */ - DPRINTK("LEC:Entry timed out\n"); + pr_debug("LEC:Entry timed out\n"); lec_arp_remove(priv, entry); lec_arp_put(entry); } else { @@ -1999,7 +1986,7 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, entry->packets_flooded < priv->maximum_unknown_frame_count) { entry->packets_flooded++; - DPRINTK("LEC_ARP: Flooding..\n"); + pr_debug("LEC_ARP: Flooding..\n"); found = priv->mcast_vcc; goto out; } @@ -2010,13 +1997,13 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, */ lec_arp_hold(entry); *ret_entry = entry; - DPRINTK("lec: entry->status %d entry->vcc %p\n", entry->status, + pr_debug("lec: entry->status %d entry->vcc %p\n", entry->status, entry->vcc); found = NULL; } else { /* No matching entry was found */ entry = make_entry(priv, mac_to_find); - DPRINTK("LEC_ARP: Making entry\n"); + pr_debug("LEC_ARP: Making entry\n"); if (!entry) { found = priv->mcast_vcc; goto out; @@ -2053,7 +2040,7 @@ lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, struct lec_arp_table *entry; int i; - DPRINTK("lec_addr_delete\n"); + pr_debug("lec_addr_delete\n"); spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) { @@ -2084,8 +2071,8 @@ lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, struct lec_arp_table *entry, *tmp; int i; - DPRINTK("lec:%s", (targetless_le_arp) ? "targetless " : " "); - DPRINTK("lec_arp_update mac:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", + pr_debug("lec:%s", (targetless_le_arp) ? "targetless " : " "); + pr_debug("lec_arp_update mac:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3], mac_addr[4], mac_addr[5]); @@ -2122,7 +2109,7 @@ lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, entry->flags |= LEC_REMOTE_FLAG; else entry->flags &= ~LEC_REMOTE_FLAG; - DPRINTK("After update\n"); + pr_debug("After update\n"); dump_arp_table(priv); goto out; } @@ -2166,7 +2153,7 @@ lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, entry->status = ESI_VC_PENDING; send_to_lecd(priv, l_svc_setup, entry->mac_addr, atm_addr, NULL); } - DPRINTK("After update2\n"); + pr_debug("After update2\n"); dump_arp_table(priv); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); @@ -2189,7 +2176,7 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, if (ioc_data->receive == 2) { /* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */ - DPRINTK("LEC_ARP: Attaching mcast forward\n"); + pr_debug("LEC_ARP: Attaching mcast forward\n"); #if 0 entry = lec_arp_find(priv, bus_mac); if (!entry) { @@ -2214,7 +2201,7 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, * Vcc which we don't want to make default vcc, * attach it anyway. */ - DPRINTK + pr_debug ("LEC_ARP:Attaching data direct, not default: " "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", ioc_data->atm_addr[0], ioc_data->atm_addr[1], @@ -2242,7 +2229,7 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, dump_arp_table(priv); goto out; } - DPRINTK + pr_debug ("LEC_ARP:Attaching data direct, default: " "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", ioc_data->atm_addr[0], ioc_data->atm_addr[1], @@ -2260,8 +2247,8 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, if (memcmp (ioc_data->atm_addr, entry->atm_addr, ATM_ESA_LEN) == 0) { - DPRINTK("LEC_ARP: Attaching data direct\n"); - DPRINTK("Currently -> Vcc: %d, Rvcc:%d\n", + pr_debug("LEC_ARP: Attaching data direct\n"); + pr_debug("Currently -> Vcc: %d, Rvcc:%d\n", entry->vcc ? entry->vcc->vci : 0, entry->recv_vcc ? entry->recv_vcc-> vci : 0); @@ -2303,7 +2290,7 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, } } if (found_entry) { - DPRINTK("After vcc was added\n"); + pr_debug("After vcc was added\n"); dump_arp_table(priv); goto out; } @@ -2323,7 +2310,7 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, entry->timer.expires = jiffies + priv->vcc_timeout_period; entry->timer.function = lec_arp_expire_vcc; add_timer(&entry->timer); - DPRINTK("After vcc was added\n"); + pr_debug("After vcc was added\n"); dump_arp_table(priv); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); @@ -2336,7 +2323,7 @@ static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) struct lec_arp_table *entry; int i; - DPRINTK("LEC:lec_flush_complete %lx\n", tran_id); + pr_debug("LEC:lec_flush_complete %lx\n", tran_id); restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { @@ -2353,7 +2340,7 @@ restart: entry->last_used = jiffies; entry->status = ESI_FORWARD_DIRECT; lec_arp_put(entry); - DPRINTK("LEC_ARP: Flushed\n"); + pr_debug("LEC_ARP: Flushed\n"); goto restart; } } @@ -2376,7 +2363,7 @@ lec_set_flush_tran_id(struct lec_priv *priv, hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) { if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) { entry->flush_tran_id = tran_id; - DPRINTK("Set flush transaction id to %lx for %p\n", + pr_debug("Set flush transaction id to %lx for %p\n", tran_id, entry); } } @@ -2427,7 +2414,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) struct lec_arp_table *entry; int i; - DPRINTK("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n", vcc->vpi, vcc->vci); + pr_debug("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n", vcc->vpi, vcc->vci); dump_arp_table(priv); spin_lock_irqsave(&priv->lec_arp_lock, flags); @@ -2510,7 +2497,7 @@ lec_arp_check_empties(struct lec_priv *priv, goto out; } } - DPRINTK("LEC_ARP: Arp_check_empties: entry not found!\n"); + pr_debug("LEC_ARP: Arp_check_empties: entry not found!\n"); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 7c85aa551d5..2086396de17 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -244,7 +244,7 @@ static struct net_device *find_lec_by_itfnum(int itf) char name[IFNAMSIZ]; sprintf(name, "lec%d", itf); - dev = dev_get_by_name(name); + dev = dev_get_by_name(&init_net, name); return dev; } @@ -956,6 +956,10 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned lo struct lec_priv *priv; dev = (struct net_device *)dev_ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (dev->name == NULL || strncmp(dev->name, "lec", 3)) return NOTIFY_DONE; /* we are only interested in lec:s */ diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index 19d5dfc0702..0af84cd4f65 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -46,13 +46,6 @@ #include "common.h" -#if 0 -#define DPRINTK(format, args...) \ - printk(KERN_DEBUG "pppoatm: " format, ##args) -#else -#define DPRINTK(format, args...) -#endif - enum pppoatm_encaps { e_autodetect = PPPOATM_ENCAPS_AUTODETECT, e_vc = PPPOATM_ENCAPS_VC, @@ -139,9 +132,9 @@ static void pppoatm_unassign_vcc(struct atm_vcc *atmvcc) static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb) { struct pppoatm_vcc *pvcc = atmvcc_to_pvcc(atmvcc); - DPRINTK("pppoatm push\n"); + pr_debug("pppoatm push\n"); if (skb == NULL) { /* VCC was closed */ - DPRINTK("removing ATMPPP VCC %p\n", pvcc); + pr_debug("removing ATMPPP VCC %p\n", pvcc); pppoatm_unassign_vcc(atmvcc); atmvcc->push(atmvcc, NULL); /* Pass along bad news */ return; @@ -172,9 +165,8 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb) pvcc->chan.mtu += LLC_LEN; break; } - DPRINTK("(unit %d): Couldn't autodetect yet " + pr_debug("Couldn't autodetect yet " "(skb: %02X %02X %02X %02X %02X %02X)\n", - pvcc->chan.unit, skb->data[0], skb->data[1], skb->data[2], skb->data[3], skb->data[4], skb->data[5]); goto error; @@ -202,8 +194,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb) { struct pppoatm_vcc *pvcc = chan_to_pvcc(chan); ATM_SKB(skb)->vcc = pvcc->atmvcc; - DPRINTK("(unit %d): pppoatm_send (skb=0x%p, vcc=0x%p)\n", - pvcc->chan.unit, skb, pvcc->atmvcc); + pr_debug("pppoatm_send (skb=0x%p, vcc=0x%p)\n", skb, pvcc->atmvcc); if (skb->data[0] == '\0' && (pvcc->flags & SC_COMP_PROT)) (void) skb_pull(skb, 1); switch (pvcc->encaps) { /* LLC encapsulation needed */ @@ -228,16 +219,14 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb) goto nospace; break; case e_autodetect: - DPRINTK("(unit %d): Trying to send without setting encaps!\n", - pvcc->chan.unit); + pr_debug("Trying to send without setting encaps!\n"); kfree_skb(skb); return 1; } atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options; - DPRINTK("(unit %d): atm_skb(%p)->vcc(%p)->dev(%p)\n", - pvcc->chan.unit, skb, ATM_SKB(skb)->vcc, + pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev); return ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb) ? DROP_PACKET : 1; diff --git a/net/atm/proc.c b/net/atm/proc.c index 99fc1fe950e..5d9d5ffba14 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -22,6 +22,7 @@ #include <linux/netdevice.h> #include <linux/atmclip.h> #include <linux/init.h> /* for __init */ +#include <net/net_namespace.h> #include <net/atmclip.h> #include <asm/uaccess.h> #include <asm/atomic.h> @@ -175,7 +176,7 @@ static void pvc_info(struct seq_file *seq, struct atm_vcc *vcc) seq_printf(seq, "%3d %3d %5d %-3s %7d %-5s %7d %-6s", vcc->dev->number,vcc->vpi,vcc->vci, - vcc->qos.aal >= sizeof(aal_name)/sizeof(aal_name[0]) ? "err" : + vcc->qos.aal >= ARRAY_SIZE(aal_name) ? "err" : aal_name[vcc->qos.aal],vcc->qos.rxtp.min_pcr, class_name[vcc->qos.rxtp.traffic_class],vcc->qos.txtp.min_pcr, class_name[vcc->qos.txtp.traffic_class]); @@ -475,7 +476,7 @@ static void atm_proc_dirs_remove(void) if (e->dirent) remove_proc_entry(e->name, atm_proc_root); } - remove_proc_entry("net/atm", NULL); + remove_proc_entry("atm", init_net.proc_net); } int __init atm_proc_init(void) @@ -483,7 +484,7 @@ int __init atm_proc_init(void) static struct atm_proc_entry *e; int ret; - atm_proc_root = proc_mkdir("net/atm",NULL); + atm_proc_root = proc_mkdir("atm", init_net.proc_net); if (!atm_proc_root) goto err_out; for (e = atm_proc_ents; e->name; e++) { diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 848e6e191cc..43e8bf5ed00 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -124,10 +124,13 @@ static const struct proto_ops pvc_proto_ops = { }; -static int pvc_create(struct socket *sock,int protocol) +static int pvc_create(struct net *net, struct socket *sock,int protocol) { + if (net != &init_net) + return -EAFNOSUPPORT; + sock->ops = &pvc_proto_ops; - return vcc_create(sock, protocol, PF_ATMPVC); + return vcc_create(net, sock, protocol, PF_ATMPVC); } diff --git a/net/atm/raw.c b/net/atm/raw.c index 1378f61c5c3..b0a2d8cb674 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -13,14 +13,6 @@ #include "common.h" #include "protocols.h" - -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - - /* * SKB == NULL indicates that the link is being closed */ @@ -40,8 +32,8 @@ static void atm_pop_raw(struct atm_vcc *vcc,struct sk_buff *skb) { struct sock *sk = sk_atm(vcc); - DPRINTK("APopR (%d) %d -= %d\n", vcc->vci, sk->sk_wmem_alloc, - skb->truesize); + pr_debug("APopR (%d) %d -= %d\n", vcc->vci, + atomic_read(&sk->sk_wmem_alloc), skb->truesize); atomic_sub(skb->truesize, &sk->sk_wmem_alloc); dev_kfree_skb_any(skb); sk->sk_write_space(sk); diff --git a/net/atm/signaling.c b/net/atm/signaling.c index d14baaf1f4c..22992140052 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -23,13 +23,6 @@ Danger: may cause nasty hangs if the demon crashes. */ -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - - struct atm_vcc *sigd = NULL; #ifdef WAIT_FOR_DEMON static DECLARE_WAIT_QUEUE_HEAD(sigd_sleep); @@ -44,14 +37,14 @@ static void sigd_put_skb(struct sk_buff *skb) add_wait_queue(&sigd_sleep,&wait); while (!sigd) { set_current_state(TASK_UNINTERRUPTIBLE); - DPRINTK("atmsvc: waiting for signaling demon...\n"); + pr_debug("atmsvc: waiting for signaling demon...\n"); schedule(); } current->state = TASK_RUNNING; remove_wait_queue(&sigd_sleep,&wait); #else if (!sigd) { - DPRINTK("atmsvc: no signaling demon\n"); + pr_debug("atmsvc: no signaling demon\n"); kfree_skb(skb); return; } @@ -96,9 +89,9 @@ static int sigd_send(struct atm_vcc *vcc,struct sk_buff *skb) msg = (struct atmsvc_msg *) skb->data; atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); - DPRINTK("sigd_send %d (0x%lx)\n",(int) msg->type, - (unsigned long) msg->vcc); vcc = *(struct atm_vcc **) &msg->vcc; + pr_debug("sigd_send %d (0x%lx)\n",(int) msg->type, + (unsigned long) vcc); sk = sk_atm(vcc); switch (msg->type) { @@ -130,7 +123,7 @@ static int sigd_send(struct atm_vcc *vcc,struct sk_buff *skb) case as_indicate: vcc = *(struct atm_vcc **) &msg->listen_vcc; sk = sk_atm(vcc); - DPRINTK("as_indicate!!!\n"); + pr_debug("as_indicate!!!\n"); lock_sock(sk); if (sk_acceptq_is_full(sk)) { sigd_enq(NULL,as_reject,vcc,NULL,NULL); @@ -139,7 +132,7 @@ static int sigd_send(struct atm_vcc *vcc,struct sk_buff *skb) } sk->sk_ack_backlog++; skb_queue_tail(&sk->sk_receive_queue, skb); - DPRINTK("waking sk->sk_sleep 0x%p\n", sk->sk_sleep); + pr_debug("waking sk->sk_sleep 0x%p\n", sk->sk_sleep); sk->sk_state_change(sk); as_indicate_complete: release_sock(sk); @@ -176,7 +169,7 @@ void sigd_enq2(struct atm_vcc *vcc,enum atmsvc_msg_type type, struct atmsvc_msg *msg; static unsigned session = 0; - DPRINTK("sigd_enq %d (0x%p)\n",(int) type,vcc); + pr_debug("sigd_enq %d (0x%p)\n",(int) type,vcc); while (!(skb = alloc_skb(sizeof(struct atmsvc_msg),GFP_KERNEL))) schedule(); msg = (struct atmsvc_msg *) skb_put(skb,sizeof(struct atmsvc_msg)); @@ -226,7 +219,7 @@ static void sigd_close(struct atm_vcc *vcc) struct sock *s; int i; - DPRINTK("sigd_close\n"); + pr_debug("sigd_close\n"); sigd = NULL; if (skb_peek(&sk_atm(vcc)->sk_receive_queue)) printk(KERN_ERR "sigd_close: closing with requests pending\n"); @@ -237,7 +230,7 @@ static void sigd_close(struct atm_vcc *vcc) struct hlist_head *head = &vcc_hash[i]; sk_for_each(s, node, head) { - struct atm_vcc *vcc = atm_sk(s); + vcc = atm_sk(s); purge_vcc(vcc); } @@ -263,7 +256,7 @@ static struct atm_dev sigd_dev = { int sigd_attach(struct atm_vcc *vcc) { if (sigd) return -EADDRINUSE; - DPRINTK("sigd_attach\n"); + pr_debug("sigd_attach\n"); sigd = vcc; vcc->dev = &sigd_dev; vcc_insert_socket(sk_atm(vcc)); diff --git a/net/atm/svc.c b/net/atm/svc.c index 876ec7b47a2..daf9a48a7db 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -25,16 +25,7 @@ #include "signaling.h" #include "addr.h" - -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - - -static int svc_create(struct socket *sock,int protocol); - +static int svc_create(struct net *net, struct socket *sock,int protocol); /* * Note: since all this is still nicely synchronized with the signaling demon, @@ -55,7 +46,7 @@ static void svc_disconnect(struct atm_vcc *vcc) struct sk_buff *skb; struct sock *sk = sk_atm(vcc); - DPRINTK("svc_disconnect %p\n",vcc); + pr_debug("svc_disconnect %p\n",vcc); if (test_bit(ATM_VF_REGIS,&vcc->flags)) { prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc,as_close,NULL,NULL,NULL); @@ -69,7 +60,7 @@ static void svc_disconnect(struct atm_vcc *vcc) as_indicate has been answered */ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { atm_return(vcc, skb->truesize); - DPRINTK("LISTEN REL\n"); + pr_debug("LISTEN REL\n"); sigd_enq2(NULL,as_reject,vcc,NULL,NULL,&vcc->qos,0); dev_kfree_skb(skb); } @@ -85,7 +76,7 @@ static int svc_release(struct socket *sock) if (sk) { vcc = ATM_SD(sock); - DPRINTK("svc_release %p\n", vcc); + pr_debug("svc_release %p\n", vcc); clear_bit(ATM_VF_READY, &vcc->flags); /* VCC pointer is used as a reference, so we must not free it (thereby subjecting it to re-use) before all pending connections @@ -162,7 +153,7 @@ static int svc_connect(struct socket *sock,struct sockaddr *sockaddr, struct atm_vcc *vcc = ATM_SD(sock); int error; - DPRINTK("svc_connect %p\n",vcc); + pr_debug("svc_connect %p\n",vcc); lock_sock(sk); if (sockaddr_len != sizeof(struct sockaddr_atmsvc)) { error = -EINVAL; @@ -224,7 +215,7 @@ static int svc_connect(struct socket *sock,struct sockaddr *sockaddr, prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); continue; } - DPRINTK("*ABORT*\n"); + pr_debug("*ABORT*\n"); /* * This is tricky: * Kernel ---close--> Demon @@ -295,7 +286,7 @@ static int svc_listen(struct socket *sock,int backlog) struct atm_vcc *vcc = ATM_SD(sock); int error; - DPRINTK("svc_listen %p\n",vcc); + pr_debug("svc_listen %p\n",vcc); lock_sock(sk); /* let server handle listen on unbound sockets */ if (test_bit(ATM_VF_SESSION,&vcc->flags)) { @@ -335,13 +326,13 @@ static int svc_accept(struct socket *sock,struct socket *newsock,int flags) lock_sock(sk); - error = svc_create(newsock,0); + error = svc_create(sk->sk_net, newsock,0); if (error) goto out; new_vcc = ATM_SD(newsock); - DPRINTK("svc_accept %p -> %p\n",old_vcc,new_vcc); + pr_debug("svc_accept %p -> %p\n",old_vcc,new_vcc); while (1) { DEFINE_WAIT(wait); @@ -545,7 +536,7 @@ static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr, error = -EINPROGRESS; goto out; } - DPRINTK("svc_addparty added wait queue\n"); + pr_debug("svc_addparty added wait queue\n"); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); @@ -636,12 +627,15 @@ static const struct proto_ops svc_proto_ops = { }; -static int svc_create(struct socket *sock,int protocol) +static int svc_create(struct net *net, struct socket *sock,int protocol) { int error; + if (net != &init_net) + return -EAFNOSUPPORT; + sock->ops = &svc_proto_ops; - error = vcc_create(sock, protocol, AF_ATMSVC); + error = vcc_create(net, sock, protocol, AF_ATMSVC); if (error) return error; ATM_SD(sock)->local.sas_family = AF_ATMSVC; ATM_SD(sock)->remote.sas_family = AF_ATMSVC; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index dae2a42d3d8..993e5c75e90 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -44,6 +44,7 @@ #include <linux/sysctl.h> #include <linux/init.h> #include <linux/spinlock.h> +#include <net/net_namespace.h> #include <net/tcp_states.h> #include <net/ip.h> #include <net/arp.h> @@ -103,6 +104,9 @@ static int ax25_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Reject non AX.25 devices */ if (dev->type != ARPHRD_AX25) return NOTIFY_DONE; @@ -627,7 +631,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } - dev = dev_get_by_name(devname); + dev = dev_get_by_name(&init_net, devname); if (dev == NULL) { res = -ENODEV; break; @@ -779,11 +783,14 @@ static struct proto ax25_proto = { .obj_size = sizeof(struct sock), }; -static int ax25_create(struct socket *sock, int protocol) +static int ax25_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; ax25_cb *ax25; + if (net != &init_net) + return -EAFNOSUPPORT; + switch (sock->type) { case SOCK_DGRAM: if (protocol == 0 || protocol == PF_AX25) @@ -829,7 +836,7 @@ static int ax25_create(struct socket *sock, int protocol) return -ESOCKTNOSUPPORT; } - if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL) return -ENOMEM; ax25 = sk->sk_protinfo = ax25_create_cb(); @@ -854,7 +861,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) struct sock *sk; ax25_cb *ax25, *oax25; - if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) return NULL; if ((ax25 = ax25_create_cb()) == NULL) { @@ -1998,9 +2005,9 @@ static int __init ax25_init(void) register_netdevice_notifier(&ax25_dev_notifier); ax25_register_sysctl(); - proc_net_fops_create("ax25_route", S_IRUGO, &ax25_route_fops); - proc_net_fops_create("ax25", S_IRUGO, &ax25_info_fops); - proc_net_fops_create("ax25_calls", S_IRUGO, &ax25_uid_fops); + proc_net_fops_create(&init_net, "ax25_route", S_IRUGO, &ax25_route_fops); + proc_net_fops_create(&init_net, "ax25", S_IRUGO, &ax25_info_fops); + proc_net_fops_create(&init_net, "ax25_calls", S_IRUGO, &ax25_uid_fops); out: return rc; } @@ -2014,9 +2021,9 @@ MODULE_ALIAS_NETPROTO(PF_AX25); static void __exit ax25_exit(void) { - proc_net_remove("ax25_route"); - proc_net_remove("ax25"); - proc_net_remove("ax25_calls"); + proc_net_remove(&init_net, "ax25_route"); + proc_net_remove(&init_net, "ax25"); + proc_net_remove(&init_net, "ax25_calls"); ax25_rt_free(); ax25_uid_free(); ax25_dev_free(); diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c index e37d217a986..8273b1200ee 100644 --- a/net/ax25/ax25_ds_in.c +++ b/net/ax25/ax25_ds_in.c @@ -75,7 +75,7 @@ static int ax25_ds_state1_machine(ax25_cb *ax25, struct sk_buff *skb, int framet } ax25_dama_on(ax25); - /* according to DK4EG´s spec we are required to + /* according to DK4EG's spec we are required to * send a RR RESPONSE FINAL NR=0. */ diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c index a49773ff2b9..b5e59787be2 100644 --- a/net/ax25/ax25_ds_subr.c +++ b/net/ax25/ax25_ds_subr.c @@ -41,7 +41,7 @@ void ax25_ds_enquiry_response(ax25_cb *ax25) ax25_cb *ax25o; struct hlist_node *node; - /* Please note that neither DK4EG´s nor DG2FEF´s + /* Please note that neither DK4EG's nor DG2FEF's * DAMA spec mention the following behaviour as seen * with TheFirmware: * diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 0ddaff0df21..3b7d1720c2e 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -451,6 +451,11 @@ int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, skb->sk = NULL; /* Initially we don't know who it's for */ skb->destructor = NULL; /* Who initializes this, dammit?! */ + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + if ((*skb->data & 0x0F) != 0) { kfree_skb(skb); /* Not a KISS data frame */ return 0; diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index 930e4918037..f047a57aa95 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -46,7 +46,9 @@ #ifdef CONFIG_INET -int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) +int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned len) { unsigned char *buff; @@ -215,7 +217,9 @@ put: #else /* INET */ -int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) +int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned len) { return -AX25_HEADER_LEN; } @@ -227,5 +231,12 @@ int ax25_rebuild_header(struct sk_buff *skb) #endif +const struct header_ops ax25_header_ops = { + .create = ax25_hard_header, + .rebuild = ax25_rebuild_header, +}; + EXPORT_SYMBOL(ax25_hard_header); EXPORT_SYMBOL(ax25_rebuild_header); +EXPORT_SYMBOL(ax25_header_ops); + diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index d942b946ba0..1220d8a41eb 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -95,10 +95,13 @@ int bt_sock_unregister(int proto) } EXPORT_SYMBOL(bt_sock_unregister); -static int bt_sock_create(struct socket *sock, int proto) +static int bt_sock_create(struct net *net, struct socket *sock, int proto) { int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (proto < 0 || proto >= BT_MAX_PROTO) return -EINVAL; @@ -113,7 +116,7 @@ static int bt_sock_create(struct socket *sock, int proto) read_lock(&bt_proto_lock); if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { - err = bt_proto[proto]->create(sock, proto); + err = bt_proto[proto]->create(net, sock, proto); module_put(bt_proto[proto]->owner); } diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 1f78c3e336d..347e935faaf 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -2,7 +2,7 @@ BNEP implementation for Linux Bluetooth stack (BlueZ). Copyright (C) 2001-2002 Inventel Systemes Written 2001-2002 by - Clément Moreau <clement.moreau@inventel.fr> + Clément Moreau <clement.moreau@inventel.fr> David Libault <david.libault@inventel.fr> Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com> diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 9092816f58d..95e3837e431 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -2,7 +2,7 @@ BNEP implementation for Linux Bluetooth stack (BlueZ). Copyright (C) 2001-2002 Inventel Systemes Written 2001-2002 by - Clément Moreau <clement.moreau@inventel.fr> + Clément Moreau <clement.moreau@inventel.fr> David Libault <david.libault@inventel.fr> Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com> diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 10292e77604..f718965f296 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -204,7 +204,7 @@ static struct proto bnep_proto = { .obj_size = sizeof(struct bt_sock) }; -static int bnep_sock_create(struct socket *sock, int protocol) +static int bnep_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -213,7 +213,7 @@ static int bnep_sock_create(struct socket *sock, int protocol) if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 19be7861e51..cf700c20d11 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -195,7 +195,7 @@ static struct proto cmtp_proto = { .obj_size = sizeof(struct bt_sock) }; -static int cmtp_sock_create(struct socket *sock, int protocol) +static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -204,7 +204,7 @@ static int cmtp_sock_create(struct socket *sock, int protocol) if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 5fdfc9a67d3..9483320f6da 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -78,11 +78,11 @@ void hci_acl_connect(struct hci_conn *conn) cp.pkt_type = cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK); if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER)) - cp.role_switch = 0x01; + cp.role_switch = 0x01; else - cp.role_switch = 0x00; + cp.role_switch = 0x00; - hci_send_cmd(hdev, OGF_LINK_CTL, OCF_CREATE_CONN, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp); } static void hci_acl_connect_cancel(struct hci_conn *conn) @@ -95,8 +95,7 @@ static void hci_acl_connect_cancel(struct hci_conn *conn) return; bacpy(&cp.bdaddr, &conn->dst); - hci_send_cmd(conn->hdev, OGF_LINK_CTL, - OCF_CREATE_CONN_CANCEL, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp); } void hci_acl_disconn(struct hci_conn *conn, __u8 reason) @@ -109,8 +108,7 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason) cp.handle = cpu_to_le16(conn->handle); cp.reason = reason; - hci_send_cmd(conn->hdev, OGF_LINK_CTL, - OCF_DISCONNECT, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp); } void hci_add_sco(struct hci_conn *conn, __u16 handle) @@ -126,7 +124,29 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle) cp.handle = cpu_to_le16(handle); cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); - hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ADD_SCO, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_ADD_SCO, sizeof(cp), &cp); +} + +void hci_setup_sync(struct hci_conn *conn, __u16 handle) +{ + struct hci_dev *hdev = conn->hdev; + struct hci_cp_setup_sync_conn cp; + + BT_DBG("%p", conn); + + conn->state = BT_CONNECT; + conn->out = 1; + + cp.handle = cpu_to_le16(handle); + cp.pkt_type = cpu_to_le16(hdev->esco_type); + + cp.tx_bandwidth = cpu_to_le32(0x00001f40); + cp.rx_bandwidth = cpu_to_le32(0x00001f40); + cp.max_latency = cpu_to_le16(0xffff); + cp.voice_setting = cpu_to_le16(hdev->voice_setting); + cp.retrans_effort = 0xff; + + hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp); } static void hci_conn_timeout(unsigned long arg) @@ -143,7 +163,10 @@ static void hci_conn_timeout(unsigned long arg) switch (conn->state) { case BT_CONNECT: - hci_acl_connect_cancel(conn); + if (conn->type == ACL_LINK) + hci_acl_connect_cancel(conn); + else + hci_acl_disconn(conn, 0x13); break; case BT_CONNECTED: hci_acl_disconn(conn, 0x13); @@ -330,8 +353,12 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) hci_conn_hold(sco); if (acl->state == BT_CONNECTED && - (sco->state == BT_OPEN || sco->state == BT_CLOSED)) - hci_add_sco(sco, acl->handle); + (sco->state == BT_OPEN || sco->state == BT_CLOSED)) { + if (lmp_esco_capable(hdev)) + hci_setup_sync(sco, acl->handle); + else + hci_add_sco(sco, acl->handle); + } return sco; } @@ -348,7 +375,7 @@ int hci_conn_auth(struct hci_conn *conn) if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { struct hci_cp_auth_requested cp; cp.handle = cpu_to_le16(conn->handle); - hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_AUTH_REQUESTED, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); } return 0; } @@ -369,7 +396,7 @@ int hci_conn_encrypt(struct hci_conn *conn) struct hci_cp_set_conn_encrypt cp; cp.handle = cpu_to_le16(conn->handle); cp.encrypt = 1; - hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT, sizeof(cp), &cp); } return 0; } @@ -383,7 +410,7 @@ int hci_conn_change_link_key(struct hci_conn *conn) if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { struct hci_cp_change_conn_link_key cp; cp.handle = cpu_to_le16(conn->handle); - hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp); } return 0; } @@ -401,7 +428,7 @@ int hci_conn_switch_role(struct hci_conn *conn, uint8_t role) struct hci_cp_switch_role cp; bacpy(&cp.bdaddr, &conn->dst); cp.role = role; - hci_send_cmd(conn->hdev, OGF_LINK_POLICY, OCF_SWITCH_ROLE, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_SWITCH_ROLE, sizeof(cp), &cp); } return 0; } @@ -423,8 +450,7 @@ void hci_conn_enter_active_mode(struct hci_conn *conn) if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { struct hci_cp_exit_sniff_mode cp; cp.handle = cpu_to_le16(conn->handle); - hci_send_cmd(hdev, OGF_LINK_POLICY, - OCF_EXIT_SNIFF_MODE, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_EXIT_SNIFF_MODE, sizeof(cp), &cp); } timer: @@ -455,8 +481,7 @@ void hci_conn_enter_sniff_mode(struct hci_conn *conn) cp.max_latency = cpu_to_le16(0); cp.min_remote_timeout = cpu_to_le16(0); cp.min_local_timeout = cpu_to_le16(0); - hci_send_cmd(hdev, OGF_LINK_POLICY, - OCF_SNIFF_SUBRATE, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_SNIFF_SUBRATE, sizeof(cp), &cp); } if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { @@ -466,8 +491,7 @@ void hci_conn_enter_sniff_mode(struct hci_conn *conn) cp.min_interval = cpu_to_le16(hdev->sniff_min_interval); cp.attempt = cpu_to_le16(4); cp.timeout = cpu_to_le16(1); - hci_send_cmd(hdev, OGF_LINK_POLICY, - OCF_SNIFF_MODE, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_SNIFF_MODE, sizeof(cp), &cp); } } @@ -493,6 +517,22 @@ void hci_conn_hash_flush(struct hci_dev *hdev) } } +/* Check pending connect attempts */ +void hci_conn_check_pending(struct hci_dev *hdev) +{ + struct hci_conn *conn; + + BT_DBG("hdev %s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); + if (conn) + hci_acl_connect(conn); + + hci_dev_unlock(hdev); +} + int hci_get_conn_list(void __user *arg) { struct hci_conn_list_req req, *cl; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 18e3afc964d..372b0d3b75a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -176,7 +176,7 @@ static void hci_reset_req(struct hci_dev *hdev, unsigned long opt) BT_DBG("%s %ld", hdev->name, opt); /* Reset device */ - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_RESET, 0, NULL); + hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); } static void hci_init_req(struct hci_dev *hdev, unsigned long opt) @@ -202,16 +202,16 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Reset */ if (test_bit(HCI_QUIRK_RESET_ON_INIT, &hdev->quirks)) - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_RESET, 0, NULL); + hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); /* Read Local Supported Features */ - hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_LOCAL_FEATURES, 0, NULL); + hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); /* Read Local Version */ - hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_LOCAL_VERSION, 0, NULL); + hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); /* Read Buffer Size (ACL mtu, max pkt, etc.) */ - hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_BUFFER_SIZE, 0, NULL); + hci_send_cmd(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL); #if 0 /* Host buffer size */ @@ -221,29 +221,35 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) cp.sco_mtu = HCI_MAX_SCO_SIZE; cp.acl_max_pkt = cpu_to_le16(0xffff); cp.sco_max_pkt = cpu_to_le16(0xffff); - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_HOST_BUFFER_SIZE, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_HOST_BUFFER_SIZE, sizeof(cp), &cp); } #endif /* Read BD Address */ - hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_BD_ADDR, 0, NULL); + hci_send_cmd(hdev, HCI_OP_READ_BD_ADDR, 0, NULL); + + /* Read Class of Device */ + hci_send_cmd(hdev, HCI_OP_READ_CLASS_OF_DEV, 0, NULL); + + /* Read Local Name */ + hci_send_cmd(hdev, HCI_OP_READ_LOCAL_NAME, 0, NULL); /* Read Voice Setting */ - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_READ_VOICE_SETTING, 0, NULL); + hci_send_cmd(hdev, HCI_OP_READ_VOICE_SETTING, 0, NULL); /* Optional initialization */ /* Clear Event Filters */ flt_type = HCI_FLT_CLEAR_ALL; - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_SET_EVENT_FLT, 1, &flt_type); + hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type); /* Page timeout ~20 secs */ param = cpu_to_le16(0x8000); - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_PG_TIMEOUT, 2, ¶m); + hci_send_cmd(hdev, HCI_OP_WRITE_PG_TIMEOUT, 2, ¶m); /* Connection accept timeout ~20 secs */ param = cpu_to_le16(0x7d00); - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_CA_TIMEOUT, 2, ¶m); + hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); } static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) @@ -253,7 +259,7 @@ static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) BT_DBG("%s %x", hdev->name, scan); /* Inquiry and Page scans */ - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_SCAN_ENABLE, 1, &scan); + hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } static void hci_auth_req(struct hci_dev *hdev, unsigned long opt) @@ -263,7 +269,7 @@ static void hci_auth_req(struct hci_dev *hdev, unsigned long opt) BT_DBG("%s %x", hdev->name, auth); /* Authentication */ - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_AUTH_ENABLE, 1, &auth); + hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); } static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt) @@ -273,7 +279,7 @@ static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt) BT_DBG("%s %x", hdev->name, encrypt); /* Authentication */ - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_ENCRYPT_MODE, 1, &encrypt); + hci_send_cmd(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); } /* Get HCI device by index. @@ -384,7 +390,7 @@ static void hci_inq_req(struct hci_dev *hdev, unsigned long opt) memcpy(&cp.lap, &ir->lap, 3); cp.length = ir->length; cp.num_rsp = ir->num_rsp; - hci_send_cmd(hdev, OGF_LINK_CTL, OCF_INQUIRY, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp); } int hci_inquiry(void __user *arg) @@ -1111,13 +1117,13 @@ static int hci_send_frame(struct sk_buff *skb) } /* Send HCI command */ -int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *param) +int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) { int len = HCI_COMMAND_HDR_SIZE + plen; struct hci_command_hdr *hdr; struct sk_buff *skb; - BT_DBG("%s ogf 0x%x ocf 0x%x plen %d", hdev->name, ogf, ocf, plen); + BT_DBG("%s opcode 0x%x plen %d", hdev->name, opcode, plen); skb = bt_skb_alloc(len, GFP_ATOMIC); if (!skb) { @@ -1126,7 +1132,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p } hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE); - hdr->opcode = cpu_to_le16(hci_opcode_pack(ogf, ocf)); + hdr->opcode = cpu_to_le16(opcode); hdr->plen = plen; if (plen) @@ -1143,7 +1149,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p } /* Get data from the previously sent command */ -void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 ogf, __u16 ocf) +void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) { struct hci_command_hdr *hdr; @@ -1152,10 +1158,10 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 ogf, __u16 ocf) hdr = (void *) hdev->sent_cmd->data; - if (hdr->opcode != cpu_to_le16(hci_opcode_pack(ogf, ocf))) + if (hdr->opcode != cpu_to_le16(opcode)) return NULL; - BT_DBG("%s ogf 0x%x ocf 0x%x", hdev->name, ogf, ocf); + BT_DBG("%s opcode 0x%x", hdev->name, opcode); return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE; } @@ -1355,6 +1361,26 @@ static inline void hci_sched_sco(struct hci_dev *hdev) } } +static inline void hci_sched_esco(struct hci_dev *hdev) +{ + struct hci_conn *conn; + struct sk_buff *skb; + int quote; + + BT_DBG("%s", hdev->name); + + while (hdev->sco_cnt && (conn = hci_low_sent(hdev, ESCO_LINK, "e))) { + while (quote-- && (skb = skb_dequeue(&conn->data_q))) { + BT_DBG("skb %p len %d", skb, skb->len); + hci_send_frame(skb); + + conn->sent++; + if (conn->sent == ~0) + conn->sent = 0; + } + } +} + static void hci_tx_task(unsigned long arg) { struct hci_dev *hdev = (struct hci_dev *) arg; @@ -1370,6 +1396,8 @@ static void hci_tx_task(unsigned long arg) hci_sched_sco(hdev); + hci_sched_esco(hdev); + /* Send next queued raw (unknown type) packet */ while ((skb = skb_dequeue(&hdev->raw_q))) hci_send_frame(skb); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4baea1e3865..46df2e403df 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -52,234 +52,273 @@ /* Handle HCI Event packets */ -/* Command Complete OGF LINK_CTL */ -static void hci_cc_link_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb) +static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) { - __u8 status; - struct hci_conn *pend; + __u8 status = *((__u8 *) skb->data); - BT_DBG("%s ocf 0x%x", hdev->name, ocf); + BT_DBG("%s status 0x%x", hdev->name, status); - switch (ocf) { - case OCF_INQUIRY_CANCEL: - case OCF_EXIT_PERIODIC_INQ: - status = *((__u8 *) skb->data); + if (status) + return; - if (status) { - BT_DBG("%s Inquiry cancel error: status 0x%x", hdev->name, status); - } else { - clear_bit(HCI_INQUIRY, &hdev->flags); - hci_req_complete(hdev, status); - } + clear_bit(HCI_INQUIRY, &hdev->flags); - hci_dev_lock(hdev); + hci_req_complete(hdev, status); - pend = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); - if (pend) - hci_acl_connect(pend); + hci_conn_check_pending(hdev); +} - hci_dev_unlock(hdev); +static void hci_cc_exit_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); - break; + BT_DBG("%s status 0x%x", hdev->name, status); - default: - BT_DBG("%s Command complete: ogf LINK_CTL ocf %x", hdev->name, ocf); - break; + if (status) + return; + + clear_bit(HCI_INQUIRY, &hdev->flags); + + hci_conn_check_pending(hdev); +} + +static void hci_cc_remote_name_req_cancel(struct hci_dev *hdev, struct sk_buff *skb) +{ + BT_DBG("%s", hdev->name); +} + +static void hci_cc_role_discovery(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_role_discovery *rp = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); + if (conn) { + if (rp->role) + conn->link_mode &= ~HCI_LM_MASTER; + else + conn->link_mode |= HCI_LM_MASTER; } + + hci_dev_unlock(hdev); } -/* Command Complete OGF LINK_POLICY */ -static void hci_cc_link_policy(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb) +static void hci_cc_write_link_policy(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_rp_write_link_policy *rp = (void *) skb->data; struct hci_conn *conn; - struct hci_rp_role_discovery *rd; - struct hci_rp_write_link_policy *lp; void *sent; - BT_DBG("%s ocf 0x%x", hdev->name, ocf); + BT_DBG("%s status 0x%x", hdev->name, rp->status); - switch (ocf) { - case OCF_ROLE_DISCOVERY: - rd = (void *) skb->data; + if (rp->status) + return; - if (rd->status) - break; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_LINK_POLICY); + if (!sent) + return; - hci_dev_lock(hdev); + hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rd->handle)); - if (conn) { - if (rd->role) - conn->link_mode &= ~HCI_LM_MASTER; - else - conn->link_mode |= HCI_LM_MASTER; - } + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); + if (conn) { + __le16 policy = get_unaligned((__le16 *) (sent + 2)); + conn->link_policy = __le16_to_cpu(policy); + } - hci_dev_unlock(hdev); - break; + hci_dev_unlock(hdev); +} - case OCF_WRITE_LINK_POLICY: - sent = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_WRITE_LINK_POLICY); - if (!sent) - break; +static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); - lp = (struct hci_rp_write_link_policy *) skb->data; + BT_DBG("%s status 0x%x", hdev->name, status); - if (lp->status) - break; + hci_req_complete(hdev, status); +} - hci_dev_lock(hdev); +static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + void *sent; - conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(lp->handle)); - if (conn) { - __le16 policy = get_unaligned((__le16 *) (sent + 2)); - conn->link_policy = __le16_to_cpu(policy); - } + BT_DBG("%s status 0x%x", hdev->name, status); - hci_dev_unlock(hdev); - break; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_LOCAL_NAME); + if (!sent) + return; - default: - BT_DBG("%s: Command complete: ogf LINK_POLICY ocf %x", - hdev->name, ocf); - break; + if (!status) + memcpy(hdev->dev_name, sent, 248); +} + +static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_local_name *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + memcpy(hdev->dev_name, rp->name, 248); +} + +static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + void *sent; + + BT_DBG("%s status 0x%x", hdev->name, status); + + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_AUTH_ENABLE); + if (!sent) + return; + + if (!status) { + __u8 param = *((__u8 *) sent); + + if (param == AUTH_ENABLED) + set_bit(HCI_AUTH, &hdev->flags); + else + clear_bit(HCI_AUTH, &hdev->flags); } + + hci_req_complete(hdev, status); } -/* Command Complete OGF HOST_CTL */ -static void hci_cc_host_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb) +static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) { - __u8 status, param; - __u16 setting; - struct hci_rp_read_voice_setting *vs; + __u8 status = *((__u8 *) skb->data); void *sent; - BT_DBG("%s ocf 0x%x", hdev->name, ocf); + BT_DBG("%s status 0x%x", hdev->name, status); - switch (ocf) { - case OCF_RESET: - status = *((__u8 *) skb->data); - hci_req_complete(hdev, status); - break; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_ENCRYPT_MODE); + if (!sent) + return; - case OCF_SET_EVENT_FLT: - status = *((__u8 *) skb->data); - if (status) { - BT_DBG("%s SET_EVENT_FLT failed %d", hdev->name, status); - } else { - BT_DBG("%s SET_EVENT_FLT succeseful", hdev->name); - } - break; + if (!status) { + __u8 param = *((__u8 *) sent); - case OCF_WRITE_AUTH_ENABLE: - sent = hci_sent_cmd_data(hdev, OGF_HOST_CTL, OCF_WRITE_AUTH_ENABLE); - if (!sent) - break; + if (param) + set_bit(HCI_ENCRYPT, &hdev->flags); + else + clear_bit(HCI_ENCRYPT, &hdev->flags); + } - status = *((__u8 *) skb->data); - param = *((__u8 *) sent); + hci_req_complete(hdev, status); +} - if (!status) { - if (param == AUTH_ENABLED) - set_bit(HCI_AUTH, &hdev->flags); - else - clear_bit(HCI_AUTH, &hdev->flags); - } - hci_req_complete(hdev, status); - break; +static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + void *sent; - case OCF_WRITE_ENCRYPT_MODE: - sent = hci_sent_cmd_data(hdev, OGF_HOST_CTL, OCF_WRITE_ENCRYPT_MODE); - if (!sent) - break; + BT_DBG("%s status 0x%x", hdev->name, status); - status = *((__u8 *) skb->data); - param = *((__u8 *) sent); + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_SCAN_ENABLE); + if (!sent) + return; - if (!status) { - if (param) - set_bit(HCI_ENCRYPT, &hdev->flags); - else - clear_bit(HCI_ENCRYPT, &hdev->flags); - } - hci_req_complete(hdev, status); - break; + if (!status) { + __u8 param = *((__u8 *) sent); - case OCF_WRITE_CA_TIMEOUT: - status = *((__u8 *) skb->data); - if (status) { - BT_DBG("%s OCF_WRITE_CA_TIMEOUT failed %d", hdev->name, status); - } else { - BT_DBG("%s OCF_WRITE_CA_TIMEOUT succeseful", hdev->name); - } - break; + clear_bit(HCI_PSCAN, &hdev->flags); + clear_bit(HCI_ISCAN, &hdev->flags); - case OCF_WRITE_PG_TIMEOUT: - status = *((__u8 *) skb->data); - if (status) { - BT_DBG("%s OCF_WRITE_PG_TIMEOUT failed %d", hdev->name, status); - } else { - BT_DBG("%s: OCF_WRITE_PG_TIMEOUT succeseful", hdev->name); - } - break; + if (param & SCAN_INQUIRY) + set_bit(HCI_ISCAN, &hdev->flags); - case OCF_WRITE_SCAN_ENABLE: - sent = hci_sent_cmd_data(hdev, OGF_HOST_CTL, OCF_WRITE_SCAN_ENABLE); - if (!sent) - break; + if (param & SCAN_PAGE) + set_bit(HCI_PSCAN, &hdev->flags); + } - status = *((__u8 *) skb->data); - param = *((__u8 *) sent); + hci_req_complete(hdev, status); +} - BT_DBG("param 0x%x", param); +static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_class_of_dev *rp = (void *) skb->data; - if (!status) { - clear_bit(HCI_PSCAN, &hdev->flags); - clear_bit(HCI_ISCAN, &hdev->flags); - if (param & SCAN_INQUIRY) - set_bit(HCI_ISCAN, &hdev->flags); + BT_DBG("%s status 0x%x", hdev->name, rp->status); - if (param & SCAN_PAGE) - set_bit(HCI_PSCAN, &hdev->flags); - } - hci_req_complete(hdev, status); - break; + if (rp->status) + return; - case OCF_READ_VOICE_SETTING: - vs = (struct hci_rp_read_voice_setting *) skb->data; + memcpy(hdev->dev_class, rp->dev_class, 3); - if (vs->status) { - BT_DBG("%s READ_VOICE_SETTING failed %d", hdev->name, vs->status); - break; - } + BT_DBG("%s class 0x%.2x%.2x%.2x", hdev->name, + hdev->dev_class[2], hdev->dev_class[1], hdev->dev_class[0]); +} - setting = __le16_to_cpu(vs->voice_setting); +static void hci_cc_write_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + void *sent; - if (hdev->voice_setting != setting ) { - hdev->voice_setting = setting; + BT_DBG("%s status 0x%x", hdev->name, status); - BT_DBG("%s: voice setting 0x%04x", hdev->name, setting); + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_CLASS_OF_DEV); + if (!sent) + return; - if (hdev->notify) { - tasklet_disable(&hdev->tx_task); - hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); - tasklet_enable(&hdev->tx_task); - } - } - break; + if (!status) + memcpy(hdev->dev_class, sent, 3); +} - case OCF_WRITE_VOICE_SETTING: - sent = hci_sent_cmd_data(hdev, OGF_HOST_CTL, OCF_WRITE_VOICE_SETTING); - if (!sent) - break; +static void hci_cc_read_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_voice_setting *rp = (void *) skb->data; + __u16 setting; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + setting = __le16_to_cpu(rp->voice_setting); + + if (hdev->voice_setting == setting ) + return; + + hdev->voice_setting = setting; - status = *((__u8 *) skb->data); - setting = __le16_to_cpu(get_unaligned((__le16 *) sent)); + BT_DBG("%s voice setting 0x%04x", hdev->name, setting); - if (!status && hdev->voice_setting != setting) { + if (hdev->notify) { + tasklet_disable(&hdev->tx_task); + hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); + tasklet_enable(&hdev->tx_task); + } +} + +static void hci_cc_write_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + void *sent; + + BT_DBG("%s status 0x%x", hdev->name, status); + + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_VOICE_SETTING); + if (!sent) + return; + + if (!status) { + __u16 setting = __le16_to_cpu(get_unaligned((__le16 *) sent)); + + if (hdev->voice_setting != setting) { hdev->voice_setting = setting; - BT_DBG("%s: voice setting 0x%04x", hdev->name, setting); + BT_DBG("%s voice setting 0x%04x", hdev->name, setting); if (hdev->notify) { tasklet_disable(&hdev->tx_task); @@ -287,143 +326,153 @@ static void hci_cc_host_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb tasklet_enable(&hdev->tx_task); } } - hci_req_complete(hdev, status); - break; - - case OCF_HOST_BUFFER_SIZE: - status = *((__u8 *) skb->data); - if (status) { - BT_DBG("%s OCF_BUFFER_SIZE failed %d", hdev->name, status); - hci_req_complete(hdev, status); - } - break; - - default: - BT_DBG("%s Command complete: ogf HOST_CTL ocf %x", hdev->name, ocf); - break; } } -/* Command Complete OGF INFO_PARAM */ -static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb) +static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_read_loc_version *lv; - struct hci_rp_read_local_features *lf; - struct hci_rp_read_buffer_size *bs; - struct hci_rp_read_bd_addr *ba; + __u8 status = *((__u8 *) skb->data); - BT_DBG("%s ocf 0x%x", hdev->name, ocf); + BT_DBG("%s status 0x%x", hdev->name, status); - switch (ocf) { - case OCF_READ_LOCAL_VERSION: - lv = (struct hci_rp_read_loc_version *) skb->data; + hci_req_complete(hdev, status); +} - if (lv->status) { - BT_DBG("%s READ_LOCAL_VERSION failed %d", hdev->name, lf->status); - break; - } +static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_local_version *rp = (void *) skb->data; - hdev->hci_ver = lv->hci_ver; - hdev->hci_rev = btohs(lv->hci_rev); - hdev->manufacturer = btohs(lv->manufacturer); + BT_DBG("%s status 0x%x", hdev->name, rp->status); - BT_DBG("%s: manufacturer %d hci_ver %d hci_rev %d", hdev->name, - hdev->manufacturer, hdev->hci_ver, hdev->hci_rev); + if (rp->status) + return; - break; + hdev->hci_ver = rp->hci_ver; + hdev->hci_rev = btohs(rp->hci_rev); + hdev->manufacturer = btohs(rp->manufacturer); - case OCF_READ_LOCAL_FEATURES: - lf = (struct hci_rp_read_local_features *) skb->data; + BT_DBG("%s manufacturer %d hci ver %d:%d", hdev->name, + hdev->manufacturer, + hdev->hci_ver, hdev->hci_rev); +} - if (lf->status) { - BT_DBG("%s READ_LOCAL_FEATURES failed %d", hdev->name, lf->status); - break; - } +static void hci_cc_read_local_commands(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_local_commands *rp = (void *) skb->data; - memcpy(hdev->features, lf->features, sizeof(hdev->features)); + BT_DBG("%s status 0x%x", hdev->name, rp->status); - /* Adjust default settings according to features - * supported by device. */ - if (hdev->features[0] & LMP_3SLOT) - hdev->pkt_type |= (HCI_DM3 | HCI_DH3); + if (rp->status) + return; - if (hdev->features[0] & LMP_5SLOT) - hdev->pkt_type |= (HCI_DM5 | HCI_DH5); + memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); +} - if (hdev->features[1] & LMP_HV2) { - hdev->pkt_type |= (HCI_HV2); - hdev->esco_type |= (ESCO_HV2); - } +static void hci_cc_read_local_features(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_local_features *rp = (void *) skb->data; - if (hdev->features[1] & LMP_HV3) { - hdev->pkt_type |= (HCI_HV3); - hdev->esco_type |= (ESCO_HV3); - } + BT_DBG("%s status 0x%x", hdev->name, rp->status); - if (hdev->features[3] & LMP_ESCO) - hdev->esco_type |= (ESCO_EV3); + if (rp->status) + return; - if (hdev->features[4] & LMP_EV4) - hdev->esco_type |= (ESCO_EV4); + memcpy(hdev->features, rp->features, 8); - if (hdev->features[4] & LMP_EV5) - hdev->esco_type |= (ESCO_EV5); + /* Adjust default settings according to features + * supported by device. */ - BT_DBG("%s: features 0x%x 0x%x 0x%x", hdev->name, - lf->features[0], lf->features[1], lf->features[2]); + if (hdev->features[0] & LMP_3SLOT) + hdev->pkt_type |= (HCI_DM3 | HCI_DH3); - break; + if (hdev->features[0] & LMP_5SLOT) + hdev->pkt_type |= (HCI_DM5 | HCI_DH5); - case OCF_READ_BUFFER_SIZE: - bs = (struct hci_rp_read_buffer_size *) skb->data; + if (hdev->features[1] & LMP_HV2) { + hdev->pkt_type |= (HCI_HV2); + hdev->esco_type |= (ESCO_HV2); + } - if (bs->status) { - BT_DBG("%s READ_BUFFER_SIZE failed %d", hdev->name, bs->status); - hci_req_complete(hdev, bs->status); - break; - } + if (hdev->features[1] & LMP_HV3) { + hdev->pkt_type |= (HCI_HV3); + hdev->esco_type |= (ESCO_HV3); + } - hdev->acl_mtu = __le16_to_cpu(bs->acl_mtu); - hdev->sco_mtu = bs->sco_mtu; - hdev->acl_pkts = __le16_to_cpu(bs->acl_max_pkt); - hdev->sco_pkts = __le16_to_cpu(bs->sco_max_pkt); + if (hdev->features[3] & LMP_ESCO) + hdev->esco_type |= (ESCO_EV3); - if (test_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks)) { - hdev->sco_mtu = 64; - hdev->sco_pkts = 8; - } + if (hdev->features[4] & LMP_EV4) + hdev->esco_type |= (ESCO_EV4); - hdev->acl_cnt = hdev->acl_pkts; - hdev->sco_cnt = hdev->sco_pkts; + if (hdev->features[4] & LMP_EV5) + hdev->esco_type |= (ESCO_EV5); - BT_DBG("%s mtu: acl %d, sco %d max_pkt: acl %d, sco %d", hdev->name, - hdev->acl_mtu, hdev->sco_mtu, hdev->acl_pkts, hdev->sco_pkts); - break; + BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name, + hdev->features[0], hdev->features[1], + hdev->features[2], hdev->features[3], + hdev->features[4], hdev->features[5], + hdev->features[6], hdev->features[7]); +} - case OCF_READ_BD_ADDR: - ba = (struct hci_rp_read_bd_addr *) skb->data; +static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_buffer_size *rp = (void *) skb->data; - if (!ba->status) { - bacpy(&hdev->bdaddr, &ba->bdaddr); - } else { - BT_DBG("%s: READ_BD_ADDR failed %d", hdev->name, ba->status); - } + BT_DBG("%s status 0x%x", hdev->name, rp->status); - hci_req_complete(hdev, ba->status); - break; + if (rp->status) + return; - default: - BT_DBG("%s Command complete: ogf INFO_PARAM ocf %x", hdev->name, ocf); - break; + hdev->acl_mtu = __le16_to_cpu(rp->acl_mtu); + hdev->sco_mtu = rp->sco_mtu; + hdev->acl_pkts = __le16_to_cpu(rp->acl_max_pkt); + hdev->sco_pkts = __le16_to_cpu(rp->sco_max_pkt); + + if (test_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks)) { + hdev->sco_mtu = 64; + hdev->sco_pkts = 8; } + + hdev->acl_cnt = hdev->acl_pkts; + hdev->sco_cnt = hdev->sco_pkts; + + BT_DBG("%s acl mtu %d:%d sco mtu %d:%d", hdev->name, + hdev->acl_mtu, hdev->acl_pkts, + hdev->sco_mtu, hdev->sco_pkts); +} + +static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_bd_addr *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (!rp->status) + bacpy(&hdev->bdaddr, &rp->bdaddr); + + hci_req_complete(hdev, rp->status); +} + +static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) +{ + BT_DBG("%s status 0x%x", hdev->name, status); + + if (status) { + hci_req_complete(hdev, status); + + hci_conn_check_pending(hdev); + } else + set_bit(HCI_INQUIRY, &hdev->flags); } -/* Command Status OGF LINK_CTL */ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) { + struct hci_cp_create_conn *cp; struct hci_conn *conn; - struct hci_cp_create_conn *cp = hci_sent_cmd_data(hdev, OGF_LINK_CTL, OCF_CREATE_CONN); + BT_DBG("%s status 0x%x", hdev->name, status); + + cp = hci_sent_cmd_data(hdev, HCI_OP_CREATE_CONN); if (!cp) return; @@ -431,8 +480,7 @@ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); - BT_DBG("%s status 0x%x bdaddr %s conn %p", hdev->name, - status, batostr(&cp->bdaddr), conn); + BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->bdaddr), conn); if (status) { if (conn && conn->state == BT_CONNECT) { @@ -457,234 +505,138 @@ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) hci_dev_unlock(hdev); } -static void hci_cs_link_ctl(struct hci_dev *hdev, __u16 ocf, __u8 status) +static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status) { - BT_DBG("%s ocf 0x%x", hdev->name, ocf); + struct hci_cp_add_sco *cp; + struct hci_conn *acl, *sco; + __u16 handle; - switch (ocf) { - case OCF_CREATE_CONN: - hci_cs_create_conn(hdev, status); - break; - - case OCF_ADD_SCO: - if (status) { - struct hci_conn *acl, *sco; - struct hci_cp_add_sco *cp = hci_sent_cmd_data(hdev, OGF_LINK_CTL, OCF_ADD_SCO); - __u16 handle; - - if (!cp) - break; + BT_DBG("%s status 0x%x", hdev->name, status); - handle = __le16_to_cpu(cp->handle); - - BT_DBG("%s Add SCO error: handle %d status 0x%x", hdev->name, handle, status); + if (!status) + return; - hci_dev_lock(hdev); + cp = hci_sent_cmd_data(hdev, HCI_OP_ADD_SCO); + if (!cp) + return; - acl = hci_conn_hash_lookup_handle(hdev, handle); - if (acl && (sco = acl->link)) { - sco->state = BT_CLOSED; + handle = __le16_to_cpu(cp->handle); - hci_proto_connect_cfm(sco, status); - hci_conn_del(sco); - } + BT_DBG("%s handle %d", hdev->name, handle); - hci_dev_unlock(hdev); - } - break; + hci_dev_lock(hdev); - case OCF_INQUIRY: - if (status) { - BT_DBG("%s Inquiry error: status 0x%x", hdev->name, status); - hci_req_complete(hdev, status); - } else { - set_bit(HCI_INQUIRY, &hdev->flags); - } - break; + acl = hci_conn_hash_lookup_handle(hdev, handle); + if (acl && (sco = acl->link)) { + sco->state = BT_CLOSED; - default: - BT_DBG("%s Command status: ogf LINK_CTL ocf %x status %d", - hdev->name, ocf, status); - break; + hci_proto_connect_cfm(sco, status); + hci_conn_del(sco); } + + hci_dev_unlock(hdev); } -/* Command Status OGF LINK_POLICY */ -static void hci_cs_link_policy(struct hci_dev *hdev, __u16 ocf, __u8 status) +static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status) { - BT_DBG("%s ocf 0x%x", hdev->name, ocf); - - switch (ocf) { - case OCF_SNIFF_MODE: - if (status) { - struct hci_conn *conn; - struct hci_cp_sniff_mode *cp = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_SNIFF_MODE); + BT_DBG("%s status 0x%x", hdev->name, status); +} - if (!cp) - break; +static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status) +{ + struct hci_cp_setup_sync_conn *cp; + struct hci_conn *acl, *sco; + __u16 handle; - hci_dev_lock(hdev); + BT_DBG("%s status 0x%x", hdev->name, status); - conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); - if (conn) { - clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); - } - - hci_dev_unlock(hdev); - } - break; + if (!status) + return; - case OCF_EXIT_SNIFF_MODE: - if (status) { - struct hci_conn *conn; - struct hci_cp_exit_sniff_mode *cp = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_EXIT_SNIFF_MODE); + cp = hci_sent_cmd_data(hdev, HCI_OP_SETUP_SYNC_CONN); + if (!cp) + return; - if (!cp) - break; + handle = __le16_to_cpu(cp->handle); - hci_dev_lock(hdev); + BT_DBG("%s handle %d", hdev->name, handle); - conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); - if (conn) { - clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); - } + hci_dev_lock(hdev); - hci_dev_unlock(hdev); - } - break; + acl = hci_conn_hash_lookup_handle(hdev, handle); + if (acl && (sco = acl->link)) { + sco->state = BT_CLOSED; - default: - BT_DBG("%s Command status: ogf LINK_POLICY ocf %x", hdev->name, ocf); - break; + hci_proto_connect_cfm(sco, status); + hci_conn_del(sco); } -} -/* Command Status OGF HOST_CTL */ -static void hci_cs_host_ctl(struct hci_dev *hdev, __u16 ocf, __u8 status) -{ - BT_DBG("%s ocf 0x%x", hdev->name, ocf); - - switch (ocf) { - default: - BT_DBG("%s Command status: ogf HOST_CTL ocf %x", hdev->name, ocf); - break; - } + hci_dev_unlock(hdev); } -/* Command Status OGF INFO_PARAM */ -static void hci_cs_info_param(struct hci_dev *hdev, __u16 ocf, __u8 status) +static void hci_cs_sniff_mode(struct hci_dev *hdev, __u8 status) { - BT_DBG("%s: hci_cs_info_param: ocf 0x%x", hdev->name, ocf); - - switch (ocf) { - default: - BT_DBG("%s Command status: ogf INFO_PARAM ocf %x", hdev->name, ocf); - break; - } -} + struct hci_cp_sniff_mode *cp; + struct hci_conn *conn; -/* Inquiry Complete */ -static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) -{ - __u8 status = *((__u8 *) skb->data); - struct hci_conn *pend; + BT_DBG("%s status 0x%x", hdev->name, status); - BT_DBG("%s status %d", hdev->name, status); + if (!status) + return; - clear_bit(HCI_INQUIRY, &hdev->flags); - hci_req_complete(hdev, status); + cp = hci_sent_cmd_data(hdev, HCI_OP_SNIFF_MODE); + if (!cp) + return; hci_dev_lock(hdev); - pend = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); - if (pend) - hci_acl_connect(pend); + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) + clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); hci_dev_unlock(hdev); } -/* Inquiry Result */ -static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) +static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status) { - struct inquiry_data data; - struct inquiry_info *info = (struct inquiry_info *) (skb->data + 1); - int num_rsp = *((__u8 *) skb->data); + struct hci_cp_exit_sniff_mode *cp; + struct hci_conn *conn; - BT_DBG("%s num_rsp %d", hdev->name, num_rsp); + BT_DBG("%s status 0x%x", hdev->name, status); - if (!num_rsp) + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_EXIT_SNIFF_MODE); + if (!cp) return; hci_dev_lock(hdev); - for (; num_rsp; num_rsp--) { - bacpy(&data.bdaddr, &info->bdaddr); - data.pscan_rep_mode = info->pscan_rep_mode; - data.pscan_period_mode = info->pscan_period_mode; - data.pscan_mode = info->pscan_mode; - memcpy(data.dev_class, info->dev_class, 3); - data.clock_offset = info->clock_offset; - data.rssi = 0x00; - info++; - hci_inquiry_cache_update(hdev, &data); - } + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) + clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); hci_dev_unlock(hdev); } -/* Inquiry Result With RSSI */ -static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct inquiry_data data; - int num_rsp = *((__u8 *) skb->data); - - BT_DBG("%s num_rsp %d", hdev->name, num_rsp); - - if (!num_rsp) - return; - - hci_dev_lock(hdev); + __u8 status = *((__u8 *) skb->data); - if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) { - struct inquiry_info_with_rssi_and_pscan_mode *info = - (struct inquiry_info_with_rssi_and_pscan_mode *) (skb->data + 1); + BT_DBG("%s status %d", hdev->name, status); - for (; num_rsp; num_rsp--) { - bacpy(&data.bdaddr, &info->bdaddr); - data.pscan_rep_mode = info->pscan_rep_mode; - data.pscan_period_mode = info->pscan_period_mode; - data.pscan_mode = info->pscan_mode; - memcpy(data.dev_class, info->dev_class, 3); - data.clock_offset = info->clock_offset; - data.rssi = info->rssi; - info++; - hci_inquiry_cache_update(hdev, &data); - } - } else { - struct inquiry_info_with_rssi *info = - (struct inquiry_info_with_rssi *) (skb->data + 1); + clear_bit(HCI_INQUIRY, &hdev->flags); - for (; num_rsp; num_rsp--) { - bacpy(&data.bdaddr, &info->bdaddr); - data.pscan_rep_mode = info->pscan_rep_mode; - data.pscan_period_mode = info->pscan_period_mode; - data.pscan_mode = 0x00; - memcpy(data.dev_class, info->dev_class, 3); - data.clock_offset = info->clock_offset; - data.rssi = info->rssi; - info++; - hci_inquiry_cache_update(hdev, &data); - } - } + hci_req_complete(hdev, status); - hci_dev_unlock(hdev); + hci_conn_check_pending(hdev); } -/* Extended Inquiry Result */ -static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct inquiry_data data; - struct extended_inquiry_info *info = (struct extended_inquiry_info *) (skb->data + 1); + struct inquiry_info *info = (void *) (skb->data + 1); int num_rsp = *((__u8 *) skb->data); BT_DBG("%s num_rsp %d", hdev->name, num_rsp); @@ -696,12 +648,12 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct for (; num_rsp; num_rsp--) { bacpy(&data.bdaddr, &info->bdaddr); - data.pscan_rep_mode = info->pscan_rep_mode; - data.pscan_period_mode = info->pscan_period_mode; - data.pscan_mode = 0x00; + data.pscan_rep_mode = info->pscan_rep_mode; + data.pscan_period_mode = info->pscan_period_mode; + data.pscan_mode = info->pscan_mode; memcpy(data.dev_class, info->dev_class, 3); - data.clock_offset = info->clock_offset; - data.rssi = info->rssi; + data.clock_offset = info->clock_offset; + data.rssi = 0x00; info++; hci_inquiry_cache_update(hdev, &data); } @@ -709,70 +661,18 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct hci_dev_unlock(hdev); } -/* Connect Request */ -static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) -{ - struct hci_ev_conn_request *ev = (struct hci_ev_conn_request *) skb->data; - int mask = hdev->link_mode; - - BT_DBG("%s Connection request: %s type 0x%x", hdev->name, - batostr(&ev->bdaddr), ev->link_type); - - mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type); - - if (mask & HCI_LM_ACCEPT) { - /* Connection accepted */ - struct hci_conn *conn; - struct hci_cp_accept_conn_req cp; - - hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); - if (!conn) { - if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) { - BT_ERR("No memmory for new connection"); - hci_dev_unlock(hdev); - return; - } - } - memcpy(conn->dev_class, ev->dev_class, 3); - conn->state = BT_CONNECT; - hci_dev_unlock(hdev); - - bacpy(&cp.bdaddr, &ev->bdaddr); - - if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) - cp.role = 0x00; /* Become master */ - else - cp.role = 0x01; /* Remain slave */ - - hci_send_cmd(hdev, OGF_LINK_CTL, - OCF_ACCEPT_CONN_REQ, sizeof(cp), &cp); - } else { - /* Connection rejected */ - struct hci_cp_reject_conn_req cp; - - bacpy(&cp.bdaddr, &ev->bdaddr); - cp.reason = 0x0f; - hci_send_cmd(hdev, OGF_LINK_CTL, - OCF_REJECT_CONN_REQ, sizeof(cp), &cp); - } -} - -/* Connect Complete */ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_conn_complete *ev = (struct hci_ev_conn_complete *) skb->data; - struct hci_conn *conn, *pend; + struct hci_ev_conn_complete *ev = (void *) skb->data; + struct hci_conn *conn; BT_DBG("%s", hdev->name); hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); - if (!conn) { - hci_dev_unlock(hdev); - return; - } + if (!conn) + goto unlock; if (!ev->status) { conn->handle = __le16_to_cpu(ev->handle); @@ -788,8 +688,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (conn->type == ACL_LINK) { struct hci_cp_read_remote_features cp; cp.handle = ev->handle; - hci_send_cmd(hdev, OGF_LINK_CTL, - OCF_READ_REMOTE_FEATURES, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, sizeof(cp), &cp); } /* Set link policy */ @@ -797,8 +696,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s struct hci_cp_write_link_policy cp; cp.handle = ev->handle; cp.policy = cpu_to_le16(hdev->link_policy); - hci_send_cmd(hdev, OGF_LINK_POLICY, - OCF_WRITE_LINK_POLICY, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_WRITE_LINK_POLICY, sizeof(cp), &cp); } /* Set packet type for incoming connection */ @@ -809,8 +707,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK): cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); - hci_send_cmd(hdev, OGF_LINK_CTL, - OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp), &cp); } else { /* Update disconnect timer */ hci_conn_hold(conn); @@ -822,9 +719,12 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (conn->type == ACL_LINK) { struct hci_conn *sco = conn->link; if (sco) { - if (!ev->status) - hci_add_sco(sco, conn->handle); - else { + if (!ev->status) { + if (lmp_esco_capable(hdev)) + hci_setup_sync(sco, conn->handle); + else + hci_add_sco(sco, conn->handle); + } else { hci_proto_connect_cfm(sco, ev->status); hci_conn_del(sco); } @@ -835,136 +735,104 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (ev->status) hci_conn_del(conn); - pend = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); - if (pend) - hci_acl_connect(pend); - +unlock: hci_dev_unlock(hdev); -} - -/* Disconnect Complete */ -static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) -{ - struct hci_ev_disconn_complete *ev = (struct hci_ev_disconn_complete *) skb->data; - struct hci_conn *conn; - - BT_DBG("%s status %d", hdev->name, ev->status); - - if (ev->status) - return; - hci_dev_lock(hdev); - - conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); - if (conn) { - conn->state = BT_CLOSED; - hci_proto_disconn_ind(conn, ev->reason); - hci_conn_del(conn); - } - - hci_dev_unlock(hdev); + hci_conn_check_pending(hdev); } -/* Number of completed packets */ -static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_num_comp_pkts *ev = (struct hci_ev_num_comp_pkts *) skb->data; - __le16 *ptr; - int i; - - skb_pull(skb, sizeof(*ev)); - - BT_DBG("%s num_hndl %d", hdev->name, ev->num_hndl); + struct hci_ev_conn_request *ev = (void *) skb->data; + int mask = hdev->link_mode; - if (skb->len < ev->num_hndl * 4) { - BT_DBG("%s bad parameters", hdev->name); - return; - } + BT_DBG("%s bdaddr %s type 0x%x", hdev->name, + batostr(&ev->bdaddr), ev->link_type); - tasklet_disable(&hdev->tx_task); + mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type); - for (i = 0, ptr = (__le16 *) skb->data; i < ev->num_hndl; i++) { + if (mask & HCI_LM_ACCEPT) { + /* Connection accepted */ struct hci_conn *conn; - __u16 handle, count; - - handle = __le16_to_cpu(get_unaligned(ptr++)); - count = __le16_to_cpu(get_unaligned(ptr++)); - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (conn) { - conn->sent -= count; + hci_dev_lock(hdev); - if (conn->type == ACL_LINK) { - if ((hdev->acl_cnt += count) > hdev->acl_pkts) - hdev->acl_cnt = hdev->acl_pkts; - } else { - if ((hdev->sco_cnt += count) > hdev->sco_pkts) - hdev->sco_cnt = hdev->sco_pkts; + conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); + if (!conn) { + if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) { + BT_ERR("No memmory for new connection"); + hci_dev_unlock(hdev); + return; } } - } - hci_sched_tx(hdev); - tasklet_enable(&hdev->tx_task); -} + memcpy(conn->dev_class, ev->dev_class, 3); + conn->state = BT_CONNECT; -/* Role Change */ -static inline void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb) -{ - struct hci_ev_role_change *ev = (struct hci_ev_role_change *) skb->data; - struct hci_conn *conn; + hci_dev_unlock(hdev); - BT_DBG("%s status %d", hdev->name, ev->status); + if (ev->link_type == ACL_LINK || !lmp_esco_capable(hdev)) { + struct hci_cp_accept_conn_req cp; - hci_dev_lock(hdev); + bacpy(&cp.bdaddr, &ev->bdaddr); - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (conn) { - if (!ev->status) { - if (ev->role) - conn->link_mode &= ~HCI_LM_MASTER; + if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) + cp.role = 0x00; /* Become master */ else - conn->link_mode |= HCI_LM_MASTER; - } + cp.role = 0x01; /* Remain slave */ - clear_bit(HCI_CONN_RSWITCH_PEND, &conn->pend); + hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, + sizeof(cp), &cp); + } else { + struct hci_cp_accept_sync_conn_req cp; - hci_role_switch_cfm(conn, ev->status, ev->role); - } + bacpy(&cp.bdaddr, &ev->bdaddr); + cp.pkt_type = cpu_to_le16(hdev->esco_type); - hci_dev_unlock(hdev); + cp.tx_bandwidth = cpu_to_le32(0x00001f40); + cp.rx_bandwidth = cpu_to_le32(0x00001f40); + cp.max_latency = cpu_to_le16(0xffff); + cp.content_format = cpu_to_le16(hdev->voice_setting); + cp.retrans_effort = 0xff; + + hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, + sizeof(cp), &cp); + } + } else { + /* Connection rejected */ + struct hci_cp_reject_conn_req cp; + + bacpy(&cp.bdaddr, &ev->bdaddr); + cp.reason = 0x0f; + hci_send_cmd(hdev, HCI_OP_REJECT_CONN_REQ, sizeof(cp), &cp); + } } -/* Mode Change */ -static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_mode_change *ev = (struct hci_ev_mode_change *) skb->data; + struct hci_ev_disconn_complete *ev = (void *) skb->data; struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); + if (ev->status) + return; + hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { - conn->mode = ev->mode; - conn->interval = __le16_to_cpu(ev->interval); - - if (!test_and_clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { - if (conn->mode == HCI_CM_ACTIVE) - conn->power_save = 1; - else - conn->power_save = 0; - } + conn->state = BT_CLOSED; + hci_proto_disconn_ind(conn, ev->reason); + hci_conn_del(conn); } hci_dev_unlock(hdev); } -/* Authentication Complete */ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_auth_complete *ev = (struct hci_ev_auth_complete *) skb->data; + struct hci_ev_auth_complete *ev = (void *) skb->data; struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -985,8 +853,8 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s struct hci_cp_set_conn_encrypt cp; cp.handle = cpu_to_le16(conn->handle); cp.encrypt = 1; - hci_send_cmd(conn->hdev, OGF_LINK_CTL, - OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, + HCI_OP_SET_CONN_ENCRYPT, sizeof(cp), &cp); } else { clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend); hci_encrypt_cfm(conn, ev->status, 0x00); @@ -997,10 +865,16 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s hci_dev_unlock(hdev); } -/* Encryption Change */ +static inline void hci_remote_name_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + BT_DBG("%s", hdev->name); + + hci_conn_check_pending(hdev); +} + static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_encrypt_change *ev = (struct hci_ev_encrypt_change *) skb->data; + struct hci_ev_encrypt_change *ev = (void *) skb->data; struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -1024,10 +898,9 @@ static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff * hci_dev_unlock(hdev); } -/* Change Connection Link Key Complete */ -static inline void hci_change_conn_link_key_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_change_link_key_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_change_conn_link_key_complete *ev = (struct hci_ev_change_conn_link_key_complete *) skb->data; + struct hci_ev_change_link_key_complete *ev = (void *) skb->data; struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -1047,25 +920,263 @@ static inline void hci_change_conn_link_key_complete_evt(struct hci_dev *hdev, s hci_dev_unlock(hdev); } -/* Pin Code Request*/ -static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_remote_features *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + if (ev->status) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn) + memcpy(conn->features, ev->features, 8); + + hci_dev_unlock(hdev); } -/* Link Key Request */ -static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_remote_version_evt(struct hci_dev *hdev, struct sk_buff *skb) { + BT_DBG("%s", hdev->name); } -/* Link Key Notification */ -static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_qos_setup_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { + BT_DBG("%s", hdev->name); } -/* Remote Features */ -static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_remote_features *ev = (struct hci_ev_remote_features *) skb->data; + struct hci_ev_cmd_complete *ev = (void *) skb->data; + __u16 opcode; + + skb_pull(skb, sizeof(*ev)); + + opcode = __le16_to_cpu(ev->opcode); + + switch (opcode) { + case HCI_OP_INQUIRY_CANCEL: + hci_cc_inquiry_cancel(hdev, skb); + break; + + case HCI_OP_EXIT_PERIODIC_INQ: + hci_cc_exit_periodic_inq(hdev, skb); + break; + + case HCI_OP_REMOTE_NAME_REQ_CANCEL: + hci_cc_remote_name_req_cancel(hdev, skb); + break; + + case HCI_OP_ROLE_DISCOVERY: + hci_cc_role_discovery(hdev, skb); + break; + + case HCI_OP_WRITE_LINK_POLICY: + hci_cc_write_link_policy(hdev, skb); + break; + + case HCI_OP_RESET: + hci_cc_reset(hdev, skb); + break; + + case HCI_OP_WRITE_LOCAL_NAME: + hci_cc_write_local_name(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_NAME: + hci_cc_read_local_name(hdev, skb); + break; + + case HCI_OP_WRITE_AUTH_ENABLE: + hci_cc_write_auth_enable(hdev, skb); + break; + + case HCI_OP_WRITE_ENCRYPT_MODE: + hci_cc_write_encrypt_mode(hdev, skb); + break; + + case HCI_OP_WRITE_SCAN_ENABLE: + hci_cc_write_scan_enable(hdev, skb); + break; + + case HCI_OP_READ_CLASS_OF_DEV: + hci_cc_read_class_of_dev(hdev, skb); + break; + + case HCI_OP_WRITE_CLASS_OF_DEV: + hci_cc_write_class_of_dev(hdev, skb); + break; + + case HCI_OP_READ_VOICE_SETTING: + hci_cc_read_voice_setting(hdev, skb); + break; + + case HCI_OP_WRITE_VOICE_SETTING: + hci_cc_write_voice_setting(hdev, skb); + break; + + case HCI_OP_HOST_BUFFER_SIZE: + hci_cc_host_buffer_size(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_VERSION: + hci_cc_read_local_version(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_COMMANDS: + hci_cc_read_local_commands(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_FEATURES: + hci_cc_read_local_features(hdev, skb); + break; + + case HCI_OP_READ_BUFFER_SIZE: + hci_cc_read_buffer_size(hdev, skb); + break; + + case HCI_OP_READ_BD_ADDR: + hci_cc_read_bd_addr(hdev, skb); + break; + + default: + BT_DBG("%s opcode 0x%x", hdev->name, opcode); + break; + } + + if (ev->ncmd) { + atomic_set(&hdev->cmd_cnt, 1); + if (!skb_queue_empty(&hdev->cmd_q)) + hci_sched_cmd(hdev); + } +} + +static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_cmd_status *ev = (void *) skb->data; + __u16 opcode; + + skb_pull(skb, sizeof(*ev)); + + opcode = __le16_to_cpu(ev->opcode); + + switch (opcode) { + case HCI_OP_INQUIRY: + hci_cs_inquiry(hdev, ev->status); + break; + + case HCI_OP_CREATE_CONN: + hci_cs_create_conn(hdev, ev->status); + break; + + case HCI_OP_ADD_SCO: + hci_cs_add_sco(hdev, ev->status); + break; + + case HCI_OP_REMOTE_NAME_REQ: + hci_cs_remote_name_req(hdev, ev->status); + break; + + case HCI_OP_SETUP_SYNC_CONN: + hci_cs_setup_sync_conn(hdev, ev->status); + break; + + case HCI_OP_SNIFF_MODE: + hci_cs_sniff_mode(hdev, ev->status); + break; + + case HCI_OP_EXIT_SNIFF_MODE: + hci_cs_exit_sniff_mode(hdev, ev->status); + break; + + default: + BT_DBG("%s opcode 0x%x", hdev->name, opcode); + break; + } + + if (ev->ncmd) { + atomic_set(&hdev->cmd_cnt, 1); + if (!skb_queue_empty(&hdev->cmd_q)) + hci_sched_cmd(hdev); + } +} + +static inline void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_role_change *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn) { + if (!ev->status) { + if (ev->role) + conn->link_mode &= ~HCI_LM_MASTER; + else + conn->link_mode |= HCI_LM_MASTER; + } + + clear_bit(HCI_CONN_RSWITCH_PEND, &conn->pend); + + hci_role_switch_cfm(conn, ev->status, ev->role); + } + + hci_dev_unlock(hdev); +} + +static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_num_comp_pkts *ev = (void *) skb->data; + __le16 *ptr; + int i; + + skb_pull(skb, sizeof(*ev)); + + BT_DBG("%s num_hndl %d", hdev->name, ev->num_hndl); + + if (skb->len < ev->num_hndl * 4) { + BT_DBG("%s bad parameters", hdev->name); + return; + } + + tasklet_disable(&hdev->tx_task); + + for (i = 0, ptr = (__le16 *) skb->data; i < ev->num_hndl; i++) { + struct hci_conn *conn; + __u16 handle, count; + + handle = __le16_to_cpu(get_unaligned(ptr++)); + count = __le16_to_cpu(get_unaligned(ptr++)); + + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (conn) { + conn->sent -= count; + + if (conn->type == ACL_LINK) { + if ((hdev->acl_cnt += count) > hdev->acl_pkts) + hdev->acl_cnt = hdev->acl_pkts; + } else { + if ((hdev->sco_cnt += count) > hdev->sco_pkts) + hdev->sco_cnt = hdev->sco_pkts; + } + } + } + + hci_sched_tx(hdev); + + tasklet_enable(&hdev->tx_task); +} + +static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_mode_change *ev = (void *) skb->data; struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -1073,17 +1184,39 @@ static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); - if (conn && !ev->status) { - memcpy(conn->features, ev->features, sizeof(conn->features)); + if (conn) { + conn->mode = ev->mode; + conn->interval = __le16_to_cpu(ev->interval); + + if (!test_and_clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { + if (conn->mode == HCI_CM_ACTIVE) + conn->power_save = 1; + else + conn->power_save = 0; + } } hci_dev_unlock(hdev); } -/* Clock Offset */ +static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + BT_DBG("%s", hdev->name); +} + +static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + BT_DBG("%s", hdev->name); +} + +static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + BT_DBG("%s", hdev->name); +} + static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_clock_offset *ev = (struct hci_ev_clock_offset *) skb->data; + struct hci_ev_clock_offset *ev = (void *) skb->data; struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -1103,10 +1236,9 @@ static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *sk hci_dev_unlock(hdev); } -/* Page Scan Repetition Mode */ static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_pscan_rep_mode *ev = (struct hci_ev_pscan_rep_mode *) skb->data; + struct hci_ev_pscan_rep_mode *ev = (void *) skb->data; struct inquiry_entry *ie; BT_DBG("%s", hdev->name); @@ -1121,10 +1253,91 @@ static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff * hci_dev_unlock(hdev); } -/* Sniff Subrate */ +static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct inquiry_data data; + int num_rsp = *((__u8 *) skb->data); + + BT_DBG("%s num_rsp %d", hdev->name, num_rsp); + + if (!num_rsp) + return; + + hci_dev_lock(hdev); + + if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) { + struct inquiry_info_with_rssi_and_pscan_mode *info = (void *) (skb->data + 1); + + for (; num_rsp; num_rsp--) { + bacpy(&data.bdaddr, &info->bdaddr); + data.pscan_rep_mode = info->pscan_rep_mode; + data.pscan_period_mode = info->pscan_period_mode; + data.pscan_mode = info->pscan_mode; + memcpy(data.dev_class, info->dev_class, 3); + data.clock_offset = info->clock_offset; + data.rssi = info->rssi; + info++; + hci_inquiry_cache_update(hdev, &data); + } + } else { + struct inquiry_info_with_rssi *info = (void *) (skb->data + 1); + + for (; num_rsp; num_rsp--) { + bacpy(&data.bdaddr, &info->bdaddr); + data.pscan_rep_mode = info->pscan_rep_mode; + data.pscan_period_mode = info->pscan_period_mode; + data.pscan_mode = 0x00; + memcpy(data.dev_class, info->dev_class, 3); + data.clock_offset = info->clock_offset; + data.rssi = info->rssi; + info++; + hci_inquiry_cache_update(hdev, &data); + } + } + + hci_dev_unlock(hdev); +} + +static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + BT_DBG("%s", hdev->name); +} + +static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_sync_conn_complete *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); + if (!conn) + goto unlock; + + if (!ev->status) { + conn->handle = __le16_to_cpu(ev->handle); + conn->state = BT_CONNECTED; + } else + conn->state = BT_CLOSED; + + hci_proto_connect_cfm(conn, ev->status); + if (ev->status) + hci_conn_del(conn); + +unlock: + hci_dev_unlock(hdev); +} + +static inline void hci_sync_conn_changed_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + BT_DBG("%s", hdev->name); +} + static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_sniff_subrate *ev = (struct hci_ev_sniff_subrate *) skb->data; + struct hci_ev_sniff_subrate *ev = (void *) skb->data; struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -1138,22 +1351,42 @@ static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *s hci_dev_unlock(hdev); } -void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_event_hdr *hdr = (struct hci_event_hdr *) skb->data; - struct hci_ev_cmd_complete *ec; - struct hci_ev_cmd_status *cs; - u16 opcode, ocf, ogf; + struct inquiry_data data; + struct extended_inquiry_info *info = (void *) (skb->data + 1); + int num_rsp = *((__u8 *) skb->data); - skb_pull(skb, HCI_EVENT_HDR_SIZE); + BT_DBG("%s num_rsp %d", hdev->name, num_rsp); - BT_DBG("%s evt 0x%x", hdev->name, hdr->evt); + if (!num_rsp) + return; - switch (hdr->evt) { - case HCI_EV_NUM_COMP_PKTS: - hci_num_comp_pkts_evt(hdev, skb); - break; + hci_dev_lock(hdev); + + for (; num_rsp; num_rsp--) { + bacpy(&data.bdaddr, &info->bdaddr); + data.pscan_rep_mode = info->pscan_rep_mode; + data.pscan_period_mode = info->pscan_period_mode; + data.pscan_mode = 0x00; + memcpy(data.dev_class, info->dev_class, 3); + data.clock_offset = info->clock_offset; + data.rssi = info->rssi; + info++; + hci_inquiry_cache_update(hdev, &data); + } + hci_dev_unlock(hdev); +} + +void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_event_hdr *hdr = (void *) skb->data; + __u8 event = hdr->evt; + + skb_pull(skb, HCI_EVENT_HDR_SIZE); + + switch (event) { case HCI_EV_INQUIRY_COMPLETE: hci_inquiry_complete_evt(hdev, skb); break; @@ -1162,44 +1395,64 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_inquiry_result_evt(hdev, skb); break; - case HCI_EV_INQUIRY_RESULT_WITH_RSSI: - hci_inquiry_result_with_rssi_evt(hdev, skb); - break; - - case HCI_EV_EXTENDED_INQUIRY_RESULT: - hci_extended_inquiry_result_evt(hdev, skb); + case HCI_EV_CONN_COMPLETE: + hci_conn_complete_evt(hdev, skb); break; case HCI_EV_CONN_REQUEST: hci_conn_request_evt(hdev, skb); break; - case HCI_EV_CONN_COMPLETE: - hci_conn_complete_evt(hdev, skb); - break; - case HCI_EV_DISCONN_COMPLETE: hci_disconn_complete_evt(hdev, skb); break; - case HCI_EV_ROLE_CHANGE: - hci_role_change_evt(hdev, skb); - break; - - case HCI_EV_MODE_CHANGE: - hci_mode_change_evt(hdev, skb); - break; - case HCI_EV_AUTH_COMPLETE: hci_auth_complete_evt(hdev, skb); break; + case HCI_EV_REMOTE_NAME: + hci_remote_name_evt(hdev, skb); + break; + case HCI_EV_ENCRYPT_CHANGE: hci_encrypt_change_evt(hdev, skb); break; - case HCI_EV_CHANGE_CONN_LINK_KEY_COMPLETE: - hci_change_conn_link_key_complete_evt(hdev, skb); + case HCI_EV_CHANGE_LINK_KEY_COMPLETE: + hci_change_link_key_complete_evt(hdev, skb); + break; + + case HCI_EV_REMOTE_FEATURES: + hci_remote_features_evt(hdev, skb); + break; + + case HCI_EV_REMOTE_VERSION: + hci_remote_version_evt(hdev, skb); + break; + + case HCI_EV_QOS_SETUP_COMPLETE: + hci_qos_setup_complete_evt(hdev, skb); + break; + + case HCI_EV_CMD_COMPLETE: + hci_cmd_complete_evt(hdev, skb); + break; + + case HCI_EV_CMD_STATUS: + hci_cmd_status_evt(hdev, skb); + break; + + case HCI_EV_ROLE_CHANGE: + hci_role_change_evt(hdev, skb); + break; + + case HCI_EV_NUM_COMP_PKTS: + hci_num_comp_pkts_evt(hdev, skb); + break; + + case HCI_EV_MODE_CHANGE: + hci_mode_change_evt(hdev, skb); break; case HCI_EV_PIN_CODE_REQ: @@ -1214,10 +1467,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_link_key_notify_evt(hdev, skb); break; - case HCI_EV_REMOTE_FEATURES: - hci_remote_features_evt(hdev, skb); - break; - case HCI_EV_CLOCK_OFFSET: hci_clock_offset_evt(hdev, skb); break; @@ -1226,82 +1475,32 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_pscan_rep_mode_evt(hdev, skb); break; - case HCI_EV_SNIFF_SUBRATE: - hci_sniff_subrate_evt(hdev, skb); + case HCI_EV_INQUIRY_RESULT_WITH_RSSI: + hci_inquiry_result_with_rssi_evt(hdev, skb); break; - case HCI_EV_CMD_STATUS: - cs = (struct hci_ev_cmd_status *) skb->data; - skb_pull(skb, sizeof(cs)); - - opcode = __le16_to_cpu(cs->opcode); - ogf = hci_opcode_ogf(opcode); - ocf = hci_opcode_ocf(opcode); - - switch (ogf) { - case OGF_INFO_PARAM: - hci_cs_info_param(hdev, ocf, cs->status); - break; - - case OGF_HOST_CTL: - hci_cs_host_ctl(hdev, ocf, cs->status); - break; - - case OGF_LINK_CTL: - hci_cs_link_ctl(hdev, ocf, cs->status); - break; - - case OGF_LINK_POLICY: - hci_cs_link_policy(hdev, ocf, cs->status); - break; - - default: - BT_DBG("%s Command Status OGF %x", hdev->name, ogf); - break; - } - - if (cs->ncmd) { - atomic_set(&hdev->cmd_cnt, 1); - if (!skb_queue_empty(&hdev->cmd_q)) - hci_sched_cmd(hdev); - } + case HCI_EV_REMOTE_EXT_FEATURES: + hci_remote_ext_features_evt(hdev, skb); break; - case HCI_EV_CMD_COMPLETE: - ec = (struct hci_ev_cmd_complete *) skb->data; - skb_pull(skb, sizeof(*ec)); - - opcode = __le16_to_cpu(ec->opcode); - ogf = hci_opcode_ogf(opcode); - ocf = hci_opcode_ocf(opcode); - - switch (ogf) { - case OGF_INFO_PARAM: - hci_cc_info_param(hdev, ocf, skb); - break; - - case OGF_HOST_CTL: - hci_cc_host_ctl(hdev, ocf, skb); - break; + case HCI_EV_SYNC_CONN_COMPLETE: + hci_sync_conn_complete_evt(hdev, skb); + break; - case OGF_LINK_CTL: - hci_cc_link_ctl(hdev, ocf, skb); - break; + case HCI_EV_SYNC_CONN_CHANGED: + hci_sync_conn_changed_evt(hdev, skb); + break; - case OGF_LINK_POLICY: - hci_cc_link_policy(hdev, ocf, skb); - break; + case HCI_EV_SNIFF_SUBRATE: + hci_sniff_subrate_evt(hdev, skb); + break; - default: - BT_DBG("%s Command Completed OGF %x", hdev->name, ogf); - break; - } + case HCI_EV_EXTENDED_INQUIRY_RESULT: + hci_extended_inquiry_result_evt(hdev, skb); + break; - if (ec->ncmd) { - atomic_set(&hdev->cmd_cnt, 1); - if (!skb_queue_empty(&hdev->cmd_q)) - hci_sched_cmd(hdev); - } + default: + BT_DBG("%s event 0x%x", hdev->name, event); break; } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 5ccea5fbd23..8825102c517 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -451,7 +451,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto drop; } - if (test_bit(HCI_RAW, &hdev->flags) || (ogf == OGF_VENDOR_CMD)) { + if (test_bit(HCI_RAW, &hdev->flags) || (ogf == 0x3f)) { skb_queue_tail(&hdev->raw_q, skb); hci_sched_tx(hdev); } else { @@ -634,7 +634,7 @@ static struct proto hci_sk_proto = { .obj_size = sizeof(struct hci_pinfo) }; -static int hci_sock_create(struct socket *sock, int protocol) +static int hci_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -645,7 +645,7 @@ static int hci_sock_create(struct socket *sock, int protocol) sock->ops = &hci_sock_ops; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 25835403d65..cef1e3e1881 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -41,6 +41,26 @@ static ssize_t show_type(struct device *dev, struct device_attribute *attr, char return sprintf(buf, "%s\n", typetostr(hdev->type)); } +static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + char name[249]; + int i; + + for (i = 0; i < 248; i++) + name[i] = hdev->dev_name[i]; + + name[248] = '\0'; + return sprintf(buf, "%s\n", name); +} + +static ssize_t show_class(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "0x%.2x%.2x%.2x\n", + hdev->dev_class[2], hdev->dev_class[1], hdev->dev_class[0]); +} + static ssize_t show_address(struct device *dev, struct device_attribute *attr, char *buf) { struct hci_dev *hdev = dev_get_drvdata(dev); @@ -49,6 +69,17 @@ static ssize_t show_address(struct device *dev, struct device_attribute *attr, c return sprintf(buf, "%s\n", batostr(&bdaddr)); } +static ssize_t show_features(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n", + hdev->features[0], hdev->features[1], + hdev->features[2], hdev->features[3], + hdev->features[4], hdev->features[5], + hdev->features[6], hdev->features[7]); +} + static ssize_t show_manufacturer(struct device *dev, struct device_attribute *attr, char *buf) { struct hci_dev *hdev = dev_get_drvdata(dev); @@ -170,7 +201,10 @@ static ssize_t store_sniff_min_interval(struct device *dev, struct device_attrib } static DEVICE_ATTR(type, S_IRUGO, show_type, NULL); +static DEVICE_ATTR(name, S_IRUGO, show_name, NULL); +static DEVICE_ATTR(class, S_IRUGO, show_class, NULL); static DEVICE_ATTR(address, S_IRUGO, show_address, NULL); +static DEVICE_ATTR(features, S_IRUGO, show_features, NULL); static DEVICE_ATTR(manufacturer, S_IRUGO, show_manufacturer, NULL); static DEVICE_ATTR(hci_version, S_IRUGO, show_hci_version, NULL); static DEVICE_ATTR(hci_revision, S_IRUGO, show_hci_revision, NULL); @@ -185,7 +219,10 @@ static DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR, static struct device_attribute *bt_attrs[] = { &dev_attr_type, + &dev_attr_name, + &dev_attr_class, &dev_attr_address, + &dev_attr_features, &dev_attr_manufacturer, &dev_attr_hci_version, &dev_attr_hci_revision, diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 64d89ca2884..4bbacddeb49 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -247,7 +247,7 @@ static inline int hidp_queue_report(struct hidp_session *session, unsigned char { struct sk_buff *skb; - BT_DBG("session %p hid %p data %p size %d", session, device, data, size); + BT_DBG("session %p hid %p data %p size %d", session, session->hid, data, size); if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) { BT_ERR("Can't allocate memory for new frame"); @@ -625,7 +625,7 @@ static struct device *hidp_get_device(struct hidp_session *session) return conn ? &conn->dev : NULL; } -static inline void hidp_setup_input(struct hidp_session *session, struct hidp_connadd_req *req) +static inline int hidp_setup_input(struct hidp_session *session, struct hidp_connadd_req *req) { struct input_dev *input = session->input; int i; @@ -656,18 +656,20 @@ static inline void hidp_setup_input(struct hidp_session *session, struct hidp_co } if (req->subclass & 0x80) { - input->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); - input->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE); - input->relbit[0] = BIT(REL_X) | BIT(REL_Y); - input->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_SIDE) | BIT(BTN_EXTRA); - input->relbit[0] |= BIT(REL_WHEEL); + input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL); + input->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) | + BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE); + input->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y); + input->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_SIDE) | + BIT_MASK(BTN_EXTRA); + input->relbit[0] |= BIT_MASK(REL_WHEEL); } input->dev.parent = hidp_get_device(session); input->event = hidp_input_event; - input_register_device(input); + return input_register_device(input); } static int hidp_open(struct hid_device *hid) @@ -820,8 +822,11 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); session->idle_to = req->idle_to; - if (session->input) - hidp_setup_input(session, req); + if (session->input) { + err = hidp_setup_input(session, req); + if (err < 0) + goto failed; + } if (session->hid) hidp_setup_hid(session, req); diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 0c185257e55..1de2b6fbcac 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -246,7 +246,7 @@ static struct proto hidp_proto = { .obj_size = sizeof(struct bt_sock) }; -static int hidp_sock_create(struct socket *sock, int protocol) +static int hidp_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -255,7 +255,7 @@ static int hidp_sock_create(struct socket *sock, int protocol) if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index c4e4ce4ebb2..6fbbae78b30 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -55,7 +55,9 @@ #define BT_DBG(D...) #endif -#define VERSION "2.8" +#define VERSION "2.9" + +static u32 l2cap_feat_mask = 0x0000; static const struct proto_ops l2cap_sock_ops; @@ -258,7 +260,119 @@ static void l2cap_chan_del(struct sock *sk, int err) sk->sk_state_change(sk); } +static inline u8 l2cap_get_ident(struct l2cap_conn *conn) +{ + u8 id; + + /* Get next available identificator. + * 1 - 128 are used by kernel. + * 129 - 199 are reserved. + * 200 - 254 are used by utilities like l2ping, etc. + */ + + spin_lock_bh(&conn->lock); + + if (++conn->tx_ident > 128) + conn->tx_ident = 1; + + id = conn->tx_ident; + + spin_unlock_bh(&conn->lock); + + return id; +} + +static inline int l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) +{ + struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data); + + BT_DBG("code 0x%2.2x", code); + + if (!skb) + return -ENOMEM; + + return hci_send_acl(conn->hcon, skb, 0); +} + /* ---- L2CAP connections ---- */ +static void l2cap_conn_start(struct l2cap_conn *conn) +{ + struct l2cap_chan_list *l = &conn->chan_list; + struct sock *sk; + + BT_DBG("conn %p", conn); + + read_lock(&l->lock); + + for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + bh_lock_sock(sk); + + if (sk->sk_type != SOCK_SEQPACKET) { + l2cap_sock_clear_timer(sk); + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); + } else if (sk->sk_state == BT_CONNECT) { + struct l2cap_conn_req req; + l2cap_pi(sk)->ident = l2cap_get_ident(conn); + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_REQ, sizeof(req), &req); + } + + bh_unlock_sock(sk); + } + + read_unlock(&l->lock); +} + +static void l2cap_conn_ready(struct l2cap_conn *conn) +{ + BT_DBG("conn %p", conn); + + if (conn->chan_list.head || !hlist_empty(&l2cap_sk_list.head)) { + struct l2cap_info_req req; + + req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); + + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; + conn->info_ident = l2cap_get_ident(conn); + + mod_timer(&conn->info_timer, + jiffies + msecs_to_jiffies(L2CAP_INFO_TIMEOUT)); + + l2cap_send_cmd(conn, conn->info_ident, + L2CAP_INFO_REQ, sizeof(req), &req); + } +} + +/* Notify sockets that we cannot guaranty reliability anymore */ +static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err) +{ + struct l2cap_chan_list *l = &conn->chan_list; + struct sock *sk; + + BT_DBG("conn %p", conn); + + read_lock(&l->lock); + + for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + if (l2cap_pi(sk)->link_mode & L2CAP_LM_RELIABLE) + sk->sk_err = err; + } + + read_unlock(&l->lock); +} + +static void l2cap_info_timeout(unsigned long arg) +{ + struct l2cap_conn *conn = (void *) arg; + + conn->info_ident = 0; + + l2cap_conn_start(conn); +} + static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) { struct l2cap_conn *conn = hcon->l2cap_data; @@ -279,6 +393,12 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) conn->src = &hcon->hdev->bdaddr; conn->dst = &hcon->dst; + conn->feat_mask = 0; + + init_timer(&conn->info_timer); + conn->info_timer.function = l2cap_info_timeout; + conn->info_timer.data = (unsigned long) conn; + spin_lock_init(&conn->lock); rwlock_init(&conn->chan_list.lock); @@ -318,40 +438,6 @@ static inline void l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, stru write_unlock_bh(&l->lock); } -static inline u8 l2cap_get_ident(struct l2cap_conn *conn) -{ - u8 id; - - /* Get next available identificator. - * 1 - 128 are used by kernel. - * 129 - 199 are reserved. - * 200 - 254 are used by utilities like l2ping, etc. - */ - - spin_lock_bh(&conn->lock); - - if (++conn->tx_ident > 128) - conn->tx_ident = 1; - - id = conn->tx_ident; - - spin_unlock_bh(&conn->lock); - - return id; -} - -static inline int l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) -{ - struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data); - - BT_DBG("code 0x%2.2x", code); - - if (!skb) - return -ENOMEM; - - return hci_send_acl(conn->hcon, skb, 0); -} - /* ---- Socket interface ---- */ static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src) { @@ -508,7 +594,6 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) /* Default config options */ pi->conf_len = 0; - pi->conf_mtu = L2CAP_DEFAULT_MTU; pi->flush_to = L2CAP_DEFAULT_FLUSH_TO; } @@ -518,11 +603,11 @@ static struct proto l2cap_proto = { .obj_size = sizeof(struct l2cap_pinfo) }; -static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) { struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &l2cap_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, 1); if (!sk) return NULL; @@ -530,7 +615,7 @@ static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio) INIT_LIST_HEAD(&bt_sk(sk)->accept_q); sk->sk_destruct = l2cap_sock_destruct; - sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT; + sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT); sock_reset_flag(sk, SOCK_ZAPPED); @@ -543,7 +628,7 @@ static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio) return sk; } -static int l2cap_sock_create(struct socket *sock, int protocol) +static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -560,7 +645,7 @@ static int l2cap_sock_create(struct socket *sock, int protocol) sock->ops = &l2cap_sock_ops; - sk = l2cap_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -650,6 +735,11 @@ static int l2cap_do_connect(struct sock *sk) l2cap_sock_set_timer(sk, sk->sk_sndtimeo); if (hcon->state == BT_CONNECTED) { + if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)) { + l2cap_conn_ready(conn); + goto done; + } + if (sk->sk_type == SOCK_SEQPACKET) { struct l2cap_conn_req req; l2cap_pi(sk)->ident = l2cap_get_ident(conn); @@ -958,7 +1048,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch opts.imtu = l2cap_pi(sk)->imtu; opts.omtu = l2cap_pi(sk)->omtu; opts.flush_to = l2cap_pi(sk)->flush_to; - opts.mode = 0x00; + opts.mode = L2CAP_MODE_BASIC; len = min_t(unsigned int, sizeof(opts), optlen); if (copy_from_user((char *) &opts, optval, len)) { @@ -1007,7 +1097,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch opts.imtu = l2cap_pi(sk)->imtu; opts.omtu = l2cap_pi(sk)->omtu; opts.flush_to = l2cap_pi(sk)->flush_to; - opts.mode = 0x00; + opts.mode = L2CAP_MODE_BASIC; len = min_t(unsigned int, len, sizeof(opts)); if (copy_to_user(optval, (char *) &opts, len)) @@ -1084,52 +1174,6 @@ static int l2cap_sock_release(struct socket *sock) return err; } -static void l2cap_conn_ready(struct l2cap_conn *conn) -{ - struct l2cap_chan_list *l = &conn->chan_list; - struct sock *sk; - - BT_DBG("conn %p", conn); - - read_lock(&l->lock); - - for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { - bh_lock_sock(sk); - - if (sk->sk_type != SOCK_SEQPACKET) { - l2cap_sock_clear_timer(sk); - sk->sk_state = BT_CONNECTED; - sk->sk_state_change(sk); - } else if (sk->sk_state == BT_CONNECT) { - struct l2cap_conn_req req; - l2cap_pi(sk)->ident = l2cap_get_ident(conn); - req.scid = cpu_to_le16(l2cap_pi(sk)->scid); - req.psm = l2cap_pi(sk)->psm; - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req); - } - - bh_unlock_sock(sk); - } - - read_unlock(&l->lock); -} - -/* Notify sockets that we cannot guaranty reliability anymore */ -static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err) -{ - struct l2cap_chan_list *l = &conn->chan_list; - struct sock *sk; - - BT_DBG("conn %p", conn); - - read_lock(&l->lock); - for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { - if (l2cap_pi(sk)->link_mode & L2CAP_LM_RELIABLE) - sk->sk_err = err; - } - read_unlock(&l->lock); -} - static void l2cap_chan_ready(struct sock *sk) { struct sock *parent = bt_sk(sk)->parent; @@ -1256,11 +1300,11 @@ static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned break; case 2: - *val = __le16_to_cpu(*((__le16 *)opt->val)); + *val = __le16_to_cpu(*((__le16 *) opt->val)); break; case 4: - *val = __le32_to_cpu(*((__le32 *)opt->val)); + *val = __le32_to_cpu(*((__le32 *) opt->val)); break; default: @@ -1332,6 +1376,8 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data) int len = pi->conf_len; int type, hint, olen; unsigned long val; + struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; + u16 mtu = L2CAP_DEFAULT_MTU; u16 result = L2CAP_CONF_SUCCESS; BT_DBG("sk %p", sk); @@ -1344,7 +1390,7 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data) switch (type) { case L2CAP_CONF_MTU: - pi->conf_mtu = val; + mtu = val; break; case L2CAP_CONF_FLUSH_TO: @@ -1354,6 +1400,11 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data) case L2CAP_CONF_QOS: break; + case L2CAP_CONF_RFC: + if (olen == sizeof(rfc)) + memcpy(&rfc, (void *) val, olen); + break; + default: if (hint) break; @@ -1368,12 +1419,24 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data) /* Configure output options and let the other side know * which ones we don't like. */ - if (pi->conf_mtu < pi->omtu) + if (rfc.mode == L2CAP_MODE_BASIC) { + if (mtu < pi->omtu) + result = L2CAP_CONF_UNACCEPT; + else { + pi->omtu = mtu; + pi->conf_state |= L2CAP_CONF_OUTPUT_DONE; + } + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->omtu); + } else { result = L2CAP_CONF_UNACCEPT; - else - pi->omtu = pi->conf_mtu; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->omtu); + memset(&rfc, 0, sizeof(rfc)); + rfc.mode = L2CAP_MODE_BASIC; + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, + sizeof(rfc), (unsigned long) &rfc); + } } rsp->scid = cpu_to_le16(pi->dcid); @@ -1397,6 +1460,23 @@ static int l2cap_build_conf_rsp(struct sock *sk, void *data, u16 result, u16 fla return ptr - data; } +static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) +{ + struct l2cap_cmd_rej *rej = (struct l2cap_cmd_rej *) data; + + if (rej->reason != 0x0000) + return 0; + + if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && + cmd->ident == conn->info_ident) { + conn->info_ident = 0; + del_timer(&conn->info_timer); + l2cap_conn_start(conn); + } + + return 0; +} + static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) { struct l2cap_chan_list *list = &conn->chan_list; @@ -1425,7 +1505,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd goto response; } - sk = l2cap_sock_alloc(NULL, BTPROTO_L2CAP, GFP_ATOMIC); + sk = l2cap_sock_alloc(parent->sk_net, NULL, BTPROTO_L2CAP, GFP_ATOMIC); if (!sk) goto response; @@ -1577,16 +1657,19 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); - /* Output config done. */ - l2cap_pi(sk)->conf_state |= L2CAP_CONF_OUTPUT_DONE; - /* Reset config buffer. */ l2cap_pi(sk)->conf_len = 0; + if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE)) + goto unlock; + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_INPUT_DONE) { sk->sk_state = BT_CONNECTED; l2cap_chan_ready(sk); - } else if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) { + goto unlock; + } + + if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) { u8 req[64]; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(sk, req), req); @@ -1646,7 +1729,6 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr if (flags & 0x01) goto done; - /* Input config done */ l2cap_pi(sk)->conf_state |= L2CAP_CONF_INPUT_DONE; if (l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE) { @@ -1711,16 +1793,27 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) { struct l2cap_info_req *req = (struct l2cap_info_req *) data; - struct l2cap_info_rsp rsp; u16 type; type = __le16_to_cpu(req->type); BT_DBG("type 0x%4.4x", type); - rsp.type = cpu_to_le16(type); - rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP); - l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(rsp), &rsp); + if (type == L2CAP_IT_FEAT_MASK) { + u8 buf[8]; + struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; + rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK); + rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); + put_unaligned(cpu_to_le32(l2cap_feat_mask), (__le32 *) rsp->data); + l2cap_send_cmd(conn, cmd->ident, + L2CAP_INFO_RSP, sizeof(buf), buf); + } else { + struct l2cap_info_rsp rsp; + rsp.type = cpu_to_le16(type); + rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP); + l2cap_send_cmd(conn, cmd->ident, + L2CAP_INFO_RSP, sizeof(rsp), &rsp); + } return 0; } @@ -1735,6 +1828,15 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm BT_DBG("type 0x%4.4x result 0x%2.2x", type, result); + conn->info_ident = 0; + + del_timer(&conn->info_timer); + + if (type == L2CAP_IT_FEAT_MASK) + conn->feat_mask = __le32_to_cpu(get_unaligned((__le32 *) rsp->data)); + + l2cap_conn_start(conn); + return 0; } @@ -1764,7 +1866,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk switch (cmd.code) { case L2CAP_COMMAND_REJ: - /* FIXME: We should process this */ + l2cap_command_rej(conn, &cmd, data); break; case L2CAP_CONN_REQ: diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index bb7220770f2..e7ac6ba7eca 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -33,11 +33,11 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/init.h> -#include <linux/freezer.h> #include <linux/wait.h> #include <linux/device.h> #include <linux/net.h> #include <linux/mutex.h> +#include <linux/kthread.h> #include <net/sock.h> #include <asm/uaccess.h> @@ -68,7 +68,6 @@ static DEFINE_MUTEX(rfcomm_mutex); static unsigned long rfcomm_event; static LIST_HEAD(session_list); -static atomic_t terminate, running; static int rfcomm_send_frame(struct rfcomm_session *s, u8 *data, int len); static int rfcomm_send_sabm(struct rfcomm_session *s, u8 dlci); @@ -1850,26 +1849,6 @@ static inline void rfcomm_process_sessions(void) rfcomm_unlock(); } -static void rfcomm_worker(void) -{ - BT_DBG(""); - - while (!atomic_read(&terminate)) { - set_current_state(TASK_INTERRUPTIBLE); - if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) { - /* No pending events. Let's sleep. - * Incoming connections and data will wake us up. */ - schedule(); - } - set_current_state(TASK_RUNNING); - - /* Process stuff */ - clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event); - rfcomm_process_sessions(); - } - return; -} - static int rfcomm_add_listener(bdaddr_t *ba) { struct sockaddr_l2 addr; @@ -1935,22 +1914,28 @@ static void rfcomm_kill_listener(void) static int rfcomm_run(void *unused) { - rfcomm_thread = current; - - atomic_inc(&running); + BT_DBG(""); - daemonize("krfcommd"); set_user_nice(current, -10); - BT_DBG(""); - rfcomm_add_listener(BDADDR_ANY); - rfcomm_worker(); + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) { + /* No pending events. Let's sleep. + * Incoming connections and data will wake us up. */ + schedule(); + } + set_current_state(TASK_RUNNING); + + /* Process stuff */ + clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event); + rfcomm_process_sessions(); + } rfcomm_kill_listener(); - atomic_dec(&running); return 0; } @@ -2059,7 +2044,11 @@ static int __init rfcomm_init(void) hci_register_cb(&rfcomm_cb); - kernel_thread(rfcomm_run, NULL, CLONE_KERNEL); + rfcomm_thread = kthread_run(rfcomm_run, NULL, "krfcommd"); + if (IS_ERR(rfcomm_thread)) { + hci_unregister_cb(&rfcomm_cb); + return PTR_ERR(rfcomm_thread); + } if (class_create_file(bt_class, &class_attr_rfcomm_dlc) < 0) BT_ERR("Failed to create RFCOMM info file"); @@ -2081,14 +2070,7 @@ static void __exit rfcomm_exit(void) hci_unregister_cb(&rfcomm_cb); - /* Terminate working thread. - * ie. Set terminate flag and wake it up */ - atomic_inc(&terminate); - rfcomm_schedule(RFCOMM_SCHED_STATE); - - /* Wait until thread is running */ - while (atomic_read(&running)) - schedule(); + kthread_stop(rfcomm_thread); #ifdef CONFIG_BT_RFCOMM_TTY rfcomm_cleanup_ttys(); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 30586ab9e87..266b6972667 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -282,12 +282,12 @@ static struct proto rfcomm_proto = { .obj_size = sizeof(struct rfcomm_pinfo) }; -static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) { struct rfcomm_dlc *d; struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &rfcomm_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, 1); if (!sk) return NULL; @@ -323,7 +323,7 @@ static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, gfp_t prio return sk; } -static int rfcomm_sock_create(struct socket *sock, int protocol) +static int rfcomm_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -336,7 +336,7 @@ static int rfcomm_sock_create(struct socket *sock, int protocol) sock->ops = &rfcomm_sock_ops; - sk = rfcomm_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -868,7 +868,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * goto done; } - sk = rfcomm_sock_alloc(NULL, BTPROTO_RFCOMM, GFP_ATOMIC); + sk = rfcomm_sock_alloc(parent->sk_net, NULL, BTPROTO_RFCOMM, GFP_ATOMIC); if (!sk) goto done; diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 22a832098d4..e447651a2db 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -189,6 +189,23 @@ static struct device *rfcomm_get_device(struct rfcomm_dev *dev) return conn ? &conn->dev : NULL; } +static ssize_t show_address(struct device *tty_dev, struct device_attribute *attr, char *buf) +{ + struct rfcomm_dev *dev = dev_get_drvdata(tty_dev); + bdaddr_t bdaddr; + baswap(&bdaddr, &dev->dst); + return sprintf(buf, "%s\n", batostr(&bdaddr)); +} + +static ssize_t show_channel(struct device *tty_dev, struct device_attribute *attr, char *buf) +{ + struct rfcomm_dev *dev = dev_get_drvdata(tty_dev); + return sprintf(buf, "%d\n", dev->channel); +} + +static DEVICE_ATTR(address, S_IRUGO, show_address, NULL); +static DEVICE_ATTR(channel, S_IRUGO, show_channel, NULL); + static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) { struct rfcomm_dev *dev; @@ -281,6 +298,14 @@ out: return err; } + dev_set_drvdata(dev->tty_dev, dev); + + if (device_create_file(dev->tty_dev, &dev_attr_address) < 0) + BT_ERR("Failed to create address attribute"); + + if (device_create_file(dev->tty_dev, &dev_attr_channel) < 0) + BT_ERR("Failed to create channel attribute"); + return dev->id; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 3f5163e725e..82d0dfdfa7e 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -189,7 +189,7 @@ static int sco_connect(struct sock *sk) struct sco_conn *conn; struct hci_conn *hcon; struct hci_dev *hdev; - int err = 0; + int err, type; BT_DBG("%s -> %s", batostr(src), batostr(dst)); @@ -200,7 +200,9 @@ static int sco_connect(struct sock *sk) err = -ENOMEM; - hcon = hci_connect(hdev, SCO_LINK, dst); + type = lmp_esco_capable(hdev) ? ESCO_LINK : SCO_LINK; + + hcon = hci_connect(hdev, type, dst); if (!hcon) goto done; @@ -224,6 +226,7 @@ static int sco_connect(struct sock *sk) sk->sk_state = BT_CONNECT; sco_sock_set_timer(sk, sk->sk_sndtimeo); } + done: hci_dev_unlock_bh(hdev); hci_dev_put(hdev); @@ -414,11 +417,11 @@ static struct proto sco_proto = { .obj_size = sizeof(struct sco_pinfo) }; -static struct sock *sco_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) { struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &sco_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, 1); if (!sk) return NULL; @@ -439,7 +442,7 @@ static struct sock *sco_sock_alloc(struct socket *sock, int proto, gfp_t prio) return sk; } -static int sco_sock_create(struct socket *sock, int protocol) +static int sco_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -452,7 +455,7 @@ static int sco_sock_create(struct socket *sock, int protocol) sock->ops = &sco_sock_ops; - sk = sco_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -807,7 +810,7 @@ static void sco_conn_ready(struct sco_conn *conn) bh_lock_sock(parent); - sk = sco_sock_alloc(NULL, BTPROTO_SCO, GFP_ATOMIC); + sk = sco_sock_alloc(parent->sk_net, NULL, BTPROTO_SCO, GFP_ATOMIC); if (!sk) { bh_unlock_sock(parent); goto done; @@ -846,7 +849,7 @@ static int sco_connect_cfm(struct hci_conn *hcon, __u8 status) { BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); - if (hcon->type != SCO_LINK) + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) return 0; if (!status) { @@ -865,10 +868,11 @@ static int sco_disconn_ind(struct hci_conn *hcon, __u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); - if (hcon->type != SCO_LINK) + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) return 0; sco_conn_del(hcon, bt_err(reason)); + return 0; } diff --git a/net/bridge/br.c b/net/bridge/br.c index 848b8fa8bed..93867bb6cc9 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -23,7 +23,7 @@ #include "br_private.h" -int (*br_should_route_hook) (struct sk_buff **pskb) = NULL; +int (*br_should_route_hook)(struct sk_buff *skb); static struct llc_sap *br_stp_sap; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 99292e8e1d0..c07bac5e3e1 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -150,11 +150,8 @@ static int br_set_tx_csum(struct net_device *dev, u32 data) static struct ethtool_ops br_ethtool_ops = { .get_drvinfo = br_getinfo, .get_link = ethtool_op_get_link, - .get_sg = ethtool_op_get_sg, .set_sg = br_set_sg, - .get_tx_csum = ethtool_op_get_tx_csum, .set_tx_csum = br_set_tx_csum, - .get_tso = ethtool_op_get_tso, .set_tso = br_set_tso, }; @@ -171,7 +168,6 @@ void br_dev_setup(struct net_device *dev) dev->set_multicast_list = br_dev_set_multicast_list; dev->change_mtu = br_change_mtu; dev->destructor = free_netdev; - SET_MODULE_OWNER(dev); SET_ETHTOOL_OPS(dev, &br_ethtool_ops); dev->stop = br_dev_stop; dev->tx_queue_len = 0; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 9272f12f664..935784f736b 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -303,7 +303,7 @@ int br_del_bridge(const char *name) int ret = 0; rtnl_lock(); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(&init_net, name); if (dev == NULL) ret = -ENXIO; /* Could not find device */ @@ -444,7 +444,7 @@ void __exit br_cleanup_bridges(void) struct net_device *dev, *nxt; rtnl_lock(); - for_each_netdev_safe(dev, nxt) + for_each_netdev_safe(&init_net, dev, nxt) if (dev->priv_flags & IFF_EBRIDGE) del_br(dev->priv); rtnl_unlock(); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 3a8a015c92e..3cedd4eeeed 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -126,6 +126,10 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto drop; + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NULL; + if (unlikely(is_link_local(dest))) { /* Pause frames shouldn't be passed up by driver anyway */ if (skb->protocol == htons(ETH_P_PAUSE)) @@ -145,7 +149,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) case BR_STATE_FORWARDING: if (br_should_route_hook) { - if (br_should_route_hook(&skb)) + if (br_should_route_hook(skb)) return skb; dest = eth_hdr(skb)->h_dest; } diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index bb15e9e259b..0655a5f07f5 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -18,6 +18,7 @@ #include <linux/if_bridge.h> #include <linux/netdevice.h> #include <linux/times.h> +#include <net/net_namespace.h> #include <asm/uaccess.h> #include "br_private.h" @@ -27,7 +28,7 @@ static int get_bridge_ifindices(int *indices, int num) struct net_device *dev; int i = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) @@ -90,7 +91,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev == NULL) return -EINVAL; @@ -364,7 +365,7 @@ static int old_deviceless(void __user *uarg) return -EOPNOTSUPP; } -int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg) +int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg) { switch (cmd) { case SIOCGIFBR: diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index fc13130035e..da22f900e89 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -503,18 +503,14 @@ inhdr_error: * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ -static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct iphdr *iph; - struct sk_buff *skb = *pskb; __u32 len = nf_bridge_encap_header_len(skb); - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) - return NF_STOLEN; - if (unlikely(!pskb_may_pull(skb, len))) goto out; @@ -584,13 +580,11 @@ out: * took place when the packet entered the bridge), but we * register an IPv4 PRE_ROUTING 'sabotage' hook that will * prevent this from happening. */ -static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; - if (skb->dst == (struct dst_entry *)&__fake_rtable) { dst_release(skb->dst); skb->dst = NULL; @@ -625,12 +619,11 @@ static int br_nf_forward_finish(struct sk_buff *skb) * but we are still able to filter on the 'real' indev/outdev * because of the physdev module. For ARP, indev and outdev are the * bridge ports. */ -static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; struct net_device *parent; int pf; @@ -648,7 +641,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, else pf = PF_INET6; - nf_bridge_pull_encap_header(*pskb); + nf_bridge_pull_encap_header(skb); nf_bridge = skb->nf_bridge; if (skb->pkt_type == PACKET_OTHERHOST) { @@ -666,12 +659,11 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, return NF_STOLEN; } -static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; struct net_device **d = (struct net_device **)(skb->cb); #ifdef CONFIG_SYSCTL @@ -682,12 +674,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, if (skb->protocol != htons(ETH_P_ARP)) { if (!IS_VLAN_ARP(skb)) return NF_ACCEPT; - nf_bridge_pull_encap_header(*pskb); + nf_bridge_pull_encap_header(skb); } if (arp_hdr(skb)->ar_pln != 4) { if (IS_VLAN_ARP(skb)) - nf_bridge_push_encap_header(*pskb); + nf_bridge_push_encap_header(skb); return NF_ACCEPT; } *d = (struct net_device *)in; @@ -709,13 +701,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor * will be executed. */ -static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct net_device *realindev; - struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; if (!skb->nf_bridge) @@ -752,13 +743,12 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) } /* PF_BRIDGE/POST_ROUTING ********************************************/ -static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; - struct nf_bridge_info *nf_bridge = (*pskb)->nf_bridge; + struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct net_device *realoutdev = bridge_parent(skb->dev); int pf; @@ -828,13 +818,13 @@ print_error: /* IP/SABOTAGE *****************************************************/ /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING * for the second time. */ -static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb, +static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if ((*pskb)->nf_bridge && - !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { + if (skb->nf_bridge && + !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { return NF_STOP; } @@ -904,7 +894,6 @@ int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp, static ctl_table brnf_table[] = { { - .ctl_name = NET_BRIDGE_NF_CALL_ARPTABLES, .procname = "bridge-nf-call-arptables", .data = &brnf_call_arptables, .maxlen = sizeof(int), @@ -912,7 +901,6 @@ static ctl_table brnf_table[] = { .proc_handler = &brnf_sysctl_call_tables, }, { - .ctl_name = NET_BRIDGE_NF_CALL_IPTABLES, .procname = "bridge-nf-call-iptables", .data = &brnf_call_iptables, .maxlen = sizeof(int), @@ -920,7 +908,6 @@ static ctl_table brnf_table[] = { .proc_handler = &brnf_sysctl_call_tables, }, { - .ctl_name = NET_BRIDGE_NF_CALL_IP6TABLES, .procname = "bridge-nf-call-ip6tables", .data = &brnf_call_ip6tables, .maxlen = sizeof(int), @@ -928,7 +915,6 @@ static ctl_table brnf_table[] = { .proc_handler = &brnf_sysctl_call_tables, }, { - .ctl_name = NET_BRIDGE_NF_FILTER_VLAN_TAGGED, .procname = "bridge-nf-filter-vlan-tagged", .data = &brnf_filter_vlan_tagged, .maxlen = sizeof(int), @@ -936,7 +922,6 @@ static ctl_table brnf_table[] = { .proc_handler = &brnf_sysctl_call_tables, }, { - .ctl_name = NET_BRIDGE_NF_FILTER_PPPOE_TAGGED, .procname = "bridge-nf-filter-pppoe-tagged", .data = &brnf_filter_pppoe_tagged, .maxlen = sizeof(int), diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0fcf6f07306..53ab8e0cb51 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <net/rtnetlink.h> +#include <net/net_namespace.h> #include "br_private.h" static inline size_t br_nlmsg_size(void) @@ -110,7 +111,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) int idx; idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { /* not a bridge port */ if (dev->br_port == NULL || idx < cb->args[0]) goto skip; @@ -155,7 +156,7 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (new_state > BR_STATE_BLOCKING) return -EINVAL; - dev = __dev_get_by_index(ifm->ifi_index); + dev = __dev_get_by_index(&init_net, ifm->ifi_index); if (!dev) return -ENODEV; diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index c8451d3a070..07ac3ae68d8 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/rtnetlink.h> +#include <net/net_namespace.h> #include "br_private.h" @@ -36,6 +37,9 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v struct net_bridge_port *p = dev->br_port; struct net_bridge *br; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* not a port of a bridge */ if (p == NULL) return NOTIFY_DONE; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e6dc6f52990..f666f7b28ff 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -192,7 +192,7 @@ extern struct sk_buff *br_handle_frame(struct net_bridge_port *p, /* br_ioctl.c */ extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -extern int br_ioctl_deviceless_stub(unsigned int cmd, void __user *arg); +extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *arg); /* br_netfilter.c */ #ifdef CONFIG_BRIDGE_NETFILTER diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 60112bce669..0edbd2a1c3f 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -17,6 +17,7 @@ #include <linux/netfilter_bridge.h> #include <linux/etherdevice.h> #include <linux/llc.h> +#include <net/net_namespace.h> #include <net/llc.h> #include <net/llc_pdu.h> #include <asm/unaligned.h> @@ -64,7 +65,7 @@ static inline int br_get_ticks(const unsigned char *src) { unsigned long ticks = ntohs(get_unaligned((__be16 *)src)); - return (ticks * HZ + STP_HZ - 1) / STP_HZ; + return DIV_ROUND_UP(ticks * HZ, STP_HZ); } /* called under bridge lock */ @@ -141,6 +142,9 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, struct net_bridge *br; const unsigned char *buf; + if (dev->nd_net != &init_net) + goto err; + if (!p) goto err; diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index c65f54e0e27..3312e8f2abe 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -435,7 +435,7 @@ int br_sysfs_addbr(struct net_device *dev) err = kobject_register(&br->ifobj); if (err) { pr_info("%s: can't add kobject (directory) %s/%s\n", - __FUNCTION__, dev->name, br->ifobj.name); + __FUNCTION__, dev->name, kobject_name(&br->ifobj)); goto out3; } return 0; diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index ffe468a632e..48a80e42328 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -15,7 +15,7 @@ #include <net/arp.h> #include <linux/module.h> -static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { @@ -23,7 +23,6 @@ static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr, __be32 _sip, *siptr, _dip, *diptr; struct arphdr _ah, *ap; unsigned char _sha[ETH_ALEN], *shp; - struct sk_buff *skb = *pskb; ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); if (ap == NULL) diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index 4582659dff0..74262e9a566 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -8,29 +8,22 @@ * */ +#include <linux/netfilter.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_nat.h> #include <linux/module.h> #include <net/sock.h> -static int ebt_target_dnat(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { struct ebt_nat_info *info = (struct ebt_nat_info *)data; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (skb_make_writable(skb, 0)) + return NF_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - memcpy(eth_hdr(*pskb)->h_dest, info->mac, ETH_ALEN); + memcpy(eth_hdr(skb)->h_dest, info->mac, ETH_ALEN); return info->target; } diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 62d23c7b25e..6cba54309c0 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -17,7 +17,7 @@ #include <linux/netfilter_bridge/ebt_mark_t.h> #include <linux/module.h> -static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { @@ -25,13 +25,13 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, int action = info->target & -16; if (action == MARK_SET_VALUE) - (*pskb)->mark = info->mark; + skb->mark = info->mark; else if (action == MARK_OR_VALUE) - (*pskb)->mark |= info->mark; + skb->mark |= info->mark; else if (action == MARK_AND_VALUE) - (*pskb)->mark &= info->mark; + skb->mark &= info->mark; else - (*pskb)->mark ^= info->mark; + skb->mark ^= info->mark; return info->target | ~EBT_VERDICT_BITS; } diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 9f378eab72d..422cb834cff 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -8,35 +8,28 @@ * */ +#include <linux/netfilter.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_redirect.h> #include <linux/module.h> #include <net/sock.h> #include "../br_private.h" -static int ebt_target_redirect(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { struct ebt_redirect_info *info = (struct ebt_redirect_info *)data; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (skb_make_writable(skb, 0)) + return NF_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } if (hooknr != NF_BR_BROUTING) - memcpy(eth_hdr(*pskb)->h_dest, + memcpy(eth_hdr(skb)->h_dest, in->br_port->br->dev->dev_addr, ETH_ALEN); else - memcpy(eth_hdr(*pskb)->h_dest, in->dev_addr, ETH_ALEN); - (*pskb)->pkt_type = PACKET_HOST; + memcpy(eth_hdr(skb)->h_dest, in->dev_addr, ETH_ALEN); + skb->pkt_type = PACKET_HOST; return info->target; } diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index a50722182bf..425ac920904 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -8,6 +8,7 @@ * */ +#include <linux/netfilter.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_nat.h> #include <linux/module.h> @@ -15,34 +16,26 @@ #include <linux/if_arp.h> #include <net/arp.h> -static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { struct ebt_nat_info *info = (struct ebt_nat_info *) data; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (skb_make_writable(skb, 0)) + return NF_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - memcpy(eth_hdr(*pskb)->h_source, info->mac, ETH_ALEN); + memcpy(eth_hdr(skb)->h_source, info->mac, ETH_ALEN); if (!(info->target & NAT_ARP_BIT) && - eth_hdr(*pskb)->h_proto == htons(ETH_P_ARP)) { + eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { struct arphdr _ah, *ap; - ap = skb_header_pointer(*pskb, 0, sizeof(_ah), &_ah); + ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); if (ap == NULL) return EBT_DROP; if (ap->ar_hln != ETH_ALEN) goto out; - if (skb_store_bits(*pskb, sizeof(_ah), info->mac,ETH_ALEN)) + if (skb_store_bits(skb, sizeof(_ah), info->mac,ETH_ALEN)) return EBT_DROP; } out: diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 204c968fa86..e7cfd30bac7 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -300,8 +300,9 @@ static int __init ebt_ulog_init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, - NULL, NULL, THIS_MODULE); + ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, + EBT_ULOG_MAXNLGROUPS, NULL, NULL, + THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index d37ce047893..e44519ebf1d 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -51,11 +51,11 @@ static struct ebt_table broute_table = .me = THIS_MODULE, }; -static int ebt_broute(struct sk_buff **pskb) +static int ebt_broute(struct sk_buff *skb) { int ret; - ret = ebt_do_table(NF_BR_BROUTING, pskb, (*pskb)->dev, NULL, + ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL, &broute_table); if (ret == NF_DROP) return 1; /* route it */ diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 81d84145c41..210493f99bc 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -61,10 +61,10 @@ static struct ebt_table frame_filter = }; static unsigned int -ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in, +ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, pskb, in, out, &frame_filter); + return ebt_do_table(hook, skb, in, out, &frame_filter); } static struct nf_hook_ops ebt_ops_filter[] = { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 9c50488b62e..3e58c2e5ee2 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -61,17 +61,17 @@ static struct ebt_table frame_nat = }; static unsigned int -ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in +ebt_nat_dst(unsigned int hook, struct sk_buff *skb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, pskb, in, out, &frame_nat); + return ebt_do_table(hook, skb, in, out, &frame_nat); } static unsigned int -ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in +ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, pskb, in, out, &frame_nat); + return ebt_do_table(hook, skb, in, out, &frame_nat); } static struct nf_hook_ops ebt_ops_nat[] = { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 6018d0e5193..817169e718c 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -142,7 +142,7 @@ static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h, } /* Do some firewalling */ -unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb, +unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, struct ebt_table *table) { @@ -172,19 +172,19 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb, base = private->entries; i = 0; while (i < nentries) { - if (ebt_basic_match(point, eth_hdr(*pskb), in, out)) + if (ebt_basic_match(point, eth_hdr(skb), in, out)) goto letscontinue; - if (EBT_MATCH_ITERATE(point, ebt_do_match, *pskb, in, out) != 0) + if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, in, out) != 0) goto letscontinue; /* increase counter */ (*(counter_base + i)).pcnt++; - (*(counter_base + i)).bcnt+=(**pskb).len; + (*(counter_base + i)).bcnt += skb->len; /* these should only watch: not modify, nor tell us what to do with the packet */ - EBT_WATCHER_ITERATE(point, ebt_do_watcher, *pskb, hook, in, + EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, hook, in, out); t = (struct ebt_entry_target *) @@ -193,7 +193,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb, if (!t->u.target->target) verdict = ((struct ebt_standard_target *)t)->verdict; else - verdict = t->u.target->target(pskb, hook, + verdict = t->u.target->target(skb, hook, in, out, t->data, t->target_size); if (verdict == EBT_ACCEPT) { read_unlock_bh(&table->lock); @@ -871,7 +871,7 @@ static int translate_table(char *name, struct ebt_table_info *newinfo) return -EINVAL; } - /* we now know the following (along with E=mc²): + /* we now know the following (along with E=mc²): - the nr of entries in each chain is right - the size of the allocated space is right - all valid hooks have a corresponding chain diff --git a/net/core/Makefile b/net/core/Makefile index 4751613e1b5..b1332f6d004 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -3,7 +3,7 @@ # obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o + gen_stats.o gen_estimator.o net_namespace.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o @@ -11,7 +11,7 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o obj-$(CONFIG_XFRM) += flow.o -obj-$(CONFIG_SYSFS) += net-sysfs.o +obj-y += net-sysfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o diff --git a/net/core/dev.c b/net/core/dev.c index a76021c7120..853c8b575f1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -92,6 +92,7 @@ #include <linux/etherdevice.h> #include <linux/notifier.h> #include <linux/skbuff.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <linux/rtnetlink.h> #include <linux/proc_fs.h> @@ -119,6 +120,8 @@ #include <linux/ctype.h> #include <linux/if_arp.h> +#include "net-sysfs.h" + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -189,25 +192,50 @@ static struct net_dma net_dma = { * unregister_netdevice(), which must be called with the rtnl * semaphore held. */ -LIST_HEAD(dev_base_head); DEFINE_RWLOCK(dev_base_lock); -EXPORT_SYMBOL(dev_base_head); EXPORT_SYMBOL(dev_base_lock); #define NETDEV_HASHBITS 8 -static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS]; -static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS]; +#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) -static inline struct hlist_head *dev_name_hash(const char *name) +static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) { unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); - return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)]; + return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; } -static inline struct hlist_head *dev_index_hash(int ifindex) +static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) { - return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)]; + return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; +} + +/* Device list insertion */ +static int list_netdevice(struct net_device *dev) +{ + struct net *net = dev->nd_net; + + ASSERT_RTNL(); + + write_lock_bh(&dev_base_lock); + list_add_tail(&dev->dev_list, &net->dev_base_head); + hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); + hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); + write_unlock_bh(&dev_base_lock); + return 0; +} + +/* Device list removal */ +static void unlist_netdevice(struct net_device *dev) +{ + ASSERT_RTNL(); + + /* Unlink dev from the device chain */ + write_lock_bh(&dev_base_lock); + list_del(&dev->dev_list); + hlist_del(&dev->name_hlist); + hlist_del(&dev->index_hlist); + write_unlock_bh(&dev_base_lock); } /* @@ -220,17 +248,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain); * Device drivers call our routines to queue packets here. We empty the * queue in the local softnet handler. */ -DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; -#ifdef CONFIG_SYSFS -extern int netdev_sysfs_init(void); -extern int netdev_register_sysfs(struct net_device *); -extern void netdev_unregister_sysfs(struct net_device *); -#else -#define netdev_sysfs_init() (0) -#define netdev_register_sysfs(dev) (0) -#define netdev_unregister_sysfs(dev) do { } while(0) -#endif +DEFINE_PER_CPU(struct softnet_data, softnet_data); #ifdef CONFIG_DEBUG_LOCK_ALLOC /* @@ -490,7 +509,7 @@ unsigned long netdev_boot_base(const char *prefix, int unit) * If device already registered then return base of 1 * to indicate not to probe for this interface */ - if (__dev_get_by_name(name)) + if (__dev_get_by_name(&init_net, name)) return 1; for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) @@ -536,6 +555,7 @@ __setup("netdev=", netdev_boot_setup); /** * __dev_get_by_name - find a device by its name + * @net: the applicable net namespace * @name: name to find * * Find an interface by name. Must be called under RTNL semaphore @@ -545,11 +565,11 @@ __setup("netdev=", netdev_boot_setup); * careful with locks. */ -struct net_device *__dev_get_by_name(const char *name) +struct net_device *__dev_get_by_name(struct net *net, const char *name) { struct hlist_node *p; - hlist_for_each(p, dev_name_hash(name)) { + hlist_for_each(p, dev_name_hash(net, name)) { struct net_device *dev = hlist_entry(p, struct net_device, name_hlist); if (!strncmp(dev->name, name, IFNAMSIZ)) @@ -560,6 +580,7 @@ struct net_device *__dev_get_by_name(const char *name) /** * dev_get_by_name - find a device by its name + * @net: the applicable net namespace * @name: name to find * * Find an interface by name. This can be called from any @@ -569,12 +590,12 @@ struct net_device *__dev_get_by_name(const char *name) * matching device is found. */ -struct net_device *dev_get_by_name(const char *name) +struct net_device *dev_get_by_name(struct net *net, const char *name) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(net, name); if (dev) dev_hold(dev); read_unlock(&dev_base_lock); @@ -583,6 +604,7 @@ struct net_device *dev_get_by_name(const char *name) /** * __dev_get_by_index - find a device by its ifindex + * @net: the applicable net namespace * @ifindex: index of device * * Search for an interface by index. Returns %NULL if the device @@ -592,11 +614,11 @@ struct net_device *dev_get_by_name(const char *name) * or @dev_base_lock. */ -struct net_device *__dev_get_by_index(int ifindex) +struct net_device *__dev_get_by_index(struct net *net, int ifindex) { struct hlist_node *p; - hlist_for_each(p, dev_index_hash(ifindex)) { + hlist_for_each(p, dev_index_hash(net, ifindex)) { struct net_device *dev = hlist_entry(p, struct net_device, index_hlist); if (dev->ifindex == ifindex) @@ -608,6 +630,7 @@ struct net_device *__dev_get_by_index(int ifindex) /** * dev_get_by_index - find a device by its ifindex + * @net: the applicable net namespace * @ifindex: index of device * * Search for an interface by index. Returns NULL if the device @@ -616,12 +639,12 @@ struct net_device *__dev_get_by_index(int ifindex) * dev_put to indicate they have finished with it. */ -struct net_device *dev_get_by_index(int ifindex) +struct net_device *dev_get_by_index(struct net *net, int ifindex) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifindex); + dev = __dev_get_by_index(net, ifindex); if (dev) dev_hold(dev); read_unlock(&dev_base_lock); @@ -630,6 +653,7 @@ struct net_device *dev_get_by_index(int ifindex) /** * dev_getbyhwaddr - find a device by its hardware address + * @net: the applicable net namespace * @type: media type of device * @ha: hardware address * @@ -642,13 +666,13 @@ struct net_device *dev_get_by_index(int ifindex) * If the API was consistent this would be __dev_get_by_hwaddr */ -struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) +struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) { struct net_device *dev; ASSERT_RTNL(); - for_each_netdev(dev) + for_each_netdev(&init_net, dev) if (dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len)) return dev; @@ -658,12 +682,12 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) EXPORT_SYMBOL(dev_getbyhwaddr); -struct net_device *__dev_getfirstbyhwtype(unsigned short type) +struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) { struct net_device *dev; ASSERT_RTNL(); - for_each_netdev(dev) + for_each_netdev(net, dev) if (dev->type == type) return dev; @@ -672,12 +696,12 @@ struct net_device *__dev_getfirstbyhwtype(unsigned short type) EXPORT_SYMBOL(__dev_getfirstbyhwtype); -struct net_device *dev_getfirstbyhwtype(unsigned short type) +struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) { struct net_device *dev; rtnl_lock(); - dev = __dev_getfirstbyhwtype(type); + dev = __dev_getfirstbyhwtype(net, type); if (dev) dev_hold(dev); rtnl_unlock(); @@ -688,6 +712,7 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype); /** * dev_get_by_flags - find any device with given flags + * @net: the applicable net namespace * @if_flags: IFF_* values * @mask: bitmask of bits in if_flags to check * @@ -697,13 +722,13 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype); * dev_put to indicate they have finished with it. */ -struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) +struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask) { struct net_device *dev, *ret; ret = NULL; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (((dev->flags ^ if_flags) & mask) == 0) { dev_hold(dev); ret = dev; @@ -740,9 +765,10 @@ int dev_valid_name(const char *name) } /** - * dev_alloc_name - allocate a name for a device - * @dev: device + * __dev_alloc_name - allocate a name for a device + * @net: network namespace to allocate the device name in * @name: name format string + * @buf: scratch buffer and result name string * * Passed a format string - eg "lt%d" it will try and find a suitable * id. It scans list of devices to build up a free map, then chooses @@ -753,13 +779,12 @@ int dev_valid_name(const char *name) * Returns the number of the unit assigned or a negative errno code. */ -int dev_alloc_name(struct net_device *dev, const char *name) +static int __dev_alloc_name(struct net *net, const char *name, char *buf) { int i = 0; - char buf[IFNAMSIZ]; const char *p; const int max_netdevices = 8*PAGE_SIZE; - long *inuse; + unsigned long *inuse; struct net_device *d; p = strnchr(name, IFNAMSIZ-1, '%'); @@ -773,18 +798,18 @@ int dev_alloc_name(struct net_device *dev, const char *name) return -EINVAL; /* Use one page as a bit array of possible slots */ - inuse = (long *) get_zeroed_page(GFP_ATOMIC); + inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC); if (!inuse) return -ENOMEM; - for_each_netdev(d) { + for_each_netdev(net, d) { if (!sscanf(d->name, name, &i)) continue; if (i < 0 || i >= max_netdevices) continue; /* avoid cases where sscanf is not exact inverse of printf */ - snprintf(buf, sizeof(buf), name, i); + snprintf(buf, IFNAMSIZ, name, i); if (!strncmp(buf, d->name, IFNAMSIZ)) set_bit(i, inuse); } @@ -793,11 +818,9 @@ int dev_alloc_name(struct net_device *dev, const char *name) free_page((unsigned long) inuse); } - snprintf(buf, sizeof(buf), name, i); - if (!__dev_get_by_name(buf)) { - strlcpy(dev->name, buf, IFNAMSIZ); + snprintf(buf, IFNAMSIZ, name, i); + if (!__dev_get_by_name(net, buf)) return i; - } /* It is possible to run out of possible slots * when the name is long and there isn't enough space left @@ -806,6 +829,34 @@ int dev_alloc_name(struct net_device *dev, const char *name) return -ENFILE; } +/** + * dev_alloc_name - allocate a name for a device + * @dev: device + * @name: name format string + * + * Passed a format string - eg "lt%d" it will try and find a suitable + * id. It scans list of devices to build up a free map, then chooses + * the first empty slot. The caller must hold the dev_base or rtnl lock + * while allocating the name and adding the device in order to avoid + * duplicates. + * Limited to bits_per_byte * page size devices (ie 32K on most platforms). + * Returns the number of the unit assigned or a negative errno code. + */ + +int dev_alloc_name(struct net_device *dev, const char *name) +{ + char buf[IFNAMSIZ]; + struct net *net; + int ret; + + BUG_ON(!dev->nd_net); + net = dev->nd_net; + ret = __dev_alloc_name(net, name, buf); + if (ret >= 0) + strlcpy(dev->name, buf, IFNAMSIZ); + return ret; +} + /** * dev_change_name - change name of a device @@ -820,15 +871,21 @@ int dev_change_name(struct net_device *dev, char *newname) char oldname[IFNAMSIZ]; int err = 0; int ret; + struct net *net; ASSERT_RTNL(); + BUG_ON(!dev->nd_net); + net = dev->nd_net; if (dev->flags & IFF_UP) return -EBUSY; if (!dev_valid_name(newname)) return -EINVAL; + if (strncmp(newname, dev->name, IFNAMSIZ) == 0) + return 0; + memcpy(oldname, dev->name, IFNAMSIZ); if (strchr(newname, '%')) { @@ -837,7 +894,7 @@ int dev_change_name(struct net_device *dev, char *newname) return err; strcpy(newname, dev->name); } - else if (__dev_get_by_name(newname)) + else if (__dev_get_by_name(net, newname)) return -EEXIST; else strlcpy(dev->name, newname, IFNAMSIZ); @@ -847,10 +904,10 @@ rollback: write_lock_bh(&dev_base_lock); hlist_del(&dev->name_hlist); - hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); + hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); write_unlock_bh(&dev_base_lock); - ret = raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); + ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); ret = notifier_to_errno(ret); if (ret) { @@ -876,7 +933,7 @@ rollback: */ void netdev_features_change(struct net_device *dev) { - raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); + call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev); } EXPORT_SYMBOL(netdev_features_change); @@ -891,14 +948,14 @@ EXPORT_SYMBOL(netdev_features_change); void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGE, dev); + call_netdevice_notifiers(NETDEV_CHANGE, dev); rtmsg_ifinfo(RTM_NEWLINK, dev, 0); } } /** * dev_load - load a network module + * @net: the applicable net namespace * @name: name of interface * * If a network interface is not present and the process has suitable @@ -906,26 +963,18 @@ void netdev_state_change(struct net_device *dev) * available in this kernel then it becomes a nop. */ -void dev_load(const char *name) +void dev_load(struct net *net, const char *name) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(net, name); read_unlock(&dev_base_lock); if (!dev && capable(CAP_SYS_MODULE)) request_module("%s", name); } -static int default_rebuild_header(struct sk_buff *skb) -{ - printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", - skb->dev ? skb->dev->name : "NULL!!!"); - kfree_skb(skb); - return 1; -} - /** * dev_open - prepare an interface for use. * @dev: device to open @@ -959,17 +1008,20 @@ int dev_open(struct net_device *dev) * Call device private open method */ set_bit(__LINK_STATE_START, &dev->state); - if (dev->open) { + + if (dev->validate_addr) + ret = dev->validate_addr(dev); + + if (!ret && dev->open) ret = dev->open(dev); - if (ret) - clear_bit(__LINK_STATE_START, &dev->state); - } /* * If it went open OK then: */ - if (!ret) { + if (ret) + clear_bit(__LINK_STATE_START, &dev->state); + else { /* * Set the flags. */ @@ -988,8 +1040,9 @@ int dev_open(struct net_device *dev) /* * ... and announce new interface. */ - raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev); + call_netdevice_notifiers(NETDEV_UP, dev); } + return ret; } @@ -1004,6 +1057,8 @@ int dev_open(struct net_device *dev) */ int dev_close(struct net_device *dev) { + might_sleep(); + if (!(dev->flags & IFF_UP)) return 0; @@ -1011,23 +1066,19 @@ int dev_close(struct net_device *dev) * Tell people we are going down, so that they can * prepare to death, when device is still operating. */ - raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); + call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); dev_deactivate(dev); clear_bit(__LINK_STATE_START, &dev->state); /* Synchronize to scheduled poll. We cannot touch poll list, - * it can be even on different cpu. So just clear netif_running(), - * and wait when poll really will happen. Actually, the best place - * for this is inside dev->stop() after device stopped its irq - * engine, but this requires more changes in devices. */ - + * it can be even on different cpu. So just clear netif_running(). + * + * dev->stop() will invoke napi_disable() on all of it's + * napi_struct instances on this device. + */ smp_mb__after_clear_bit(); /* Commit netif_running(). */ - while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { - /* No hurry. */ - msleep(1); - } /* * Call the device specific close. This cannot fail. @@ -1048,12 +1099,14 @@ int dev_close(struct net_device *dev) /* * Tell people we are down */ - raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); + call_netdevice_notifiers(NETDEV_DOWN, dev); return 0; } +static int dev_boot_phase = 1; + /* * Device change register/unregister. These are not inline or static * as we export them to the world. @@ -1077,23 +1130,27 @@ int register_netdevice_notifier(struct notifier_block *nb) { struct net_device *dev; struct net_device *last; + struct net *net; int err; rtnl_lock(); err = raw_notifier_chain_register(&netdev_chain, nb); if (err) goto unlock; + if (dev_boot_phase) + goto unlock; + for_each_net(net) { + for_each_netdev(net, dev) { + err = nb->notifier_call(nb, NETDEV_REGISTER, dev); + err = notifier_to_errno(err); + if (err) + goto rollback; + + if (!(dev->flags & IFF_UP)) + continue; - for_each_netdev(dev) { - err = nb->notifier_call(nb, NETDEV_REGISTER, dev); - err = notifier_to_errno(err); - if (err) - goto rollback; - - if (!(dev->flags & IFF_UP)) - continue; - - nb->notifier_call(nb, NETDEV_UP, dev); + nb->notifier_call(nb, NETDEV_UP, dev); + } } unlock: @@ -1102,15 +1159,17 @@ unlock: rollback: last = dev; - for_each_netdev(dev) { - if (dev == last) - break; + for_each_net(net) { + for_each_netdev(net, dev) { + if (dev == last) + break; - if (dev->flags & IFF_UP) { - nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); - nb->notifier_call(nb, NETDEV_DOWN, dev); + if (dev->flags & IFF_UP) { + nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); + nb->notifier_call(nb, NETDEV_DOWN, dev); + } + nb->notifier_call(nb, NETDEV_UNREGISTER, dev); } - nb->notifier_call(nb, NETDEV_UNREGISTER, dev); } goto unlock; } @@ -1138,15 +1197,15 @@ int unregister_netdevice_notifier(struct notifier_block *nb) /** * call_netdevice_notifiers - call all network notifier blocks * @val: value passed unmodified to notifier function - * @v: pointer passed unmodified to notifier function + * @dev: net_device pointer passed unmodified to notifier function * * Call all network notifier blocks. Parameters and return value * are as for raw_notifier_call_chain(). */ -int call_netdevice_notifiers(unsigned long val, void *v) +int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - return raw_notifier_call_chain(&netdev_chain, val, v); + return raw_notifier_call_chain(&netdev_chain, val, dev); } /* When > 0 there are consumers of rx skb time stamps */ @@ -1233,21 +1292,21 @@ void __netif_schedule(struct net_device *dev) } EXPORT_SYMBOL(__netif_schedule); -void __netif_rx_schedule(struct net_device *dev) +void dev_kfree_skb_irq(struct sk_buff *skb) { - unsigned long flags; + if (atomic_dec_and_test(&skb->users)) { + struct softnet_data *sd; + unsigned long flags; - local_irq_save(flags); - dev_hold(dev); - list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); - if (dev->quota < 0) - dev->quota += dev->weight; - else - dev->quota = dev->weight; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - local_irq_restore(flags); + local_irq_save(flags); + sd = &__get_cpu_var(softnet_data); + skb->next = sd->completion_queue; + sd->completion_queue = skb; + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); + } } -EXPORT_SYMBOL(__netif_rx_schedule); +EXPORT_SYMBOL(dev_kfree_skb_irq); void dev_kfree_skb_any(struct sk_buff *skb) { @@ -1259,7 +1318,12 @@ void dev_kfree_skb_any(struct sk_buff *skb) EXPORT_SYMBOL(dev_kfree_skb_any); -/* Hot-plugging. */ +/** + * netif_device_detach - mark device as removed + * @dev: network device + * + * Mark device as removed from system and therefore no longer available. + */ void netif_device_detach(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && @@ -1269,6 +1333,12 @@ void netif_device_detach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_detach); +/** + * netif_device_attach - mark device as attached + * @dev: network device + * + * Mark device as attached from system and restart if needed. + */ void netif_device_attach(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && @@ -1297,22 +1367,21 @@ int skb_checksum_help(struct sk_buff *skb) goto out_set_summed; } - if (skb_cloned(skb)) { + offset = skb->csum_start - skb_headroom(skb); + BUG_ON(offset >= skb_headlen(skb)); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + + offset += skb->csum_offset; + BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); + + if (skb_cloned(skb) && + !skb_clone_writable(skb, offset + sizeof(__sum16))) { ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); if (ret) goto out; } - offset = skb->csum_start - skb_headroom(skb); - BUG_ON(offset > (int)skb->len); - csum = skb_checksum(skb, offset, skb->len-offset, 0); - - offset = skb_headlen(skb) - offset; - BUG_ON(offset <= 0); - BUG_ON(skb->csum_offset + 2 > offset); - - *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) = - csum_fold(csum); + *(__sum16 *)(skb->data + offset) = csum_fold(csum); out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: @@ -1489,7 +1558,7 @@ gso: return rc; } if (unlikely((netif_queue_stopped(dev) || - netif_subqueue_stopped(dev, skb->queue_mapping)) && + netif_subqueue_stopped(dev, skb)) && skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -1501,18 +1570,6 @@ out_kfree_skb: return 0; } -#define HARD_TX_LOCK(dev, cpu) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ - netif_tx_lock(dev); \ - } \ -} - -#define HARD_TX_UNLOCK(dev) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ - netif_tx_unlock(dev); \ - } \ -} - /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -1609,7 +1666,7 @@ gso: q = dev->qdisc; if (q->enqueue) { /* reset queue_mapping to zero */ - skb->queue_mapping = 0; + skb_set_queue_mapping(skb, 0); rc = q->enqueue(skb, q); qdisc_run(dev); spin_unlock(&dev->queue_lock); @@ -1640,7 +1697,7 @@ gso: HARD_TX_LOCK(dev, cpu); if (!netif_queue_stopped(dev) && - !netif_subqueue_stopped(dev, skb->queue_mapping)) { + !netif_subqueue_stopped(dev, skb)) { rc = 0; if (!dev_hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); @@ -1730,7 +1787,7 @@ enqueue: return NET_RX_SUCCESS; } - netif_rx_schedule(&queue->backlog_dev); + napi_schedule(&queue->backlog); goto enqueue; } @@ -1771,6 +1828,7 @@ static inline struct net_device *skb_bond(struct sk_buff *skb) return dev; } + static void net_tx_action(struct softirq_action *h) { struct softnet_data *sd = &__get_cpu_var(softnet_data); @@ -1895,27 +1953,51 @@ static int ing_filter(struct sk_buff *skb) struct Qdisc *q; struct net_device *dev = skb->dev; int result = TC_ACT_OK; + u32 ttl = G_TC_RTTL(skb->tc_verd); - if (dev->qdisc_ingress) { - __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); - if (MAX_RED_LOOP < ttl++) { - printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n", - skb->iif, skb->dev->ifindex); - return TC_ACT_SHOT; - } + if (MAX_RED_LOOP < ttl++) { + printk(KERN_WARNING + "Redir loop detected Dropping packet (%d->%d)\n", + skb->iif, dev->ifindex); + return TC_ACT_SHOT; + } + + skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); + skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); + spin_lock(&dev->ingress_lock); + if ((q = dev->qdisc_ingress) != NULL) + result = q->enqueue(skb, q); + spin_unlock(&dev->ingress_lock); - skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); + return result; +} - spin_lock(&dev->ingress_lock); - if ((q = dev->qdisc_ingress) != NULL) - result = q->enqueue(skb, q); - spin_unlock(&dev->ingress_lock); +static inline struct sk_buff *handle_ing(struct sk_buff *skb, + struct packet_type **pt_prev, + int *ret, struct net_device *orig_dev) +{ + if (!skb->dev->qdisc_ingress) + goto out; + if (*pt_prev) { + *ret = deliver_skb(skb, *pt_prev, orig_dev); + *pt_prev = NULL; + } else { + /* Huh? Why does turning on AF_PACKET affect this? */ + skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); } - return result; + switch (ing_filter(skb)) { + case TC_ACT_SHOT: + case TC_ACT_STOLEN: + kfree_skb(skb); + return NULL; + } + +out: + skb->tc_verd = 0; + return skb; } #endif @@ -1927,7 +2009,7 @@ int netif_receive_skb(struct sk_buff *skb) __be16 type; /* if we've gotten here through NAPI, check netpoll */ - if (skb->dev->poll && netpoll_rx(skb)) + if (netpoll_receive_skb(skb)) return NET_RX_DROP; if (!skb->tstamp.tv64) @@ -1967,21 +2049,9 @@ int netif_receive_skb(struct sk_buff *skb) } #ifdef CONFIG_NET_CLS_ACT - if (pt_prev) { - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = NULL; /* noone else should process this after*/ - } else { - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); - } - - ret = ing_filter(skb); - - if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { - kfree_skb(skb); + skb = handle_ing(skb, &pt_prev, &ret, orig_dev); + if (!skb) goto out; - } - - skb->tc_verd = 0; ncls: #endif @@ -2017,22 +2087,25 @@ out: return ret; } -static int process_backlog(struct net_device *backlog_dev, int *budget) +static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; - int quota = min(backlog_dev->quota, *budget); struct softnet_data *queue = &__get_cpu_var(softnet_data); unsigned long start_time = jiffies; - backlog_dev->weight = weight_p; - for (;;) { + napi->weight = weight_p; + do { struct sk_buff *skb; struct net_device *dev; local_irq_disable(); skb = __skb_dequeue(&queue->input_pkt_queue); - if (!skb) - goto job_done; + if (!skb) { + __napi_complete(napi); + local_irq_enable(); + break; + } + local_irq_enable(); dev = skb->dev; @@ -2040,67 +2113,86 @@ static int process_backlog(struct net_device *backlog_dev, int *budget) netif_receive_skb(skb); dev_put(dev); + } while (++work < quota && jiffies == start_time); - work++; - - if (work >= quota || jiffies - start_time > 1) - break; - - } - - backlog_dev->quota -= work; - *budget -= work; - return -1; - -job_done: - backlog_dev->quota -= work; - *budget -= work; + return work; +} - list_del(&backlog_dev->poll_list); - smp_mb__before_clear_bit(); - netif_poll_enable(backlog_dev); +/** + * __napi_schedule - schedule for receive + * @n: entry to schedule + * + * The entry's receive function will be scheduled to run + */ +void fastcall __napi_schedule(struct napi_struct *n) +{ + unsigned long flags; - local_irq_enable(); - return 0; + local_irq_save(flags); + list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + local_irq_restore(flags); } +EXPORT_SYMBOL(__napi_schedule); + static void net_rx_action(struct softirq_action *h) { - struct softnet_data *queue = &__get_cpu_var(softnet_data); + struct list_head *list = &__get_cpu_var(softnet_data).poll_list; unsigned long start_time = jiffies; int budget = netdev_budget; void *have; local_irq_disable(); - while (!list_empty(&queue->poll_list)) { - struct net_device *dev; + while (!list_empty(list)) { + struct napi_struct *n; + int work, weight; - if (budget <= 0 || jiffies - start_time > 1) + /* If softirq window is exhuasted then punt. + * + * Note that this is a slight policy change from the + * previous NAPI code, which would allow up to 2 + * jiffies to pass before breaking out. The test + * used to be "jiffies - start_time > 1". + */ + if (unlikely(budget <= 0 || jiffies != start_time)) goto softnet_break; local_irq_enable(); - dev = list_entry(queue->poll_list.next, - struct net_device, poll_list); - have = netpoll_poll_lock(dev); + /* Even though interrupts have been re-enabled, this + * access is safe because interrupts can only add new + * entries to the tail of this list, and only ->poll() + * calls can remove this head entry from the list. + */ + n = list_entry(list->next, struct napi_struct, poll_list); - if (dev->quota <= 0 || dev->poll(dev, &budget)) { - netpoll_poll_unlock(have); - local_irq_disable(); - list_move_tail(&dev->poll_list, &queue->poll_list); - if (dev->quota < 0) - dev->quota += dev->weight; - else - dev->quota = dev->weight; - } else { - netpoll_poll_unlock(have); - dev_put(dev); - local_irq_disable(); - } + have = netpoll_poll_lock(n); + + weight = n->weight; + + work = n->poll(n, weight); + + WARN_ON_ONCE(work > weight); + + budget -= work; + + local_irq_disable(); + + /* Drivers must not modify the NAPI state if they + * consume the entire weight. In such cases this code + * still "owns" the NAPI instance and therefore can + * move the instance around on the list at-will. + */ + if (unlikely(work == weight)) + list_move_tail(&n->poll_list, list); + + netpoll_poll_unlock(have); } out: local_irq_enable(); + #ifdef CONFIG_NET_DMA /* * There may not be any more sk_buffs coming right now, so push @@ -2115,6 +2207,7 @@ out: } } #endif + return; softnet_break: @@ -2154,7 +2247,7 @@ int register_gifconf(unsigned int family, gifconf_func_t * gifconf) * match. --pb */ -static int dev_ifname(struct ifreq __user *arg) +static int dev_ifname(struct net *net, struct ifreq __user *arg) { struct net_device *dev; struct ifreq ifr; @@ -2167,7 +2260,7 @@ static int dev_ifname(struct ifreq __user *arg) return -EFAULT; read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifr.ifr_ifindex); + dev = __dev_get_by_index(net, ifr.ifr_ifindex); if (!dev) { read_unlock(&dev_base_lock); return -ENODEV; @@ -2187,7 +2280,7 @@ static int dev_ifname(struct ifreq __user *arg) * Thus we will need a 'compatibility mode'. */ -static int dev_ifconf(char __user *arg) +static int dev_ifconf(struct net *net, char __user *arg) { struct ifconf ifc; struct net_device *dev; @@ -2211,7 +2304,7 @@ static int dev_ifconf(char __user *arg) */ total = 0; - for_each_netdev(dev) { + for_each_netdev(net, dev) { for (i = 0; i < NPROTO; i++) { if (gifconf_list[i]) { int done; @@ -2245,6 +2338,7 @@ static int dev_ifconf(char __user *arg) */ void *dev_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq->private; loff_t off; struct net_device *dev; @@ -2253,7 +2347,7 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) return SEQ_START_TOKEN; off = 1; - for_each_netdev(dev) + for_each_netdev(net, dev) if (off++ == *pos) return dev; @@ -2262,9 +2356,10 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq->private; ++*pos; return v == SEQ_START_TOKEN ? - first_net_device() : next_net_device((struct net_device *)v); + first_net_device(net) : next_net_device((struct net_device *)v); } void dev_seq_stop(struct seq_file *seq, void *v) @@ -2360,7 +2455,26 @@ static const struct seq_operations dev_seq_ops = { static int dev_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &dev_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &dev_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_proc_net(inode); + if (!seq->private) { + seq_release(inode, file); + res = -ENXIO; + } + } + return res; +} + +static int dev_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations dev_seq_fops = { @@ -2368,7 +2482,7 @@ static const struct file_operations dev_seq_fops = { .open = dev_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = dev_seq_release, }; static const struct seq_operations softnet_seq_ops = { @@ -2520,30 +2634,49 @@ static const struct file_operations ptype_seq_fops = { }; -static int __init dev_proc_init(void) +static int __net_init dev_proc_net_init(struct net *net) { int rc = -ENOMEM; - if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) + if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) goto out; - if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) + if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) goto out_dev; - if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops)) - goto out_dev2; - - if (wext_proc_init()) + if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) goto out_softnet; + + if (wext_proc_init(net)) + goto out_ptype; rc = 0; out: return rc; +out_ptype: + proc_net_remove(net, "ptype"); out_softnet: - proc_net_remove("ptype"); -out_dev2: - proc_net_remove("softnet_stat"); + proc_net_remove(net, "softnet_stat"); out_dev: - proc_net_remove("dev"); + proc_net_remove(net, "dev"); goto out; } + +static void __net_exit dev_proc_net_exit(struct net *net) +{ + wext_proc_exit(net); + + proc_net_remove(net, "ptype"); + proc_net_remove(net, "softnet_stat"); + proc_net_remove(net, "dev"); +} + +static struct pernet_operations dev_proc_ops = { + .init = dev_proc_net_init, + .exit = dev_proc_net_exit, +}; + +static int __init dev_proc_init(void) +{ + return register_pernet_subsys(&dev_proc_ops); +} #else #define dev_proc_init() 0 #endif /* CONFIG_PROC_FS */ @@ -2906,8 +3039,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) if (dev->flags & IFF_UP && ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGE, dev); + call_netdevice_notifiers(NETDEV_CHANGE, dev); if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? +1 : -1; @@ -2953,8 +3085,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) else dev->mtu = new_mtu; if (!err && dev->flags & IFF_UP) - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGEMTU, dev); + call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); return err; } @@ -2970,18 +3101,17 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) return -ENODEV; err = dev->set_mac_address(dev, sa); if (!err) - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGEADDR, dev); + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; } /* - * Perform the SIOCxIFxxx calls. + * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) */ -static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) +static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); if (!dev) return -ENODEV; @@ -2991,25 +3121,15 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) ifr->ifr_flags = dev_get_flags(dev); return 0; - case SIOCSIFFLAGS: /* Set interface flags */ - return dev_change_flags(dev, ifr->ifr_flags); - case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */ ifr->ifr_metric = 0; return 0; - case SIOCSIFMETRIC: /* Set the metric on the interface - (currently unused) */ - return -EOPNOTSUPP; - case SIOCGIFMTU: /* Get the MTU of a device */ ifr->ifr_mtu = dev->mtu; return 0; - case SIOCSIFMTU: /* Set the MTU of a device */ - return dev_set_mtu(dev, ifr->ifr_mtu); - case SIOCGIFHWADDR: if (!dev->addr_len) memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); @@ -3019,17 +3139,9 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) ifr->ifr_hwaddr.sa_family = dev->type; return 0; - case SIOCSIFHWADDR: - return dev_set_mac_address(dev, &ifr->ifr_hwaddr); - - case SIOCSIFHWBROADCAST: - if (ifr->ifr_hwaddr.sa_family != dev->type) - return -EINVAL; - memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGEADDR, dev); - return 0; + case SIOCGIFSLAVE: + err = -EINVAL; + break; case SIOCGIFMAP: ifr->ifr_map.mem_start = dev->mem_start; @@ -3040,6 +3152,59 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) ifr->ifr_map.port = dev->if_port; return 0; + case SIOCGIFINDEX: + ifr->ifr_ifindex = dev->ifindex; + return 0; + + case SIOCGIFTXQLEN: + ifr->ifr_qlen = dev->tx_queue_len; + return 0; + + default: + /* dev_ioctl() should ensure this case + * is never reached + */ + WARN_ON(1); + err = -EINVAL; + break; + + } + return err; +} + +/* + * Perform the SIOCxIFxxx calls, inside rtnl_lock() + */ +static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) +{ + int err; + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + + if (!dev) + return -ENODEV; + + switch (cmd) { + case SIOCSIFFLAGS: /* Set interface flags */ + return dev_change_flags(dev, ifr->ifr_flags); + + case SIOCSIFMETRIC: /* Set the metric on the interface + (currently unused) */ + return -EOPNOTSUPP; + + case SIOCSIFMTU: /* Set the MTU of a device */ + return dev_set_mtu(dev, ifr->ifr_mtu); + + case SIOCSIFHWADDR: + return dev_set_mac_address(dev, &ifr->ifr_hwaddr); + + case SIOCSIFHWBROADCAST: + if (ifr->ifr_hwaddr.sa_family != dev->type) + return -EINVAL; + memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, + min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return 0; + case SIOCSIFMAP: if (dev->set_config) { if (!netif_device_present(dev)) @@ -3066,14 +3231,6 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, dev->addr_len, 1); - case SIOCGIFINDEX: - ifr->ifr_ifindex = dev->ifindex; - return 0; - - case SIOCGIFTXQLEN: - ifr->ifr_qlen = dev->tx_queue_len; - return 0; - case SIOCSIFTXQLEN: if (ifr->ifr_qlen < 0) return -EINVAL; @@ -3125,6 +3282,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) /** * dev_ioctl - network device ioctl + * @net: the applicable net namespace * @cmd: command to issue * @arg: pointer to a struct ifreq in user space * @@ -3134,7 +3292,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) * positive or a negative errno code on error. */ -int dev_ioctl(unsigned int cmd, void __user *arg) +int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct ifreq ifr; int ret; @@ -3147,12 +3305,12 @@ int dev_ioctl(unsigned int cmd, void __user *arg) if (cmd == SIOCGIFCONF) { rtnl_lock(); - ret = dev_ifconf((char __user *) arg); + ret = dev_ifconf(net, (char __user *) arg); rtnl_unlock(); return ret; } if (cmd == SIOCGIFNAME) - return dev_ifname((struct ifreq __user *)arg); + return dev_ifname(net, (struct ifreq __user *)arg); if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; @@ -3182,9 +3340,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); read_lock(&dev_base_lock); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc_locked(net, &ifr, cmd); read_unlock(&dev_base_lock); if (!ret) { if (colon) @@ -3196,9 +3354,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) return ret; case SIOCETHTOOL: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ethtool(&ifr); + ret = dev_ethtool(net, &ifr); rtnl_unlock(); if (!ret) { if (colon) @@ -3220,9 +3378,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) case SIOCSIFNAME: if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); if (!ret) { if (colon) @@ -3261,9 +3419,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) /* fall through */ case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); return ret; @@ -3283,9 +3441,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) if (cmd == SIOCWANDEV || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) @@ -3294,7 +3452,7 @@ int dev_ioctl(unsigned int cmd, void __user *arg) } /* Take care of Wireless Extensions */ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) - return wext_handle_ioctl(&ifr, cmd, arg); + return wext_handle_ioctl(net, &ifr, cmd, arg); return -EINVAL; } } @@ -3302,24 +3460,23 @@ int dev_ioctl(unsigned int cmd, void __user *arg) /** * dev_new_index - allocate an ifindex + * @net: the applicable net namespace * * Returns a suitable unique value for a new device interface * number. The caller must hold the rtnl semaphore or the * dev_base_lock to be sure it remains unique. */ -static int dev_new_index(void) +static int dev_new_index(struct net *net) { static int ifindex; for (;;) { if (++ifindex <= 0) ifindex = 1; - if (!__dev_get_by_index(ifindex)) + if (!__dev_get_by_index(net, ifindex)) return ifindex; } } -static int dev_boot_phase = 1; - /* Delayed registration/unregisteration */ static DEFINE_SPINLOCK(net_todo_list_lock); static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); @@ -3353,6 +3510,7 @@ int register_netdevice(struct net_device *dev) struct hlist_head *head; struct hlist_node *p; int ret; + struct net *net; BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -3361,6 +3519,8 @@ int register_netdevice(struct net_device *dev) /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); + BUG_ON(!dev->nd_net); + net = dev->nd_net; spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->_xmit_lock); @@ -3385,12 +3545,12 @@ int register_netdevice(struct net_device *dev) goto err_uninit; } - dev->ifindex = dev_new_index(); + dev->ifindex = dev_new_index(net); if (dev->iflink == -1) dev->iflink = dev->ifindex; /* Check for existence of name */ - head = dev_name_hash(dev->name); + head = dev_name_hash(net, dev->name); hlist_for_each(p, head) { struct net_device *d = hlist_entry(p, struct net_device, name_hlist); @@ -3446,15 +3606,7 @@ int register_netdevice(struct net_device *dev) } } - /* - * nil rebuild_header routine, - * that should be never called and used as just bug trap. - */ - - if (!dev->rebuild_header) - dev->rebuild_header = default_rebuild_header; - - ret = netdev_register_sysfs(dev); + ret = netdev_register_kobject(dev); if (ret) goto err_uninit; dev->reg_state = NETREG_REGISTERED; @@ -3467,15 +3619,11 @@ int register_netdevice(struct net_device *dev) set_bit(__LINK_STATE_PRESENT, &dev->state); dev_init_scheduler(dev); - write_lock_bh(&dev_base_lock); - list_add_tail(&dev->dev_list, &dev_base_head); - hlist_add_head(&dev->name_hlist, head); - hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); dev_hold(dev); - write_unlock_bh(&dev_base_lock); + list_netdevice(dev); /* Notify protocols, that a new device appeared. */ - ret = raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); + ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); ret = notifier_to_errno(ret); if (ret) unregister_netdevice(dev); @@ -3546,8 +3694,7 @@ static void netdev_wait_allrefs(struct net_device *dev) rtnl_lock(); /* Rebroadcast unregister notification */ - raw_notifier_call_chain(&netdev_chain, - NETDEV_UNREGISTER, dev); + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { @@ -3692,6 +3839,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev = (struct net_device *) (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; + dev->nd_net = &init_net; if (sizeof_priv) { dev->priv = ((char *)dev + @@ -3704,6 +3852,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev->egress_subqueue_count = queue_count; dev->get_stats = internal_stats; + netpoll_netdev_init(dev); setup(dev); strcpy(dev->name, name); return dev; @@ -3720,7 +3869,6 @@ EXPORT_SYMBOL(alloc_netdev_mq); */ void free_netdev(struct net_device *dev) { -#ifdef CONFIG_SYSFS /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); @@ -3732,9 +3880,6 @@ void free_netdev(struct net_device *dev) /* will free via device release */ put_device(&dev->dev); -#else - kfree((char *)dev - dev->padded); -#endif } /* Synchronize with packet receive processing. */ @@ -3773,15 +3918,10 @@ void unregister_netdevice(struct net_device *dev) BUG_ON(dev->reg_state != NETREG_REGISTERED); /* If device is running, close it first. */ - if (dev->flags & IFF_UP) - dev_close(dev); + dev_close(dev); /* And unlink it from device chain. */ - write_lock_bh(&dev_base_lock); - list_del(&dev->dev_list); - hlist_del(&dev->name_hlist); - hlist_del(&dev->index_hlist); - write_unlock_bh(&dev_base_lock); + unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; @@ -3794,7 +3934,7 @@ void unregister_netdevice(struct net_device *dev) /* Notify protocols, that we are about to destroy this device. They should clean all the things. */ - raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); /* * Flush the unicast and multicast chains @@ -3807,8 +3947,8 @@ void unregister_netdevice(struct net_device *dev) /* Notifier chain MUST detach us from master device. */ BUG_TRAP(!dev->master); - /* Remove entries from sysfs */ - netdev_unregister_sysfs(dev); + /* Remove entries from kobject tree */ + netdev_unregister_kobject(dev); /* Finish processing unregister after unlock */ net_set_todo(dev); @@ -3839,6 +3979,121 @@ void unregister_netdev(struct net_device *dev) EXPORT_SYMBOL(unregister_netdev); +/** + * dev_change_net_namespace - move device to different nethost namespace + * @dev: device + * @net: network namespace + * @pat: If not NULL name pattern to try if the current device name + * is already taken in the destination network namespace. + * + * This function shuts down a device interface and moves it + * to a new network namespace. On success 0 is returned, on + * a failure a netagive errno code is returned. + * + * Callers must hold the rtnl semaphore. + */ + +int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) +{ + char buf[IFNAMSIZ]; + const char *destname; + int err; + + ASSERT_RTNL(); + + /* Don't allow namespace local devices to be moved. */ + err = -EINVAL; + if (dev->features & NETIF_F_NETNS_LOCAL) + goto out; + + /* Ensure the device has been registrered */ + err = -EINVAL; + if (dev->reg_state != NETREG_REGISTERED) + goto out; + + /* Get out if there is nothing todo */ + err = 0; + if (dev->nd_net == net) + goto out; + + /* Pick the destination device name, and ensure + * we can use it in the destination network namespace. + */ + err = -EEXIST; + destname = dev->name; + if (__dev_get_by_name(net, destname)) { + /* We get here if we can't use the current device name */ + if (!pat) + goto out; + if (!dev_valid_name(pat)) + goto out; + if (strchr(pat, '%')) { + if (__dev_alloc_name(net, pat, buf) < 0) + goto out; + destname = buf; + } else + destname = pat; + if (__dev_get_by_name(net, destname)) + goto out; + } + + /* + * And now a mini version of register_netdevice unregister_netdevice. + */ + + /* If device is running close it first. */ + dev_close(dev); + + /* And unlink it from device chain */ + err = -ENODEV; + unlist_netdevice(dev); + + synchronize_net(); + + /* Shutdown queueing discipline. */ + dev_shutdown(dev); + + /* Notify protocols, that we are about to destroy + this device. They should clean all the things. + */ + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + + /* + * Flush the unicast and multicast chains + */ + dev_addr_discard(dev); + + /* Actually switch the network namespace */ + dev->nd_net = net; + + /* Assign the new device name */ + if (destname != dev->name) + strcpy(dev->name, destname); + + /* If there is an ifindex conflict assign a new one */ + if (__dev_get_by_index(net, dev->ifindex)) { + int iflink = (dev->iflink == dev->ifindex); + dev->ifindex = dev_new_index(net); + if (iflink) + dev->iflink = dev->ifindex; + } + + /* Fixup kobjects */ + err = device_rename(&dev->dev, dev->name); + WARN_ON(err); + + /* Add the device back in the hashes */ + list_netdevice(dev); + + /* Notify protocols, that a new device appeared. */ + call_netdevice_notifiers(NETDEV_REGISTER, dev); + + synchronize_net(); + err = 0; +out: + return err; +} + static int dev_cpu_callback(struct notifier_block *nfb, unsigned long action, void *ocpu) @@ -4032,6 +4287,82 @@ int netdev_compute_features(unsigned long all, unsigned long one) } EXPORT_SYMBOL(netdev_compute_features); +static struct hlist_head *netdev_create_hash(void) +{ + int i; + struct hlist_head *hash; + + hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL); + if (hash != NULL) + for (i = 0; i < NETDEV_HASHENTRIES; i++) + INIT_HLIST_HEAD(&hash[i]); + + return hash; +} + +/* Initialize per network namespace state */ +static int __net_init netdev_init(struct net *net) +{ + INIT_LIST_HEAD(&net->dev_base_head); + rwlock_init(&dev_base_lock); + + net->dev_name_head = netdev_create_hash(); + if (net->dev_name_head == NULL) + goto err_name; + + net->dev_index_head = netdev_create_hash(); + if (net->dev_index_head == NULL) + goto err_idx; + + return 0; + +err_idx: + kfree(net->dev_name_head); +err_name: + return -ENOMEM; +} + +static void __net_exit netdev_exit(struct net *net) +{ + kfree(net->dev_name_head); + kfree(net->dev_index_head); +} + +static struct pernet_operations netdev_net_ops = { + .init = netdev_init, + .exit = netdev_exit, +}; + +static void __net_exit default_device_exit(struct net *net) +{ + struct net_device *dev, *next; + /* + * Push all migratable of the network devices back to the + * initial network namespace + */ + rtnl_lock(); + for_each_netdev_safe(net, dev, next) { + int err; + + /* Ignore unmoveable devices (i.e. loopback) */ + if (dev->features & NETIF_F_NETNS_LOCAL) + continue; + + /* Push remaing network devices to init_net */ + err = dev_change_net_namespace(dev, &init_net, "dev%d"); + if (err) { + printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n", + __func__, dev->name, err); + unregister_netdevice(dev); + } + } + rtnl_unlock(); +} + +static struct pernet_operations default_device_ops = { + .exit = default_device_exit, +}; + /* * Initialize the DEV module. At boot time this walks the device list and * unhooks any devices that fail to initialise (normally hardware not @@ -4052,18 +4383,18 @@ static int __init net_dev_init(void) if (dev_proc_init()) goto out; - if (netdev_sysfs_init()) + if (netdev_kobject_init()) goto out; INIT_LIST_HEAD(&ptype_all); for (i = 0; i < 16; i++) INIT_LIST_HEAD(&ptype_base[i]); - for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) - INIT_HLIST_HEAD(&dev_name_head[i]); + if (register_pernet_subsys(&netdev_net_ops)) + goto out; - for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) - INIT_HLIST_HEAD(&dev_index_head[i]); + if (register_pernet_device(&default_device_ops)) + goto out; /* * Initialise the packet receive queues. @@ -4076,10 +4407,9 @@ static int __init net_dev_init(void) skb_queue_head_init(&queue->input_pkt_queue); queue->completion_queue = NULL; INIT_LIST_HEAD(&queue->poll_list); - set_bit(__LINK_STATE_START, &queue->backlog_dev.state); - queue->backlog_dev.weight = weight_p; - queue->backlog_dev.poll = process_backlog; - atomic_set(&queue->backlog_dev.refcnt, 1); + + queue->backlog.poll = process_backlog; + queue->backlog.weight = weight_p; } netdev_dma_register(); diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 20330c57261..ae354057d84 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -41,6 +41,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/init.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/route.h> #include <linux/skbuff.h> @@ -186,11 +187,12 @@ EXPORT_SYMBOL(dev_mc_unsync); #ifdef CONFIG_PROC_FS static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq->private; struct net_device *dev; loff_t off = 0; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (off++ == *pos) return dev; } @@ -239,7 +241,26 @@ static const struct seq_operations dev_mc_seq_ops = { static int dev_mc_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &dev_mc_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &dev_mc_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_proc_net(inode); + if (!seq->private) { + seq_release(inode, file); + res = -ENXIO; + } + } + return res; +} + +static int dev_mc_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations dev_mc_seq_fops = { @@ -247,14 +268,31 @@ static const struct file_operations dev_mc_seq_fops = { .open = dev_mc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = dev_mc_seq_release, }; #endif +static int __net_init dev_mc_net_init(struct net *net) +{ + if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops)) + return -ENOMEM; + return 0; +} + +static void __net_exit dev_mc_net_exit(struct net *net) +{ + proc_net_remove(net, "dev_mcast"); +} + +static struct pernet_operations dev_mc_net_ops = { + .init = dev_mc_net_init, + .exit = dev_mc_net_exit, +}; + void __init dev_mcast_init(void) { - proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops); + register_pernet_subsys(&dev_mc_net_ops); } EXPORT_SYMBOL(dev_mc_add); diff --git a/net/core/dst.c b/net/core/dst.c index c6a05879d58..16958e64e57 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -9,59 +9,84 @@ #include <linux/errno.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/workqueue.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/string.h> #include <linux/types.h> +#include <net/net_namespace.h> +#include <net/net_namespace.h> #include <net/dst.h> -/* Locking strategy: - * 1) Garbage collection state of dead destination cache - * entries is protected by dst_lock. - * 2) GC is run only from BH context, and is the only remover - * of entries. - * 3) Entries are added to the garbage list from both BH - * and non-BH context, so local BH disabling is needed. - * 4) All operations modify state, so a spinlock is used. +/* + * Theory of operations: + * 1) We use a list, protected by a spinlock, to add + * new entries from both BH and non-BH context. + * 2) In order to keep spinlock held for a small delay, + * we use a second list where are stored long lived + * entries, that are handled by the garbage collect thread + * fired by a workqueue. + * 3) This list is guarded by a mutex, + * so that the gc_task and dst_dev_event() can be synchronized. */ -static struct dst_entry *dst_garbage_list; #if RT_CACHE_DEBUG >= 2 static atomic_t dst_total = ATOMIC_INIT(0); #endif -static DEFINE_SPINLOCK(dst_lock); -static unsigned long dst_gc_timer_expires; -static unsigned long dst_gc_timer_inc = DST_GC_MAX; -static void dst_run_gc(unsigned long); +/* + * We want to keep lock & list close together + * to dirty as few cache lines as possible in __dst_free(). + * As this is not a very strong hint, we dont force an alignment on SMP. + */ +static struct { + spinlock_t lock; + struct dst_entry *list; + unsigned long timer_inc; + unsigned long timer_expires; +} dst_garbage = { + .lock = __SPIN_LOCK_UNLOCKED(dst_garbage.lock), + .timer_inc = DST_GC_MAX, +}; +static void dst_gc_task(struct work_struct *work); static void ___dst_free(struct dst_entry * dst); -static DEFINE_TIMER(dst_gc_timer, dst_run_gc, DST_GC_MIN, 0); +static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task); -static void dst_run_gc(unsigned long dummy) +static DEFINE_MUTEX(dst_gc_mutex); +/* + * long lived entries are maintained in this list, guarded by dst_gc_mutex + */ +static struct dst_entry *dst_busy_list; + +static void dst_gc_task(struct work_struct *work) { int delayed = 0; - int work_performed; - struct dst_entry * dst, **dstp; + int work_performed = 0; + unsigned long expires = ~0L; + struct dst_entry *dst, *next, head; + struct dst_entry *last = &head; +#if RT_CACHE_DEBUG >= 2 + ktime_t time_start = ktime_get(); + struct timespec elapsed; +#endif - if (!spin_trylock(&dst_lock)) { - mod_timer(&dst_gc_timer, jiffies + HZ/10); - return; - } + mutex_lock(&dst_gc_mutex); + next = dst_busy_list; - del_timer(&dst_gc_timer); - dstp = &dst_garbage_list; - work_performed = 0; - while ((dst = *dstp) != NULL) { - if (atomic_read(&dst->__refcnt)) { - dstp = &dst->next; +loop: + while ((dst = next) != NULL) { + next = dst->next; + prefetch(&next->next); + if (likely(atomic_read(&dst->__refcnt))) { + last->next = dst; + last = dst; delayed++; continue; } - *dstp = dst->next; - work_performed = 1; + work_performed++; dst = dst_destroy(dst); if (dst) { @@ -77,38 +102,56 @@ static void dst_run_gc(unsigned long dummy) continue; ___dst_free(dst); - dst->next = *dstp; - *dstp = dst; - dstp = &dst->next; + dst->next = next; + next = dst; } } - if (!dst_garbage_list) { - dst_gc_timer_inc = DST_GC_MAX; - goto out; + + spin_lock_bh(&dst_garbage.lock); + next = dst_garbage.list; + if (next) { + dst_garbage.list = NULL; + spin_unlock_bh(&dst_garbage.lock); + goto loop; } - if (!work_performed) { - if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX) - dst_gc_timer_expires = DST_GC_MAX; - dst_gc_timer_inc += DST_GC_INC; - } else { - dst_gc_timer_inc = DST_GC_INC; - dst_gc_timer_expires = DST_GC_MIN; + last->next = NULL; + dst_busy_list = head.next; + if (!dst_busy_list) + dst_garbage.timer_inc = DST_GC_MAX; + else { + /* + * if we freed less than 1/10 of delayed entries, + * we can sleep longer. + */ + if (work_performed <= delayed/10) { + dst_garbage.timer_expires += dst_garbage.timer_inc; + if (dst_garbage.timer_expires > DST_GC_MAX) + dst_garbage.timer_expires = DST_GC_MAX; + dst_garbage.timer_inc += DST_GC_INC; + } else { + dst_garbage.timer_inc = DST_GC_INC; + dst_garbage.timer_expires = DST_GC_MIN; + } + expires = dst_garbage.timer_expires; + /* + * if the next desired timer is more than 4 seconds in the future + * then round the timer to whole seconds + */ + if (expires > 4*HZ) + expires = round_jiffies_relative(expires); + schedule_delayed_work(&dst_gc_work, expires); } + + spin_unlock_bh(&dst_garbage.lock); + mutex_unlock(&dst_gc_mutex); #if RT_CACHE_DEBUG >= 2 - printk("dst_total: %d/%d %ld\n", - atomic_read(&dst_total), delayed, dst_gc_timer_expires); + elapsed = ktime_to_timespec(ktime_sub(ktime_get(), time_start)); + printk(KERN_DEBUG "dst_total: %d delayed: %d work_perf: %d" + " expires: %lu elapsed: %lu us\n", + atomic_read(&dst_total), delayed, work_performed, + expires, + elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC); #endif - /* if the next desired timer is more than 4 seconds in the future - * then round the timer to whole seconds - */ - if (dst_gc_timer_expires > 4*HZ) - mod_timer(&dst_gc_timer, - round_jiffies(jiffies + dst_gc_timer_expires)); - else - mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); - -out: - spin_unlock(&dst_lock); } static int dst_discard(struct sk_buff *skb) @@ -153,16 +196,16 @@ static void ___dst_free(struct dst_entry * dst) void __dst_free(struct dst_entry * dst) { - spin_lock_bh(&dst_lock); + spin_lock_bh(&dst_garbage.lock); ___dst_free(dst); - dst->next = dst_garbage_list; - dst_garbage_list = dst; - if (dst_gc_timer_inc > DST_GC_INC) { - dst_gc_timer_inc = DST_GC_INC; - dst_gc_timer_expires = DST_GC_MIN; - mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); + dst->next = dst_garbage.list; + dst_garbage.list = dst; + if (dst_garbage.timer_inc > DST_GC_INC) { + dst_garbage.timer_inc = DST_GC_INC; + dst_garbage.timer_expires = DST_GC_MIN; + schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires); } - spin_unlock_bh(&dst_lock); + spin_unlock_bh(&dst_garbage.lock); } struct dst_entry *dst_destroy(struct dst_entry * dst) @@ -236,13 +279,13 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { - dst->dev = &loopback_dev; - dev_hold(&loopback_dev); + dst->dev = init_net.loopback_dev; + dev_hold(dst->dev); dev_put(dev); if (dst->neighbour && dst->neighbour->dev == dev) { - dst->neighbour->dev = &loopback_dev; + dst->neighbour->dev = init_net.loopback_dev; dev_put(dev); - dev_hold(&loopback_dev); + dev_hold(dst->neighbour->dev); } } } @@ -250,16 +293,33 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct dst_entry *dst; + struct dst_entry *dst, *last = NULL; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; switch (event) { case NETDEV_UNREGISTER: case NETDEV_DOWN: - spin_lock_bh(&dst_lock); - for (dst = dst_garbage_list; dst; dst = dst->next) { + mutex_lock(&dst_gc_mutex); + for (dst = dst_busy_list; dst; dst = dst->next) { + last = dst; + dst_ifdown(dst, dev, event != NETDEV_DOWN); + } + + spin_lock_bh(&dst_garbage.lock); + dst = dst_garbage.list; + dst_garbage.list = NULL; + spin_unlock_bh(&dst_garbage.lock); + + if (last) + last->next = dst; + else + dst_busy_list = dst; + for (; dst; dst = dst->next) { dst_ifdown(dst, dev, event != NETDEV_DOWN); } - spin_unlock_bh(&dst_lock); + mutex_unlock(&dst_gc_mutex); break; } return NOTIFY_DONE; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index c5e059352d4..1163eb2256d 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -109,6 +109,32 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data) return 0; } +/* the following list of flags are the same as their associated + * NETIF_F_xxx values in include/linux/netdevice.h + */ +static const u32 flags_dup_features = + ETH_FLAG_LRO; + +u32 ethtool_op_get_flags(struct net_device *dev) +{ + /* in the future, this function will probably contain additional + * handling for flags which are not so easily handled + * by a simple masking operation + */ + + return dev->features & flags_dup_features; +} + +int ethtool_op_set_flags(struct net_device *dev, u32 data) +{ + if (data & ETH_FLAG_LRO) + dev->features |= NETIF_F_LRO; + else + dev->features &= ~NETIF_F_LRO; + + return 0; +} + /* Handlers for each ethtool command */ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) @@ -153,10 +179,26 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) info.cmd = ETHTOOL_GDRVINFO; ops->get_drvinfo(dev, &info); - if (ops->self_test_count) - info.testinfo_len = ops->self_test_count(dev); - if (ops->get_stats_count) - info.n_stats = ops->get_stats_count(dev); + if (ops->get_sset_count) { + int rc; + + rc = ops->get_sset_count(dev, ETH_SS_TEST); + if (rc >= 0) + info.testinfo_len = rc; + rc = ops->get_sset_count(dev, ETH_SS_STATS); + if (rc >= 0) + info.n_stats = rc; + rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); + if (rc >= 0) + info.n_priv_flags = rc; + } else { + /* code path for obsolete hooks */ + + if (ops->self_test_count) + info.testinfo_len = ops->self_test_count(dev); + if (ops->get_stats_count) + info.n_stats = ops->get_stats_count(dev); + } if (ops->get_regs_len) info.regdump_len = ops->get_regs_len(dev); if (ops->get_eeprom_len) @@ -230,34 +272,6 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_wol(dev, &wol); } -static int ethtool_get_msglevel(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GMSGLVL }; - - if (!dev->ethtool_ops->get_msglevel) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_msglevel(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_msglevel(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (!dev->ethtool_ops->set_msglevel) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - dev->ethtool_ops->set_msglevel(dev, edata.data); - return 0; -} - static int ethtool_nway_reset(struct net_device *dev) { if (!dev->ethtool_ops->nway_reset) @@ -266,20 +280,6 @@ static int ethtool_nway_reset(struct net_device *dev) return dev->ethtool_ops->nway_reset(dev); } -static int ethtool_get_link(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GLINK }; - - if (!dev->ethtool_ops->get_link) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_link(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; @@ -447,48 +447,6 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr) return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); } -static int ethtool_get_rx_csum(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GRXCSUM }; - - if (!dev->ethtool_ops->get_rx_csum) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_rx_csum(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (!dev->ethtool_ops->set_rx_csum) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - dev->ethtool_ops->set_rx_csum(dev, edata.data); - return 0; -} - -static int ethtool_get_tx_csum(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GTXCSUM }; - - if (!dev->ethtool_ops->get_tx_csum) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_tx_csum(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int __ethtool_set_sg(struct net_device *dev, u32 data) { int err; @@ -527,20 +485,6 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_tx_csum(dev, edata.data); } -static int ethtool_get_sg(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GSG }; - - if (!dev->ethtool_ops->get_sg) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_sg(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -558,20 +502,6 @@ static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) return __ethtool_set_sg(dev, edata.data); } -static int ethtool_get_tso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GTSO }; - - if (!dev->ethtool_ops->get_tso) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_tso(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -588,18 +518,6 @@ static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_tso(dev, edata.data); } -static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GUFO }; - - if (!dev->ethtool_ops->get_ufo) - return -EOPNOTSUPP; - edata.data = dev->ethtool_ops->get_ufo(dev); - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -643,16 +561,27 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) struct ethtool_test test; const struct ethtool_ops *ops = dev->ethtool_ops; u64 *data; - int ret; + int ret, test_len; - if (!ops->self_test || !ops->self_test_count) + if (!ops->self_test) return -EOPNOTSUPP; + if (!ops->get_sset_count && !ops->self_test_count) + return -EOPNOTSUPP; + + if (ops->get_sset_count) + test_len = ops->get_sset_count(dev, ETH_SS_TEST); + else + /* code path for obsolete hook */ + test_len = ops->self_test_count(dev); + if (test_len < 0) + return test_len; + WARN_ON(test_len == 0); if (copy_from_user(&test, useraddr, sizeof(test))) return -EFAULT; - test.len = ops->self_test_count(dev); - data = kmalloc(test.len * sizeof(u64), GFP_USER); + test.len = test_len; + data = kmalloc(test_len * sizeof(u64), GFP_USER); if (!data) return -ENOMEM; @@ -684,19 +613,29 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) return -EFAULT; - switch (gstrings.string_set) { - case ETH_SS_TEST: - if (!ops->self_test_count) - return -EOPNOTSUPP; - gstrings.len = ops->self_test_count(dev); - break; - case ETH_SS_STATS: - if (!ops->get_stats_count) - return -EOPNOTSUPP; - gstrings.len = ops->get_stats_count(dev); - break; - default: - return -EINVAL; + if (ops->get_sset_count) { + ret = ops->get_sset_count(dev, gstrings.string_set); + if (ret < 0) + return ret; + + gstrings.len = ret; + } else { + /* code path for obsolete hooks */ + + switch (gstrings.string_set) { + case ETH_SS_TEST: + if (!ops->self_test_count) + return -EOPNOTSUPP; + gstrings.len = ops->self_test_count(dev); + break; + case ETH_SS_STATS: + if (!ops->get_stats_count) + return -EOPNOTSUPP; + gstrings.len = ops->get_stats_count(dev); + break; + default: + return -EINVAL; + } } data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); @@ -736,16 +675,27 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) struct ethtool_stats stats; const struct ethtool_ops *ops = dev->ethtool_ops; u64 *data; - int ret; + int ret, n_stats; - if (!ops->get_ethtool_stats || !ops->get_stats_count) + if (!ops->get_ethtool_stats) + return -EOPNOTSUPP; + if (!ops->get_sset_count && !ops->get_stats_count) return -EOPNOTSUPP; + if (ops->get_sset_count) + n_stats = ops->get_sset_count(dev, ETH_SS_STATS); + else + /* code path for obsolete hook */ + n_stats = ops->get_stats_count(dev); + if (n_stats < 0) + return n_stats; + WARN_ON(n_stats == 0); + if (copy_from_user(&stats, useraddr, sizeof(stats))) return -EFAULT; - stats.n_stats = ops->get_stats_count(dev); - data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER); + stats.n_stats = n_stats; + data = kmalloc(n_stats * sizeof(u64), GFP_USER); if (!data) return -ENOMEM; @@ -783,11 +733,55 @@ static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) return 0; } +static int ethtool_get_value(struct net_device *dev, char __user *useraddr, + u32 cmd, u32 (*actor)(struct net_device *)) +{ + struct ethtool_value edata = { cmd }; + + if (!actor) + return -EOPNOTSUPP; + + edata.data = actor(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_value_void(struct net_device *dev, char __user *useraddr, + void (*actor)(struct net_device *, u32)) +{ + struct ethtool_value edata; + + if (!actor) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + actor(dev, edata.data); + return 0; +} + +static int ethtool_set_value(struct net_device *dev, char __user *useraddr, + int (*actor)(struct net_device *, u32)) +{ + struct ethtool_value edata; + + if (!actor) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + return actor(dev, edata.data); +} + /* The main entry point in this file. Called from net/core/dev.c */ -int dev_ethtool(struct ifreq *ifr) +int dev_ethtool(struct net *net, struct ifreq *ifr) { - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); void __user *useraddr = ifr->ifr_data; u32 ethcmd; int rc; @@ -817,6 +811,8 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_GPERMADDR: case ETHTOOL_GUFO: case ETHTOOL_GGSO: + case ETHTOOL_GFLAGS: + case ETHTOOL_GPFLAGS: break; default: if (!capable(CAP_NET_ADMIN)) @@ -849,16 +845,19 @@ int dev_ethtool(struct ifreq *ifr) rc = ethtool_set_wol(dev, useraddr); break; case ETHTOOL_GMSGLVL: - rc = ethtool_get_msglevel(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_msglevel); break; case ETHTOOL_SMSGLVL: - rc = ethtool_set_msglevel(dev, useraddr); + rc = ethtool_set_value_void(dev, useraddr, + dev->ethtool_ops->set_msglevel); break; case ETHTOOL_NWAY_RST: rc = ethtool_nway_reset(dev); break; case ETHTOOL_GLINK: - rc = ethtool_get_link(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_link); break; case ETHTOOL_GEEPROM: rc = ethtool_get_eeprom(dev, useraddr); @@ -885,25 +884,36 @@ int dev_ethtool(struct ifreq *ifr) rc = ethtool_set_pauseparam(dev, useraddr); break; case ETHTOOL_GRXCSUM: - rc = ethtool_get_rx_csum(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_rx_csum); break; case ETHTOOL_SRXCSUM: - rc = ethtool_set_rx_csum(dev, useraddr); + rc = ethtool_set_value(dev, useraddr, + dev->ethtool_ops->set_rx_csum); break; case ETHTOOL_GTXCSUM: - rc = ethtool_get_tx_csum(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + (dev->ethtool_ops->get_tx_csum ? + dev->ethtool_ops->get_tx_csum : + ethtool_op_get_tx_csum)); break; case ETHTOOL_STXCSUM: rc = ethtool_set_tx_csum(dev, useraddr); break; case ETHTOOL_GSG: - rc = ethtool_get_sg(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + (dev->ethtool_ops->get_sg ? + dev->ethtool_ops->get_sg : + ethtool_op_get_sg)); break; case ETHTOOL_SSG: rc = ethtool_set_sg(dev, useraddr); break; case ETHTOOL_GTSO: - rc = ethtool_get_tso(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + (dev->ethtool_ops->get_tso ? + dev->ethtool_ops->get_tso : + ethtool_op_get_tso)); break; case ETHTOOL_STSO: rc = ethtool_set_tso(dev, useraddr); @@ -924,7 +934,10 @@ int dev_ethtool(struct ifreq *ifr) rc = ethtool_get_perm_addr(dev, useraddr); break; case ETHTOOL_GUFO: - rc = ethtool_get_ufo(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + (dev->ethtool_ops->get_ufo ? + dev->ethtool_ops->get_ufo : + ethtool_op_get_ufo)); break; case ETHTOOL_SUFO: rc = ethtool_set_ufo(dev, useraddr); @@ -935,6 +948,22 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_SGSO: rc = ethtool_set_gso(dev, useraddr); break; + case ETHTOOL_GFLAGS: + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_flags); + break; + case ETHTOOL_SFLAGS: + rc = ethtool_set_value(dev, useraddr, + dev->ethtool_ops->set_flags); + break; + case ETHTOOL_GPFLAGS: + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_priv_flags); + break; + case ETHTOOL_SPFLAGS: + rc = ethtool_set_value(dev, useraddr, + dev->ethtool_ops->set_priv_flags); + break; default: rc = -EOPNOTSUPP; } @@ -959,3 +988,5 @@ EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); EXPORT_SYMBOL(ethtool_op_set_ufo); EXPORT_SYMBOL(ethtool_op_get_ufo); +EXPORT_SYMBOL(ethtool_op_set_flags); +EXPORT_SYMBOL(ethtool_op_get_flags); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 8c5474e1668..13de6f53f09 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -11,6 +11,8 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/list.h> +#include <net/net_namespace.h> +#include <net/sock.h> #include <net/fib_rules.h> static LIST_HEAD(rules_ops); @@ -82,7 +84,7 @@ static void cleanup_ops(struct fib_rules_ops *ops) { struct fib_rule *rule, *tmp; - list_for_each_entry_safe(rule, tmp, ops->rules_list, list) { + list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) { list_del_rcu(&rule->list); fib_rule_put(rule); } @@ -137,7 +139,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, rcu_read_lock(); - list_for_each_entry_rcu(rule, ops->rules_list, list) { + list_for_each_entry_rcu(rule, &ops->rules_list, list) { jumped: if (!fib_rule_match(rule, ops, fl, flags)) continue; @@ -197,6 +199,7 @@ errout: static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *r, *last = NULL; @@ -234,7 +237,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rule->ifindex = -1; nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); - dev = __dev_get_by_name(rule->ifname); + dev = __dev_get_by_name(net, rule->ifname); if (dev) rule->ifindex = dev->ifindex; } @@ -268,7 +271,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (rule->target <= rule->pref) goto errout_free; - list_for_each_entry(r, ops->rules_list, list) { + list_for_each_entry(r, &ops->rules_list, list) { if (r->pref == rule->target) { rule->ctarget = r; break; @@ -284,7 +287,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (err < 0) goto errout_free; - list_for_each_entry(r, ops->rules_list, list) { + list_for_each_entry(r, &ops->rules_list, list) { if (r->pref > rule->pref) break; last = r; @@ -297,7 +300,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) * There are unresolved goto rules in the list, check if * any of them are pointing to this new rule. */ - list_for_each_entry(r, ops->rules_list, list) { + list_for_each_entry(r, &ops->rules_list, list) { if (r->action == FR_ACT_GOTO && r->target == rule->pref) { BUG_ON(r->ctarget != NULL); @@ -317,7 +320,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (last) list_add_rcu(&rule->list, &last->list); else - list_add_rcu(&rule->list, ops->rules_list); + list_add_rcu(&rule->list, &ops->rules_list); notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); flush_route_cache(ops); @@ -356,7 +359,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (err < 0) goto errout; - list_for_each_entry(rule, ops->rules_list, list) { + list_for_each_entry(rule, &ops->rules_list, list) { if (frh->action && (frh->action != rule->action)) continue; @@ -399,7 +402,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) * actually been added. */ if (ops->nr_goto_rules > 0) { - list_for_each_entry(tmp, ops->rules_list, list) { + list_for_each_entry(tmp, &ops->rules_list, list) { if (tmp->ctarget == rule) { rcu_assign_pointer(tmp->ctarget, NULL); ops->unresolved_rules++; @@ -495,7 +498,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, int idx = 0; struct fib_rule *rule; - list_for_each_entry(rule, ops->rules_list, list) { + list_for_each_entry(rule, &ops->rules_list, list) { if (idx < cb->args[1]) goto skip; @@ -596,18 +599,21 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct fib_rules_ops *ops; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + ASSERT_RTNL(); rcu_read_lock(); switch (event) { case NETDEV_REGISTER: list_for_each_entry(ops, &rules_ops, list) - attach_rules(ops->rules_list, dev); + attach_rules(&ops->rules_list, dev); break; case NETDEV_UNREGISTER: list_for_each_entry(ops, &rules_ops, list) - detach_rules(ops->rules_list, dev); + detach_rules(&ops->rules_list, dev); break; } diff --git a/net/core/filter.c b/net/core/filter.c index bd903aaf7aa..e0a06942c02 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -387,6 +387,25 @@ int sk_chk_filter(struct sock_filter *filter, int flen) } /** + * sk_filter_rcu_release: Release a socket filter by rcu_head + * @rcu: rcu_head that contains the sk_filter to free + */ +static void sk_filter_rcu_release(struct rcu_head *rcu) +{ + struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); + + sk_filter_release(fp); +} + +static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp) +{ + unsigned int size = sk_filter_len(fp); + + atomic_sub(size, &sk->sk_omem_alloc); + call_rcu_bh(&fp->rcu, sk_filter_rcu_release); +} + +/** * sk_attach_filter - attach a socket filter * @fprog: the filter program * @sk: the socket to use @@ -398,7 +417,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen) */ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { - struct sk_filter *fp; + struct sk_filter *fp, *old_fp; unsigned int fsize = sizeof(struct sock_filter) * fprog->len; int err; @@ -418,19 +437,35 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) fp->len = fprog->len; err = sk_chk_filter(fp->insns, fp->len); - if (!err) { - struct sk_filter *old_fp; - - rcu_read_lock_bh(); - old_fp = rcu_dereference(sk->sk_filter); - rcu_assign_pointer(sk->sk_filter, fp); - rcu_read_unlock_bh(); - fp = old_fp; + if (err) { + sk_filter_uncharge(sk, fp); + return err; } - if (fp) - sk_filter_release(sk, fp); - return err; + rcu_read_lock_bh(); + old_fp = rcu_dereference(sk->sk_filter); + rcu_assign_pointer(sk->sk_filter, fp); + rcu_read_unlock_bh(); + + if (old_fp) + sk_filter_delayed_uncharge(sk, old_fp); + return 0; +} + +int sk_detach_filter(struct sock *sk) +{ + int ret = -ENOENT; + struct sk_filter *filter; + + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); + if (filter) { + rcu_assign_pointer(sk->sk_filter, NULL); + sk_filter_delayed_uncharge(sk, filter); + ret = 0; + } + rcu_read_unlock_bh(); + return ret; } EXPORT_SYMBOL(sk_chk_filter); diff --git a/net/core/flow.c b/net/core/flow.c index 0ab5234b17d..3ed2b4b1d6d 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -142,8 +142,6 @@ typedef u64 flow_compare_t; typedef u32 flow_compare_t; #endif -extern void flowi_is_missized(void); - /* I hear what you're saying, use memcmp. But memcmp cannot make * important assumptions that we can here, such as alignment and * constant size. @@ -153,8 +151,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) flow_compare_t *k1, *k1_lim, *k2; const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); - if (sizeof(struct flowi) % sizeof(flow_compare_t)) - flowi_is_missized(); + BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); k1 = (flow_compare_t *) key1; k1_lim = k1 + n_elem; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 590a767b029..daadbcc4e8d 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -15,7 +15,7 @@ #include <asm/uaccess.h> #include <asm/system.h> -#include <asm/bitops.h> +#include <linux/bitops.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f7de8f24d8d..05979e35696 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -25,6 +25,7 @@ #include <linux/sysctl.h> #endif #include <linux/times.h> +#include <net/net_namespace.h> #include <net/neighbour.h> #include <net/dst.h> #include <net/sock.h> @@ -55,9 +56,8 @@ #define PNEIGH_HASHMASK 0xF static void neigh_timer_handler(unsigned long arg); -#ifdef CONFIG_ARPD -static void neigh_app_notify(struct neighbour *n); -#endif +static void __neigh_notify(struct neighbour *n, int type, int flags); +static void neigh_update_notify(struct neighbour *neigh); static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); @@ -105,6 +105,15 @@ static int neigh_blackhole(struct sk_buff *skb) return -ENETDOWN; } +static void neigh_cleanup_and_release(struct neighbour *neigh) +{ + if (neigh->parms->neigh_cleanup) + neigh->parms->neigh_cleanup(neigh); + + __neigh_notify(neigh, RTM_DELNEIGH, 0); + neigh_release(neigh); +} + /* * It is random distribution in the interval (1/2)*base...(3/2)*base. * It corresponds to default IPv6 settings and is not overridable, @@ -141,9 +150,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) n->dead = 1; shrunk = 1; write_unlock(&n->lock); - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); + neigh_cleanup_and_release(n); continue; } write_unlock(&n->lock); @@ -214,9 +221,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) NEIGH_PRINTK2("neigh %p is stray.\n", n); } write_unlock(&n->lock); - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); + neigh_cleanup_and_release(n); } } } @@ -476,6 +481,8 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, if (!creat) goto out; + ASSERT_RTNL(); + n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); if (!n) goto out; @@ -677,9 +684,7 @@ static void neigh_periodic_timer(unsigned long arg) *np = n->next; n->dead = 1; write_unlock(&n->lock); - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); + neigh_cleanup_and_release(n); continue; } write_unlock(&n->lock); @@ -828,13 +833,10 @@ static void neigh_timer_handler(unsigned long arg) out: write_unlock(&neigh->lock); } + if (notify) - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); + neigh_update_notify(neigh); -#ifdef CONFIG_ARPD - if (notify && neigh->parms->app_probes) - neigh_app_notify(neigh); -#endif neigh_release(neigh); } @@ -897,8 +899,8 @@ out_unlock_bh: static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; - void (*update)(struct hh_cache*, struct net_device*, unsigned char *) = - neigh->dev->header_cache_update; + void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) + = neigh->dev->header_ops->cache_update; if (update) { for (hh = neigh->hh; hh; hh = hh->hh_next) { @@ -1063,11 +1065,8 @@ out: write_unlock_bh(&neigh->lock); if (notify) - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); -#ifdef CONFIG_ARPD - if (notify && neigh->parms->app_probes) - neigh_app_notify(neigh); -#endif + neigh_update_notify(neigh); + return err; } @@ -1098,7 +1097,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, hh->hh_type = protocol; atomic_set(&hh->hh_refcnt, 0); hh->hh_next = NULL; - if (dev->hard_header_cache(n, hh)) { + + if (dev->header_ops->cache(n, hh)) { kfree(hh); hh = NULL; } else { @@ -1128,10 +1128,9 @@ int neigh_compat_output(struct sk_buff *skb) __skb_pull(skb, skb_network_offset(skb)); - if (dev->hard_header && - dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, - skb->len) < 0 && - dev->rebuild_header(skb)) + if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, + skb->len) < 0 && + dev->header_ops->rebuild(skb)) return 0; return dev_queue_xmit(skb); @@ -1153,17 +1152,17 @@ int neigh_resolve_output(struct sk_buff *skb) if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; - if (dev->hard_header_cache && !dst->hh) { + if (dev->header_ops->cache && !dst->hh) { write_lock_bh(&neigh->lock); if (!dst->hh) neigh_hh_init(neigh, dst, dst->ops->protocol); - err = dev->hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); write_unlock_bh(&neigh->lock); } else { read_lock_bh(&neigh->lock); - err = dev->hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); read_unlock_bh(&neigh->lock); } if (err >= 0) @@ -1194,8 +1193,8 @@ int neigh_connected_output(struct sk_buff *skb) __skb_pull(skb, skb_network_offset(skb)); read_lock_bh(&neigh->lock); - err = dev->hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); read_unlock_bh(&neigh->lock); if (err >= 0) err = neigh->ops->queue_xmit(skb); @@ -1354,7 +1353,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS - tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat); + tbl->pde = create_proc_entry(tbl->id, 0, init_net.proc_net_stat); if (!tbl->pde) panic("cannot create neighbour proc dir entry"); tbl->pde->proc_fops = &neigh_stat_seq_fops; @@ -1439,11 +1438,15 @@ int neigh_table_clear(struct neigh_table *tbl) free_percpu(tbl->stats); tbl->stats = NULL; + kmem_cache_destroy(tbl->kmem_cachep); + tbl->kmem_cachep = NULL; + return 0; } static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ndmsg *ndm; struct nlattr *dst_attr; struct neigh_table *tbl; @@ -1459,7 +1462,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { - dev = dev_get_by_index(ndm->ndm_ifindex); + dev = dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; @@ -1509,6 +1512,7 @@ out: static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; @@ -1525,7 +1529,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { - dev = dev_get_by_index(ndm->ndm_ifindex); + dev = dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; @@ -2000,6 +2004,11 @@ nla_put_failure: return -EMSGSIZE; } +static void neigh_update_notify(struct neighbour *neigh) +{ + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); + __neigh_notify(neigh, RTM_NEWNEIGH, 0); +} static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) @@ -2095,11 +2104,8 @@ void __neigh_for_each_release(struct neigh_table *tbl, } else np = &n->next; write_unlock(&n->lock); - if (release) { - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); - } + if (release) + neigh_cleanup_and_release(n); } } } @@ -2422,7 +2428,6 @@ static const struct file_operations neigh_stat_seq_fops = { #endif /* CONFIG_PROC_FS */ -#ifdef CONFIG_ARPD static inline size_t neigh_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) @@ -2454,16 +2459,11 @@ errout: rtnl_set_sk_err(RTNLGRP_NEIGH, err); } +#ifdef CONFIG_ARPD void neigh_app_ns(struct neighbour *n) { __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); } - -static void neigh_app_notify(struct neighbour *n) -{ - __neigh_notify(n, RTM_NEWNEIGH, 0); -} - #endif /* CONFIG_ARPD */ #ifdef CONFIG_SYSCTL @@ -2499,7 +2499,6 @@ static struct neigh_sysctl_table { .proc_handler = &proc_dointvec, }, { - .ctl_name = NET_NEIGH_RETRANS_TIME, .procname = "retrans_time", .maxlen = sizeof(int), .mode = 0644, @@ -2544,27 +2543,40 @@ static struct neigh_sysctl_table { .proc_handler = &proc_dointvec, }, { - .ctl_name = NET_NEIGH_ANYCAST_DELAY, .procname = "anycast_delay", .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_userhz_jiffies, }, { - .ctl_name = NET_NEIGH_PROXY_DELAY, .procname = "proxy_delay", .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_userhz_jiffies, }, { - .ctl_name = NET_NEIGH_LOCKTIME, .procname = "locktime", .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_userhz_jiffies, }, { + .ctl_name = NET_NEIGH_RETRANS_TIME_MS, + .procname = "retrans_time_ms", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies, + }, + { + .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, + .procname = "base_reachable_time_ms", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies, + }, + { .ctl_name = NET_NEIGH_GC_INTERVAL, .procname = "gc_interval", .maxlen = sizeof(int), @@ -2593,22 +2605,7 @@ static struct neigh_sysctl_table { .mode = 0644, .proc_handler = &proc_dointvec, }, - { - .ctl_name = NET_NEIGH_RETRANS_TIME_MS, - .procname = "retrans_time_ms", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, - }, - { - .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, - .procname = "base_reachable_time_ms", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, - }, + {} }, .neigh_dev = { { @@ -2661,42 +2658,48 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[9].data = &p->anycast_delay; t->neigh_vars[10].data = &p->proxy_delay; t->neigh_vars[11].data = &p->locktime; + t->neigh_vars[12].data = &p->retrans_time; + t->neigh_vars[13].data = &p->base_reachable_time; if (dev) { dev_name_source = dev->name; t->neigh_dev[0].ctl_name = dev->ifindex; - t->neigh_vars[12].procname = NULL; - t->neigh_vars[13].procname = NULL; - t->neigh_vars[14].procname = NULL; - t->neigh_vars[15].procname = NULL; + /* Terminate the table early */ + memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14])); } else { dev_name_source = t->neigh_dev[0].procname; - t->neigh_vars[12].data = (int *)(p + 1); - t->neigh_vars[13].data = (int *)(p + 1) + 1; - t->neigh_vars[14].data = (int *)(p + 1) + 2; - t->neigh_vars[15].data = (int *)(p + 1) + 3; + t->neigh_vars[14].data = (int *)(p + 1); + t->neigh_vars[15].data = (int *)(p + 1) + 1; + t->neigh_vars[16].data = (int *)(p + 1) + 2; + t->neigh_vars[17].data = (int *)(p + 1) + 3; } - t->neigh_vars[16].data = &p->retrans_time; - t->neigh_vars[17].data = &p->base_reachable_time; if (handler || strategy) { /* RetransTime */ t->neigh_vars[3].proc_handler = handler; t->neigh_vars[3].strategy = strategy; t->neigh_vars[3].extra1 = dev; + if (!strategy) + t->neigh_vars[3].ctl_name = CTL_UNNUMBERED; /* ReachableTime */ t->neigh_vars[4].proc_handler = handler; t->neigh_vars[4].strategy = strategy; t->neigh_vars[4].extra1 = dev; + if (!strategy) + t->neigh_vars[4].ctl_name = CTL_UNNUMBERED; /* RetransTime (in milliseconds)*/ - t->neigh_vars[16].proc_handler = handler; - t->neigh_vars[16].strategy = strategy; - t->neigh_vars[16].extra1 = dev; + t->neigh_vars[12].proc_handler = handler; + t->neigh_vars[12].strategy = strategy; + t->neigh_vars[12].extra1 = dev; + if (!strategy) + t->neigh_vars[12].ctl_name = CTL_UNNUMBERED; /* ReachableTime (in milliseconds) */ - t->neigh_vars[17].proc_handler = handler; - t->neigh_vars[17].strategy = strategy; - t->neigh_vars[17].extra1 = dev; + t->neigh_vars[13].proc_handler = handler; + t->neigh_vars[13].strategy = strategy; + t->neigh_vars[13].extra1 = dev; + if (!strategy) + t->neigh_vars[13].ctl_name = CTL_UNNUMBERED; } dev_name = kstrdup(dev_name_source, GFP_KERNEL); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 5c19b0646d7..61ead1d1113 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -18,6 +18,9 @@ #include <linux/wireless.h> #include <net/iw_handler.h> +#include "net-sysfs.h" + +#ifdef CONFIG_SYSFS static const char fmt_hex[] = "%#x\n"; static const char fmt_long_hex[] = "%#lx\n"; static const char fmt_dec[] = "%d\n"; @@ -216,20 +219,6 @@ static ssize_t store_tx_queue_len(struct device *dev, return netdev_store(dev, attr, buf, len, change_tx_queue_len); } -NETDEVICE_SHOW(weight, fmt_dec); - -static int change_weight(struct net_device *net, unsigned long new_weight) -{ - net->weight = new_weight; - return 0; -} - -static ssize_t store_weight(struct device *dev, struct device_attribute *attr, - const char *buf, size_t len) -{ - return netdev_store(dev, attr, buf, len, change_weight); -} - static struct device_attribute net_class_attributes[] = { __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), __ATTR(iflink, S_IRUGO, show_iflink, NULL), @@ -246,7 +235,6 @@ static struct device_attribute net_class_attributes[] = { __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), - __ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight), {} }; @@ -407,29 +395,25 @@ static struct attribute_group wireless_group = { }; #endif +#endif /* CONFIG_SYSFS */ + #ifdef CONFIG_HOTPLUG -static int netdev_uevent(struct device *d, char **envp, - int num_envp, char *buf, int size) +static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) { struct net_device *dev = to_net_dev(d); - int retval, len = 0, i = 0; + int retval; /* pass interface to uevent. */ - retval = add_uevent_var(envp, num_envp, &i, - buf, size, &len, - "INTERFACE=%s", dev->name); + retval = add_uevent_var(env, "INTERFACE=%s", dev->name); if (retval) goto exit; /* pass ifindex to uevent. * ifindex is useful as it won't change (interface name may change) * and is what RtNetlink uses natively. */ - retval = add_uevent_var(envp, num_envp, &i, - buf, size, &len, - "IFINDEX=%d", dev->ifindex); + retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex); exit: - envp[i] = NULL; return retval; } #endif @@ -450,7 +434,9 @@ static void netdev_release(struct device *d) static struct class net_class = { .name = "net", .dev_release = netdev_release, +#ifdef CONFIG_SYSFS .dev_attrs = net_class_attributes, +#endif /* CONFIG_SYSFS */ #ifdef CONFIG_HOTPLUG .dev_uevent = netdev_uevent, #endif @@ -459,7 +445,7 @@ static struct class net_class = { /* Delete sysfs entries but hold kobject reference until after all * netdev references are gone. */ -void netdev_unregister_sysfs(struct net_device * net) +void netdev_unregister_kobject(struct net_device * net) { struct device *dev = &(net->dev); @@ -468,7 +454,7 @@ void netdev_unregister_sysfs(struct net_device * net) } /* Create sysfs entries for network device. */ -int netdev_register_sysfs(struct net_device *net) +int netdev_register_kobject(struct net_device *net) { struct device *dev = &(net->dev); struct attribute_group **groups = net->sysfs_groups; @@ -481,6 +467,7 @@ int netdev_register_sysfs(struct net_device *net) BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); +#ifdef CONFIG_SYSFS if (net->get_stats) *groups++ = &netstat_group; @@ -488,11 +475,12 @@ int netdev_register_sysfs(struct net_device *net) if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) *groups++ = &wireless_group; #endif +#endif /* CONFIG_SYSFS */ return device_add(dev); } -int netdev_sysfs_init(void) +int netdev_kobject_init(void) { return class_register(&net_class); } diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h new file mode 100644 index 00000000000..f5f108db392 --- /dev/null +++ b/net/core/net-sysfs.h @@ -0,0 +1,8 @@ +#ifndef __NET_SYSFS_H__ +#define __NET_SYSFS_H__ + +int netdev_kobject_init(void); +int netdev_register_kobject(struct net_device *); +void netdev_unregister_kobject(struct net_device *); + +#endif diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c new file mode 100644 index 00000000000..6f71db8c442 --- /dev/null +++ b/net/core/net_namespace.c @@ -0,0 +1,317 @@ +#include <linux/workqueue.h> +#include <linux/rtnetlink.h> +#include <linux/cache.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/delay.h> +#include <linux/sched.h> +#include <net/net_namespace.h> + +/* + * Our network namespace constructor/destructor lists + */ + +static LIST_HEAD(pernet_list); +static struct list_head *first_device = &pernet_list; +static DEFINE_MUTEX(net_mutex); + +LIST_HEAD(net_namespace_list); + +static struct kmem_cache *net_cachep; + +struct net init_net; +EXPORT_SYMBOL_GPL(init_net); + +static struct net *net_alloc(void) +{ + return kmem_cache_zalloc(net_cachep, GFP_KERNEL); +} + +static void net_free(struct net *net) +{ + if (!net) + return; + + if (unlikely(atomic_read(&net->use_count) != 0)) { + printk(KERN_EMERG "network namespace not free! Usage: %d\n", + atomic_read(&net->use_count)); + return; + } + + kmem_cache_free(net_cachep, net); +} + +static void cleanup_net(struct work_struct *work) +{ + struct pernet_operations *ops; + struct net *net; + + net = container_of(work, struct net, work); + + mutex_lock(&net_mutex); + + /* Don't let anyone else find us. */ + rtnl_lock(); + list_del(&net->list); + rtnl_unlock(); + + /* Run all of the network namespace exit methods */ + list_for_each_entry_reverse(ops, &pernet_list, list) { + if (ops->exit) + ops->exit(net); + } + + mutex_unlock(&net_mutex); + + /* Ensure there are no outstanding rcu callbacks using this + * network namespace. + */ + rcu_barrier(); + + /* Finally it is safe to free my network namespace structure */ + net_free(net); +} + + +void __put_net(struct net *net) +{ + /* Cleanup the network namespace in process context */ + INIT_WORK(&net->work, cleanup_net); + schedule_work(&net->work); +} +EXPORT_SYMBOL_GPL(__put_net); + +/* + * setup_net runs the initializers for the network namespace object. + */ +static int setup_net(struct net *net) +{ + /* Must be called with net_mutex held */ + struct pernet_operations *ops; + int error; + + atomic_set(&net->count, 1); + atomic_set(&net->use_count, 0); + + error = 0; + list_for_each_entry(ops, &pernet_list, list) { + if (ops->init) { + error = ops->init(net); + if (error < 0) + goto out_undo; + } + } +out: + return error; + +out_undo: + /* Walk through the list backwards calling the exit functions + * for the pernet modules whose init functions did not fail. + */ + list_for_each_entry_continue_reverse(ops, &pernet_list, list) { + if (ops->exit) + ops->exit(net); + } + goto out; +} + +struct net *copy_net_ns(unsigned long flags, struct net *old_net) +{ + struct net *new_net = NULL; + int err; + + get_net(old_net); + + if (!(flags & CLONE_NEWNET)) + return old_net; + +#ifndef CONFIG_NET_NS + return ERR_PTR(-EINVAL); +#endif + + err = -ENOMEM; + new_net = net_alloc(); + if (!new_net) + goto out; + + mutex_lock(&net_mutex); + err = setup_net(new_net); + if (err) + goto out_unlock; + + rtnl_lock(); + list_add_tail(&new_net->list, &net_namespace_list); + rtnl_unlock(); + + +out_unlock: + mutex_unlock(&net_mutex); +out: + put_net(old_net); + if (err) { + net_free(new_net); + new_net = ERR_PTR(err); + } + return new_net; +} + +static int __init net_ns_init(void) +{ + int err; + + printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); + net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), + SMP_CACHE_BYTES, + SLAB_PANIC, NULL); + mutex_lock(&net_mutex); + err = setup_net(&init_net); + + rtnl_lock(); + list_add_tail(&init_net.list, &net_namespace_list); + rtnl_unlock(); + + mutex_unlock(&net_mutex); + if (err) + panic("Could not setup the initial network namespace"); + + return 0; +} + +pure_initcall(net_ns_init); + +static int register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) +{ + struct net *net, *undo_net; + int error; + + error = 0; + list_add_tail(&ops->list, list); + for_each_net(net) { + if (ops->init) { + error = ops->init(net); + if (error) + goto out_undo; + } + } +out: + return error; + +out_undo: + /* If I have an error cleanup all namespaces I initialized */ + list_del(&ops->list); + for_each_net(undo_net) { + if (undo_net == net) + goto undone; + if (ops->exit) + ops->exit(undo_net); + } +undone: + goto out; +} + +static void unregister_pernet_operations(struct pernet_operations *ops) +{ + struct net *net; + + list_del(&ops->list); + for_each_net(net) + if (ops->exit) + ops->exit(net); +} + +/** + * register_pernet_subsys - register a network namespace subsystem + * @ops: pernet operations structure for the subsystem + * + * Register a subsystem which has init and exit functions + * that are called when network namespaces are created and + * destroyed respectively. + * + * When registered all network namespace init functions are + * called for every existing network namespace. Allowing kernel + * modules to have a race free view of the set of network namespaces. + * + * When a new network namespace is created all of the init + * methods are called in the order in which they were registered. + * + * When a network namespace is destroyed all of the exit methods + * are called in the reverse of the order with which they were + * registered. + */ +int register_pernet_subsys(struct pernet_operations *ops) +{ + int error; + mutex_lock(&net_mutex); + error = register_pernet_operations(first_device, ops); + mutex_unlock(&net_mutex); + return error; +} +EXPORT_SYMBOL_GPL(register_pernet_subsys); + +/** + * unregister_pernet_subsys - unregister a network namespace subsystem + * @ops: pernet operations structure to manipulate + * + * Remove the pernet operations structure from the list to be + * used when network namespaces are created or destoryed. In + * addition run the exit method for all existing network + * namespaces. + */ +void unregister_pernet_subsys(struct pernet_operations *module) +{ + mutex_lock(&net_mutex); + unregister_pernet_operations(module); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_subsys); + +/** + * register_pernet_device - register a network namespace device + * @ops: pernet operations structure for the subsystem + * + * Register a device which has init and exit functions + * that are called when network namespaces are created and + * destroyed respectively. + * + * When registered all network namespace init functions are + * called for every existing network namespace. Allowing kernel + * modules to have a race free view of the set of network namespaces. + * + * When a new network namespace is created all of the init + * methods are called in the order in which they were registered. + * + * When a network namespace is destroyed all of the exit methods + * are called in the reverse of the order with which they were + * registered. + */ +int register_pernet_device(struct pernet_operations *ops) +{ + int error; + mutex_lock(&net_mutex); + error = register_pernet_operations(&pernet_list, ops); + if (!error && (first_device == &pernet_list)) + first_device = &ops->list; + mutex_unlock(&net_mutex); + return error; +} +EXPORT_SYMBOL_GPL(register_pernet_device); + +/** + * unregister_pernet_device - unregister a network namespace netdevice + * @ops: pernet operations structure to manipulate + * + * Remove the pernet operations structure from the list to be + * used when network namespaces are created or destoryed. In + * addition run the exit method for all existing network + * namespaces. + */ +void unregister_pernet_device(struct pernet_operations *ops) +{ + mutex_lock(&net_mutex); + if (&ops->list == first_device) + first_device = first_device->next; + unregister_pernet_operations(ops); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_device); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index de1b26aa572..bf8d18f1b01 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -67,7 +67,7 @@ static void queue_process(struct work_struct *work) local_irq_save(flags); netif_tx_lock(dev); if ((netif_queue_stopped(dev) || - netif_subqueue_stopped(dev, skb->queue_mapping)) || + netif_subqueue_stopped(dev, skb)) || dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); netif_tx_unlock(dev); @@ -119,19 +119,22 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, static void poll_napi(struct netpoll *np) { struct netpoll_info *npinfo = np->dev->npinfo; + struct napi_struct *napi; int budget = 16; - if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && - npinfo->poll_owner != smp_processor_id() && - spin_trylock(&npinfo->poll_lock)) { - npinfo->rx_flags |= NETPOLL_RX_DROP; - atomic_inc(&trapped); + list_for_each_entry(napi, &np->dev->napi_list, dev_list) { + if (test_bit(NAPI_STATE_SCHED, &napi->state) && + napi->poll_owner != smp_processor_id() && + spin_trylock(&napi->poll_lock)) { + npinfo->rx_flags |= NETPOLL_RX_DROP; + atomic_inc(&trapped); - np->dev->poll(np->dev, &budget); + napi->poll(napi, budget); - atomic_dec(&trapped); - npinfo->rx_flags &= ~NETPOLL_RX_DROP; - spin_unlock(&npinfo->poll_lock); + atomic_dec(&trapped); + npinfo->rx_flags &= ~NETPOLL_RX_DROP; + spin_unlock(&napi->poll_lock); + } } } @@ -157,7 +160,7 @@ void netpoll_poll(struct netpoll *np) /* Process pending work on NIC */ np->dev->poll_controller(np->dev); - if (np->dev->poll) + if (!list_empty(&np->dev->napi_list)) poll_napi(np); service_arp_queue(np->dev->npinfo); @@ -233,6 +236,17 @@ repeat: return skb; } +static int netpoll_owner_active(struct net_device *dev) +{ + struct napi_struct *napi; + + list_for_each_entry(napi, &dev->napi_list, dev_list) { + if (napi->poll_owner == smp_processor_id()) + return 1; + } + return 0; +} + static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { int status = NETDEV_TX_BUSY; @@ -246,8 +260,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) } /* don't get messages out of order, and no recursion */ - if (skb_queue_len(&npinfo->txq) == 0 && - npinfo->poll_owner != smp_processor_id()) { + if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { unsigned long flags; local_irq_save(flags); @@ -256,7 +269,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) tries > 0; --tries) { if (netif_tx_trylock(dev)) { if (!netif_queue_stopped(dev) && - !netif_subqueue_stopped(dev, skb->queue_mapping)) + !netif_subqueue_stopped(dev, skb)) status = dev->hard_start_xmit(skb, dev); netif_tx_unlock(dev); @@ -402,11 +415,9 @@ static void arp_reply(struct sk_buff *skb) send_skb->protocol = htons(ETH_P_ARP); /* Fill the device header for the ARP frame */ - - if (np->dev->hard_header && - np->dev->hard_header(send_skb, skb->dev, ptype, - sha, np->local_mac, - send_skb->len) < 0) { + if (dev_hard_header(send_skb, skb->dev, ptype, + sha, np->local_mac, + send_skb->len) < 0) { kfree_skb(send_skb); return; } @@ -519,6 +530,23 @@ out: return 0; } +void netpoll_print_options(struct netpoll *np) +{ + DECLARE_MAC_BUF(mac); + printk(KERN_INFO "%s: local port %d\n", + np->name, np->local_port); + printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", + np->name, HIPQUAD(np->local_ip)); + printk(KERN_INFO "%s: interface %s\n", + np->name, np->dev_name); + printk(KERN_INFO "%s: remote port %d\n", + np->name, np->remote_port); + printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", + np->name, HIPQUAD(np->remote_ip)); + printk(KERN_INFO "%s: remote ethernet address %s\n", + np->name, print_mac(mac, np->remote_mac)); +} + int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; @@ -531,7 +559,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) cur = delim; } cur++; - printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port); if (*cur != '/') { if ((delim = strchr(cur, '/')) == NULL) @@ -539,9 +566,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) *delim = 0; np->local_ip = ntohl(in_aton(cur)); cur = delim; - - printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->local_ip)); } cur++; @@ -555,8 +579,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) } cur++; - printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name); - if (*cur != '@') { /* dst port */ if ((delim = strchr(cur, '@')) == NULL) @@ -566,7 +588,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) cur = delim; } cur++; - printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port); /* dst ip */ if ((delim = strchr(cur, '/')) == NULL) @@ -575,9 +596,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) np->remote_ip = ntohl(in_aton(cur)); cur = delim + 1; - printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->remote_ip)); - if (*cur != 0) { /* MAC address */ if ((delim = strchr(cur, ':')) == NULL) @@ -608,15 +626,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) np->remote_mac[5] = simple_strtol(cur, NULL, 16); } - printk(KERN_INFO "%s: remote ethernet address " - "%02x:%02x:%02x:%02x:%02x:%02x\n", - np->name, - np->remote_mac[0], - np->remote_mac[1], - np->remote_mac[2], - np->remote_mac[3], - np->remote_mac[4], - np->remote_mac[5]); + netpoll_print_options(np); return 0; @@ -635,7 +645,7 @@ int netpoll_setup(struct netpoll *np) int err; if (np->dev_name) - ndev = dev_get_by_name(np->dev_name); + ndev = dev_get_by_name(&init_net, np->dev_name); if (!ndev) { printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", np->name, np->dev_name); @@ -652,8 +662,6 @@ int netpoll_setup(struct netpoll *np) npinfo->rx_flags = 0; npinfo->rx_np = NULL; - spin_lock_init(&npinfo->poll_lock); - npinfo->poll_owner = -1; spin_lock_init(&npinfo->rx_lock); skb_queue_head_init(&npinfo->arp_tx); @@ -820,6 +828,7 @@ void netpoll_set_trap(int trap) EXPORT_SYMBOL(netpoll_set_trap); EXPORT_SYMBOL(netpoll_trap); +EXPORT_SYMBOL(netpoll_print_options); EXPORT_SYMBOL(netpoll_parse_options); EXPORT_SYMBOL(netpoll_setup); EXPORT_SYMBOL(netpoll_cleanup); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 803d0c8826a..de33f36947e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -6,7 +6,7 @@ * * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * Ben Greear <greearb@candelatech.com> - * Jens Låås <jens.laas@data.slu.se> + * Jens Låås <jens.laas@data.slu.se> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -152,6 +152,7 @@ #include <linux/wait.h> #include <linux/etherdevice.h> #include <linux/kthread.h> +#include <net/net_namespace.h> #include <net/checksum.h> #include <net/ipv6.h> #include <net/addrconf.h> @@ -160,14 +161,14 @@ #endif #include <asm/byteorder.h> #include <linux/rcupdate.h> -#include <asm/bitops.h> +#include <linux/bitops.h> #include <asm/io.h> #include <asm/dma.h> #include <asm/uaccess.h> #include <asm/div64.h> /* do_div */ #include <asm/timex.h> -#define VERSION "pktgen v2.68: Packet Generator for packet performance testing.\n" +#define VERSION "pktgen v2.69: Packet Generator for packet performance testing.\n" /* The buckets are exponential in 'width' */ #define LAT_BUCKETS_MAX 32 @@ -189,6 +190,7 @@ #define F_SVID_RND (1<<10) /* Random SVLAN ID */ #define F_FLOW_SEQ (1<<11) /* Sequential flows */ #define F_IPSEC_ON (1<<12) /* ipsec on for flows */ +#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ /* Thread control flag bits */ #define T_TERMINATE (1<<0) @@ -331,6 +333,7 @@ struct pktgen_dev { __be32 cur_daddr; __u16 cur_udp_dst; __u16 cur_udp_src; + __u16 cur_queue_map; __u32 cur_pkt_size; __u8 hh[14]; @@ -358,6 +361,10 @@ struct pktgen_dev { unsigned lflow; /* Flow length (config) */ unsigned nflows; /* accumulated flows (stats) */ unsigned curfl; /* current sequenced flow (state)*/ + + u16 queue_map_min; + u16 queue_map_max; + #ifdef CONFIG_XFRM __u8 ipsmode; /* IPSEC mode (config) */ __u8 ipsproto; /* IPSEC type (config) */ @@ -378,7 +385,6 @@ struct pktgen_thread { struct list_head th_list; struct task_struct *tsk; char result[512]; - u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ /* Field for thread to receive "posted" events terminate, stop ifs etc. */ @@ -593,11 +599,11 @@ static const struct file_operations pktgen_fops = { static int pktgen_if_show(struct seq_file *seq, void *v) { - int i; struct pktgen_dev *pkt_dev = seq->private; __u64 sa; __u64 stopped; __u64 now = getCurUs(); + DECLARE_MAC_BUF(mac); seq_printf(seq, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", @@ -613,6 +619,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); + seq_printf(seq, + " queue_map_min: %u queue_map_max: %u\n", + pkt_dev->queue_map_min, + pkt_dev->queue_map_max); + if (pkt_dev->flags & F_IPV6) { char b1[128], b2[128], b3[128]; fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); @@ -637,19 +648,12 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_puts(seq, " src_mac: "); - if (is_zero_ether_addr(pkt_dev->src_mac)) - for (i = 0; i < 6; i++) - seq_printf(seq, "%02X%s", pkt_dev->odev->dev_addr[i], - i == 5 ? " " : ":"); - else - for (i = 0; i < 6; i++) - seq_printf(seq, "%02X%s", pkt_dev->src_mac[i], - i == 5 ? " " : ":"); + seq_printf(seq, "%s ", + print_mac(mac, is_zero_ether_addr(pkt_dev->src_mac) ? + pkt_dev->odev->dev_addr : pkt_dev->src_mac)); seq_printf(seq, "dst_mac: "); - for (i = 0; i < 6; i++) - seq_printf(seq, "%02X%s", pkt_dev->dst_mac[i], - i == 5 ? "\n" : ":"); + seq_printf(seq, "%s\n", print_mac(mac, pkt_dev->dst_mac)); seq_printf(seq, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", @@ -709,6 +713,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->flags & F_MPLS_RND) seq_printf(seq, "MPLS_RND "); + if (pkt_dev->flags & F_QUEUE_MAP_RND) + seq_printf(seq, "QUEUE_MAP_RND "); + if (pkt_dev->cflows) { if (pkt_dev->flags & F_FLOW_SEQ) seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/ @@ -764,6 +771,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_printf(seq, " cur_udp_dst: %d cur_udp_src: %d\n", pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); + seq_printf(seq, " cur_queue_map: %u\n", pkt_dev->cur_queue_map); + seq_printf(seq, " flows: %u\n", pkt_dev->nflows); if (pkt_dev->result[0]) @@ -1215,6 +1224,11 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "FLOW_SEQ") == 0) pkt_dev->flags |= F_FLOW_SEQ; + else if (strcmp(f, "QUEUE_MAP_RND") == 0) + pkt_dev->flags |= F_QUEUE_MAP_RND; + + else if (strcmp(f, "!QUEUE_MAP_RND") == 0) + pkt_dev->flags &= ~F_QUEUE_MAP_RND; #ifdef CONFIG_XFRM else if (strcmp(f, "IPSEC") == 0) pkt_dev->flags |= F_IPSEC_ON; @@ -1526,16 +1540,40 @@ static ssize_t pktgen_if_write(struct file *file, return count; } + if (!strcmp(name, "queue_map_min")) { + len = num_arg(&user_buffer[i], 5, &value); + if (len < 0) { + return len; + } + i += len; + pkt_dev->queue_map_min = value; + sprintf(pg_result, "OK: queue_map_min=%u", pkt_dev->queue_map_min); + return count; + } + + if (!strcmp(name, "queue_map_max")) { + len = num_arg(&user_buffer[i], 5, &value); + if (len < 0) { + return len; + } + i += len; + pkt_dev->queue_map_max = value; + sprintf(pg_result, "OK: queue_map_max=%u", pkt_dev->queue_map_max); + return count; + } + if (!strcmp(name, "mpls")) { - unsigned n, offset; + unsigned n, cnt; + len = get_labels(&user_buffer[i], pkt_dev); - if (len < 0) { return len; } + if (len < 0) + return len; i += len; - offset = sprintf(pg_result, "OK: mpls="); + cnt = sprintf(pg_result, "OK: mpls="); for (n = 0; n < pkt_dev->nr_labels; n++) - offset += sprintf(pg_result + offset, - "%08x%s", ntohl(pkt_dev->labels[n]), - n == pkt_dev->nr_labels-1 ? "" : ","); + cnt += sprintf(pg_result + cnt, + "%08x%s", ntohl(pkt_dev->labels[n]), + n == pkt_dev->nr_labels-1 ? "" : ","); if (pkt_dev->nr_labels && pkt_dev->vlan_id != 0xffff) { pkt_dev->vlan_id = 0xffff; /* turn off VLAN/SVLAN */ @@ -1718,9 +1756,6 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) BUG_ON(!t); - seq_printf(seq, "Name: %s max_before_softirq: %d\n", - t->tsk->comm, t->max_before_softirq); - seq_printf(seq, "Running: "); if_lock(t); @@ -1753,7 +1788,6 @@ static ssize_t pktgen_thread_write(struct file *file, int i = 0, max, len, ret; char name[40]; char *pg_result; - unsigned long value = 0; if (count < 1) { // sprintf(pg_result, "Wrong command format"); @@ -1827,12 +1861,8 @@ static ssize_t pktgen_thread_write(struct file *file, } if (!strcmp(name, "max_before_softirq")) { - len = num_arg(&user_buffer[i], 10, &value); - mutex_lock(&pktgen_thread_lock); - t->max_before_softirq = value; - mutex_unlock(&pktgen_thread_lock); + sprintf(pg_result, "OK: Note! max_before_softirq is obsoleted -- Do not use"); ret = count; - sprintf(pg_result, "OK: max_before_softirq=%lu", value); goto out; } @@ -1940,6 +1970,9 @@ static int pktgen_device_event(struct notifier_block *unused, { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* It is OK that we do not hold the group lock right now, * as we run under the RTNL lock. */ @@ -1970,7 +2003,7 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) pkt_dev->odev = NULL; } - odev = dev_get_by_name(ifname); + odev = dev_get_by_name(&init_net, ifname); if (!odev) { printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname); return -ENODEV; @@ -2111,7 +2144,6 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) if (spin_until_us - now > jiffies_to_usecs(1) + 1) schedule_timeout_interruptible(1); else if (spin_until_us - now > 100) { - do_softirq(); if (!pkt_dev->running) return; if (need_resched()) @@ -2387,6 +2419,20 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->cur_pkt_size = t; } + if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) { + __u16 t; + if (pkt_dev->flags & F_QUEUE_MAP_RND) { + t = random32() % + (pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1) + + pkt_dev->queue_map_min; + } else { + t = pkt_dev->cur_queue_map + 1; + if (t > pkt_dev->queue_map_max) + t = pkt_dev->queue_map_min; + } + pkt_dev->cur_queue_map = t; + } + pkt_dev->flows[flow].count++; } @@ -2408,7 +2454,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) spin_lock(&x->lock); iph = ip_hdr(skb); - err = x->mode->output(x, skb); + err = x->outer_mode->output(x, skb); if (err) goto error; err = x->type->output(x, skb); @@ -2557,7 +2603,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); - + skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); iph = ip_hdr(skb); udph = udp_hdr(skb); @@ -2686,6 +2732,7 @@ static unsigned int scan_ip6(const char *s, char ip[16]) unsigned int prefixlen = 0; unsigned int suffixlen = 0; __be32 tmp; + char *pos; for (i = 0; i < 16; i++) ip[i] = 0; @@ -2700,12 +2747,9 @@ static unsigned int scan_ip6(const char *s, char ip[16]) } s++; } - { - char *tmp; - u = simple_strtoul(s, &tmp, 16); - i = tmp - s; - } + u = simple_strtoul(s, &pos, 16); + i = pos - s; if (!i) return 0; if (prefixlen == 12 && s[i] == '.') { @@ -2733,11 +2777,9 @@ static unsigned int scan_ip6(const char *s, char ip[16]) len++; } else if (suffixlen != 0) break; - { - char *tmp; - u = simple_strtol(s, &tmp, 16); - i = tmp - s; - } + + u = simple_strtol(s, &pos, 16); + i = pos - s; if (!i) { if (*s) len--; @@ -2898,7 +2940,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); - + skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); iph = ipv6_hdr(skb); udph = udp_hdr(skb); @@ -3341,7 +3383,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) if ((netif_queue_stopped(odev) || (pkt_dev->skb && - netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping))) || + netif_subqueue_stopped(odev, pkt_dev->skb))) || need_resched()) { idle_start = getCurUs(); @@ -3358,7 +3400,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->idle_acc += getCurUs() - idle_start; if (netif_queue_stopped(odev) || - netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) { + netif_subqueue_stopped(odev, pkt_dev->skb)) { pkt_dev->next_tx_us = getCurUs(); /* TODO */ pkt_dev->next_tx_ns = 0; goto out; /* Try the next interface */ @@ -3387,7 +3429,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) netif_tx_lock_bh(odev); if (!netif_queue_stopped(odev) && - !netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) { + !netif_subqueue_stopped(odev, pkt_dev->skb)) { atomic_inc(&(pkt_dev->skb->users)); retry_now: @@ -3465,16 +3507,12 @@ static int pktgen_thread_worker(void *arg) struct pktgen_thread *t = arg; struct pktgen_dev *pkt_dev = NULL; int cpu = t->cpu; - u32 max_before_softirq; - u32 tx_since_softirq = 0; BUG_ON(smp_processor_id() != cpu); init_waitqueue_head(&t->queue); - pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid); - - max_before_softirq = t->max_before_softirq; + pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); set_current_state(TASK_INTERRUPTIBLE); @@ -3494,24 +3532,9 @@ static int pktgen_thread_worker(void *arg) __set_current_state(TASK_RUNNING); - if (pkt_dev) { - + if (pkt_dev) pktgen_xmit(pkt_dev); - /* - * We like to stay RUNNING but must also give - * others fair share. - */ - - tx_since_softirq += pkt_dev->last_ok; - - if (tx_since_softirq > max_before_softirq) { - if (local_softirq_pending()) - do_softirq(); - tx_since_softirq = 0; - } - } - if (t->control & T_STOP) { pktgen_stop(t); t->control &= ~(T_STOP); @@ -3778,7 +3801,7 @@ static int __init pg_init(void) printk(KERN_INFO "%s", version); - pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net); + pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); if (!pg_proc_dir) return -ENODEV; pg_proc_dir->owner = THIS_MODULE; @@ -3787,7 +3810,7 @@ static int __init pg_init(void) if (pe == NULL) { printk(KERN_ERR "pktgen: ERROR: cannot create %s " "procfs entry.\n", PGCTRL); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(&init_net, PG_PROC_DIR); return -EINVAL; } @@ -3811,7 +3834,7 @@ static int __init pg_init(void) "all threads\n"); unregister_netdevice_notifier(&pktgen_notifier_block); remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(&init_net, PG_PROC_DIR); return -ENODEV; } @@ -3838,7 +3861,7 @@ static void __exit pg_cleanup(void) /* Clean up proc file system */ remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(&init_net, PG_PROC_DIR); } module_init(pg_init); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4756d5857ab..e1ba26fb4bf 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -35,6 +35,7 @@ #include <linux/security.h> #include <linux/mutex.h> #include <linux/if_addr.h> +#include <linux/nsproxy.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -74,8 +75,6 @@ void __rtnl_unlock(void) void rtnl_unlock(void) { mutex_unlock(&rtnl_mutex); - if (rtnl && rtnl->sk_receive_queue.qlen) - rtnl->sk_data_ready(rtnl, 0); netdev_run_todo(); } @@ -306,10 +305,13 @@ EXPORT_SYMBOL_GPL(rtnl_link_register); void __rtnl_link_unregister(struct rtnl_link_ops *ops) { struct net_device *dev, *n; + struct net *net; - for_each_netdev_safe(dev, n) { - if (dev->rtnl_link_ops == ops) - ops->dellink(dev); + for_each_net(net) { + for_each_netdev_safe(net, dev, n) { + if (dev->rtnl_link_ops == ops) + ops->dellink(dev); + } } list_del(&ops->list); } @@ -634,7 +636,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len); - NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight); NLA_PUT_U8(skb, IFLA_OPERSTATE, netif_running(dev) ? dev->operstate : IF_OPER_DOWN); NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); @@ -694,12 +695,13 @@ nla_put_failure: static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx; int s_idx = cb->args[0]; struct net_device *dev; idx = 0; - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (idx < s_idx) goto cont; if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, @@ -714,7 +716,7 @@ cont: return skb->len; } -static const struct nla_policy ifla_policy[IFLA_MAX+1] = { +const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, @@ -724,6 +726,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_WEIGHT] = { .type = NLA_U32 }, [IFLA_OPERSTATE] = { .type = NLA_U8 }, [IFLA_LINKMODE] = { .type = NLA_U8 }, + [IFLA_NET_NS_PID] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -731,12 +734,45 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_DATA] = { .type = NLA_NESTED }, }; +static struct net *get_net_ns_by_pid(pid_t pid) +{ + struct task_struct *tsk; + struct net *net; + + /* Lookup the network namespace */ + net = ERR_PTR(-ESRCH); + rcu_read_lock(); + tsk = find_task_by_vpid(pid); + if (tsk) { + struct nsproxy *nsproxy; + nsproxy = task_nsproxy(tsk); + if (nsproxy) + net = get_net(nsproxy->net_ns); + } + rcu_read_unlock(); + return net; +} + static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { int send_addr_notify = 0; int err; + if (tb[IFLA_NET_NS_PID]) { + struct net *net; + net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); + if (IS_ERR(net)) { + err = PTR_ERR(net); + goto errout; + } + err = dev_change_net_namespace(dev, net, ifname); + put_net(net); + if (err) + goto errout; + modified = 1; + } + if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; struct ifmap k_map; @@ -834,9 +870,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_WEIGHT]) - dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); - if (tb[IFLA_OPERSTATE]) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); @@ -862,6 +895,7 @@ errout: static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifinfomsg *ifm; struct net_device *dev; int err; @@ -880,9 +914,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = dev_get_by_index(ifm->ifi_index); + dev = dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = dev_get_by_name(ifname); + dev = dev_get_by_name(net, ifname); else goto errout; @@ -908,6 +942,7 @@ errout: static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; const struct rtnl_link_ops *ops; struct net_device *dev; struct ifinfomsg *ifm; @@ -924,9 +959,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = __dev_get_by_index(ifm->ifi_index); + dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(ifname); + dev = __dev_get_by_name(net, ifname); else return -EINVAL; @@ -941,8 +976,52 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return 0; } +struct net_device *rtnl_create_link(struct net *net, char *ifname, + const struct rtnl_link_ops *ops, struct nlattr *tb[]) +{ + int err; + struct net_device *dev; + + err = -ENOMEM; + dev = alloc_netdev(ops->priv_size, ifname, ops->setup); + if (!dev) + goto err; + + if (strchr(dev->name, '%')) { + err = dev_alloc_name(dev, dev->name); + if (err < 0) + goto err_free; + } + + dev->nd_net = net; + dev->rtnl_link_ops = ops; + + if (tb[IFLA_MTU]) + dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_ADDRESS]) + memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), + nla_len(tb[IFLA_ADDRESS])); + if (tb[IFLA_BROADCAST]) + memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), + nla_len(tb[IFLA_BROADCAST])); + if (tb[IFLA_TXQLEN]) + dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); + if (tb[IFLA_OPERSTATE]) + set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + if (tb[IFLA_LINKMODE]) + dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + + return dev; + +err_free: + free_netdev(dev); +err: + return ERR_PTR(err); +} + static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; const struct rtnl_link_ops *ops; struct net_device *dev; struct ifinfomsg *ifm; @@ -966,9 +1045,9 @@ replay: ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = __dev_get_by_index(ifm->ifi_index); + dev = __dev_get_by_index(net, ifm->ifi_index); else if (ifname[0]) - dev = __dev_get_by_name(ifname); + dev = __dev_get_by_name(net, ifname); else dev = NULL; @@ -1053,40 +1132,17 @@ replay: if (!ifname[0]) snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); - dev = alloc_netdev(ops->priv_size, ifname, ops->setup); - if (!dev) - return -ENOMEM; - - if (strchr(dev->name, '%')) { - err = dev_alloc_name(dev, dev->name); - if (err < 0) - goto err_free; - } - dev->rtnl_link_ops = ops; - - if (tb[IFLA_MTU]) - dev->mtu = nla_get_u32(tb[IFLA_MTU]); - if (tb[IFLA_ADDRESS]) - memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), - nla_len(tb[IFLA_ADDRESS])); - if (tb[IFLA_BROADCAST]) - memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), - nla_len(tb[IFLA_BROADCAST])); - if (tb[IFLA_TXQLEN]) - dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_WEIGHT]) - dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); - if (tb[IFLA_OPERSTATE]) - set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); - if (tb[IFLA_LINKMODE]) - dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); - - if (ops->newlink) + + dev = rtnl_create_link(net, ifname, ops, tb); + + if (IS_ERR(dev)) + err = PTR_ERR(dev); + else if (ops->newlink) err = ops->newlink(dev, tb, data); else err = register_netdevice(dev); -err_free: - if (err < 0) + + if (err < 0 && !IS_ERR(dev)) free_netdev(dev); return err; } @@ -1094,6 +1150,7 @@ err_free: static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifinfomsg *ifm; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; @@ -1106,7 +1163,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) { - dev = dev_get_by_index(ifm->ifi_index); + dev = dev_get_by_index(net, ifm->ifi_index); if (dev == NULL) return -ENODEV; } else @@ -1255,22 +1312,20 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return doit(skb, nlh, (void *)&rta_buf[0]); } -static void rtnetlink_rcv(struct sock *sk, int len) +static void rtnetlink_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - mutex_lock(&rtnl_mutex); - netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg); - mutex_unlock(&rtnl_mutex); - - netdev_run_todo(); - } while (qlen); + rtnl_lock(); + netlink_rcv_skb(skb, &rtnetlink_rcv_msg); + rtnl_unlock(); } static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_UNREGISTER: rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); @@ -1308,8 +1363,8 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, - &rtnl_mutex, THIS_MODULE); + rtnl = netlink_kernel_create(&init_net, NETLINK_ROUTE, RTNLGRP_MAX, + rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); @@ -1335,3 +1390,5 @@ EXPORT_SYMBOL(rtnl_unlock); EXPORT_SYMBOL(rtnl_unicast); EXPORT_SYMBOL(rtnl_notify); EXPORT_SYMBOL(rtnl_set_sk_err); +EXPORT_SYMBOL(rtnl_create_link); +EXPORT_SYMBOL(ifla_policy); diff --git a/net/core/scm.c b/net/core/scm.c index 44c4ec2c876..100ba6d9d47 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -24,6 +24,8 @@ #include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/security.h> +#include <linux/pid.h> +#include <linux/nsproxy.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -42,7 +44,7 @@ static __inline__ int scm_check_creds(struct ucred *creds) { - if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) && + if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && ((creds->uid == current->uid || creds->uid == current->euid || creds->uid == current->suid) || capable(CAP_SETUID)) && ((creds->gid == current->gid || creds->gid == current->egid || @@ -167,7 +169,8 @@ error: int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) { - struct cmsghdr __user *cm = (struct cmsghdr __user *)msg->msg_control; + struct cmsghdr __user *cm + = (__force struct cmsghdr __user *)msg->msg_control; struct cmsghdr cmhdr; int cmlen = CMSG_LEN(len); int err; @@ -202,7 +205,8 @@ out: void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) { - struct cmsghdr __user *cm = (struct cmsghdr __user*)msg->msg_control; + struct cmsghdr __user *cm + = (__force struct cmsghdr __user*)msg->msg_control; int fdmax = 0; int fdnum = scm->fp->count; @@ -222,7 +226,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) if (fdnum < fdmax) fdmax = fdnum; - for (i=0, cmfptr=(int __user *)CMSG_DATA(cm); i<fdmax; i++, cmfptr++) + for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax; + i++, cmfptr++) { int new_fd; err = security_file_receive(fp[i]); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 35021eb3ed0..573e1724019 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -362,6 +362,90 @@ void kfree_skb(struct sk_buff *skb) __kfree_skb(skb); } +static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) +{ + new->tstamp = old->tstamp; + new->dev = old->dev; + new->transport_header = old->transport_header; + new->network_header = old->network_header; + new->mac_header = old->mac_header; + new->dst = dst_clone(old->dst); +#ifdef CONFIG_INET + new->sp = secpath_get(old->sp); +#endif + memcpy(new->cb, old->cb, sizeof(old->cb)); + new->csum_start = old->csum_start; + new->csum_offset = old->csum_offset; + new->local_df = old->local_df; + new->pkt_type = old->pkt_type; + new->ip_summed = old->ip_summed; + skb_copy_queue_mapping(new, old); + new->priority = old->priority; +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + new->ipvs_property = old->ipvs_property; +#endif + new->protocol = old->protocol; + new->mark = old->mark; + __nf_copy(new, old); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + new->nf_trace = old->nf_trace; +#endif +#ifdef CONFIG_NET_SCHED + new->tc_index = old->tc_index; +#ifdef CONFIG_NET_CLS_ACT + new->tc_verd = old->tc_verd; +#endif +#endif + skb_copy_secmark(new, old); +} + +static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) +{ +#define C(x) n->x = skb->x + + n->next = n->prev = NULL; + n->sk = NULL; + __copy_skb_header(n, skb); + + C(len); + C(data_len); + C(mac_len); + n->cloned = 1; + n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; + n->nohdr = 0; + n->destructor = NULL; + C(truesize); + atomic_set(&n->users, 1); + C(head); + C(data); + C(tail); + C(end); + + atomic_inc(&(skb_shinfo(skb)->dataref)); + skb->cloned = 1; + + return n; +#undef C +} + +/** + * skb_morph - morph one skb into another + * @dst: the skb to receive the contents + * @src: the skb to supply the contents + * + * This is identical to skb_clone except that the target skb is + * supplied by the user. + * + * The target skb is returned upon exit. + */ +struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) +{ + skb_release_data(dst); + return __skb_clone(dst, src); +} +EXPORT_SYMBOL_GPL(skb_morph); + /** * skb_clone - duplicate an sk_buff * @skb: buffer to clone @@ -393,66 +477,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->fclone = SKB_FCLONE_UNAVAILABLE; } -#define C(x) n->x = skb->x - - n->next = n->prev = NULL; - n->sk = NULL; - C(tstamp); - C(dev); - C(transport_header); - C(network_header); - C(mac_header); - C(dst); - dst_clone(skb->dst); - C(sp); -#ifdef CONFIG_INET - secpath_get(skb->sp); -#endif - memcpy(n->cb, skb->cb, sizeof(skb->cb)); - C(len); - C(data_len); - C(mac_len); - C(csum); - C(local_df); - n->cloned = 1; - n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; - n->nohdr = 0; - C(pkt_type); - C(ip_summed); - skb_copy_queue_mapping(n, skb); - C(priority); -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) - C(ipvs_property); -#endif - C(protocol); - n->destructor = NULL; - C(mark); - __nf_copy(n, skb); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) - C(nf_trace); -#endif -#ifdef CONFIG_NET_SCHED - C(tc_index); -#ifdef CONFIG_NET_CLS_ACT - n->tc_verd = SET_TC_VERD(skb->tc_verd,0); - n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); - n->tc_verd = CLR_TC_MUNGED(n->tc_verd); - C(iif); -#endif -#endif - skb_copy_secmark(n, skb); - C(truesize); - atomic_set(&n->users, 1); - C(head); - C(data); - C(tail); - C(end); - - atomic_inc(&(skb_shinfo(skb)->dataref)); - skb->cloned = 1; - - return n; + return __skb_clone(n, skb); } static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) @@ -463,47 +488,15 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) */ unsigned long offset = new->data - old->data; #endif - new->sk = NULL; - new->dev = old->dev; - skb_copy_queue_mapping(new, old); - new->priority = old->priority; - new->protocol = old->protocol; - new->dst = dst_clone(old->dst); -#ifdef CONFIG_INET - new->sp = secpath_get(old->sp); -#endif - new->transport_header = old->transport_header; - new->network_header = old->network_header; - new->mac_header = old->mac_header; + + __copy_skb_header(new, old); + #ifndef NET_SKBUFF_DATA_USES_OFFSET /* {transport,network,mac}_header are relative to skb->head */ new->transport_header += offset; new->network_header += offset; new->mac_header += offset; #endif - memcpy(new->cb, old->cb, sizeof(old->cb)); - new->local_df = old->local_df; - new->fclone = SKB_FCLONE_UNAVAILABLE; - new->pkt_type = old->pkt_type; - new->tstamp = old->tstamp; - new->destructor = NULL; - new->mark = old->mark; - __nf_copy(new, old); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) - new->nf_trace = old->nf_trace; -#endif -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) - new->ipvs_property = old->ipvs_property; -#endif -#ifdef CONFIG_NET_SCHED -#ifdef CONFIG_NET_CLS_ACT - new->tc_verd = old->tc_verd; -#endif - new->tc_index = old->tc_index; -#endif - skb_copy_secmark(new, old); - atomic_set(&new->users, 1); skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; @@ -545,8 +538,6 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) skb_reserve(n, headerlen); /* Set the tail pointer and length */ skb_put(n, skb->len); - n->csum = skb->csum; - n->ip_summed = skb->ip_summed; if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) BUG(); @@ -589,8 +580,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) skb_put(n, skb_headlen(skb)); /* Copy the bytes */ skb_copy_from_linear_data(skb, n->data, n->len); - n->csum = skb->csum; - n->ip_summed = skb->ip_summed; n->truesize += skb->data_len; n->data_len = skb->data_len; @@ -686,6 +675,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->transport_header += off; skb->network_header += off; skb->mac_header += off; + skb->csum_start += nhead; skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; @@ -734,9 +724,6 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) * * You must pass %GFP_ATOMIC as the allocation priority if this function * is called from an interrupt. - * - * BUG ALERT: ip_summed is not copied. Why does this work? Is it used - * only by netfilter in the cases when checksum is recalculated? --ANK */ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, @@ -749,7 +736,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, gfp_mask); int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; - int off = 0; + int off; if (!n) return NULL; @@ -773,12 +760,13 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, copy_skb_header(n, skb); -#ifdef NET_SKBUFF_DATA_USES_OFFSET off = newheadroom - oldheadroom; -#endif + n->csum_start += off; +#ifdef NET_SKBUFF_DATA_USES_OFFSET n->transport_header += off; n->network_header += off; n->mac_header += off; +#endif return n; } @@ -2050,9 +2038,7 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) if (copy > 0) { if (copy > len) copy = len; - sg[elt].page = virt_to_page(skb->data + offset); - sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; - sg[elt].length = copy; + sg_set_buf(sg, skb->data + offset, copy); elt++; if ((len -= copy) == 0) return elt; @@ -2070,9 +2056,8 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) if (copy > len) copy = len; - sg[elt].page = frag->page; - sg[elt].offset = frag->page_offset+offset-start; - sg[elt].length = copy; + sg_set_page(&sg[elt], frag->page, copy, + frag->page_offset+offset-start); elt++; if (!(len -= copy)) return elt; diff --git a/net/core/sock.c b/net/core/sock.c index 190de61cd64..bba9949681f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -119,6 +119,7 @@ #include <linux/netdevice.h> #include <net/protocol.h> #include <linux/skbuff.h> +#include <net/net_namespace.h> #include <net/request_sock.h> #include <net/sock.h> #include <net/xfrm.h> @@ -231,7 +232,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) warned++; printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) " "tries to set negative timeout\n", - current->comm, current->pid); + current->comm, task_pid_nr(current)); return 0; } *timeo_p = MAX_SCHEDULE_TIMEOUT; @@ -366,6 +367,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES + struct net *net = sk->sk_net; char devname[IFNAMSIZ]; int index; @@ -394,7 +396,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) if (devname[0] == '\0') { index = 0; } else { - struct net_device *dev = dev_get_by_name(devname); + struct net_device *dev = dev_get_by_name(net, devname); ret = -ENODEV; if (!dev) @@ -426,7 +428,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { struct sock *sk=sock->sk; - struct sk_filter *filter; int val; int valbool; struct linger ling; @@ -650,16 +651,7 @@ set_rcvbuf: break; case SO_DETACH_FILTER: - rcu_read_lock_bh(); - filter = rcu_dereference(sk->sk_filter); - if (filter) { - rcu_assign_pointer(sk->sk_filter, NULL); - sk_filter_release(sk, filter); - rcu_read_unlock_bh(); - break; - } - rcu_read_unlock_bh(); - ret = -ENONET; + ret = sk_detach_filter(sk); break; case SO_PASSSEC: @@ -867,12 +859,13 @@ static inline void sock_lock_init(struct sock *sk) /** * sk_alloc - All socket objects are allocated here + * @net: the applicable net namespace * @family: protocol family * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) * @prot: struct proto associated with this new sock instance * @zero_it: if we should zero the newly allocated sock */ -struct sock *sk_alloc(int family, gfp_t priority, +struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int zero_it) { struct sock *sk = NULL; @@ -893,6 +886,7 @@ struct sock *sk_alloc(int family, gfp_t priority, */ sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); + sk->sk_net = get_net(net); } if (security_sk_alloc(sk, family, priority)) @@ -921,7 +915,7 @@ void sk_free(struct sock *sk) filter = rcu_dereference(sk->sk_filter); if (filter) { - sk_filter_release(sk, filter); + sk_filter_uncharge(sk, filter); rcu_assign_pointer(sk->sk_filter, NULL); } @@ -932,6 +926,7 @@ void sk_free(struct sock *sk) __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); security_sk_free(sk); + put_net(sk->sk_net); if (sk->sk_prot_creator->slab != NULL) kmem_cache_free(sk->sk_prot_creator->slab, sk); else @@ -941,7 +936,7 @@ void sk_free(struct sock *sk) struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { - struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0); + struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0); if (newsk != NULL) { struct sk_filter *filter; @@ -1585,9 +1580,9 @@ void fastcall lock_sock_nested(struct sock *sk, int subclass) { might_sleep(); spin_lock_bh(&sk->sk_lock.slock); - if (sk->sk_lock.owner) + if (sk->sk_lock.owned) __lock_sock(sk); - sk->sk_lock.owner = (void *)1; + sk->sk_lock.owned = 1; spin_unlock(&sk->sk_lock.slock); /* * The sk_lock has mutex_lock() semantics here: @@ -1608,7 +1603,7 @@ void fastcall release_sock(struct sock *sk) spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_backlog.tail) __release_sock(sk); - sk->sk_lock.owner = NULL; + sk->sk_lock.owned = 0; if (waitqueue_active(&sk->sk_lock.wq)) wake_up(&sk->sk_lock.wq); spin_unlock_bh(&sk->sk_lock.slock); @@ -1654,7 +1649,6 @@ void sock_enable_timestamp(struct sock *sk) net_enable_timestamp(); } } -EXPORT_SYMBOL(sock_enable_timestamp); /* * Get a socket option on an socket. @@ -1973,7 +1967,7 @@ static const struct file_operations proto_seq_fops = { static int __init proto_init(void) { /* register /proc/net/protocols */ - return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; + return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; } subsys_initcall(proto_init); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 6d5ea976204..113cc728dc3 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -9,25 +9,12 @@ #include <linux/sysctl.h> #include <linux/module.h> #include <linux/socket.h> +#include <linux/netdevice.h> #include <net/sock.h> +#include <net/xfrm.h> #ifdef CONFIG_SYSCTL -extern int netdev_max_backlog; -extern int weight_p; - -extern __u32 sysctl_wmem_max; -extern __u32 sysctl_rmem_max; - -extern int sysctl_core_destroy_delay; - -#ifdef CONFIG_XFRM -extern u32 sysctl_xfrm_aevent_etime; -extern u32 sysctl_xfrm_aevent_rseqth; -extern int sysctl_xfrm_larval_drop; -extern u32 sysctl_xfrm_acq_expires; -#endif - ctl_table core_table[] = { #ifdef CONFIG_NET { diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 7ac775f9a64..83378f379f7 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -69,21 +69,20 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; /* Figure out how many options do we need to represent the ackvec */ - const u16 nr_opts = (av->dccpav_vec_len + - DCCP_MAX_ACKVEC_OPT_LEN - 1) / - DCCP_MAX_ACKVEC_OPT_LEN; + const u16 nr_opts = DIV_ROUND_UP(av->dccpav_vec_len, + DCCP_MAX_ACKVEC_OPT_LEN); u16 len = av->dccpav_vec_len + 2 * nr_opts, i; - struct timeval now; u32 elapsed_time; const unsigned char *tail, *from; unsigned char *to; struct dccp_ackvec_record *avr; + suseconds_t delta; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) return -1; - dccp_timestamp(sk, &now); - elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10; + delta = ktime_us_delta(ktime_get_real(), av->dccpav_time); + elapsed_time = delta / 10; if (elapsed_time != 0 && dccp_insert_option_elapsed_time(sk, skb, elapsed_time)) @@ -159,8 +158,7 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; av->dccpav_buf_ackno = UINT48_MAX + 1; av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; - av->dccpav_time.tv_sec = 0; - av->dccpav_time.tv_usec = 0; + av->dccpav_time = ktime_set(0, 0); av->dccpav_vec_len = 0; INIT_LIST_HEAD(&av->dccpav_records); } @@ -321,7 +319,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, } av->dccpav_buf_ackno = ackno; - dccp_timestamp(sk, &av->dccpav_time); + av->dccpav_time = ktime_get_real(); out: return 0; diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 96504a3b16e..9ef0737043e 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -12,8 +12,8 @@ */ #include <linux/compiler.h> +#include <linux/ktime.h> #include <linux/list.h> -#include <linux/time.h> #include <linux/types.h> /* Read about the ECN nonce to see why it is 253 */ @@ -52,7 +52,7 @@ struct dccp_ackvec { u64 dccpav_buf_ackno; struct list_head dccpav_records; - struct timeval dccpav_time; + ktime_t dccpav_time; u16 dccpav_buf_head; u16 dccpav_vec_len; u8 dccpav_buf_nonce; diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index d29b88fe723..d694656b880 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -59,7 +59,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) pipe++; /* packets are sent sequentially */ - BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq); + BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq, + prev->ccid2s_seq ) >= 0); BUG_ON(time_before(seqp->ccid2s_sent, prev->ccid2s_sent)); @@ -83,8 +84,7 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) #define ccid2_hc_tx_check_sanity(hctx) #endif -static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, - gfp_t gfp) +static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) { struct ccid2_seq *seqp; int i; @@ -95,16 +95,16 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, return -ENOMEM; /* allocate buffer and initialize linked list */ - seqp = kmalloc(sizeof(*seqp) * num, gfp); + seqp = kmalloc(CCID2_SEQBUF_LEN * sizeof(struct ccid2_seq), gfp_any()); if (seqp == NULL) return -ENOMEM; - for (i = 0; i < (num - 1); i++) { + for (i = 0; i < (CCID2_SEQBUF_LEN - 1); i++) { seqp[i].ccid2s_next = &seqp[i + 1]; seqp[i + 1].ccid2s_prev = &seqp[i]; } - seqp[num - 1].ccid2s_next = seqp; - seqp->ccid2s_prev = &seqp[num - 1]; + seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = seqp; + seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; /* This is the first allocation. Initiate the head and tail. */ if (hctx->ccid2hctx_seqbufc == 0) @@ -114,8 +114,8 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, hctx->ccid2hctx_seqh->ccid2s_next = seqp; seqp->ccid2s_prev = hctx->ccid2hctx_seqh; - hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[num - 1]; - seqp[num - 1].ccid2s_next = hctx->ccid2hctx_seqt; + hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; + seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->ccid2hctx_seqt; } /* store the original pointer to the buffer so we can free it */ @@ -127,19 +127,7 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { - struct ccid2_hc_tx_sock *hctx; - - switch (DCCP_SKB_CB(skb)->dccpd_type) { - case 0: /* XXX data packets from userland come through like this */ - case DCCP_PKT_DATA: - case DCCP_PKT_DATAACK: - break; - /* No congestion control on other packets */ - default: - return 0; - } - - hctx = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); ccid2_pr_debug("pipe=%d cwnd=%d\n", hctx->ccid2hctx_pipe, hctx->ccid2hctx_cwnd); @@ -180,16 +168,11 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, int val) dp->dccps_l_ack_ratio = val; } -static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val) +static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, u32 val) { - if (val == 0) - val = 1; - /* XXX do we need to change ack ratio? */ - ccid2_pr_debug("change cwnd to %d\n", val); - - BUG_ON(val < 1); - hctx->ccid2hctx_cwnd = val; + hctx->ccid2hctx_cwnd = val? : 1; + ccid2_pr_debug("changed cwnd to %u\n", hctx->ccid2hctx_cwnd); } static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) @@ -295,12 +278,11 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) next = hctx->ccid2hctx_seqh->ccid2s_next; /* check if we need to alloc more space */ if (next == hctx->ccid2hctx_seqt) { - int rc; - - ccid2_pr_debug("allocating more space in history\n"); - rc = ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, gfp_any()); - BUG_ON(rc); /* XXX what do we do? */ - + if (ccid2_hc_tx_alloc_seq(hctx)) { + DCCP_CRIT("packet history - out of memory!"); + /* FIXME: find a more graceful way to bail out */ + return; + } next = hctx->ccid2hctx_seqh->ccid2s_next; BUG_ON(next == hctx->ccid2hctx_seqt); } @@ -581,8 +563,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) hctx->ccid2hctx_rpseq = seqno; } else { /* check if packet is consecutive */ - if ((hctx->ccid2hctx_rpseq + 1) == seqno) - hctx->ccid2hctx_rpseq++; + if (dccp_delta_seqno(hctx->ccid2hctx_rpseq, seqno) == 1) + hctx->ccid2hctx_rpseq = seqno; /* it's a later packet */ else if (after48(seqno, hctx->ccid2hctx_rpseq)) { hctx->ccid2hctx_rpdupack++; @@ -768,20 +750,16 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) */ hctx->ccid2hctx_ssthresh = ~0; hctx->ccid2hctx_numdupack = 3; - hctx->ccid2hctx_seqbufc = 0; /* XXX init ~ to window size... */ - if (ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_ATOMIC) != 0) + if (ccid2_hc_tx_alloc_seq(hctx)) return -ENOMEM; - hctx->ccid2hctx_sent = 0; hctx->ccid2hctx_rto = 3 * HZ; ccid2_change_srtt(hctx, -1); hctx->ccid2hctx_rttvar = -1; - hctx->ccid2hctx_lastrtt = 0; hctx->ccid2hctx_rpdupack = -1; hctx->ccid2hctx_last_cong = jiffies; - hctx->ccid2hctx_high_ack = 0; hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire; hctx->ccid2hctx_rtotimer.data = (unsigned long)sk; @@ -835,7 +813,7 @@ static struct ccid_operations ccid2 = { }; #ifdef CONFIG_IP_DCCP_CCID2_DEBUG -module_param(ccid2_debug, int, 0444); +module_param(ccid2_debug, bool, 0444); MODULE_PARM_DESC(ccid2_debug, "Enable debug messages"); #endif diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index ebd79499c85..d9daa534c9b 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -50,7 +50,7 @@ struct ccid2_seq { * @ccid2hctx_rpdupack - dupacks since rpseq */ struct ccid2_hc_tx_sock { - int ccid2hctx_cwnd; + u32 ccid2hctx_cwnd; int ccid2hctx_ssacks; int ccid2hctx_acks; unsigned int ccid2hctx_ssthresh; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index e91c2b9dc27..19b33586333 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -40,6 +40,8 @@ #include "lib/tfrc.h" #include "ccid3.h" +#include <asm/unaligned.h> + #ifdef CONFIG_IP_DCCP_CCID3_DEBUG static int ccid3_debug; #define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a) @@ -113,27 +115,24 @@ static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6)); } -/* - * Update X by - * If (p > 0) - * X_calc = calcX(s, R, p); - * X = max(min(X_calc, 2 * X_recv), s / t_mbi); - * Else - * If (now - tld >= R) - * X = max(min(2 * X, 2 * X_recv), s / R); - * tld = now; + +/** + * ccid3_hc_tx_update_x - Update allowed sending rate X + * @stamp: most recent time if available - can be left NULL. + * This function tracks draft rfc3448bis, check there for latest details. * * Note: X and X_recv are both stored in units of 64 * bytes/second, to support * fine-grained resolution of sending rates. This requires scaling by 2^6 * throughout the code. Only X_calc is unscaled (in bytes/second). * */ -static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) +static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); __u64 min_rate = 2 * hctx->ccid3hctx_x_recv; const __u64 old_x = hctx->ccid3hctx_x; + ktime_t now = stamp? *stamp : ktime_get_real(); /* * Handle IDLE periods: do not reduce below RFC3390 initial sending rate @@ -153,14 +152,14 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) (((__u64)hctx->ccid3hctx_s) << 6) / TFRC_T_MBI); - } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) - - (suseconds_t)hctx->ccid3hctx_rtt >= 0) { + } else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld) + - (s64)hctx->ccid3hctx_rtt >= 0) { hctx->ccid3hctx_x = max(min(2 * hctx->ccid3hctx_x, min_rate), scaled_div(((__u64)hctx->ccid3hctx_s) << 6, hctx->ccid3hctx_rtt)); - hctx->ccid3hctx_t_ld = *now; + hctx->ccid3hctx_t_ld = now; } if (hctx->ccid3hctx_x != old_x) { @@ -214,7 +213,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct timeval now; unsigned long t_nfb = USEC_PER_SEC / 5; bh_lock_sock(sk); @@ -265,15 +263,12 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) max(hctx->ccid3hctx_x_recv / 2, (((__u64)hctx->ccid3hctx_s) << 6) / (2 * TFRC_T_MBI)); - - if (hctx->ccid3hctx_p == 0) - dccp_timestamp(sk, &now); } else { hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; hctx->ccid3hctx_x_recv <<= 4; } /* Now recalculate X [RFC 3448, 4.3, step (4)] */ - ccid3_hc_tx_update_x(sk, &now); + ccid3_hc_tx_update_x(sk, NULL); /* * Schedule no feedback timer to expire in * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) @@ -309,8 +304,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) ktime_t now = ktime_get_real(); s64 delay; - BUG_ON(hctx == NULL); - /* * This function is called only for Data and DataAck packets. Sending * zero-sized Data(Ack)s is theoretically possible, but for congestion @@ -341,7 +334,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); hctx->ccid3hctx_rtt = dp->dccps_syn_rtt; hctx->ccid3hctx_x = rfc3390_initial_rate(sk); - hctx->ccid3hctx_t_ld = ktime_to_timeval(now); + hctx->ccid3hctx_t_ld = now; } else { /* Sender does not have RTT sample: X = MSS/second */ hctx->ccid3hctx_x = dp->dccps_mss_cache; @@ -388,11 +381,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct timeval now; struct dccp_tx_hist_entry *packet; - BUG_ON(hctx == NULL); - ccid3_hc_tx_update_s(hctx, len); packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC); @@ -402,8 +392,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, } dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet); - dccp_timestamp(sk, &now); - packet->dccphtx_tstamp = now; + packet->dccphtx_tstamp = ktime_get_real(); packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss; packet->dccphtx_rtt = hctx->ccid3hctx_rtt; packet->dccphtx_sent = 1; @@ -414,12 +403,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; - struct timeval now; + ktime_t now; unsigned long t_nfb; u32 pinv, r_sample; - BUG_ON(hctx == NULL); - /* we are only interested in ACKs */ if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) @@ -452,13 +439,12 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) else /* can not exceed 100% */ hctx->ccid3hctx_p = 1000000 / pinv; - dccp_timestamp(sk, &now); - + now = ktime_get_real(); /* * Calculate new round trip sample as per [RFC 3448, 4.3] by * R_sample = (now - t_recvdata) - t_elapsed */ - r_sample = dccp_sample_rtt(sk, &now, &packet->dccphtx_tstamp); + r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, packet->dccphtx_tstamp)); /* * Update RTT estimate by @@ -560,8 +546,7 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, const struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; - - BUG_ON(hctx == NULL); + __be32 opt_val; opt_recv = &hctx->ccid3hctx_options_received; @@ -581,8 +566,8 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, dccp_role(sk), sk, len); rc = -EINVAL; } else { - opt_recv->ccid3or_loss_event_rate = - ntohl(*(__be32 *)value); + opt_val = get_unaligned((__be32 *)value); + opt_recv->ccid3or_loss_event_rate = ntohl(opt_val); ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", dccp_role(sk), sk, opt_recv->ccid3or_loss_event_rate); @@ -603,8 +588,8 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, dccp_role(sk), sk, len); rc = -EINVAL; } else { - opt_recv->ccid3or_receive_rate = - ntohl(*(__be32 *)value); + opt_val = get_unaligned((__be32 *)value); + opt_recv->ccid3or_receive_rate = ntohl(opt_val); ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", dccp_role(sk), sk, opt_recv->ccid3or_receive_rate); @@ -619,8 +604,6 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) { struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); - hctx->ccid3hctx_s = 0; - hctx->ccid3hctx_rtt = 0; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); @@ -636,8 +619,6 @@ static void ccid3_hc_tx_exit(struct sock *sk) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - BUG_ON(hctx == NULL); - ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); @@ -647,14 +628,13 @@ static void ccid3_hc_tx_exit(struct sock *sk) static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) { - const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + struct ccid3_hc_tx_sock *hctx; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return; - BUG_ON(hctx == NULL); - + hctx = ccid3_hc_tx_sk(sk); info->tcpi_rto = hctx->ccid3hctx_t_rto; info->tcpi_rtt = hctx->ccid3hctx_rtt; } @@ -662,13 +642,14 @@ static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { - const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + const struct ccid3_hc_tx_sock *hctx; const void *val; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return -EINVAL; + hctx = ccid3_hc_tx_sk(sk); switch (optname) { case DCCP_SOCKOPT_CCID_TX_INFO: if (len < sizeof(hctx->ccid3hctx_tfrc)) @@ -729,20 +710,20 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct dccp_rx_hist_entry *packet; - struct timeval now; + ktime_t now; suseconds_t delta; ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk); - dccp_timestamp(sk, &now); + now = ktime_get_real(); switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: hcrx->ccid3hcrx_x_recv = 0; break; case TFRC_RSTATE_DATA: - delta = timeval_delta(&now, - &hcrx->ccid3hcrx_tstamp_last_feedback); + delta = ktime_us_delta(now, + hcrx->ccid3hcrx_tstamp_last_feedback); DCCP_BUG_ON(delta < 0); hcrx->ccid3hcrx_x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); @@ -764,7 +745,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) hcrx->ccid3hcrx_bytes_recv = 0; /* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */ - delta = timeval_delta(&now, &packet->dccphrx_tstamp); + delta = ktime_us_delta(now, packet->dccphrx_tstamp); DCCP_BUG_ON(delta < 0); hcrx->ccid3hcrx_elapsed_time = delta / 10; @@ -782,14 +763,13 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) { - const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + const struct ccid3_hc_rx_sock *hcrx; __be32 x_recv, pinv; - BUG_ON(hcrx == NULL); - if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return 0; + hcrx = ccid3_hc_rx_sk(sk); DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter; if (dccp_packet_without_ack(skb)) @@ -839,7 +819,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk, dccp_li_update_li(sk, &hcrx->ccid3hcrx_li_hist, &hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_tstamp_last_feedback, + hcrx->ccid3hcrx_tstamp_last_feedback, hcrx->ccid3hcrx_s, hcrx->ccid3hcrx_bytes_recv, hcrx->ccid3hcrx_x_recv, @@ -876,11 +856,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; - struct timeval now; u32 p_prev, r_sample, rtt_prev; int loss, payload_size; - - BUG_ON(hcrx == NULL); + ktime_t now; opt_recv = &dccp_sk(sk)->dccps_options_received; @@ -891,9 +869,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_DATAACK: if (opt_recv->dccpor_timestamp_echo == 0) break; + r_sample = dccp_timestamp() - opt_recv->dccpor_timestamp_echo; rtt_prev = hcrx->ccid3hcrx_rtt; - dccp_timestamp(sk, &now); - r_sample = dccp_sample_rtt(sk, &now, NULL); + r_sample = dccp_sample_rtt(sk, 10 * r_sample); if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) hcrx->ccid3hcrx_rtt = r_sample; @@ -912,7 +890,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) return; } - packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, + packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, skb, GFP_ATOMIC); if (unlikely(packet == NULL)) { DCCP_WARN("%s(%p), Not enough mem to add rx packet " @@ -941,9 +919,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (loss) break; - dccp_timestamp(sk, &now); - if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) - - (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) { + now = ktime_get_real(); + if ((ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_ack) - + (s64)hcrx->ccid3hcrx_rtt) >= 0) { hcrx->ccid3hcrx_tstamp_last_ack = now; ccid3_hc_rx_send_feedback(sk); } @@ -984,10 +962,8 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); - dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack); - hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack; - hcrx->ccid3hcrx_s = 0; - hcrx->ccid3hcrx_rtt = 0; + hcrx->ccid3hcrx_tstamp_last_feedback = + hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real(); return 0; } @@ -995,8 +971,6 @@ static void ccid3_hc_rx_exit(struct sock *sk) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - BUG_ON(hcrx == NULL); - ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); /* Empty packet history */ @@ -1008,14 +982,13 @@ static void ccid3_hc_rx_exit(struct sock *sk) static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) { - const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + const struct ccid3_hc_rx_sock *hcrx; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return; - BUG_ON(hcrx == NULL); - + hcrx = ccid3_hc_rx_sk(sk); info->tcpi_ca_state = hcrx->ccid3hcrx_state; info->tcpi_options |= TCPI_OPT_TIMESTAMPS; info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; @@ -1024,13 +997,14 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { - const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + const struct ccid3_hc_rx_sock *hcrx; const void *val; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return -EINVAL; + hcrx = ccid3_hc_rx_sk(sk); switch (optname) { case DCCP_SOCKOPT_CCID_RX_INFO: if (len < sizeof(hcrx->ccid3hcrx_tfrc)) @@ -1071,7 +1045,7 @@ static struct ccid_operations ccid3 = { }; #ifdef CONFIG_IP_DCCP_CCID3_DEBUG -module_param(ccid3_debug, int, 0444); +module_param(ccid3_debug, bool, 0444); MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); #endif diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 51d4b804e33..0cdc982cfe4 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -38,7 +38,6 @@ #include <linux/ktime.h> #include <linux/list.h> -#include <linux/time.h> #include <linux/types.h> #include <linux/tfrc.h> #include "../ccid.h" @@ -111,13 +110,20 @@ struct ccid3_hc_tx_sock { u8 ccid3hctx_idle; ktime_t ccid3hctx_t_last_win_count; struct timer_list ccid3hctx_no_feedback_timer; - struct timeval ccid3hctx_t_ld; + ktime_t ccid3hctx_t_ld; ktime_t ccid3hctx_t_nom; u32 ccid3hctx_delta; struct list_head ccid3hctx_hist; struct ccid3_options_received ccid3hctx_options_received; }; +static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) +{ + struct ccid3_hc_tx_sock *hctx = ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid); + BUG_ON(hctx == NULL); + return hctx; +} + /* TFRC receiver states */ enum ccid3_hc_rx_states { TFRC_RSTATE_NO_DATA = 1, @@ -153,8 +159,8 @@ struct ccid3_hc_rx_sock { ccid3hcrx_ccval_last_counter:4; enum ccid3_hc_rx_states ccid3hcrx_state:8; u32 ccid3hcrx_bytes_recv; - struct timeval ccid3hcrx_tstamp_last_feedback; - struct timeval ccid3hcrx_tstamp_last_ack; + ktime_t ccid3hcrx_tstamp_last_feedback; + ktime_t ccid3hcrx_tstamp_last_ack; struct list_head ccid3hcrx_hist; struct list_head ccid3hcrx_li_hist; u16 ccid3hcrx_s; @@ -162,14 +168,11 @@ struct ccid3_hc_rx_sock { u32 ccid3hcrx_elapsed_time; }; -static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) -{ - return ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid); -} - static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) { - return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid); + struct ccid3_hc_rx_sock *hcrx = ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid); + BUG_ON(hcrx == NULL); + return hcrx; } #endif /* _DCCP_CCID3_H_ */ diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 174d3f13d93..40ad428a27f 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -125,14 +125,14 @@ static int dccp_li_hist_interval_new(struct list_head *list, * returns estimated loss interval in usecs */ static u32 dccp_li_calc_first_li(struct sock *sk, struct list_head *hist_list, - struct timeval *last_feedback, + ktime_t last_feedback, u16 s, u32 bytes_recv, u32 previous_x_recv) { struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 x_recv, p; suseconds_t rtt, delta; - struct timeval tstamp = { 0, 0 }; + ktime_t tstamp = ktime_set(0, 0); int interval = 0; int win_count = 0; int step = 0; @@ -176,7 +176,7 @@ found: return ~0; } - delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); + delta = ktime_us_delta(tstamp, tail->dccphrx_tstamp); DCCP_BUG_ON(delta < 0); rtt = delta * 4 / interval; @@ -196,8 +196,7 @@ found: return ~0; } - dccp_timestamp(sk, &tstamp); - delta = timeval_delta(&tstamp, last_feedback); + delta = ktime_us_delta(ktime_get_real(), last_feedback); DCCP_BUG_ON(delta <= 0); x_recv = scaled_div32(bytes_recv, delta); @@ -226,7 +225,7 @@ found: void dccp_li_update_li(struct sock *sk, struct list_head *li_hist_list, struct list_head *hist_list, - struct timeval *last_feedback, u16 s, u32 bytes_recv, + ktime_t last_feedback, u16 s, u32 bytes_recv, u32 previous_x_recv, u64 seq_loss, u8 win_loss) { struct dccp_li_hist_entry *head; diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 906c806d6d9..27bee92dae1 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -13,8 +13,8 @@ * any later version. */ +#include <linux/ktime.h> #include <linux/list.h> -#include <linux/time.h> extern void dccp_li_hist_purge(struct list_head *list); @@ -23,7 +23,7 @@ extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); extern void dccp_li_update_li(struct sock *sk, struct list_head *li_hist_list, struct list_head *hist_list, - struct timeval *last_feedback, u16 s, + ktime_t last_feedback, u16 s, u32 bytes_recv, u32 previous_x_recv, u64 seq_loss, u8 win_loss); #endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 60d00f01539..032bb61c6e3 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -37,9 +37,9 @@ #ifndef _DCCP_PKT_HIST_ #define _DCCP_PKT_HIST_ +#include <linux/ktime.h> #include <linux/list.h> #include <linux/slab.h> -#include <linux/time.h> #include "../../dccp.h" @@ -57,7 +57,7 @@ struct dccp_tx_hist_entry { u64 dccphtx_seqno:48, dccphtx_sent:1; u32 dccphtx_rtt; - struct timeval dccphtx_tstamp; + ktime_t dccphtx_tstamp; }; struct dccp_tx_hist { @@ -124,7 +124,7 @@ struct dccp_rx_hist_entry { dccphrx_ccval:4, dccphrx_type:4; u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ - struct timeval dccphrx_tstamp; + ktime_t dccphrx_tstamp; }; struct dccp_rx_hist { @@ -136,7 +136,6 @@ extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); static inline struct dccp_rx_hist_entry * dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, - const struct sock *sk, const u32 ndp, const struct sk_buff *skb, const gfp_t prio) @@ -151,7 +150,7 @@ static inline struct dccp_rx_hist_entry * entry->dccphrx_ccval = dh->dccph_ccval; entry->dccphrx_type = dh->dccph_type; entry->dccphrx_ndp = ndp; - dccp_timestamp(sk, &entry->dccphrx_tstamp); + entry->dccphrx_tstamp = ktime_get_real(); } return entry; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index e2d74cd7eee..ee97950d77d 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -13,6 +13,7 @@ */ #include <linux/dccp.h> +#include <linux/ktime.h> #include <net/snmp.h> #include <net/sock.h> #include <net/tcp.h> @@ -91,6 +92,7 @@ extern int sysctl_dccp_feat_ack_ratio; extern int sysctl_dccp_feat_send_ack_vector; extern int sysctl_dccp_feat_send_ndp_count; extern int sysctl_dccp_tx_qlen; +extern int sysctl_dccp_sync_ratelimit; /* * 48-bit sequence number arithmetic (signed and unsigned) @@ -208,7 +210,6 @@ extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); extern void dccp_send_ack(struct sock *sk); -extern void dccp_send_delayed_ack(struct sock *sk); extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk); extern void dccp_send_sync(struct sock *sk, const u64 seq, @@ -293,11 +294,12 @@ extern unsigned int dccp_poll(struct file *file, struct socket *sock, extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +extern struct sk_buff *dccp_ctl_make_reset(struct socket *ctl, + struct sk_buff *skb); extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); extern void dccp_send_close(struct sock *sk, const int active); extern int dccp_invalid_packet(struct sk_buff *skb); -extern u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv, - struct timeval *t_history); +extern u32 dccp_sample_rtt(struct sock *sk, long delta); static inline int dccp_bad_service_code(const struct sock *sk, const __be32 service) @@ -309,10 +311,22 @@ static inline int dccp_bad_service_code(const struct sock *sk, return !dccp_list_has_service(dp->dccps_service_list, service); } +/** + * dccp_skb_cb - DCCP per-packet control information + * @dccpd_type: one of %dccp_pkt_type (or unknown) + * @dccpd_ccval: CCVal field (5.1), see e.g. RFC 4342, 8.1 + * @dccpd_reset_code: one of %dccp_reset_codes + * @dccpd_reset_data: Data1..3 fields (depend on @dccpd_reset_code) + * @dccpd_opt_len: total length of all options (5.8) in the packet + * @dccpd_seq: sequence number + * @dccpd_ack_seq: acknowledgment number subheader field value + * This is used for transmission as well as for reception. + */ struct dccp_skb_cb { __u8 dccpd_type:4; __u8 dccpd_ccval:4; - __u8 dccpd_reset_code; + __u8 dccpd_reset_code, + dccpd_reset_data[3]; __u16 dccpd_opt_len; __u64 dccpd_seq; __u64 dccpd_ack_seq; @@ -395,53 +409,14 @@ extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); extern int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, u32 elapsed_time); +extern u32 dccp_timestamp(void); +extern void dccp_timestamping_init(void); extern int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb); extern int dccp_insert_option(struct sock *sk, struct sk_buff *skb, unsigned char option, const void *value, unsigned char len); -extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); - -static inline suseconds_t timeval_usecs(const struct timeval *tv) -{ - return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; -} - -static inline suseconds_t timeval_delta(const struct timeval *large, - const struct timeval *small) -{ - time_t secs = large->tv_sec - small->tv_sec; - suseconds_t usecs = large->tv_usec - small->tv_usec; - - if (usecs < 0) { - secs--; - usecs += USEC_PER_SEC; - } - return secs * USEC_PER_SEC + usecs; -} - -static inline void timeval_add_usecs(struct timeval *tv, - const suseconds_t usecs) -{ - tv->tv_usec += usecs; - while (tv->tv_usec >= USEC_PER_SEC) { - tv->tv_sec++; - tv->tv_usec -= USEC_PER_SEC; - } -} - -static inline void timeval_sub_usecs(struct timeval *tv, - const suseconds_t usecs) -{ - tv->tv_usec -= usecs; - while (tv->tv_usec < 0) { - tv->tv_sec--; - tv->tv_usec += USEC_PER_SEC; - } - DCCP_BUG_ON(tv->tv_sec < 0); -} - #ifdef CONFIG_SYSCTL extern int dccp_sysctl_init(void); extern void dccp_sysctl_exit(void); diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 0f3745585a9..d8a3509b26f 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -68,3 +68,4 @@ module_exit(dccp_diag_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); MODULE_DESCRIPTION("DCCP inet_diag handler"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, DCCPDIAG_GETSOCK); diff --git a/net/dccp/input.c b/net/dccp/input.c index da6ec185ed5..1ce10106282 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -19,6 +19,9 @@ #include "ccid.h" #include "dccp.h" +/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */ +int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8; + static void dccp_fin(struct sock *sk, struct sk_buff *skb) { sk->sk_shutdown |= RCV_SHUTDOWN; @@ -55,6 +58,42 @@ static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) dccp_send_close(sk, 0); } +static u8 dccp_reset_code_convert(const u8 code) +{ + const u8 error_code[] = { + [DCCP_RESET_CODE_CLOSED] = 0, /* normal termination */ + [DCCP_RESET_CODE_UNSPECIFIED] = 0, /* nothing known */ + [DCCP_RESET_CODE_ABORTED] = ECONNRESET, + + [DCCP_RESET_CODE_NO_CONNECTION] = ECONNREFUSED, + [DCCP_RESET_CODE_CONNECTION_REFUSED] = ECONNREFUSED, + [DCCP_RESET_CODE_TOO_BUSY] = EUSERS, + [DCCP_RESET_CODE_AGGRESSION_PENALTY] = EDQUOT, + + [DCCP_RESET_CODE_PACKET_ERROR] = ENOMSG, + [DCCP_RESET_CODE_BAD_INIT_COOKIE] = EBADR, + [DCCP_RESET_CODE_BAD_SERVICE_CODE] = EBADRQC, + [DCCP_RESET_CODE_OPTION_ERROR] = EILSEQ, + [DCCP_RESET_CODE_MANDATORY_ERROR] = EOPNOTSUPP, + }; + + return code >= DCCP_MAX_RESET_CODES ? 0 : error_code[code]; +} + +static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) +{ + u8 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code); + + sk->sk_err = err; + + /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */ + dccp_fin(sk, skb); + + if (err && !sock_flag(sk, SOCK_DEAD)) + sk_wake_async(sk, 0, POLL_ERR); + dccp_time_wait(sk, DCCP_TIME_WAIT, 0); +} + static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); @@ -68,7 +107,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); struct dccp_sock *dp = dccp_sk(sk); - u64 lswl, lawl; + u64 lswl, lawl, seqno = DCCP_SKB_CB(skb)->dccpd_seq, + ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; /* * Step 5: Prepare sequence numbers for Sync @@ -84,11 +124,9 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) */ if (dh->dccph_type == DCCP_PKT_SYNC || dh->dccph_type == DCCP_PKT_SYNCACK) { - if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, - dp->dccps_awl, dp->dccps_awh) && - dccp_delta_seqno(dp->dccps_swl, - DCCP_SKB_CB(skb)->dccpd_seq) >= 0) - dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + if (between48(ackno, dp->dccps_awl, dp->dccps_awh) && + dccp_delta_seqno(dp->dccps_swl, seqno) >= 0) + dccp_update_gsr(sk, seqno); else return -1; } @@ -103,9 +141,6 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) * Update S.GSR, S.SWL, S.SWH * If P.type != Sync, * Update S.GAR - * Otherwise, - * Send Sync packet acknowledging P.seqno - * Drop packet and return */ lswl = dp->dccps_swl; lawl = dp->dccps_awl; @@ -113,35 +148,52 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) if (dh->dccph_type == DCCP_PKT_CLOSEREQ || dh->dccph_type == DCCP_PKT_CLOSE || dh->dccph_type == DCCP_PKT_RESET) { - lswl = dp->dccps_gsr; - dccp_inc_seqno(&lswl); + lswl = ADD48(dp->dccps_gsr, 1); lawl = dp->dccps_gar; } - if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) && - (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ || - between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, - lawl, dp->dccps_awh))) { - dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + if (between48(seqno, lswl, dp->dccps_swh) && + (ackno == DCCP_PKT_WITHOUT_ACK_SEQ || + between48(ackno, lawl, dp->dccps_awh))) { + dccp_update_gsr(sk, seqno); if (dh->dccph_type != DCCP_PKT_SYNC && - (DCCP_SKB_CB(skb)->dccpd_ack_seq != - DCCP_PKT_WITHOUT_ACK_SEQ)) - dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; + (ackno != DCCP_PKT_WITHOUT_ACK_SEQ)) + dp->dccps_gar = ackno; } else { + unsigned long now = jiffies; + /* + * Step 6: Check sequence numbers + * Otherwise, + * If P.type == Reset, + * Send Sync packet acknowledging S.GSR + * Otherwise, + * Send Sync packet acknowledging P.seqno + * Drop packet and return + * + * These Syncs are rate-limited as per RFC 4340, 7.5.4: + * at most 1 / (dccp_sync_rate_limit * HZ) Syncs per second. + */ + if (time_before(now, (dp->dccps_rate_last + + sysctl_dccp_sync_ratelimit))) + return 0; + DCCP_WARN("DCCP: Step 6 failed for %s packet, " "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " "sending SYNC...\n", dccp_packet_name(dh->dccph_type), - (unsigned long long) lswl, - (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq, + (unsigned long long) lswl, (unsigned long long) seqno, (unsigned long long) dp->dccps_swh, - (DCCP_SKB_CB(skb)->dccpd_ack_seq == - DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists", - (unsigned long long) lawl, - (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, + (ackno == DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" + : "exists", + (unsigned long long) lawl, (unsigned long long) ackno, (unsigned long long) dp->dccps_awh); - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); + + dp->dccps_rate_last = now; + + if (dh->dccph_type == DCCP_PKT_RESET) + seqno = dp->dccps_gsr; + dccp_send_sync(sk, seqno, DCCP_PKT_SYNC); return -1; } @@ -175,9 +227,8 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb, * S.state := TIMEWAIT * Set TIMEWAIT timer * Drop packet and return - */ - dccp_fin(sk, skb); - dccp_time_wait(sk, DCCP_TIME_WAIT, 0); + */ + dccp_rcv_reset(sk, skb); return 0; case DCCP_PKT_CLOSEREQ: dccp_rcv_closereq(sk, skb); @@ -280,6 +331,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (dh->dccph_type == DCCP_PKT_RESPONSE) { const struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); + long tstamp = dccp_timestamp(); /* Stop the REQUEST timer */ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); @@ -300,13 +352,10 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (dccp_parse_options(sk, skb)) goto out_invalid_packet; - /* Obtain RTT sample from SYN exchange (used by CCID 3) */ - if (dp->dccps_options_received.dccpor_timestamp_echo) { - struct timeval now; - - dccp_timestamp(sk, &now); - dp->dccps_syn_rtt = dccp_sample_rtt(sk, &now, NULL); - } + /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */ + if (likely(dp->dccps_options_received.dccpor_timestamp_echo)) + dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp - + dp->dccps_options_received.dccpor_timestamp_echo)); if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, @@ -507,12 +556,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * Drop packet and return */ if (dh->dccph_type == DCCP_PKT_RESET) { - /* - * Queue the equivalent of TCP fin so that dccp_recvmsg - * exits the loop - */ - dccp_fin(sk, skb); - dccp_time_wait(sk, DCCP_TIME_WAIT, 0); + dccp_rcv_reset(sk, skb); return 0; /* * Step 7: Check for unexpected packet types @@ -540,11 +584,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; } - if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { - dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); - goto discard; - } - switch (sk->sk_state) { case DCCP_CLOSED: dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; @@ -575,6 +614,9 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, sk_wake_async(sk, 0, POLL_OUT); break; } + } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); + goto discard; } if (!queued) { @@ -587,37 +629,22 @@ discard: EXPORT_SYMBOL_GPL(dccp_rcv_state_process); /** - * dccp_sample_rtt - Sample RTT from packet exchange - * - * @sk: connected dccp_sock - * @t_recv: receive timestamp of packet with timestamp echo - * @t_hist: packet history timestamp or NULL + * dccp_sample_rtt - Validate and finalise computation of RTT sample + * @delta: number of microseconds between packet and acknowledgment + * The routine is kept generic to work in different contexts. It should be + * called immediately when the ACK used for the RTT sample arrives. */ -u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv, - struct timeval *t_hist) +u32 dccp_sample_rtt(struct sock *sk, long delta) { - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_options_received *or = &dp->dccps_options_received; - suseconds_t delta; - - if (t_hist == NULL) { - if (!or->dccpor_timestamp_echo) { - DCCP_WARN("packet without timestamp echo\n"); - return DCCP_SANE_RTT_MAX; - } - timeval_sub_usecs(t_recv, or->dccpor_timestamp_echo * 10); - delta = timeval_usecs(t_recv); - } else - delta = timeval_delta(t_recv, t_hist); - - delta -= or->dccpor_elapsed_time * 10; /* either set or 0 */ + /* dccpor_elapsed_time is either zeroed out or set and > 0 */ + delta -= dccp_sk(sk)->dccps_options_received.dccpor_elapsed_time * 10; if (unlikely(delta <= 0)) { - DCCP_WARN("unusable RTT sample %ld, using min\n", (long)delta); + DCCP_WARN("unusable RTT sample %ld, using min\n", delta); return DCCP_SANE_RTT_MIN; } - if (unlikely(delta - (suseconds_t)DCCP_SANE_RTT_MAX > 0)) { - DCCP_WARN("RTT sample %ld too large, using max\n", (long)delta); + if (unlikely(delta > DCCP_SANE_RTT_MAX)) { + DCCP_WARN("RTT sample %ld too large, using max\n", delta); return DCCP_SANE_RTT_MAX; } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 718f2fa923a..01a6a808bdb 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -241,8 +241,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) goto out; dp = dccp_sk(sk); - seq = dccp_hdr_seq(skb); - if (sk->sk_state != DCCP_LISTEN && + seq = dccp_hdr_seq(dh); + if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_swl, dp->dccps_swh)) { NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; @@ -381,7 +381,6 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, { struct inet_request_sock *ireq; struct inet_sock *newinet; - struct dccp_sock *newdp; struct sock *newsk; if (sk_acceptq_is_full(sk)) @@ -396,7 +395,6 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, sk_setup_caps(newsk, dst); - newdp = dccp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); newinet->daddr = ireq->rmt_addr; @@ -512,17 +510,12 @@ out: static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { int err; - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; const struct iphdr *rxiph; - const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_reset); struct sk_buff *skb; struct dst_entry *dst; - u64 seqno = 0; /* Never send a reset in response to a reset. */ - if (rxdh->dccph_type == DCCP_PKT_RESET) + if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) return; if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) @@ -532,37 +525,14 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) if (dst == NULL) return; - skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, - GFP_ATOMIC); + skb = dccp_ctl_make_reset(dccp_v4_ctl_socket, rxskb); if (skb == NULL) goto out; - /* Reserve space for headers. */ - skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); - skb->dst = dst_clone(dst); - - dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); - - /* Build DCCP header and checksum it. */ - dh->dccph_type = DCCP_PKT_RESET; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_reset_len / 4; - dh->dccph_x = 1; - dccp_hdr_reset(skb)->dccph_reset_code = - DCCP_SKB_CB(rxskb)->dccpd_reset_code; - - /* See "8.3.1. Abnormal Termination" in RFC 4340 */ - if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); - - dccp_hdr_set_seq(dh, seqno); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); - - dccp_csum_outgoing(skb); rxiph = ip_hdr(rxskb); - dh->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr, - rxiph->daddr); + dccp_hdr(skb)->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr, + rxiph->daddr); + skb->dst = dst_clone(dst); bh_lock_sock(dccp_v4_ctl_socket->sk); err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, @@ -598,17 +568,14 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct dccp_request_sock *dreq; const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) { - reset_code = DCCP_RESET_CODE_NO_CONNECTION; - goto drop; - } + (RTCF_BROADCAST | RTCF_MULTICAST)) + return 0; /* discard, don't send a reset here */ if (dccp_bad_service_code(sk, service)) { - reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; goto drop; } /* @@ -616,6 +583,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * limitations, they conserve resources and peer is * evidently real one. */ + dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; if (inet_csk_reqsk_queue_is_full(sk)) goto drop; @@ -668,7 +636,6 @@ drop_and_free: reqsk_free(req); drop: DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); - dcb->dccpd_reset_code = reset_code; return -1; } @@ -828,7 +795,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) dh = dccp_hdr(skb); - DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); + DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh); DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; dccp_pr_debug("%8.8s " @@ -1070,8 +1037,8 @@ module_exit(dccp_v4_exit); * values directly, Also cover the case where the protocol is not specified, * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP */ -MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6"); -MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 33, 6); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 0, 6); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b158c661867..62428ff137d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -173,7 +173,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, icmpv6_err_convert(type, code, &err); - seq = DCCP_SKB_CB(skb)->dccpd_seq; + seq = dccp_hdr_seq(dh); /* Might be for an request_sock */ switch (sk->sk_state) { struct request_sock *req, **prev; @@ -301,50 +301,23 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req) static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; struct ipv6hdr *rxip6h; - const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_reset); struct sk_buff *skb; struct flowi fl; - u64 seqno = 0; - if (rxdh->dccph_type == DCCP_PKT_RESET) + if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) return; if (!ipv6_unicast_destination(rxskb)) return; - skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header, - GFP_ATOMIC); + skb = dccp_ctl_make_reset(dccp_v6_ctl_socket, rxskb); if (skb == NULL) return; - skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header); - - dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); - - /* Swap the send and the receive. */ - dh->dccph_type = DCCP_PKT_RESET; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_reset_len / 4; - dh->dccph_x = 1; - dccp_hdr_reset(skb)->dccph_reset_code = - DCCP_SKB_CB(rxskb)->dccpd_reset_code; - - /* See "8.3.1. Abnormal Termination" in RFC 4340 */ - if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); - - dccp_hdr_set_seq(dh, seqno); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); - - dccp_csum_outgoing(skb); rxip6h = ipv6_hdr(rxskb); - dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, - &rxip6h->daddr); + dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, + &rxip6h->daddr); memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); @@ -352,8 +325,8 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) fl.proto = IPPROTO_DCCP; fl.oif = inet6_iif(rxskb); - fl.fl_ip_dport = dh->dccph_dport; - fl.fl_ip_sport = dh->dccph_sport; + fl.fl_ip_dport = dccp_hdr(skb)->dccph_dport; + fl.fl_ip_sport = dccp_hdr(skb)->dccph_sport; security_skb_classify_flow(rxskb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ @@ -417,21 +390,21 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct ipv6_pinfo *np = inet6_sk(sk); const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_conn_request(sk, skb); if (!ipv6_unicast_destination(skb)) - goto drop; + return 0; /* discard, don't send a reset here */ if (dccp_bad_service_code(sk, service)) { - reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; goto drop; } /* * There are no SYN attacks on IPv6, yet... */ + dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; if (inet_csk_reqsk_queue_is_full(sk)) goto drop; @@ -491,7 +464,6 @@ drop_and_free: reqsk_free(req); drop: DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); - dcb->dccpd_reset_code = reset_code; return -1; } @@ -795,10 +767,9 @@ discard: return 0; } -static int dccp_v6_rcv(struct sk_buff **pskb) +static int dccp_v6_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; - struct sk_buff *skb = *pskb; struct sock *sk; int min_cov; @@ -816,7 +787,7 @@ static int dccp_v6_rcv(struct sk_buff **pskb) dh = dccp_hdr(skb); - DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); + DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh); DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; if (dccp_packet_without_ack(skb)) @@ -1248,8 +1219,8 @@ module_exit(dccp_v6_exit); * values directly, Also cover the case where the protocol is not specified, * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP */ -MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-33-type-6"); -MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-0-type-6"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 33, 6); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 0, 6); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol"); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index e18e249ac49..831b76e08d0 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -42,6 +42,16 @@ struct inet_timewait_death_row dccp_death_row = { EXPORT_SYMBOL_GPL(dccp_death_row); +void dccp_minisock_init(struct dccp_minisock *dmsk) +{ + dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; + dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid; + dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid; + dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio; + dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; + dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count; +} + void dccp_time_wait(struct sock *sk, int state, int timeo) { struct inet_timewait_sock *tw = NULL; @@ -112,7 +122,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newdp->dccps_service_list = NULL; newdp->dccps_service = dreq->dreq_service; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; - do_gettimeofday(&newdp->dccps_epoch); if (dccp_feat_clone(sk, newsk)) goto out_free; diff --git a/net/dccp/options.c b/net/dccp/options.c index 34d536d5f1a..d286cffe2c4 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -14,6 +14,7 @@ #include <linux/dccp.h> #include <linux/module.h> #include <linux/types.h> +#include <asm/unaligned.h> #include <linux/kernel.h> #include <linux/skbuff.h> @@ -29,16 +30,6 @@ int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO; int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; -void dccp_minisock_init(struct dccp_minisock *dmsk) -{ - dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; - dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid; - dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid; - dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio; - dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; - dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count; -} - static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) { u32 value = 0; @@ -69,6 +60,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) unsigned char opt, len; unsigned char *value; u32 elapsed_time; + __be32 opt_val; int rc; int mandatory = 0; @@ -155,10 +147,11 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) if (len != 4) goto out_invalid_option; - opt_recv->dccpor_timestamp = ntohl(*(__be32 *)value); + opt_val = get_unaligned((__be32 *)value); + opt_recv->dccpor_timestamp = ntohl(opt_val); dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; - dccp_timestamp(sk, &dp->dccps_timestamp_time); + dp->dccps_timestamp_time = ktime_get_real(); dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n", dccp_role(sk), opt_recv->dccpor_timestamp, @@ -169,7 +162,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) if (len != 4 && len != 6 && len != 8) goto out_invalid_option; - opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value); + opt_val = get_unaligned((__be32 *)value); + opt_recv->dccpor_timestamp_echo = ntohl(opt_val); dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, " "ackno=%llu", dccp_role(sk), @@ -178,18 +172,22 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); + value += 4; - if (len == 4) { + if (len == 4) { /* no elapsed time included */ dccp_pr_debug_cat("\n"); break; } - if (len == 6) - elapsed_time = ntohs(*(__be16 *)(value + 4)); - else - elapsed_time = ntohl(*(__be32 *)(value + 4)); + if (len == 6) { /* 2-byte elapsed time */ + __be16 opt_val2 = get_unaligned((__be16 *)value); + elapsed_time = ntohs(opt_val2); + } else { /* 4-byte elapsed time */ + opt_val = get_unaligned((__be32 *)value); + elapsed_time = ntohl(opt_val); + } - dccp_pr_debug_cat(", ELAPSED_TIME=%d\n", elapsed_time); + dccp_pr_debug_cat(", ELAPSED_TIME=%u\n", elapsed_time); /* Give precedence to the biggest ELAPSED_TIME */ if (elapsed_time > opt_recv->dccpor_elapsed_time) @@ -202,10 +200,13 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) if (pkt_type == DCCP_PKT_DATA) continue; - if (len == 2) - elapsed_time = ntohs(*(__be16 *)value); - else - elapsed_time = ntohl(*(__be32 *)value); + if (len == 2) { + __be16 opt_val2 = get_unaligned((__be16 *)value); + elapsed_time = ntohs(opt_val2); + } else { + opt_val = get_unaligned((__be32 *)value); + elapsed_time = ntohl(opt_val); + } if (elapsed_time > opt_recv->dccpor_elapsed_time) opt_recv->dccpor_elapsed_time = elapsed_time; @@ -370,29 +371,9 @@ int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); -void dccp_timestamp(const struct sock *sk, struct timeval *tv) -{ - const struct dccp_sock *dp = dccp_sk(sk); - - do_gettimeofday(tv); - tv->tv_sec -= dp->dccps_epoch.tv_sec; - tv->tv_usec -= dp->dccps_epoch.tv_usec; - - while (tv->tv_usec < 0) { - tv->tv_sec--; - tv->tv_usec += USEC_PER_SEC; - } -} - -EXPORT_SYMBOL_GPL(dccp_timestamp); - int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { - struct timeval tv; - __be32 now; - - dccp_timestamp(sk, &tv); - now = htonl(timeval_usecs(&tv) / 10); + __be32 now = htonl(dccp_timestamp()); /* yes this will overflow but that is the point as we want a * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ @@ -405,14 +386,12 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); - struct timeval now; __be32 tstamp_echo; - u32 elapsed_time; int len, elapsed_time_len; unsigned char *to; - - dccp_timestamp(sk, &now); - elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10; + const suseconds_t delta = ktime_us_delta(ktime_get_real(), + dp->dccps_timestamp_time); + u32 elapsed_time = delta / 10; elapsed_time_len = dccp_elapsed_time_len(elapsed_time); len = 6 + elapsed_time_len; @@ -438,8 +417,7 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk, } dp->dccps_timestamp_echo = 0; - dp->dccps_timestamp_time.tv_sec = 0; - dp->dccps_timestamp_time.tv_usec = 0; + dp->dccps_timestamp_time = ktime_set(0, 0); return 0; } diff --git a/net/dccp/output.c b/net/dccp/output.c index c8d843e983f..f49544618f2 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -61,6 +61,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) set_ack = 0; /* fall through */ case DCCP_PKT_DATAACK: + case DCCP_PKT_RESET: break; case DCCP_PKT_REQUEST: @@ -69,12 +70,14 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_SYNC: case DCCP_PKT_SYNCACK: - ackno = dcb->dccpd_seq; + ackno = dcb->dccpd_ack_seq; /* fall through */ default: /* - * Only data packets should come through with skb->sk - * set. + * Set owner/destructor: some skbs are allocated via + * alloc_skb (e.g. when retransmission may happen). + * Only Data, DataAck, and Reset packets should come + * through here with skb->sk set. */ WARN_ON(skb->sk); skb_set_owner_w(skb, sk); @@ -174,34 +177,38 @@ void dccp_write_space(struct sock *sk) /** * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet - * @sk: socket to wait for + * @sk: socket to wait for + * @skb: current skb to pass on for waiting + * @delay: sleep timeout in milliseconds (> 0) + * This function is called by default when the socket is closed, and + * when a non-zero linger time is set on the socket. For consistency */ -static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb) +static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) { struct dccp_sock *dp = dccp_sk(sk); DEFINE_WAIT(wait); - unsigned long delay; + unsigned long jiffdelay; int rc; - while (1) { + do { + dccp_pr_debug("delayed send by %d msec\n", delay); + jiffdelay = msecs_to_jiffies(delay); + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + sk->sk_write_pending++; + release_sock(sk); + schedule_timeout(jiffdelay); + lock_sock(sk); + sk->sk_write_pending--; + if (sk->sk_err) goto do_error; if (signal_pending(current)) goto do_interrupted; rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); - if (rc <= 0) - break; - dccp_pr_debug("delayed send by %d msec\n", rc); - delay = msecs_to_jiffies(rc); - sk->sk_write_pending++; - release_sock(sk); - schedule_timeout(delay); - lock_sock(sk); - sk->sk_write_pending--; - } + } while ((delay = rc) > 0); out: finish_wait(sk->sk_sleep, &wait); return rc; @@ -228,7 +235,7 @@ void dccp_write_xmit(struct sock *sk, int block) msecs_to_jiffies(err)+jiffies); break; } else - err = dccp_wait_for_ccid(sk, skb); + err = dccp_wait_for_ccid(sk, skb, err); if (err && err != -EINTR) DCCP_BUG("err=%d after dccp_wait_for_ccid", err); } @@ -324,72 +331,81 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, EXPORT_SYMBOL_GPL(dccp_make_response); -static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, - const enum dccp_reset_codes code) +/* answer offending packet in @rcv_skb with Reset from control socket @ctl */ +struct sk_buff *dccp_ctl_make_reset(struct socket *ctl, struct sk_buff *rcv_skb) { - struct dccp_hdr *dh; - struct dccp_sock *dp = dccp_sk(sk); - const u32 dccp_header_size = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_reset); - struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, - GFP_ATOMIC); - if (skb == NULL) - return NULL; - - /* Reserve space for headers. */ - skb_reserve(skb, sk->sk_prot->max_header); - - skb->dst = dst_clone(dst); - - dccp_inc_seqno(&dp->dccps_gss); - - DCCP_SKB_CB(skb)->dccpd_reset_code = code; - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; - DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss; + struct dccp_hdr *rxdh = dccp_hdr(rcv_skb), *dh; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(rcv_skb); + const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_reset); + struct dccp_hdr_reset *dhr; + struct sk_buff *skb; - if (dccp_insert_options(sk, skb)) { - kfree_skb(skb); + skb = alloc_skb(ctl->sk->sk_prot->max_header, GFP_ATOMIC); + if (skb == NULL) return NULL; - } - dh = dccp_zeroed_hdr(skb, dccp_header_size); + skb_reserve(skb, ctl->sk->sk_prot->max_header); - dh->dccph_sport = inet_sk(sk)->sport; - dh->dccph_dport = inet_sk(sk)->dport; - dh->dccph_doff = (dccp_header_size + - DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; + /* Swap the send and the receive. */ + dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); dh->dccph_type = DCCP_PKT_RESET; + dh->dccph_sport = rxdh->dccph_dport; + dh->dccph_dport = rxdh->dccph_sport; + dh->dccph_doff = dccp_hdr_reset_len / 4; dh->dccph_x = 1; - dccp_hdr_set_seq(dh, dp->dccps_gss); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); - dccp_hdr_reset(skb)->dccph_reset_code = code; - inet_csk(sk)->icsk_af_ops->send_check(sk, 0, skb); + dhr = dccp_hdr_reset(skb); + dhr->dccph_reset_code = dcb->dccpd_reset_code; + + switch (dcb->dccpd_reset_code) { + case DCCP_RESET_CODE_PACKET_ERROR: + dhr->dccph_reset_data[0] = rxdh->dccph_type; + break; + case DCCP_RESET_CODE_OPTION_ERROR: /* fall through */ + case DCCP_RESET_CODE_MANDATORY_ERROR: + memcpy(dhr->dccph_reset_data, dcb->dccpd_reset_data, 3); + break; + } + /* + * From RFC 4340, 8.3.1: + * If P.ackno exists, set R.seqno := P.ackno + 1. + * Else set R.seqno := 0. + */ + if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_hdr_set_seq(dh, ADD48(dcb->dccpd_ack_seq, 1)); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dcb->dccpd_seq); - DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + dccp_csum_outgoing(skb); return skb; } +EXPORT_SYMBOL_GPL(dccp_ctl_make_reset); + +/* send Reset on established socket, to close or abort the connection */ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) { + struct sk_buff *skb; /* * FIXME: what if rebuild_header fails? * Should we be doing a rebuild_header here? */ int err = inet_sk_rebuild_header(sk); - if (err == 0) { - struct sk_buff *skb = dccp_make_reset(sk, sk->sk_dst_cache, - code); - if (skb != NULL) { - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0); - return net_xmit_eval(err); - } - } + if (err != 0) + return err; + + skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, GFP_ATOMIC); + if (skb == NULL) + return -ENOBUFS; - return err; + /* Reserve space for headers and prepare control bits. */ + skb_reserve(skb, sk->sk_prot->max_header); + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; + DCCP_SKB_CB(skb)->dccpd_reset_code = code; + + return dccp_transmit_skb(sk, skb); } /* @@ -477,6 +493,7 @@ void dccp_send_ack(struct sock *sk) EXPORT_SYMBOL_GPL(dccp_send_ack); +/* FIXME: Is this still necessary (11.3) - currently nowhere used by DCCP. */ void dccp_send_delayed_ack(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -507,7 +524,7 @@ void dccp_send_delayed_ack(struct sock *sk) sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } -void dccp_send_sync(struct sock *sk, const u64 seq, +void dccp_send_sync(struct sock *sk, const u64 ackno, const enum dccp_pkt_type pkt_type) { /* @@ -517,14 +534,16 @@ void dccp_send_sync(struct sock *sk, const u64 seq, */ struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC); - if (skb == NULL) + if (skb == NULL) { /* FIXME: how to make sure the sync is sent? */ + DCCP_CRIT("could not send %s", dccp_packet_name(pkt_type)); return; + } /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, sk->sk_prot->max_header); DCCP_SKB_CB(skb)->dccpd_type = pkt_type; - DCCP_SKB_CB(skb)->dccpd_seq = seq; + DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; dccp_transmit_skb(sk, skb); } diff --git a/net/dccp/probe.c b/net/dccp/probe.c index bae10b0f2fc..7053bb827bc 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -30,6 +30,7 @@ #include <linux/module.h> #include <linux/kfifo.h> #include <linux/vmalloc.h> +#include <net/net_namespace.h> #include "dccp.h" #include "ccid.h" @@ -168,7 +169,7 @@ static __init int dccpprobe_init(void) if (IS_ERR(dccpw.fifo)) return PTR_ERR(dccpw.fifo); - if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops)) + if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) goto err0; ret = register_jprobe(&dccp_send_probe); @@ -178,7 +179,7 @@ static __init int dccpprobe_init(void) pr_info("DCCP watch registered (port=%d)\n", port); return 0; err1: - proc_net_remove(procname); + proc_net_remove(&init_net, procname); err0: kfifo_free(dccpw.fifo); return ret; @@ -188,7 +189,7 @@ module_init(dccpprobe_init); static __exit void dccpprobe_exit(void) { kfifo_free(dccpw.fifo); - proc_net_remove(procname); + proc_net_remove(&init_net, procname); unregister_jprobe(&dccp_send_probe); } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 04b59ec4f51..d8497392803 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -26,6 +26,7 @@ #include <net/sock.h> #include <net/xfrm.h> +#include <asm/ioctls.h> #include <asm/semaphore.h> #include <linux/spinlock.h> #include <linux/timer.h> @@ -172,7 +173,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) struct inet_connection_sock *icsk = inet_csk(sk); dccp_minisock_init(&dp->dccps_minisock); - do_gettimeofday(&dp->dccps_epoch); /* * FIXME: We're hardcoding the CCID, and doing this at this point makes @@ -220,6 +220,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) sk->sk_write_space = dccp_write_space; icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; + dp->dccps_rate_last = jiffies; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; @@ -378,8 +379,36 @@ EXPORT_SYMBOL_GPL(dccp_poll); int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) { - dccp_pr_debug("entry\n"); - return -ENOIOCTLCMD; + int rc = -ENOTCONN; + + lock_sock(sk); + + if (sk->sk_state == DCCP_LISTEN) + goto out; + + switch (cmd) { + case SIOCINQ: { + struct sk_buff *skb; + unsigned long amount = 0; + + skb = skb_peek(&sk->sk_receive_queue); + if (skb != NULL) { + /* + * We will only return the amount of this packet since + * that is all that will be read. + */ + amount = skb->len; + } + rc = put_user(amount, (int __user *)arg); + } + break; + default: + rc = -ENOIOCTLCMD; + break; + } +out: + release_sock(sk); + return rc; } EXPORT_SYMBOL_GPL(dccp_ioctl); @@ -587,6 +616,10 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_SERVICE: return dccp_getsockopt_service(sk, len, (__be32 __user *)optval, optlen); + case DCCP_SOCKOPT_GET_CUR_MPS: + val = dp->dccps_mss_cache; + len = sizeof(val); + break; case DCCP_SOCKOPT_SEND_CSCOV: val = dp->dccps_pcslen; len = sizeof(val); @@ -664,7 +697,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, * so that the trick in dccp_rcv_request_sent_state_process. */ /* Wait for a connection to finish. */ - if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) + if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) goto out_release; @@ -988,7 +1021,7 @@ MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); #ifdef CONFIG_IP_DCCP_DEBUG int dccp_debug; -module_param(dccp_debug, int, 0444); +module_param(dccp_debug, bool, 0444); MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); EXPORT_SYMBOL_GPL(dccp_debug); @@ -1077,6 +1110,8 @@ static int __init dccp_init(void) rc = dccp_sysctl_init(); if (rc) goto out_ackvec_exit; + + dccp_timestamping_init(); out: return rc; out_ackvec_exit: diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 1260aabac5e..c62c05039f6 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -89,6 +89,13 @@ static struct ctl_table dccp_default_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "sync_ratelimit", + .data = &sysctl_dccp_sync_ratelimit, + .maxlen = sizeof(sysctl_dccp_sync_ratelimit), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, { .ctl_name = 0, } }; diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 0197a41c256..3af067354bd 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -291,3 +291,24 @@ void dccp_init_xmit_timers(struct sock *sk) inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, &dccp_keepalive_timer); } + +static ktime_t dccp_timestamp_seed; +/** + * dccp_timestamp - 10s of microseconds time source + * Returns the number of 10s of microseconds since loading DCCP. This is native + * DCCP time difference format (RFC 4340, sec. 13). + * Please note: This will wrap around about circa every 11.9 hours. + */ +u32 dccp_timestamp(void) +{ + s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed); + + do_div(delta, 10); + return delta; +} +EXPORT_SYMBOL_GPL(dccp_timestamp); + +void __init dccp_timestamping_init(void) +{ + dccp_timestamp_seed = ktime_get_real(); +} diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index ed76d4aab4a..aabe98d9402 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -128,6 +128,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include <linux/stat.h> #include <linux/init.h> #include <linux/poll.h> +#include <net/net_namespace.h> #include <net/neighbour.h> #include <net/dst.h> #include <net/fib_rules.h> @@ -470,10 +471,10 @@ static struct proto dn_proto = { .obj_size = sizeof(struct dn_sock), }; -static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp) +static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp) { struct dn_scp *scp; - struct sock *sk = sk_alloc(PF_DECnet, gfp, &dn_proto, 1); + struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, 1); if (!sk) goto out; @@ -674,10 +675,13 @@ char *dn_addr2asc(__u16 addr, char *buf) -static int dn_create(struct socket *sock, int protocol) +static int dn_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; + if (net != &init_net) + return -EAFNOSUPPORT; + switch(sock->type) { case SOCK_SEQPACKET: if (protocol != DNPROTO_NSP) @@ -690,7 +694,7 @@ static int dn_create(struct socket *sock, int protocol) } - if ((sk = dn_alloc_sock(sock, GFP_KERNEL)) == NULL) + if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL) return -ENOBUFS; sk->sk_protocol = protocol; @@ -747,7 +751,7 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (dn_ntohs(saddr->sdn_nodeaddrl)) { read_lock(&dev_base_lock); ldev = NULL; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!dev->dn_ptr) continue; if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) { @@ -1090,7 +1094,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags) cb = DN_SKB_CB(skb); sk->sk_ack_backlog--; - newsk = dn_alloc_sock(newsock, sk->sk_allocation); + newsk = dn_alloc_sock(sk->sk_net, newsock, sk->sk_allocation); if (newsk == NULL) { release_sock(sk); kfree_skb(skb); @@ -2085,6 +2089,9 @@ static int dn_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch(event) { case NETDEV_UP: dn_dev_up(dev); @@ -2399,7 +2406,7 @@ static int __init decnet_init(void) dev_add_pack(&dn_dix_packet_type); register_netdevice_notifier(&dn_dev_notifier); - proc_net_fops_create("decnet", S_IRUGO, &dn_socket_seq_fops); + proc_net_fops_create(&init_net, "decnet", S_IRUGO, &dn_socket_seq_fops); dn_register_sysctl(); out: return rc; @@ -2428,7 +2435,7 @@ static void __exit decnet_exit(void) dn_neigh_cleanup(); dn_fib_cleanup(); - proc_net_remove("decnet"); + proc_net_remove(&init_net, "decnet"); proto_unregister(&dn_proto); } diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 8def68209ed..26130afd802 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -42,6 +42,7 @@ #include <linux/notifier.h> #include <asm/uaccess.h> #include <asm/system.h> +#include <net/net_namespace.h> #include <net/neighbour.h> #include <net/dst.h> #include <net/flow.h> @@ -149,7 +150,7 @@ static struct dn_dev_parms dn_dev_list[] = { } }; -#define DN_DEV_LIST_SIZE (sizeof(dn_dev_list)/sizeof(struct dn_dev_parms)) +#define DN_DEV_LIST_SIZE ARRAY_SIZE(dn_dev_list) #define DN_DEV_PARMS_OFFSET(x) ((int) ((char *) &((struct dn_dev_parms *)0)->x)) @@ -512,7 +513,7 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) ifr->ifr_name[IFNAMSIZ-1] = 0; #ifdef CONFIG_KMOD - dev_load(ifr->ifr_name); + dev_load(&init_net, ifr->ifr_name); #endif switch(cmd) { @@ -530,7 +531,7 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) rtnl_lock(); - if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) { + if ((dev = __dev_get_by_name(&init_net, ifr->ifr_name)) == NULL) { ret = -ENODEV; goto done; } @@ -628,7 +629,7 @@ static struct dn_dev *dn_dev_by_index(int ifindex) { struct net_device *dev; struct dn_dev *dn_dev = NULL; - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev) { dn_dev = dev->dn_ptr; dev_put(dev); @@ -693,7 +694,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -EINVAL; ifm = nlmsg_data(nlh); - if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) + if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL) return -ENODEV; if ((dn_db = dev->dn_ptr) == NULL) { @@ -799,7 +800,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) skip_naddr = cb->args[1]; idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < skip_ndevs) goto cont; else if (idx > skip_ndevs) { @@ -868,10 +869,10 @@ last_chance: rv = dn_dev_get_first(dev, addr); read_unlock(&dev_base_lock); dev_put(dev); - if (rv == 0 || dev == &loopback_dev) + if (rv == 0 || dev == init_net.loopback_dev) return rv; } - dev = &loopback_dev; + dev = init_net.loopback_dev; dev_hold(dev); goto last_chance; } @@ -1296,7 +1297,7 @@ void dn_dev_devices_off(void) struct net_device *dev; rtnl_lock(); - for_each_netdev(dev) + for_each_netdev(&init_net, dev) dn_dev_down(dev); rtnl_unlock(); @@ -1307,7 +1308,7 @@ void dn_dev_devices_on(void) struct net_device *dev; rtnl_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (dev->flags & IFF_UP) dn_dev_up(dev); } @@ -1341,7 +1342,7 @@ static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) return SEQ_START_TOKEN; i = 1; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!is_dn_dev(dev)) continue; @@ -1360,9 +1361,9 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) dev = (struct net_device *)v; if (v == SEQ_START_TOKEN) - dev = net_device_entry(&dev_base_head); + dev = net_device_entry(&init_net.dev_base_head); - for_each_netdev_continue(dev) { + for_each_netdev_continue(&init_net, dev) { if (!is_dn_dev(dev)) continue; @@ -1462,7 +1463,7 @@ void __init dn_dev_init(void) rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL); rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr); - proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops); + proc_net_fops_create(&init_net, "decnet_dev", S_IRUGO, &dn_dev_seq_fops); #ifdef CONFIG_SYSCTL { @@ -1483,7 +1484,7 @@ void __exit dn_dev_cleanup(void) } #endif /* CONFIG_SYSCTL */ - proc_net_remove("decnet_dev"); + proc_net_remove(&init_net, "decnet_dev"); dn_dev_devices_off(); } diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index d2bc19d4795..3760a20d10d 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -212,7 +212,7 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct return -EINVAL; if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST) return -EINVAL; - if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) + if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) return -ENETDOWN; @@ -255,7 +255,7 @@ out: if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) return -EINVAL; - dev = __dev_get_by_index(nh->nh_oif); + dev = __dev_get_by_index(&init_net, nh->nh_oif); if (dev == NULL || dev->dn_ptr == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) @@ -355,7 +355,7 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta if (nhs != 1 || nh->nh_gw) goto err_inval; nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); + nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif); err = -ENODEV; if (nh->nh_dev == NULL) goto failure; @@ -602,7 +602,7 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) /* Scan device list */ read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { dn_db = dev->dn_ptr; if (dn_db == NULL) continue; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 174d8a7a6da..e851b143cca 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -38,6 +38,7 @@ #include <linux/rcupdate.h> #include <linux/jhash.h> #include <asm/atomic.h> +#include <net/net_namespace.h> #include <net/neighbour.h> #include <net/dst.h> #include <net/flow.h> @@ -210,7 +211,8 @@ static int dn_neigh_output_packet(struct sk_buff *skb) char mac_addr[ETH_ALEN]; dn_dn2eth(mac_addr, rt->rt_local_src); - if (!dev->hard_header || dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, mac_addr, skb->len) >= 0) + if (dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, + mac_addr, skb->len) >= 0) return neigh->ops->queue_xmit(skb); if (net_ratelimit()) @@ -578,24 +580,8 @@ static const struct seq_operations dn_neigh_seq_ops = { static int dn_neigh_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct neigh_seq_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &dn_neigh_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &dn_neigh_seq_ops, + sizeof(struct neigh_seq_state)); } static const struct file_operations dn_neigh_seq_fops = { @@ -611,11 +597,11 @@ static const struct file_operations dn_neigh_seq_fops = { void __init dn_neigh_init(void) { neigh_table_init(&dn_neigh_table); - proc_net_fops_create("decnet_neigh", S_IRUGO, &dn_neigh_seq_fops); + proc_net_fops_create(&init_net, "decnet_neigh", S_IRUGO, &dn_neigh_seq_fops); } void __exit dn_neigh_cleanup(void) { - proc_net_remove("decnet_neigh"); + proc_net_remove(&init_net, "decnet_neigh"); neigh_table_clear(&dn_neigh_table); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index a4a620971ef..97eee5e8fbb 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -77,6 +77,7 @@ #include <linux/rcupdate.h> #include <linux/times.h> #include <asm/errno.h> +#include <net/net_namespace.h> #include <net/netlink.h> #include <net/neighbour.h> #include <net/dst.h> @@ -583,6 +584,9 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; unsigned char padlen = 0; + if (dev->nd_net != &init_net) + goto dump_it; + if (dn == NULL) goto dump_it; @@ -883,7 +887,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old .scope = RT_SCOPE_UNIVERSE, } }, .mark = oldflp->mark, - .iif = loopback_dev.ifindex, + .iif = init_net.loopback_dev->ifindex, .oif = oldflp->oif }; struct dn_route *rt = NULL; struct net_device *dev_out = NULL, *dev; @@ -900,11 +904,11 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old "dn_route_output_slow: dst=%04x src=%04x mark=%d" " iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst), dn_ntohs(oldflp->fld_src), - oldflp->mark, loopback_dev.ifindex, oldflp->oif); + oldflp->mark, init_net.loopback_dev->ifindex, oldflp->oif); /* If we have an output interface, verify its a DECnet device */ if (oldflp->oif) { - dev_out = dev_get_by_index(oldflp->oif); + dev_out = dev_get_by_index(&init_net, oldflp->oif); err = -ENODEV; if (dev_out && dev_out->dn_ptr == NULL) { dev_put(dev_out); @@ -925,7 +929,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old goto out; } read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!dev->dn_ptr) continue; if (!dn_dev_islocal(dev, oldflp->fld_src)) @@ -953,7 +957,7 @@ source_ok: err = -EADDRNOTAVAIL; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = init_net.loopback_dev; dev_hold(dev_out); if (!fl.fld_dst) { fl.fld_dst = @@ -962,7 +966,7 @@ source_ok: if (!fl.fld_dst) goto out; } - fl.oif = loopback_dev.ifindex; + fl.oif = init_net.loopback_dev->ifindex; res.type = RTN_LOCAL; goto make_route; } @@ -1008,7 +1012,7 @@ source_ok: if (dev_out) dev_put(dev_out); if (dn_dev_islocal(neigh->dev, fl.fld_dst)) { - dev_out = &loopback_dev; + dev_out = init_net.loopback_dev; res.type = RTN_LOCAL; } else { dev_out = neigh->dev; @@ -1029,7 +1033,7 @@ source_ok: /* Possible improvement - check all devices for local addr */ if (dn_dev_islocal(dev_out, fl.fld_dst)) { dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = init_net.loopback_dev; dev_hold(dev_out); res.type = RTN_LOCAL; goto select_source; @@ -1065,7 +1069,7 @@ select_source: fl.fld_src = fl.fld_dst; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = init_net.loopback_dev; dev_hold(dev_out); fl.oif = dev_out->ifindex; if (res.fi) @@ -1552,7 +1556,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (fl.iif) { struct net_device *dev; - if ((dev = dev_get_by_index(fl.iif)) == NULL) { + if ((dev = dev_get_by_index(&init_net, fl.iif)) == NULL) { kfree_skb(skb); return -ENODEV; } @@ -1735,23 +1739,8 @@ static const struct seq_operations dn_rt_cache_seq_ops = { static int dn_rt_cache_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct dn_rt_cache_iter_state *s; - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - goto out; - rc = seq_open(file, &dn_rt_cache_seq_ops); - if (rc) - goto out_kfree; - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &dn_rt_cache_seq_ops, + sizeof(struct dn_rt_cache_iter_state)); } static const struct file_operations dn_rt_cache_seq_fops = { @@ -1814,7 +1803,7 @@ void __init dn_route_init(void) dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1); - proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); + proc_net_fops_create(&init_net, "decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); #ifdef CONFIG_DECNET_ROUTER rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump); @@ -1829,6 +1818,6 @@ void __exit dn_route_cleanup(void) del_timer(&dn_route_timer); dn_run_flush(0); - proc_net_remove("decnet_cache"); + proc_net_remove(&init_net, "decnet_cache"); } diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 84ff3dd3707..ddd3f04f091 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -57,8 +57,6 @@ static struct dn_fib_rule default_rule = { }, }; -static LIST_HEAD(dn_fib_rules); - int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) { @@ -228,9 +226,9 @@ static u32 dn_fib_rule_default_pref(void) struct list_head *pos; struct fib_rule *rule; - if (!list_empty(&dn_fib_rules)) { - pos = dn_fib_rules.next; - if (pos->next != &dn_fib_rules) { + if (!list_empty(&dn_fib_rules_ops.rules_list)) { + pos = dn_fib_rules_ops.rules_list.next; + if (pos->next != &dn_fib_rules_ops.rules_list) { rule = list_entry(pos->next, struct fib_rule, list); if (rule->pref) return rule->pref - 1; @@ -258,13 +256,14 @@ static struct fib_rules_ops dn_fib_rules_ops = { .flush_cache = dn_fib_rule_flush_cache, .nlgroup = RTNLGRP_DECnet_RULE, .policy = dn_fib_rule_policy, - .rules_list = &dn_fib_rules, + .rules_list = LIST_HEAD_INIT(dn_fib_rules_ops.rules_list), .owner = THIS_MODULE, }; void __init dn_fib_rules_init(void) { - list_add_tail(&default_rule.common.list, &dn_fib_rules); + list_add_tail(&default_rule.common.list, + &dn_fib_rules_ops.rules_list); fib_rules_register(&dn_fib_rules_ops); } diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 696234688cf..43fcd29046d 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -88,12 +88,12 @@ static void dnrmg_send_peer(struct sk_buff *skb) static unsigned int dnrmg_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - dnrmg_send_peer(*pskb); + dnrmg_send_peer(skb); return NF_ACCEPT; } @@ -115,17 +115,6 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb) RCV_SKB_FAIL(-EINVAL); } -static void dnrmg_receive_user_sk(struct sock *sk, int len) -{ - struct sk_buff *skb; - unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); - - for (; qlen && (skb = skb_dequeue(&sk->sk_receive_queue)); qlen--) { - dnrmg_receive_user_skb(skb); - kfree_skb(skb); - } -} - static struct nf_hook_ops dnrmg_ops = { .hook = dnrmg_hook, .pf = PF_DECnet, @@ -137,8 +126,10 @@ static int __init dn_rtmsg_init(void) { int rv = 0; - dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, - dnrmg_receive_user_sk, NULL, THIS_MODULE); + dnrmg = netlink_kernel_create(&init_net, + NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, + dnrmg_receive_user_skb, + NULL, THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 52e40d7eb22..ae354a43fb9 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -259,7 +259,7 @@ static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen, devname[newlen] = 0; - dev = dev_get_by_name(devname); + dev = dev_get_by_name(&init_net, devname); if (dev == NULL) return -ENODEV; @@ -299,7 +299,7 @@ static int dn_def_dev_handler(ctl_table *table, int write, devname[*lenp] = 0; strip_it(devname); - dev = dev_get_by_name(devname); + dev = dev_get_by_name(&init_net, devname); if (dev == NULL) return -ENODEV; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 35c96bcc0f3..9cae16b4e0b 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -336,6 +336,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, /* Real hardware Econet. We're not worthy etc. */ #ifdef CONFIG_ECONET_NATIVE unsigned short proto = 0; + int res; dev_hold(dev); @@ -354,12 +355,12 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, eb->sec = *saddr; eb->sent = ec_tx_done; - if (dev->hard_header) { - int res; + err = -EINVAL; + res = dev_hard_header(skb, dev, ntohs(proto), &addr, NULL, len); + if (res < 0) + goto out_free; + if (res > 0) { struct ec_framehdr *fh; - err = -EINVAL; - res = dev->hard_header(skb, dev, ntohs(proto), - &addr, NULL, len); /* Poke in our control byte and port number. Hack, hack. */ fh = (struct ec_framehdr *)(skb->data); @@ -368,8 +369,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, if (sock->type != SOCK_DGRAM) { skb_reset_tail_pointer(skb); skb->len = 0; - } else if (res < 0) - goto out_free; + } } /* Copy the data. Returns -EFAULT on error */ @@ -608,12 +608,15 @@ static struct proto econet_proto = { * Create an Econet socket */ -static int econet_create(struct socket *sock, int protocol) +static int econet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct econet_sock *eo; int err; + if (net != &init_net) + return -EAFNOSUPPORT; + /* Econet only provides datagram services. */ if (sock->type != SOCK_DGRAM) return -ESOCKTNOSUPPORT; @@ -621,7 +624,7 @@ static int econet_create(struct socket *sock, int protocol) sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(PF_ECONET, GFP_KERNEL, &econet_proto, 1); + sk = sk_alloc(net, PF_ECONET, GFP_KERNEL, &econet_proto, 1); if (sk == NULL) goto out; @@ -659,7 +662,7 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; - if ((dev = dev_get_by_name(ifr.ifr_name)) == NULL) + if ((dev = dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) return -ENODEV; sec = (struct sockaddr_ec *)&ifr.ifr_addr; @@ -1062,6 +1065,9 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet struct sock *sk; struct ec_device *edev = dev->ec_ptr; + if (dev->nd_net != &init_net) + goto drop; + if (skb->pkt_type == PACKET_OTHERHOST) goto drop; @@ -1116,6 +1122,9 @@ static int econet_notifier(struct notifier_block *this, unsigned long msg, void struct net_device *dev = (struct net_device *)data; struct ec_device *edev; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (msg) { case NETDEV_UNREGISTER: /* A device has gone down - kill any data we hold for it. */ diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 12c765715ac..6b2e454ae31 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -75,8 +75,9 @@ __setup("ether=", netdev_boot_setup); * Set the protocol type. For a packet of type ETH_P_802_3 we put the length * in here instead. It is up to the 802.2 layer to carry protocol information. */ -int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) +int eth_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, unsigned len) { struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN); @@ -91,10 +92,10 @@ int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, if (!saddr) saddr = dev->dev_addr; - memcpy(eth->h_source, saddr, dev->addr_len); + memcpy(eth->h_source, saddr, ETH_ALEN); if (daddr) { - memcpy(eth->h_dest, daddr, dev->addr_len); + memcpy(eth->h_dest, daddr, ETH_ALEN); return ETH_HLEN; } @@ -103,12 +104,13 @@ int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, */ if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) { - memset(eth->h_dest, 0, dev->addr_len); + memset(eth->h_dest, 0, ETH_ALEN); return ETH_HLEN; } return -ETH_HLEN; } +EXPORT_SYMBOL(eth_header); /** * eth_rebuild_header- rebuild the Ethernet MAC header. @@ -135,12 +137,13 @@ int eth_rebuild_header(struct sk_buff *skb) "%s: unable to resolve type %X addresses.\n", dev->name, (int)eth->h_proto); - memcpy(eth->h_source, dev->dev_addr, dev->addr_len); + memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); break; } return 0; } +EXPORT_SYMBOL(eth_rebuild_header); /** * eth_type_trans - determine the packet's protocol ID. @@ -207,12 +210,13 @@ EXPORT_SYMBOL(eth_type_trans); * @skb: packet to extract header from * @haddr: destination buffer */ -static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) +int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr) { - struct ethhdr *eth = eth_hdr(skb); + const struct ethhdr *eth = eth_hdr(skb); memcpy(haddr, eth->h_source, ETH_ALEN); return ETH_ALEN; } +EXPORT_SYMBOL(eth_header_parse); /** * eth_header_cache - fill cache entry from neighbour @@ -220,11 +224,11 @@ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) * @hh: destination cache entry * Create an Ethernet header template from the neighbour. */ -int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) +int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh) { __be16 type = hh->hh_type; struct ethhdr *eth; - struct net_device *dev = neigh->dev; + const struct net_device *dev = neigh->dev; eth = (struct ethhdr *) (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); @@ -233,11 +237,12 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) return -1; eth->h_proto = type; - memcpy(eth->h_source, dev->dev_addr, dev->addr_len); - memcpy(eth->h_dest, neigh->ha, dev->addr_len); + memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); + memcpy(eth->h_dest, neigh->ha, ETH_ALEN); hh->hh_len = ETH_HLEN; return 0; } +EXPORT_SYMBOL(eth_header_cache); /** * eth_header_cache_update - update cache entry @@ -247,12 +252,14 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) * * Called by Address Resolution module to notify changes in address. */ -void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, - unsigned char *haddr) +void eth_header_cache_update(struct hh_cache *hh, + const struct net_device *dev, + const unsigned char *haddr) { memcpy(((u8 *) hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), - haddr, dev->addr_len); + haddr, ETH_ALEN); } +EXPORT_SYMBOL(eth_header_cache_update); /** * eth_mac_addr - set new Ethernet hardware address @@ -271,7 +278,7 @@ static int eth_mac_addr(struct net_device *dev, void *p) return -EBUSY; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); return 0; } @@ -291,6 +298,22 @@ static int eth_change_mtu(struct net_device *dev, int new_mtu) return 0; } +static int eth_validate_addr(struct net_device *dev) +{ + if (!is_valid_ether_addr(dev->dev_addr)) + return -EINVAL; + + return 0; +} + +const struct header_ops eth_header_ops ____cacheline_aligned = { + .create = eth_header, + .parse = eth_header_parse, + .rebuild = eth_rebuild_header, + .cache = eth_header_cache, + .cache_update = eth_header_cache_update, +}; + /** * ether_setup - setup Ethernet network device * @dev: network device @@ -298,13 +321,11 @@ static int eth_change_mtu(struct net_device *dev, int new_mtu) */ void ether_setup(struct net_device *dev) { + dev->header_ops = ð_header_ops; + dev->change_mtu = eth_change_mtu; - dev->hard_header = eth_header; - dev->rebuild_header = eth_rebuild_header; dev->set_mac_address = eth_mac_addr; - dev->hard_header_cache = eth_header_cache; - dev->header_cache_update= eth_header_cache_update; - dev->hard_header_parse = eth_header_parse; + dev->validate_addr = eth_validate_addr; dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; @@ -337,3 +358,11 @@ struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count) return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count); } EXPORT_SYMBOL(alloc_etherdev_mq); + +char *print_mac(char *buf, const u8 *addr) +{ + sprintf(buf, MAC_FMT, + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + return buf; +} +EXPORT_SYMBOL(print_mac); diff --git a/net/ethernet/pe2.c b/net/ethernet/pe2.c index 9d57b4fb644..d60e15d9365 100644 --- a/net/ethernet/pe2.c +++ b/net/ethernet/pe2.c @@ -12,9 +12,7 @@ static int pEII_request(struct datalink_proto *dl, struct net_device *dev = skb->dev; skb->protocol = htons(ETH_P_IPX); - if (dev->hard_header) - dev->hard_header(skb, dev, ETH_P_IPX, - dest_node, NULL, skb->len); + dev_hard_header(skb, dev, ETH_P_IPX, dest_node, NULL, skb->len); return dev_queue_xmit(skb); } diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c index b016b4104de..0936a3e0210 100644 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ b/net/ieee80211/ieee80211_crypt_ccmp.c @@ -9,6 +9,7 @@ * more details. */ +#include <linux/kernel.h> #include <linux/err.h> #include <linux/module.h> #include <linux/init.h> @@ -241,7 +242,7 @@ static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) hdr = (struct ieee80211_hdr_4addr *)skb->data; ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0); - blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN; + blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last = data_len % AES_BLOCK_LEN; for (i = 1; i <= blocks; i++) { @@ -296,6 +297,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) int i, blocks, last, len; size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN - CCMP_MIC_LEN; u8 *mic = skb->data + skb->len - CCMP_MIC_LEN; + DECLARE_MAC_BUF(mac); if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) { key->dot11RSNAStatsCCMPFormatErrors++; @@ -308,7 +310,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (!(keyidx & (1 << 5))) { if (net_ratelimit()) { printk(KERN_DEBUG "CCMP: received packet without ExtIV" - " flag from " MAC_FMT "\n", MAC_ARG(hdr->addr2)); + " flag from %s\n", print_mac(mac, hdr->addr2)); } key->dot11RSNAStatsCCMPFormatErrors++; return -2; @@ -321,9 +323,9 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) } if (!key->key_set) { if (net_ratelimit()) { - printk(KERN_DEBUG "CCMP: received packet from " MAC_FMT + printk(KERN_DEBUG "CCMP: received packet from %s" " with keyid=%d that does not have a configured" - " key\n", MAC_ARG(hdr->addr2), keyidx); + " key\n", print_mac(mac, hdr->addr2), keyidx); } return -3; } @@ -338,11 +340,13 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (ccmp_replay_check(pn, key->rx_pn)) { if (net_ratelimit()) { - IEEE80211_DEBUG_DROP("CCMP: replay detected: STA=" MAC_FMT - " previous PN %02x%02x%02x%02x%02x%02x " - "received PN %02x%02x%02x%02x%02x%02x\n", - MAC_ARG(hdr->addr2), MAC_ARG(key->rx_pn), - MAC_ARG(pn)); + IEEE80211_DEBUG_DROP("CCMP: replay detected: STA=%s " + "previous PN %02x%02x%02x%02x%02x%02x " + "received PN %02x%02x%02x%02x%02x%02x\n", + print_mac(mac, hdr->addr2), + key->rx_pn[0], key->rx_pn[1], key->rx_pn[2], + key->rx_pn[3], key->rx_pn[4], key->rx_pn[5], + pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); } key->dot11RSNAStatsCCMPReplays++; return -4; @@ -351,7 +355,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) ccmp_init_blocks(key->tfm, hdr, pn, data_len, b0, a, b); xor_block(mic, b, CCMP_MIC_LEN); - blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN; + blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last = data_len % AES_BLOCK_LEN; for (i = 1; i <= blocks; i++) { @@ -370,7 +374,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (memcmp(mic, a, CCMP_MIC_LEN) != 0) { if (net_ratelimit()) { printk(KERN_DEBUG "CCMP: decrypt failed: STA=" - MAC_FMT "\n", MAC_ARG(hdr->addr2)); + "%s\n", print_mac(mac, hdr->addr2)); } key->dot11RSNAStatsCCMPDecryptErrors++; return -5; @@ -442,12 +446,16 @@ static int ieee80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv) static char *ieee80211_ccmp_print_stats(char *p, void *priv) { struct ieee80211_ccmp_data *ccmp = priv; + p += sprintf(p, "key[%d] alg=CCMP key_set=%d " "tx_pn=%02x%02x%02x%02x%02x%02x " "rx_pn=%02x%02x%02x%02x%02x%02x " "format_errors=%d replays=%d decrypt_errors=%d\n", ccmp->key_idx, ccmp->key_set, - MAC_ARG(ccmp->tx_pn), MAC_ARG(ccmp->rx_pn), + ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2], + ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5], + ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2], + ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5], ccmp->dot11RSNAStatsCCMPFormatErrors, ccmp->dot11RSNAStatsCCMPReplays, ccmp->dot11RSNAStatsCCMPDecryptErrors); diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index 5a48d8e0aec..4cce3534e40 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/random.h> +#include <linux/scatterlist.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/mm.h> @@ -24,7 +25,7 @@ #include <net/ieee80211.h> #include <linux/crypto.h> -#include <asm/scatterlist.h> +#include <linux/scatterlist.h> #include <linux/crc32.h> MODULE_AUTHOR("Jouni Malinen"); @@ -359,14 +360,15 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) u8 rc4key[16], *pos, *icv; u32 crc; struct scatterlist sg; + DECLARE_MAC_BUF(mac); if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { if (net_ratelimit()) { struct ieee80211_hdr_4addr *hdr = (struct ieee80211_hdr_4addr *)skb->data; printk(KERN_DEBUG ": TKIP countermeasures: dropped " - "TX packet to " MAC_FMT "\n", - MAC_ARG(hdr->addr1)); + "TX packet to %s\n", + print_mac(mac, hdr->addr1)); } return -1; } @@ -389,9 +391,7 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) icv[3] = crc >> 24; crypto_blkcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16); - sg.page = virt_to_page(pos); - sg.offset = offset_in_page(pos); - sg.length = len + 4; + sg_init_one(&sg, pos, len + 4); return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); } @@ -421,14 +421,15 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) u32 crc; struct scatterlist sg; int plen; + DECLARE_MAC_BUF(mac); hdr = (struct ieee80211_hdr_4addr *)skb->data; if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { if (net_ratelimit()) { printk(KERN_DEBUG ": TKIP countermeasures: dropped " - "received packet from " MAC_FMT "\n", - MAC_ARG(hdr->addr2)); + "received packet from %s\n", + print_mac(mac, hdr->addr2)); } return -1; } @@ -441,7 +442,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (!(keyidx & (1 << 5))) { if (net_ratelimit()) { printk(KERN_DEBUG "TKIP: received packet without ExtIV" - " flag from " MAC_FMT "\n", MAC_ARG(hdr->addr2)); + " flag from %s\n", print_mac(mac, hdr->addr2)); } return -2; } @@ -453,9 +454,9 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) } if (!tkey->key_set) { if (net_ratelimit()) { - printk(KERN_DEBUG "TKIP: received packet from " MAC_FMT + printk(KERN_DEBUG "TKIP: received packet from %s" " with keyid=%d that does not have a configured" - " key\n", MAC_ARG(hdr->addr2), keyidx); + " key\n", print_mac(mac, hdr->addr2), keyidx); } return -3; } @@ -465,9 +466,9 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { if (net_ratelimit()) { - IEEE80211_DEBUG_DROP("TKIP: replay detected: STA=" MAC_FMT + IEEE80211_DEBUG_DROP("TKIP: replay detected: STA=%s" " previous TSC %08x%04x received TSC " - "%08x%04x\n", MAC_ARG(hdr->addr2), + "%08x%04x\n", print_mac(mac, hdr->addr2), tkey->rx_iv32, tkey->rx_iv16, iv32, iv16); } tkey->dot11RSNAStatsTKIPReplays++; @@ -483,14 +484,12 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) plen = skb->len - hdr_len - 12; crypto_blkcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16); - sg.page = virt_to_page(pos); - sg.offset = offset_in_page(pos); - sg.length = plen + 4; + sg_init_one(&sg, pos, plen + 4); if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) { if (net_ratelimit()) { printk(KERN_DEBUG ": TKIP: failed to decrypt " - "received packet from " MAC_FMT "\n", - MAC_ARG(hdr->addr2)); + "received packet from %s\n", + print_mac(mac, hdr->addr2)); } return -7; } @@ -508,7 +507,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) } if (net_ratelimit()) { IEEE80211_DEBUG_DROP("TKIP: ICV error detected: STA=" - MAC_FMT "\n", MAC_ARG(hdr->addr2)); + "%s\n", print_mac(mac, hdr->addr2)); } tkey->dot11RSNAStatsTKIPICVErrors++; return -5; @@ -537,13 +536,9 @@ static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr, printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n"); return -1; } - sg[0].page = virt_to_page(hdr); - sg[0].offset = offset_in_page(hdr); - sg[0].length = 16; - - sg[1].page = virt_to_page(data); - sg[1].offset = offset_in_page(data); - sg[1].length = data_len; + sg_init_table(sg, 2); + sg_set_buf(&sg[0], hdr, 16); + sg_set_buf(&sg[1], data, data_len); if (crypto_hash_setkey(tfm_michael, key, 8)) return -1; @@ -584,7 +579,7 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) if (stype & IEEE80211_STYPE_QOS_DATA) { const struct ieee80211_hdr_3addrqos *qoshdr = (struct ieee80211_hdr_3addrqos *)skb->data; - hdr[12] = qoshdr->qos_ctl & cpu_to_le16(IEEE80211_QCTL_TID); + hdr[12] = le16_to_cpu(qoshdr->qos_ctl) & IEEE80211_QCTL_TID; } else hdr[12] = 0; /* priority */ @@ -639,6 +634,7 @@ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, { struct ieee80211_tkip_data *tkey = priv; u8 mic[8]; + DECLARE_MAC_BUF(mac); if (!tkey->key_set) return -1; @@ -651,8 +647,8 @@ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, struct ieee80211_hdr_4addr *hdr; hdr = (struct ieee80211_hdr_4addr *)skb->data; printk(KERN_DEBUG "%s: Michael MIC verification failed for " - "MSDU from " MAC_FMT " keyidx=%d\n", - skb->dev ? skb->dev->name : "N/A", MAC_ARG(hdr->addr2), + "MSDU from %s keyidx=%d\n", + skb->dev ? skb->dev->name : "N/A", print_mac(mac, hdr->addr2), keyidx); if (skb->dev) ieee80211_michael_mic_failure(skb->dev, hdr, keyidx); diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index 8d182459344..866fc04c44f 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/random.h> +#include <linux/scatterlist.h> #include <linux/skbuff.h> #include <linux/mm.h> #include <asm/string.h> @@ -21,7 +22,7 @@ #include <net/ieee80211.h> #include <linux/crypto.h> -#include <asm/scatterlist.h> +#include <linux/scatterlist.h> #include <linux/crc32.h> MODULE_AUTHOR("Jouni Malinen"); @@ -170,9 +171,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) icv[3] = crc >> 24; crypto_blkcipher_setkey(wep->tx_tfm, key, klen); - sg.page = virt_to_page(pos); - sg.offset = offset_in_page(pos); - sg.length = len + 4; + sg_init_one(&sg, pos, len + 4); return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); } @@ -212,9 +211,7 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) plen = skb->len - hdr_len - 8; crypto_blkcipher_setkey(wep->rx_tfm, key, klen); - sg.page = virt_to_page(pos); - sg.offset = offset_in_page(pos); - sg.length = plen + 4; + sg_init_one(&sg, pos, plen + 4); if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) return -7; diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 17ad278696e..69cb6aad25b 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -47,6 +47,7 @@ #include <linux/wireless.h> #include <linux/etherdevice.h> #include <asm/uaccess.h> +#include <net/net_namespace.h> #include <net/arp.h> #include <net/ieee80211.h> @@ -264,7 +265,7 @@ static int __init ieee80211_init(void) struct proc_dir_entry *e; ieee80211_debug_level = debug; - ieee80211_proc = proc_mkdir(DRV_NAME, proc_net); + ieee80211_proc = proc_mkdir(DRV_NAME, init_net.proc_net); if (ieee80211_proc == NULL) { IEEE80211_ERROR("Unable to create " DRV_NAME " proc directory\n"); @@ -273,7 +274,7 @@ static int __init ieee80211_init(void) e = create_proc_entry("debug_level", S_IFREG | S_IRUGO | S_IWUSR, ieee80211_proc); if (!e) { - remove_proc_entry(DRV_NAME, proc_net); + remove_proc_entry(DRV_NAME, init_net.proc_net); ieee80211_proc = NULL; return -EIO; } @@ -293,7 +294,7 @@ static void __exit ieee80211_exit(void) #ifdef CONFIG_IEEE80211_DEBUG if (ieee80211_proc) { remove_proc_entry("debug_level", ieee80211_proc); - remove_proc_entry(DRV_NAME, proc_net); + remove_proc_entry(DRV_NAME, init_net.proc_net); ieee80211_proc = NULL; } #endif /* CONFIG_IEEE80211_DEBUG */ diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index 6284c99b456..21c0fadde03 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -271,6 +271,7 @@ ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb, { struct ieee80211_hdr_3addr *hdr; int res, hdrlen; + DECLARE_MAC_BUF(mac); if (crypt == NULL || crypt->ops->decrypt_mpdu == NULL) return 0; @@ -282,8 +283,8 @@ ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb, res = crypt->ops->decrypt_mpdu(skb, hdrlen, crypt->priv); atomic_dec(&crypt->refcnt); if (res < 0) { - IEEE80211_DEBUG_DROP("decryption failed (SA=" MAC_FMT - ") res=%d\n", MAC_ARG(hdr->addr2), res); + IEEE80211_DEBUG_DROP("decryption failed (SA=%s" + ") res=%d\n", print_mac(mac, hdr->addr2), res); if (res == -2) IEEE80211_DEBUG_DROP("Decryption failed ICV " "mismatch (key %d)\n", @@ -303,6 +304,7 @@ ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee, { struct ieee80211_hdr_3addr *hdr; int res, hdrlen; + DECLARE_MAC_BUF(mac); if (crypt == NULL || crypt->ops->decrypt_msdu == NULL) return 0; @@ -315,8 +317,8 @@ ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee, atomic_dec(&crypt->refcnt); if (res < 0) { printk(KERN_DEBUG "%s: MSDU decryption/MIC verification failed" - " (SA=" MAC_FMT " keyidx=%d)\n", - ieee->dev->name, MAC_ARG(hdr->addr2), keyidx); + " (SA=%s keyidx=%d)\n", + ieee->dev->name, print_mac(mac, hdr->addr2), keyidx); return -1; } @@ -350,6 +352,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, struct ieee80211_crypt_data *crypt = NULL; int keyidx = 0; int can_be_decrypted = 0; + DECLARE_MAC_BUF(mac); hdr = (struct ieee80211_hdr_4addr *)skb->data; stats = &ieee->stats; @@ -459,8 +462,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, * frames silently instead of filling system log with * these reports. */ IEEE80211_DEBUG_DROP("Decryption failed (not set)" - " (SA=" MAC_FMT ")\n", - MAC_ARG(hdr->addr2)); + " (SA=%s)\n", + print_mac(mac, hdr->addr2)); ieee->ieee_stats.rx_discards_undecryptable++; goto rx_dropped; } @@ -471,8 +474,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, fc & IEEE80211_FCTL_PROTECTED && ieee->host_decrypt && (keyidx = hostap_rx_frame_decrypt(ieee, skb, crypt)) < 0) { printk(KERN_DEBUG "%s: failed to decrypt mgmt::auth " - "from " MAC_FMT "\n", dev->name, - MAC_ARG(hdr->addr2)); + "from %s\n", dev->name, + print_mac(mac, hdr->addr2)); /* TODO: could inform hostapd about this so that it * could send auth failure report */ goto rx_dropped; @@ -650,8 +653,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, * configured */ } else { IEEE80211_DEBUG_DROP("encryption configured, but RX " - "frame not encrypted (SA=" MAC_FMT - ")\n", MAC_ARG(hdr->addr2)); + "frame not encrypted (SA=%s" + ")\n", print_mac(mac, hdr->addr2)); goto rx_dropped; } } @@ -659,9 +662,9 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, if (crypt && !(fc & IEEE80211_FCTL_PROTECTED) && !ieee->open_wep && !ieee80211_is_eapol_frame(ieee, skb)) { IEEE80211_DEBUG_DROP("dropped unencrypted RX data " - "frame from " MAC_FMT + "frame from %s" " (drop_unencrypted=1)\n", - MAC_ARG(hdr->addr2)); + print_mac(mac, hdr->addr2)); goto rx_dropped; } @@ -1411,6 +1414,8 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021 struct ieee80211_network *network, struct ieee80211_rx_stats *stats) { + DECLARE_MAC_BUF(mac); + network->qos_data.active = 0; network->qos_data.supported = 0; network->qos_data.param_count = 0; @@ -1457,11 +1462,11 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021 } if (network->mode == 0) { - IEEE80211_DEBUG_SCAN("Filtered out '%s (" MAC_FMT ")' " + IEEE80211_DEBUG_SCAN("Filtered out '%s (%s)' " "network.\n", escape_essid(network->ssid, network->ssid_len), - MAC_ARG(network->bssid)); + print_mac(mac, network->bssid)); return 1; } @@ -1490,6 +1495,7 @@ static void update_network(struct ieee80211_network *dst, { int qos_active; u8 old_param; + DECLARE_MAC_BUF(mac); ieee80211_network_reset(dst); dst->ibss_dfs = src->ibss_dfs; @@ -1503,8 +1509,8 @@ static void update_network(struct ieee80211_network *dst, memcpy(&dst->stats, &src->stats, sizeof(struct ieee80211_rx_stats)); else - IEEE80211_DEBUG_SCAN("Network " MAC_FMT " info received " - "off channel (%d vs. %d)\n", MAC_ARG(src->bssid), + IEEE80211_DEBUG_SCAN("Network %s info received " + "off channel (%d vs. %d)\n", print_mac(mac, src->bssid), dst->channel, src->stats.received_channel); dst->capability = src->capability; @@ -1576,12 +1582,13 @@ static void ieee80211_process_probe_response(struct ieee80211_device struct ieee80211_info_element *info_element = beacon->info_element; #endif unsigned long flags; + DECLARE_MAC_BUF(mac); - IEEE80211_DEBUG_SCAN("'%s' (" MAC_FMT + IEEE80211_DEBUG_SCAN("'%s' (%s" "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n", escape_essid(info_element->data, info_element->len), - MAC_ARG(beacon->header.addr3), + print_mac(mac, beacon->header.addr3), (beacon->capability & (1 << 0xf)) ? '1' : '0', (beacon->capability & (1 << 0xe)) ? '1' : '0', (beacon->capability & (1 << 0xd)) ? '1' : '0', @@ -1600,10 +1607,10 @@ static void ieee80211_process_probe_response(struct ieee80211_device (beacon->capability & (1 << 0x0)) ? '1' : '0'); if (ieee80211_network_init(ieee, beacon, &network, stats)) { - IEEE80211_DEBUG_SCAN("Dropped '%s' (" MAC_FMT ") via %s.\n", + IEEE80211_DEBUG_SCAN("Dropped '%s' (%s) via %s.\n", escape_essid(info_element->data, info_element->len), - MAC_ARG(beacon->header.addr3), + print_mac(mac, beacon->header.addr3), is_beacon(beacon->header.frame_ctl) ? "BEACON" : "PROBE RESPONSE"); return; @@ -1637,11 +1644,11 @@ static void ieee80211_process_probe_response(struct ieee80211_device /* If there are no more slots, expire the oldest */ list_del(&oldest->list); target = oldest; - IEEE80211_DEBUG_SCAN("Expired '%s' (" MAC_FMT ") from " + IEEE80211_DEBUG_SCAN("Expired '%s' (%s) from " "network list.\n", escape_essid(target->ssid, target->ssid_len), - MAC_ARG(target->bssid)); + print_mac(mac, target->bssid)); ieee80211_network_reset(target); } else { /* Otherwise just pull from the free list */ @@ -1651,10 +1658,10 @@ static void ieee80211_process_probe_response(struct ieee80211_device } #ifdef CONFIG_IEEE80211_DEBUG - IEEE80211_DEBUG_SCAN("Adding '%s' (" MAC_FMT ") via %s.\n", + IEEE80211_DEBUG_SCAN("Adding '%s' (%s) via %s.\n", escape_essid(network.ssid, network.ssid_len), - MAC_ARG(network.bssid), + print_mac(mac, network.bssid), is_beacon(beacon->header.frame_ctl) ? "BEACON" : "PROBE RESPONSE"); #endif @@ -1662,10 +1669,10 @@ static void ieee80211_process_probe_response(struct ieee80211_device network.ibss_dfs = NULL; list_add_tail(&target->list, &ieee->network_list); } else { - IEEE80211_DEBUG_SCAN("Updating '%s' (" MAC_FMT ") via %s.\n", + IEEE80211_DEBUG_SCAN("Updating '%s' (%s) via %s.\n", escape_essid(target->ssid, target->ssid_len), - MAC_ARG(target->bssid), + print_mac(mac, target->bssid), is_beacon(beacon->header.frame_ctl) ? "BEACON" : "PROBE RESPONSE"); update_network(target, &network); diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 465b73d5053..d309e8f1999 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -257,6 +257,7 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee, char *ev = extra; char *stop = ev + wrqu->data.length; int i = 0; + DECLARE_MAC_BUF(mac); IEEE80211_DEBUG_WX("Getting scan\n"); @@ -274,10 +275,10 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee, ev = ieee80211_translate_scan(ieee, ev, stop, network); else IEEE80211_DEBUG_SCAN("Not showing network '%s (" - MAC_FMT ")' due to age (%dms).\n", + "%s)' due to age (%dms).\n", escape_essid(network->ssid, network->ssid_len), - MAC_ARG(network->bssid), + print_mac(mac, network->bssid), jiffies_to_msecs(jiffies - network-> last_scanned)); @@ -408,7 +409,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, (*crypt)->priv); sec.flags |= (1 << key); /* This ensures a key will be activated if no key is - * explicitely set */ + * explicitly set */ if (key == sec.active_key) sec.flags |= SEC_ACTIVE_KEY; diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c index e475f2e1be1..c4d122ddd72 100644 --- a/net/ieee80211/softmac/ieee80211softmac_assoc.c +++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c @@ -53,7 +53,7 @@ ieee80211softmac_assoc(struct ieee80211softmac_device *mac, struct ieee80211soft /* Set a timer for timeout */ /* FIXME: make timeout configurable */ if (likely(mac->running)) - schedule_delayed_work(&mac->associnfo.timeout, 5 * HZ); + queue_delayed_work(mac->wq, &mac->associnfo.timeout, 5 * HZ); spin_unlock_irqrestore(&mac->lock, flags); } @@ -372,6 +372,7 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev, u16 status = le16_to_cpup(&resp->status); struct ieee80211softmac_network *network = NULL; unsigned long flags; + DECLARE_MAC_BUF(mac2); if (unlikely(!mac->running)) return -ENODEV; @@ -388,7 +389,8 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev, /* someone sending us things without us knowing him? Ignore. */ if (!network) { - dprintk(KERN_INFO PFX "Received unrequested assocation response from " MAC_FMT "\n", MAC_ARG(resp->header.addr3)); + dprintk(KERN_INFO PFX "Received unrequested assocation response from %s\n", + print_mac(mac2, resp->header.addr3)); spin_unlock_irqrestore(&mac->lock, flags); return 0; } @@ -417,7 +419,7 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev, network->authenticated = 0; /* we don't want to do this more than once ... */ network->auth_desynced_once = 1; - schedule_delayed_work(&mac->associnfo.work, 0); + queue_delayed_work(mac->wq, &mac->associnfo.work, 0); break; } default: @@ -439,7 +441,7 @@ ieee80211softmac_try_reassoc(struct ieee80211softmac_device *mac) spin_lock_irqsave(&mac->lock, flags); mac->associnfo.associating = 1; - schedule_delayed_work(&mac->associnfo.work, 0); + queue_delayed_work(mac->wq, &mac->associnfo.work, 0); spin_unlock_irqrestore(&mac->lock, flags); } @@ -481,7 +483,7 @@ ieee80211softmac_handle_reassoc_req(struct net_device * dev, dprintkl(KERN_INFO PFX "reassoc request from unknown network\n"); return 0; } - schedule_delayed_work(&mac->associnfo.work, 0); + queue_delayed_work(mac->wq, &mac->associnfo.work, 0); return 0; } diff --git a/net/ieee80211/softmac/ieee80211softmac_auth.c b/net/ieee80211/softmac/ieee80211softmac_auth.c index 826c32d2446..a53a751d070 100644 --- a/net/ieee80211/softmac/ieee80211softmac_auth.c +++ b/net/ieee80211/softmac/ieee80211softmac_auth.c @@ -35,6 +35,7 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac, { struct ieee80211softmac_auth_queue_item *auth; unsigned long flags; + DECLARE_MAC_BUF(mac2); if (net->authenticating || net->authenticated) return 0; @@ -43,7 +44,7 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac, /* Add the network if it's not already added */ ieee80211softmac_add_network(mac, net); - dprintk(KERN_NOTICE PFX "Queueing Authentication Request to "MAC_FMT"\n", MAC_ARG(net->bssid)); + dprintk(KERN_NOTICE PFX "Queueing Authentication Request to %s\n", print_mac(mac2, net->bssid)); /* Queue the auth request */ auth = (struct ieee80211softmac_auth_queue_item *) kmalloc(sizeof(struct ieee80211softmac_auth_queue_item), GFP_KERNEL); @@ -61,7 +62,7 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac, /* add to list */ list_add_tail(&auth->list, &mac->auth_queue); - schedule_delayed_work(&auth->work, 0); + queue_delayed_work(mac->wq, &auth->work, 0); spin_unlock_irqrestore(&mac->lock, flags); return 0; @@ -76,6 +77,7 @@ ieee80211softmac_auth_queue(struct work_struct *work) struct ieee80211softmac_auth_queue_item *auth; struct ieee80211softmac_network *net; unsigned long flags; + DECLARE_MAC_BUF(mac2); auth = container_of(work, struct ieee80211softmac_auth_queue_item, work.work); @@ -95,17 +97,18 @@ ieee80211softmac_auth_queue(struct work_struct *work) } net->authenticated = 0; /* add a timeout call so we eventually give up waiting for an auth reply */ - schedule_delayed_work(&auth->work, IEEE80211SOFTMAC_AUTH_TIMEOUT); + queue_delayed_work(mac->wq, &auth->work, IEEE80211SOFTMAC_AUTH_TIMEOUT); auth->retry--; spin_unlock_irqrestore(&mac->lock, flags); if (ieee80211softmac_send_mgt_frame(mac, auth->net, IEEE80211_STYPE_AUTH, auth->state)) - dprintk(KERN_NOTICE PFX "Sending Authentication Request to "MAC_FMT" failed (this shouldn't happen, wait for the timeout).\n", MAC_ARG(net->bssid)); + dprintk(KERN_NOTICE PFX "Sending Authentication Request to %s failed (this shouldn't happen, wait for the timeout).\n", + print_mac(mac2, net->bssid)); else - dprintk(KERN_NOTICE PFX "Sent Authentication Request to "MAC_FMT".\n", MAC_ARG(net->bssid)); + dprintk(KERN_NOTICE PFX "Sent Authentication Request to %s.\n", print_mac(mac2, net->bssid)); return; } - printkl(KERN_WARNING PFX "Authentication timed out with "MAC_FMT"\n", MAC_ARG(net->bssid)); + printkl(KERN_WARNING PFX "Authentication timed out with %s\n", print_mac(mac2, net->bssid)); /* Remove this item from the queue */ spin_lock_irqsave(&mac->lock, flags); net->authenticating = 0; @@ -142,6 +145,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) struct ieee80211softmac_network *net = NULL; unsigned long flags; u8 * data; + DECLARE_MAC_BUF(mac2); if (unlikely(!mac->running)) return -ENODEV; @@ -161,7 +165,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) /* Make sure that we've got an auth queue item for this request */ if(aq == NULL) { - dprintkl(KERN_DEBUG PFX "Authentication response received from "MAC_FMT" but no queue item exists.\n", MAC_ARG(auth->header.addr2)); + dprintkl(KERN_DEBUG PFX "Authentication response received from %s but no queue item exists.\n", print_mac(mac2, auth->header.addr2)); /* Error #? */ return -1; } @@ -169,7 +173,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) /* Check for out of order authentication */ if(!net->authenticating) { - dprintkl(KERN_DEBUG PFX "Authentication response received from "MAC_FMT" but did not request authentication.\n",MAC_ARG(auth->header.addr2)); + dprintkl(KERN_DEBUG PFX "Authentication response received from %s but did not request authentication.\n",print_mac(mac2, auth->header.addr2)); return -1; } @@ -187,7 +191,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) spin_unlock_irqrestore(&mac->lock, flags); /* Send event */ - printkl(KERN_NOTICE PFX "Open Authentication completed with "MAC_FMT"\n", MAC_ARG(net->bssid)); + printkl(KERN_NOTICE PFX "Open Authentication completed with %s\n", print_mac(mac2, net->bssid)); ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_AUTHENTICATED, net); break; default: @@ -197,8 +201,8 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) net->authenticating = 0; spin_unlock_irqrestore(&mac->lock, flags); - printkl(KERN_NOTICE PFX "Open Authentication with "MAC_FMT" failed, error code: %i\n", - MAC_ARG(net->bssid), le16_to_cpup(&auth->status)); + printkl(KERN_NOTICE PFX "Open Authentication with %s failed, error code: %i\n", + print_mac(mac2, net->bssid), le16_to_cpup(&auth->status)); /* Count the error? */ break; } @@ -238,7 +242,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) * request. */ cancel_delayed_work(&aq->work); INIT_DELAYED_WORK(&aq->work, &ieee80211softmac_auth_challenge_response); - schedule_delayed_work(&aq->work, 0); + queue_delayed_work(mac->wq, &aq->work, 0); spin_unlock_irqrestore(&mac->lock, flags); return 0; case IEEE80211SOFTMAC_AUTH_SHARED_PASS: @@ -253,13 +257,13 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) net->authenticating = 0; net->authenticated = 1; spin_unlock_irqrestore(&mac->lock, flags); - printkl(KERN_NOTICE PFX "Shared Key Authentication completed with "MAC_FMT"\n", - MAC_ARG(net->bssid)); + printkl(KERN_NOTICE PFX "Shared Key Authentication completed with %s\n", + print_mac(mac2, net->bssid)); ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_AUTHENTICATED, net); break; default: - printkl(KERN_NOTICE PFX "Shared Key Authentication with "MAC_FMT" failed, error code: %i\n", - MAC_ARG(net->bssid), le16_to_cpup(&auth->status)); + printkl(KERN_NOTICE PFX "Shared Key Authentication with %s failed, error code: %i\n", + print_mac(mac2, net->bssid), le16_to_cpup(&auth->status)); /* Lock and reset flags */ spin_lock_irqsave(&mac->lock, flags); net->authenticating = 0; @@ -375,6 +379,7 @@ ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *de struct ieee80211softmac_network *net = NULL; struct ieee80211softmac_device *mac = ieee80211_priv(dev); + DECLARE_MAC_BUF(mac2); if (unlikely(!mac->running)) return -ENODEV; @@ -387,8 +392,8 @@ ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *de net = ieee80211softmac_get_network_by_bssid(mac, deauth->header.addr2); if (net == NULL) { - dprintkl(KERN_DEBUG PFX "Received deauthentication packet from "MAC_FMT", but that network is unknown.\n", - MAC_ARG(deauth->header.addr2)); + dprintkl(KERN_DEBUG PFX "Received deauthentication packet from %s, but that network is unknown.\n", + print_mac(mac2, deauth->header.addr2)); return 0; } @@ -403,6 +408,6 @@ ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *de ieee80211softmac_deauth_from_net(mac, net); /* let's try to re-associate */ - schedule_delayed_work(&mac->associnfo.work, 0); + queue_delayed_work(mac->wq, &mac->associnfo.work, 0); return 0; } diff --git a/net/ieee80211/softmac/ieee80211softmac_event.c b/net/ieee80211/softmac/ieee80211softmac_event.c index b3e33a4d486..8cef05b60f1 100644 --- a/net/ieee80211/softmac/ieee80211softmac_event.c +++ b/net/ieee80211/softmac/ieee80211softmac_event.c @@ -172,7 +172,7 @@ ieee80211softmac_call_events_locked(struct ieee80211softmac_device *mac, int eve /* User may have subscribed to ANY event, so * we tell them which event triggered it. */ eventptr->event_type = event; - schedule_delayed_work(&eventptr->work, 0); + queue_delayed_work(mac->wq, &eventptr->work, 0); } } } diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c index 6398e6e6749..07505ca859a 100644 --- a/net/ieee80211/softmac/ieee80211softmac_module.c +++ b/net/ieee80211/softmac/ieee80211softmac_module.c @@ -36,8 +36,13 @@ struct net_device *alloc_ieee80211softmac(int sizeof_priv) dev = alloc_ieee80211(sizeof(*softmac) + sizeof_priv); if (!dev) return NULL; - softmac = ieee80211_priv(dev); + softmac->wq = create_freezeable_workqueue("softmac"); + if (!softmac->wq) { + free_ieee80211(dev); + return NULL; + } + softmac->dev = dev; softmac->ieee = netdev_priv(dev); spin_lock_init(&softmac->lock); @@ -105,7 +110,7 @@ ieee80211softmac_clear_pending_work(struct ieee80211softmac_device *sm) cancel_delayed_work(&eventptr->work); spin_unlock_irqrestore(&sm->lock, flags); - flush_scheduled_work(); + flush_workqueue(sm->wq); /* now we should be save and no longer need locking... */ spin_lock_irqsave(&sm->lock, flags); @@ -139,6 +144,7 @@ void free_ieee80211softmac(struct net_device *dev) ieee80211softmac_clear_pending_work(sm); kfree(sm->scaninfo); kfree(sm->wpa.IE); + destroy_workqueue(sm->wq); free_ieee80211(dev); } EXPORT_SYMBOL_GPL(free_ieee80211softmac); diff --git a/net/ieee80211/softmac/ieee80211softmac_scan.c b/net/ieee80211/softmac/ieee80211softmac_scan.c index abea3648680..bfab8d7db88 100644 --- a/net/ieee80211/softmac/ieee80211softmac_scan.c +++ b/net/ieee80211/softmac/ieee80211softmac_scan.c @@ -123,7 +123,7 @@ void ieee80211softmac_scan(struct work_struct *work) spin_unlock_irqrestore(&sm->lock, flags); break; } - schedule_delayed_work(&si->softmac_scan, IEEE80211SOFTMAC_PROBE_DELAY); + queue_delayed_work(sm->wq, &si->softmac_scan, IEEE80211SOFTMAC_PROBE_DELAY); spin_unlock_irqrestore(&sm->lock, flags); return; } else { @@ -190,7 +190,7 @@ int ieee80211softmac_start_scan_implementation(struct net_device *dev) sm->scaninfo->started = 1; sm->scaninfo->stop = 0; INIT_COMPLETION(sm->scaninfo->finished); - schedule_delayed_work(&sm->scaninfo->softmac_scan, 0); + queue_delayed_work(sm->wq, &sm->scaninfo->softmac_scan, 0); spin_unlock_irqrestore(&sm->lock, flags); return 0; } diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c index 5742dc803b7..ac36767b56e 100644 --- a/net/ieee80211/softmac/ieee80211softmac_wx.c +++ b/net/ieee80211/softmac/ieee80211softmac_wx.c @@ -72,6 +72,7 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev, struct ieee80211softmac_device *sm = ieee80211_priv(net_dev); struct ieee80211softmac_auth_queue_item *authptr; int length = 0; + DECLARE_MAC_BUF(mac); check_assoc_again: mutex_lock(&sm->associnfo.mutex); @@ -90,7 +91,7 @@ check_assoc_again: /* We must unlock to avoid deadlocks with the assoc workqueue * on the associnfo.mutex */ mutex_unlock(&sm->associnfo.mutex); - flush_scheduled_work(); + flush_workqueue(sm->wq); /* Avoid race! Check assoc status again. Maybe someone started an * association while we flushed. */ goto check_assoc_again; @@ -113,7 +114,7 @@ check_assoc_again: sm->associnfo.associating = 1; /* queue lower level code to do work (if necessary) */ - schedule_delayed_work(&sm->associnfo.work, 0); + queue_delayed_work(sm->wq, &sm->associnfo.work, 0); mutex_unlock(&sm->associnfo.mutex); @@ -348,7 +349,7 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev, /* force reassociation */ mac->associnfo.bssvalid = 0; if (mac->associnfo.associated) - schedule_delayed_work(&mac->associnfo.work, 0); + queue_delayed_work(mac->wq, &mac->associnfo.work, 0); } else if (is_zero_ether_addr(data->ap_addr.sa_data)) { /* the bssid we have is no longer fixed */ mac->associnfo.bssfixed = 0; @@ -365,7 +366,7 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev, /* tell the other code that this bssid should be used no matter what */ mac->associnfo.bssfixed = 1; /* queue associate if new bssid or (old one again and not associated) */ - schedule_delayed_work(&mac->associnfo.work, 0); + queue_delayed_work(mac->wq, &mac->associnfo.work, 0); } out: diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index fb790977425..9f9fd2c6f6e 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -394,6 +394,14 @@ config INET_XFRM_MODE_BEET If unsure, say Y. +config INET_LRO + tristate "Large Receive Offload (ipv4/tcp)" + + ---help--- + Support for Large Receive Offload (ipv4/tcp). + + If unsure, say Y. + config INET_DIAG tristate "INET: socket monitoring interface" default y @@ -552,7 +560,7 @@ config TCP_CONG_ILLINOIS depends on EXPERIMENTAL default n ---help--- - TCP-Illinois is a sender-side modificatio of TCP Reno for + TCP-Illinois is a sender-side modification of TCP Reno for high speed long delay links. It uses round-trip-time to adjust the alpha and beta parameters to achieve a higher average throughput and maintain fairness. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index fbf1674e0c2..93fe3966805 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -10,7 +10,8 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ - sysctl_net_ipv4.o fib_frontend.o fib_semantics.o + sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \ + inet_fragment.o obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o @@ -25,6 +26,7 @@ obj-$(CONFIG_INET_ESP) += esp4.o obj-$(CONFIG_INET_IPCOMP) += ipcomp.o obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o +obj-$(CONFIG_INET_LRO) += inet_lro.o obj-$(CONFIG_INET_TUNNEL) += tunnel4.o obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e68103475cc..621b128897d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -241,7 +241,7 @@ EXPORT_SYMBOL(build_ehash_secret); * Create an inet socket. */ -static int inet_create(struct socket *sock, int protocol) +static int inet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct list_head *p; @@ -253,6 +253,9 @@ static int inet_create(struct socket *sock, int protocol) int try_loading_module = 0; int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM && !inet_ehash_secret) @@ -320,7 +323,7 @@ lookup_protocol: BUG_TRAP(answer_prot->slab != NULL); err = -ENOBUFS; - sk = sk_alloc(PF_INET, GFP_KERNEL, answer_prot, 1); + sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, 1); if (sk == NULL) goto out; @@ -939,7 +942,7 @@ static struct inet_protosw inetsw_array[] = } }; -#define INETSW_ARRAY_LEN (sizeof(inetsw_array) / sizeof(struct inet_protosw)) +#define INETSW_ARRAY_LEN ARRAY_SIZE(inetsw_array) void inet_register_protosw(struct inet_protosw *p) { @@ -1299,6 +1302,10 @@ static int __init init_ipv4_mibs(void) sizeof(struct icmp_mib), __alignof__(struct icmp_mib)) < 0) goto err_icmp_mib; + if (snmp_mib_init((void **)icmpmsg_statistics, + sizeof(struct icmpmsg_mib), + __alignof__(struct icmpmsg_mib)) < 0) + goto err_icmpmsg_mib; if (snmp_mib_init((void **)tcp_statistics, sizeof(struct tcp_mib), __alignof__(struct tcp_mib)) < 0) @@ -1321,6 +1328,8 @@ err_udplite_mib: err_udp_mib: snmp_mib_free((void **)tcp_statistics); err_tcp_mib: + snmp_mib_free((void **)icmpmsg_statistics); +err_icmpmsg_mib: snmp_mib_free((void **)icmp_statistics); err_icmp_mib: snmp_mib_free((void **)ip_statistics); diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 39f6211f149..4e8e3b079f5 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -5,6 +5,7 @@ #include <net/ah.h> #include <linux/crypto.h> #include <linux/pfkeyv2.h> +#include <linux/spinlock.h> #include <net/icmp.h> #include <net/protocol.h> #include <asm/scatterlist.h> @@ -65,6 +66,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) char buf[60]; } tmp_iph; + skb_push(skb, -skb_network_offset(skb)); top_iph = ip_hdr(skb); iph = &tmp_iph.iph; @@ -80,28 +82,30 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) goto error; } - ah = (struct ip_auth_hdr *)((char *)top_iph+top_iph->ihl*4); - ah->nexthdr = top_iph->protocol; + ah = ip_auth_hdr(skb); + ah->nexthdr = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_AH; top_iph->tos = 0; top_iph->tot_len = htons(skb->len); top_iph->frag_off = 0; top_iph->ttl = 0; - top_iph->protocol = IPPROTO_AH; top_iph->check = 0; ahp = x->data; - ah->hdrlen = (XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + - ahp->icv_trunc_len) >> 2) - 2; + ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; ah->reserved = 0; ah->spi = x->id.spi; - ah->seq_no = htonl(++x->replay.oseq); - xfrm_aevent_doreplay(x); + ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq); + + spin_lock_bh(&x->lock); err = ah_mac_digest(ahp, skb, ah->auth_data); + memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); + spin_unlock_bh(&x->lock); + if (err) goto error; - memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; top_iph->ttl = iph->ttl; @@ -111,8 +115,6 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); } - ip_send_check(top_iph); - err = 0; error: @@ -123,21 +125,23 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) { int ah_hlen; int ihl; + int nexthdr; int err = -EINVAL; struct iphdr *iph; struct ip_auth_hdr *ah; struct ah_data *ahp; char work_buf[60]; - if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr))) + if (!pskb_may_pull(skb, sizeof(*ah))) goto out; - ah = (struct ip_auth_hdr*)skb->data; + ah = (struct ip_auth_hdr *)skb->data; ahp = x->data; + nexthdr = ah->nexthdr; ah_hlen = (ah->hdrlen + 2) << 2; - if (ah_hlen != XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_full_len) && - ah_hlen != XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len)) + if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && + ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) goto out; if (!pskb_may_pull(skb, ah_hlen)) @@ -151,7 +155,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; - ah = (struct ip_auth_hdr*)skb->data; + ah = (struct ip_auth_hdr *)skb->data; iph = ip_hdr(skb); ihl = skb->data - skb_network_header(skb); @@ -180,13 +184,12 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } } - ((struct iphdr*)work_buf)->protocol = ah->nexthdr; skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_buf, ihl); skb->transport_header = skb->network_header; __skb_pull(skb, ah_hlen + ihl); - return 0; + return nexthdr; out: return err; @@ -219,10 +222,6 @@ static int ah_init_state(struct xfrm_state *x) if (!x->aalg) goto error; - /* null auth can use a zero length key */ - if (x->aalg->alg_key_len > 512) - goto error; - if (x->encap) goto error; @@ -230,14 +229,13 @@ static int ah_init_state(struct xfrm_state *x) if (ahp == NULL) return -ENOMEM; - ahp->key = x->aalg->alg_key; - ahp->key_len = (x->aalg->alg_key_len+7)/8; tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) goto error; ahp->tfm = tfm; - if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len)) + if (crypto_hash_setkey(tfm, x->aalg->alg_key, + (x->aalg->alg_key_len + 7) / 8)) goto error; /* @@ -266,7 +264,8 @@ static int ah_init_state(struct xfrm_state *x) if (!ahp->work_icv) goto error; - x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); + x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + + ahp->icv_trunc_len); if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); x->data = ahp; @@ -302,6 +301,7 @@ static struct xfrm_type ah_type = .description = "AH4", .owner = THIS_MODULE, .proto = IPPROTO_AH, + .flags = XFRM_TYPE_REPLAY_PROT, .init_state = ah_init_state, .destructor = ah_destroy, .input = ah_input, diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9ab9d534fba..36d6798947b 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -103,6 +103,7 @@ #include <linux/sysctl.h> #endif +#include <net/net_namespace.h> #include <net/ip.h> #include <net/icmp.h> #include <net/route.h> @@ -252,7 +253,7 @@ static int arp_constructor(struct neighbour *neigh) neigh->parms = neigh_parms_clone(parms); rcu_read_unlock(); - if (dev->hard_header == NULL) { + if (!dev->header_ops) { neigh->nud_state = NUD_NOARP; neigh->ops = &arp_direct_ops; neigh->output = neigh->ops->queue_xmit; @@ -309,10 +310,12 @@ static int arp_constructor(struct neighbour *neigh) neigh->nud_state = NUD_NOARP; memcpy(neigh->ha, dev->broadcast, dev->addr_len); } - if (dev->hard_header_cache) + + if (dev->header_ops->cache) neigh->ops = &arp_hh_ops; else neigh->ops = &arp_generic_ops; + if (neigh->nud_state&NUD_VALID) neigh->output = neigh->ops->connected_output; else @@ -590,8 +593,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, /* * Fill the device header for the ARP frame */ - if (dev->hard_header && - dev->hard_header(skb,dev,ptype,dest_hw,src_hw,skb->len) < 0) + if (dev_hard_header(skb, dev, ptype, dest_hw, src_hw, skb->len) < 0) goto out; /* @@ -931,6 +933,9 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, { struct arphdr *arp; + if (dev->nd_net != &init_net) + goto freeskb; + /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ if (!pskb_may_pull(skb, (sizeof(struct arphdr) + (2 * dev->addr_len) + @@ -977,7 +982,7 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev) if (mask && mask != htonl(0xFFFFFFFF)) return -EINVAL; if (!dev && (r->arp_flags & ATF_COM)) { - dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data); + dev = dev_getbyhwaddr(&init_net, r->arp_ha.sa_family, r->arp_ha.sa_data); if (!dev) return -ENODEV; } @@ -1165,7 +1170,7 @@ int arp_ioctl(unsigned int cmd, void __user *arg) rtnl_lock(); if (r.arp_dev[0]) { err = -ENODEV; - if ((dev = __dev_get_by_name(r.arp_dev)) == NULL) + if ((dev = __dev_get_by_name(&init_net, r.arp_dev)) == NULL) goto out; /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ @@ -1201,6 +1206,9 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); @@ -1370,24 +1378,8 @@ static const struct seq_operations arp_seq_ops = { static int arp_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct neigh_seq_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &arp_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &arp_seq_ops, + sizeof(struct neigh_seq_state)); } static const struct file_operations arp_seq_fops = { @@ -1400,7 +1392,7 @@ static const struct file_operations arp_seq_fops = { static int __init arp_proc_init(void) { - if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops)) + if (!proc_net_fops_create(&init_net, "arp", S_IRUGO, &arp_seq_fops)) return -ENOMEM; return 0; } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index ab56a052ce3..f18e88bc86e 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -504,22 +504,16 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) INIT_RCU_HEAD(&doi_def->rcu); INIT_LIST_HEAD(&doi_def->dom_list); - rcu_read_lock(); - if (cipso_v4_doi_search(doi_def->doi) != NULL) - goto doi_add_failure_rlock; spin_lock(&cipso_v4_doi_list_lock); if (cipso_v4_doi_search(doi_def->doi) != NULL) - goto doi_add_failure_slock; + goto doi_add_failure; list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list); spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_unlock(); return 0; -doi_add_failure_slock: +doi_add_failure: spin_unlock(&cipso_v4_doi_list_lock); -doi_add_failure_rlock: - rcu_read_unlock(); return -EEXIST; } @@ -543,29 +537,23 @@ int cipso_v4_doi_remove(u32 doi, struct cipso_v4_doi *doi_def; struct cipso_v4_domhsh_entry *dom_iter; - rcu_read_lock(); - if (cipso_v4_doi_search(doi) != NULL) { - spin_lock(&cipso_v4_doi_list_lock); - doi_def = cipso_v4_doi_search(doi); - if (doi_def == NULL) { - spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_unlock(); - return -ENOENT; - } + spin_lock(&cipso_v4_doi_list_lock); + doi_def = cipso_v4_doi_search(doi); + if (doi_def != NULL) { doi_def->valid = 0; list_del_rcu(&doi_def->list); spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_lock(); list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) if (dom_iter->valid) netlbl_domhsh_remove(dom_iter->domain, audit_info); - cipso_v4_cache_invalidate(); rcu_read_unlock(); - + cipso_v4_cache_invalidate(); call_rcu(&doi_def->rcu, callback); return 0; } - rcu_read_unlock(); + spin_unlock(&cipso_v4_doi_list_lock); return -ENOENT; } @@ -653,22 +641,19 @@ int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain) new_dom->valid = 1; INIT_RCU_HEAD(&new_dom->rcu); - rcu_read_lock(); spin_lock(&cipso_v4_doi_list_lock); - list_for_each_entry_rcu(iter, &doi_def->dom_list, list) + list_for_each_entry(iter, &doi_def->dom_list, list) if (iter->valid && ((domain != NULL && iter->domain != NULL && strcmp(iter->domain, domain) == 0) || (domain == NULL && iter->domain == NULL))) { spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_unlock(); kfree(new_dom->domain); kfree(new_dom); return -EEXIST; } list_add_tail_rcu(&new_dom->list, &doi_def->dom_list); spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_unlock(); return 0; } @@ -689,9 +674,8 @@ int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, { struct cipso_v4_domhsh_entry *iter; - rcu_read_lock(); spin_lock(&cipso_v4_doi_list_lock); - list_for_each_entry_rcu(iter, &doi_def->dom_list, list) + list_for_each_entry(iter, &doi_def->dom_list, list) if (iter->valid && ((domain != NULL && iter->domain != NULL && strcmp(iter->domain, domain) == 0) || @@ -699,13 +683,10 @@ int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, iter->valid = 0; list_del_rcu(&iter->list); spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_unlock(); call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free); - return 0; } spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_unlock(); return -ENOENT; } @@ -1831,68 +1812,75 @@ socket_setattr_failure: } /** - * cipso_v4_sock_getattr - Get the security attributes from a sock - * @sk: the sock + * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions + * @cipso: the CIPSO v4 option * @secattr: the security attributes * * Description: - * Query @sk to see if there is a CIPSO option attached to the sock and if - * there is return the CIPSO security attributes in @secattr. This function - * requires that @sk be locked, or privately held, but it does not do any - * locking itself. Returns zero on success and negative values on failure. + * Inspect @cipso and return the security attributes in @secattr. Returns zero + * on success and negative values on failure. * */ -int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) +static int cipso_v4_getattr(const unsigned char *cipso, + struct netlbl_lsm_secattr *secattr) { int ret_val = -ENOMSG; - struct inet_sock *sk_inet; - unsigned char *cipso_ptr; u32 doi; struct cipso_v4_doi *doi_def; - sk_inet = inet_sk(sk); - if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0) - return -ENOMSG; - cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso - - sizeof(struct iphdr); - ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr); - if (ret_val == 0) - return ret_val; + if (cipso_v4_cache_check(cipso, cipso[1], secattr) == 0) + return 0; - doi = ntohl(get_unaligned((__be32 *)&cipso_ptr[2])); + doi = ntohl(get_unaligned((__be32 *)&cipso[2])); rcu_read_lock(); doi_def = cipso_v4_doi_search(doi); - if (doi_def == NULL) { - rcu_read_unlock(); - return -ENOMSG; - } - + if (doi_def == NULL) + goto getattr_return; /* XXX - This code assumes only one tag per CIPSO option which isn't * really a good assumption to make but since we only support the MAC * tags right now it is a safe assumption. */ - switch (cipso_ptr[6]) { + switch (cipso[6]) { case CIPSO_V4_TAG_RBITMAP: - ret_val = cipso_v4_parsetag_rbm(doi_def, - &cipso_ptr[6], - secattr); + ret_val = cipso_v4_parsetag_rbm(doi_def, &cipso[6], secattr); break; case CIPSO_V4_TAG_ENUM: - ret_val = cipso_v4_parsetag_enum(doi_def, - &cipso_ptr[6], - secattr); + ret_val = cipso_v4_parsetag_enum(doi_def, &cipso[6], secattr); break; case CIPSO_V4_TAG_RANGE: - ret_val = cipso_v4_parsetag_rng(doi_def, - &cipso_ptr[6], - secattr); + ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr); break; } - rcu_read_unlock(); +getattr_return: + rcu_read_unlock(); return ret_val; } /** + * cipso_v4_sock_getattr - Get the security attributes from a sock + * @sk: the sock + * @secattr: the security attributes + * + * Description: + * Query @sk to see if there is a CIPSO option attached to the sock and if + * there is return the CIPSO security attributes in @secattr. This function + * requires that @sk be locked, or privately held, but it does not do any + * locking itself. Returns zero on success and negative values on failure. + * + */ +int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) +{ + struct ip_options *opt; + + opt = inet_sk(sk)->opt; + if (opt == NULL || opt->cipso == 0) + return -ENOMSG; + + return cipso_v4_getattr(opt->__data + opt->cipso - sizeof(struct iphdr), + secattr); +} + +/** * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option * @skb: the packet * @secattr: the security attributes @@ -1905,45 +1893,7 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) int cipso_v4_skbuff_getattr(const struct sk_buff *skb, struct netlbl_lsm_secattr *secattr) { - int ret_val = -ENOMSG; - unsigned char *cipso_ptr; - u32 doi; - struct cipso_v4_doi *doi_def; - - cipso_ptr = CIPSO_V4_OPTPTR(skb); - if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0) - return 0; - - doi = ntohl(get_unaligned((__be32 *)&cipso_ptr[2])); - rcu_read_lock(); - doi_def = cipso_v4_doi_search(doi); - if (doi_def == NULL) - goto skbuff_getattr_return; - - /* XXX - This code assumes only one tag per CIPSO option which isn't - * really a good assumption to make but since we only support the MAC - * tags right now it is a safe assumption. */ - switch (cipso_ptr[6]) { - case CIPSO_V4_TAG_RBITMAP: - ret_val = cipso_v4_parsetag_rbm(doi_def, - &cipso_ptr[6], - secattr); - break; - case CIPSO_V4_TAG_ENUM: - ret_val = cipso_v4_parsetag_enum(doi_def, - &cipso_ptr[6], - secattr); - break; - case CIPSO_V4_TAG_RANGE: - ret_val = cipso_v4_parsetag_rng(doi_def, - &cipso_ptr[6], - secattr); - break; - } - -skbuff_getattr_return: - rcu_read_unlock(); - return ret_val; + return cipso_v4_getattr(CIPSO_V4_OPTPTR(skb), secattr); } /* diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5dbe5803b7d..55d199e4ae2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -203,8 +203,6 @@ static void inetdev_destroy(struct in_device *in_dev) ASSERT_RTNL(); dev = in_dev->dev; - if (dev == &loopback_dev) - return; in_dev->dead = 1; @@ -420,7 +418,7 @@ struct in_device *inetdev_by_index(int ifindex) struct net_device *dev; struct in_device *in_dev = NULL; read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifindex); + dev = __dev_get_by_index(&init_net, ifindex); if (dev) in_dev = in_dev_get(dev); read_unlock(&dev_base_lock); @@ -506,7 +504,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) goto errout; } - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(&init_net, ifm->ifa_index); if (dev == NULL) { err = -ENODEV; goto errout; @@ -628,7 +626,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) *colon = 0; #ifdef CONFIG_KMOD - dev_load(ifr.ifr_name); + dev_load(&init_net, ifr.ifr_name); #endif switch (cmd) { @@ -669,7 +667,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) rtnl_lock(); ret = -ENODEV; - if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) goto done; if (colon) @@ -909,7 +907,7 @@ no_in_dev: */ read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((in_dev = __in_dev_get_rcu(dev)) == NULL) continue; @@ -988,7 +986,7 @@ __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((in_dev = __in_dev_get_rcu(dev))) { addr = confirm_addr_indev(in_dev, dst, local, scope); if (addr) @@ -1051,6 +1049,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get_rtnl(dev); + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + ASSERT_RTNL(); if (!in_dev) { @@ -1058,7 +1059,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, in_dev = inetdev_init(dev); if (!in_dev) return notifier_from_errno(-ENOMEM); - if (dev == &loopback_dev) { + if (dev->flags & IFF_LOOPBACK) { IN_DEV_CONF_SET(in_dev, NOXFRM, 1); IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); } @@ -1074,7 +1075,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, case NETDEV_UP: if (dev->mtu < 68) break; - if (dev == &loopback_dev) { + if (dev->flags & IFF_LOOPBACK) { struct in_ifaddr *ifa; if ((ifa = inet_alloc_ifa()) != NULL) { ifa->ifa_local = @@ -1182,7 +1183,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) s_ip_idx = ip_idx = cb->args[1]; idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -1241,7 +1242,7 @@ static void devinet_copy_dflt_conf(int i) struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { struct in_device *in_dev; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); @@ -1330,7 +1331,7 @@ void inet_forward_change(void) IPV4_DEVCONF_DFLT(FORWARDING) = on; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { struct in_device *in_dev; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 98767a4f118..cad4278025a 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -3,11 +3,12 @@ #include <net/ip.h> #include <net/xfrm.h> #include <net/esp.h> -#include <asm/scatterlist.h> +#include <linux/scatterlist.h> #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/pfkeyv2.h> #include <linux/random.h> +#include <linux/spinlock.h> #include <net/icmp.h> #include <net/protocol.h> #include <net/udp.h> @@ -15,7 +16,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { int err; - struct iphdr *top_iph; struct ip_esp_hdr *esph; struct crypto_blkcipher *tfm; struct blkcipher_desc desc; @@ -27,9 +27,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int alen; int nfrags; - /* Strip IP+ESP header. */ - __skb_pull(skb, skb_transport_offset(skb)); - /* Now skb is pure payload to encrypt */ + /* skb is pure payload to encrypt */ err = -ENOMEM; @@ -59,12 +57,12 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) tail[clen - skb->len - 2] = (clen - skb->len) - 2; pskb_put(skb, trailer, clen - skb->len); - __skb_push(skb, skb->data - skb_network_header(skb)); - top_iph = ip_hdr(skb); - esph = (struct ip_esp_hdr *)(skb_network_header(skb) + - top_iph->ihl * 4); - top_iph->tot_len = htons(skb->len + alen); - *(skb_tail_pointer(trailer) - 1) = top_iph->protocol; + skb_push(skb, -skb_network_offset(skb)); + esph = ip_esp_hdr(skb); + *(skb_tail_pointer(trailer) - 1) = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_ESP; + + spin_lock_bh(&x->lock); /* this is non-NULL only with UDP Encapsulation */ if (x->encap) { @@ -75,7 +73,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) uh = (struct udphdr *)esph; uh->source = encap->encap_sport; uh->dest = encap->encap_dport; - uh->len = htons(skb->len + alen - top_iph->ihl*4); + uh->len = htons(skb->len + alen - skb_transport_offset(skb)); uh->check = 0; switch (encap->encap_type) { @@ -90,13 +88,11 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) break; } - top_iph->protocol = IPPROTO_UDP; - } else - top_iph->protocol = IPPROTO_ESP; + *skb_mac_header(skb) = IPPROTO_UDP; + } esph->spi = x->id.spi; - esph->seq_no = htonl(++x->replay.oseq); - xfrm_aevent_doreplay(x); + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq); if (esp->conf.ivlen) { if (unlikely(!esp->conf.ivinitted)) { @@ -112,16 +108,19 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) if (unlikely(nfrags > ESP_NUM_FAST_SG)) { sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); if (!sg) - goto error; + goto unlock; } - skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen); + sg_init_table(sg, nfrags); + sg_mark_end(sg, skb_to_sgvec(skb, sg, esph->enc_data + + esp->conf.ivlen - + skb->data, clen)); err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); } while (0); if (unlikely(err)) - goto error; + goto unlock; if (esp->conf.ivlen) { memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); @@ -134,7 +133,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); } - ip_send_check(top_iph); +unlock: + spin_unlock_bh(&x->lock); error: return err; @@ -155,7 +155,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) struct sk_buff *trailer; int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); int alen = esp->auth.icv_trunc_len; - int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen; + int elen = skb->len - sizeof(*esph) - esp->conf.ivlen - alen; int nfrags; int ihl; u8 nexthdr[2]; @@ -163,7 +163,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) int padlen; int err; - if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr))) + if (!pskb_may_pull(skb, sizeof(*esph))) goto out; if (elen <= 0 || (elen & (blksize-1))) @@ -191,7 +191,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; - esph = (struct ip_esp_hdr*)skb->data; + esph = (struct ip_esp_hdr *)skb->data; /* Get ivec. This can be wrong, check against another impls. */ if (esp->conf.ivlen) @@ -204,7 +204,9 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) if (!sg) goto out; } - skb_to_sgvec(skb, sg, sizeof(struct ip_esp_hdr) + esp->conf.ivlen, elen); + sg_init_table(sg, nfrags); + sg_mark_end(sg, skb_to_sgvec(skb, sg, sizeof(*esph) + esp->conf.ivlen, + elen)); err = crypto_blkcipher_decrypt(&desc, sg, sg, elen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); @@ -256,17 +258,15 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) * as per draft-ietf-ipsec-udp-encaps-06, * section 3.1.2 */ - if (x->props.mode == XFRM_MODE_TRANSPORT || - x->props.mode == XFRM_MODE_BEET) + if (x->props.mode == XFRM_MODE_TRANSPORT) skb->ip_summed = CHECKSUM_UNNECESSARY; } - iph->protocol = nexthdr[1]; pskb_trim(skb, skb->len - alen - padlen - 2); __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen); skb_set_transport_header(skb, -ihl); - return 0; + return nexthdr[1]; out: return -EINVAL; @@ -343,11 +343,6 @@ static int esp_init_state(struct xfrm_state *x) struct crypto_blkcipher *tfm; u32 align; - /* null auth and encryption can have zero length keys */ - if (x->aalg) { - if (x->aalg->alg_key_len > 512) - goto error; - } if (x->ealg == NULL) goto error; @@ -359,15 +354,14 @@ static int esp_init_state(struct xfrm_state *x) struct xfrm_algo_desc *aalg_desc; struct crypto_hash *hash; - esp->auth.key = x->aalg->alg_key; - esp->auth.key_len = (x->aalg->alg_key_len+7)/8; hash = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(hash)) goto error; esp->auth.tfm = hash; - if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len)) + if (crypto_hash_setkey(hash, x->aalg->alg_key, + (x->aalg->alg_key_len + 7) / 8)) goto error; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); @@ -389,8 +383,7 @@ static int esp_init_state(struct xfrm_state *x) if (!esp->auth.work_icv) goto error; } - esp->conf.key = x->ealg->alg_key; - esp->conf.key_len = (x->ealg->alg_key_len+7)/8; + tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) goto error; @@ -403,7 +396,8 @@ static int esp_init_state(struct xfrm_state *x) goto error; esp->conf.ivinitted = 0; } - if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) + if (crypto_blkcipher_setkey(tfm, x->ealg->alg_key, + (x->ealg->alg_key_len + 7) / 8)) goto error; x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; if (x->props.mode == XFRM_MODE_TUNNEL) @@ -443,6 +437,7 @@ static struct xfrm_type esp_type = .description = "ESP4", .owner = THIS_MODULE, .proto = IPPROTO_ESP, + .flags = XFRM_TYPE_REPLAY_PROT, .init_state = esp_init_state, .destructor = esp_destroy, .get_mtu = esp4_get_mtu, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eff6bce453e..60123905dbb 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -49,6 +49,8 @@ #define FFprint(a...) printk(KERN_DEBUG a) +static struct sock *fibnl; + #ifndef CONFIG_IP_MULTIPLE_TABLES struct fib_table *ip_fib_local_table; @@ -126,13 +128,14 @@ struct net_device * ip_dev_find(__be32 addr) struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; struct fib_result res; struct net_device *dev = NULL; + struct fib_table *local_table; #ifdef CONFIG_IP_MULTIPLE_TABLES res.r = NULL; #endif - if (!ip_fib_local_table || - ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res)) + local_table = fib_get_table(RT_TABLE_LOCAL); + if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) return NULL; if (res.type != RTN_LOCAL) goto out; @@ -150,6 +153,7 @@ unsigned inet_addr_type(__be32 addr) struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; struct fib_result res; unsigned ret = RTN_BROADCAST; + struct fib_table *local_table; if (ZERONET(addr) || BADCLASS(addr)) return RTN_BROADCAST; @@ -160,10 +164,10 @@ unsigned inet_addr_type(__be32 addr) res.r = NULL; #endif - if (ip_fib_local_table) { + local_table = fib_get_table(RT_TABLE_LOCAL); + if (local_table) { ret = RTN_UNICAST; - if (!ip_fib_local_table->tb_lookup(ip_fib_local_table, - &fl, &res)) { + if (!local_table->tb_lookup(local_table, &fl, &res)) { ret = res.type; fib_res_put(&res); } @@ -334,7 +338,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt, colon = strchr(devname, ':'); if (colon) *colon = 0; - dev = __dev_get_by_name(devname); + dev = __dev_get_by_name(&init_net, devname); if (!dev) return -ENODEV; cfg->fc_oif = dev->ifindex; @@ -487,7 +491,7 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, } nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { - switch (attr->nla_type) { + switch (nla_type(attr)) { case RTA_DST: cfg->fc_dst = nla_get_be32(attr); break; @@ -784,17 +788,12 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) } } -static void nl_fib_input(struct sock *sk, int len) +static void nl_fib_input(struct sk_buff *skb) { - struct sk_buff *skb = NULL; - struct nlmsghdr *nlh = NULL; struct fib_result_nl *frn; - u32 pid; + struct nlmsghdr *nlh; struct fib_table *tb; - - skb = skb_dequeue(&sk->sk_receive_queue); - if (skb == NULL) - return; + u32 pid; nlh = nlmsg_hdr(skb); if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || @@ -811,13 +810,13 @@ static void nl_fib_input(struct sock *sk, int len) pid = NETLINK_CB(skb).pid; /* pid of sending process */ NETLINK_CB(skb).pid = 0; /* from kernel */ NETLINK_CB(skb).dst_group = 0; /* unicast */ - netlink_unicast(sk, skb, pid, MSG_DONTWAIT); + netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT); } static void nl_fib_lookup_init(void) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL, - THIS_MODULE); + fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0, + nl_fib_input, NULL, THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) @@ -860,6 +859,9 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get_rtnl(dev); + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2); return NOTIFY_DONE; diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 9ad1d9ff9ce..527a6e0af5b 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -35,6 +35,7 @@ #include <linux/netlink.h> #include <linux/init.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> @@ -1038,24 +1039,8 @@ static const struct seq_operations fib_seq_ops = { static int fib_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct fib_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &fib_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &fib_seq_ops, + sizeof(struct fib_iter_state)); } static const struct file_operations fib_seq_fops = { @@ -1068,13 +1053,13 @@ static const struct file_operations fib_seq_fops = { int __init fib_proc_init(void) { - if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops)) + if (!proc_net_fops_create(&init_net, "route", S_IRUGO, &fib_seq_fops)) return -ENOMEM; return 0; } void __init fib_proc_exit(void) { - proc_net_remove("route"); + proc_net_remove(&init_net, "route"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 2a947840210..f16839c6a72 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -76,8 +76,6 @@ static struct fib4_rule local_rule = { }, }; -static LIST_HEAD(fib4_rules); - #ifdef CONFIG_NET_CLS_ROUTE u32 fib_rules_tclass(struct fib_result *res) { @@ -279,9 +277,9 @@ static u32 fib4_rule_default_pref(void) struct list_head *pos; struct fib_rule *rule; - if (!list_empty(&fib4_rules)) { - pos = fib4_rules.next; - if (pos->next != &fib4_rules) { + if (!list_empty(&fib4_rules_ops.rules_list)) { + pos = fib4_rules_ops.rules_list.next; + if (pos->next != &fib4_rules_ops.rules_list) { rule = list_entry(pos->next, struct fib_rule, list); if (rule->pref) return rule->pref - 1; @@ -317,15 +315,15 @@ static struct fib_rules_ops fib4_rules_ops = { .flush_cache = fib4_rule_flush_cache, .nlgroup = RTNLGRP_IPV4_RULE, .policy = fib4_rule_policy, - .rules_list = &fib4_rules, + .rules_list = LIST_HEAD_INIT(fib4_rules_ops.rules_list), .owner = THIS_MODULE, }; void __init fib4_rules_init(void) { - list_add_tail(&local_rule.common.list, &fib4_rules); - list_add_tail(&main_rule.common.list, &fib4_rules); - list_add_tail(&default_rule.common.list, &fib4_rules); + list_add_tail(&local_rule.common.list, &fib4_rules_ops.rules_list); + list_add_tail(&main_rule.common.list, &fib4_rules_ops.rules_list); + list_add_tail(&default_rule.common.list, &fib4_rules_ops.rules_list); fib_rules_register(&fib4_rules_ops); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c434119deb5..1351a2617dc 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -533,7 +533,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, return -EINVAL; if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) return -EINVAL; - if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) + if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) return -ENETDOWN; @@ -743,7 +743,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) int remaining; nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla->nla_type; + int type = nla_type(nla); if (type) { if (type > RTAX_MAX) @@ -799,7 +799,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (nhs != 1 || nh->nh_gw) goto err_inval; nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); + nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif); err = -ENODEV; if (nh->nh_dev == NULL) goto failure; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 9ca786a6fd3..8d8c2915e06 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -54,7 +54,7 @@ #include <asm/uaccess.h> #include <asm/system.h> -#include <asm/bitops.h> +#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -73,6 +73,7 @@ #include <linux/netlink.h> #include <linux/init.h> #include <linux/list.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> @@ -85,23 +86,14 @@ #define MAX_STAT_DEPTH 32 #define KEYLENGTH (8*sizeof(t_key)) -#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l)) -#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset)) typedef unsigned int t_key; #define T_TNODE 0 #define T_LEAF 1 #define NODE_TYPE_MASK 0x1UL -#define NODE_PARENT(node) \ - ((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK))) - #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) -#define NODE_SET_PARENT(node, ptr) \ - rcu_assign_pointer((node)->parent, \ - ((unsigned long)(ptr)) | NODE_TYPE(node)) - #define IS_TNODE(n) (!(n->parent & T_LEAF)) #define IS_LEAF(n) (n->parent & T_LEAF) @@ -174,6 +166,19 @@ static void tnode_free(struct tnode *tn); static struct kmem_cache *fn_alias_kmem __read_mostly; static struct trie *trie_local = NULL, *trie_main = NULL; +static inline struct tnode *node_parent(struct node *node) +{ + struct tnode *ret; + + ret = (struct tnode *)(node->parent & ~NODE_TYPE_MASK); + return rcu_dereference(ret); +} + +static inline void node_set_parent(struct node *node, struct tnode *ptr) +{ + rcu_assign_pointer(node->parent, + (unsigned long)ptr | NODE_TYPE(node)); +} /* rcu_read_lock needs to be hold by caller from readside */ @@ -189,6 +194,11 @@ static inline int tnode_child_length(const struct tnode *tn) return 1 << tn->bits; } +static inline t_key mask_pfx(t_key k, unsigned short l) +{ + return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); +} + static inline t_key tkey_extract_bits(t_key a, int offset, int bits) { if (offset < KEYLENGTH) @@ -446,7 +456,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w tn->full_children++; if (n) - NODE_SET_PARENT(n, tn); + node_set_parent(n, tn); rcu_assign_pointer(tn->child[i], n); } @@ -481,7 +491,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) continue; /* compress one level */ - NODE_SET_PARENT(n, NULL); + node_set_parent(n, NULL); tnode_free(tn); return n; } @@ -636,7 +646,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* compress one level */ - NODE_SET_PARENT(n, NULL); + node_set_parent(n, NULL); tnode_free(tn); return n; } @@ -673,7 +683,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) inode->pos == oldtnode->pos + oldtnode->bits && inode->bits > 1) { struct tnode *left, *right; - t_key m = TKEY_GET_MASK(inode->pos, 1); + t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos; left = tnode_new(inode->key&(~m), inode->pos + 1, inode->bits - 1); @@ -961,24 +971,21 @@ fib_find_node(struct trie *t, u32 key) static struct node *trie_rebalance(struct trie *t, struct tnode *tn) { int wasfull; - t_key cindex, key; - struct tnode *tp = NULL; - - key = tn->key; - - while (tn != NULL && NODE_PARENT(tn) != NULL) { + t_key cindex, key = tn->key; + struct tnode *tp; - tp = NODE_PARENT(tn); + while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); tn = (struct tnode *) resize (t, (struct tnode *)tn); tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull); - if (!NODE_PARENT(tn)) + tp = node_parent((struct node *) tn); + if (!tp) break; - - tn = NODE_PARENT(tn); + tn = tp; } + /* Handle last (top) tnode */ if (IS_TNODE(tn)) tn = (struct tnode*) resize(t, (struct tnode *)tn); @@ -1031,7 +1038,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) pos = tn->pos + tn->bits; n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); - BUG_ON(n && NODE_PARENT(n) != tn); + BUG_ON(n && node_parent(n) != tn); } else break; } @@ -1083,7 +1090,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) if (t->trie && n == NULL) { /* Case 2: n is NULL, and will just insert a new leaf */ - NODE_SET_PARENT(l, tp); + node_set_parent((struct node *)l, tp); cindex = tkey_extract_bits(key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, (struct node *)l); @@ -1114,7 +1121,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) goto err; } - NODE_SET_PARENT(tn, tp); + node_set_parent((struct node *)tn, tp); missbit = tkey_extract_bits(key, newpos, 1); put_child(t, tn, missbit, (struct node *)l); @@ -1364,7 +1371,8 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result bits = pn->bits; if (!chopped_off) - cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits); + cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), + pos, bits); n = tnode_get_child(pn, cindex); @@ -1450,8 +1458,8 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result * to find a matching prefix. */ - node_prefix = MASK_PFX(cn->key, cn->pos); - key_prefix = MASK_PFX(key, cn->pos); + node_prefix = mask_pfx(cn->key, cn->pos); + key_prefix = mask_pfx(key, cn->pos); pref_mismatch = key_prefix^node_prefix; mp = 0; @@ -1495,12 +1503,13 @@ backtrace: if (chopped_off <= pn->bits) { cindex &= ~(1 << (chopped_off-1)); } else { - if (NODE_PARENT(pn) == NULL) + struct tnode *parent = node_parent((struct node *) pn); + if (!parent) goto failed; /* Get Child's index */ - cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits); - pn = NODE_PARENT(pn); + cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits); + pn = parent; chopped_off = 0; #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -1536,7 +1545,7 @@ static int trie_leaf_remove(struct trie *t, t_key key) check_tnode(tn); n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits)); - BUG_ON(n && NODE_PARENT(n) != tn); + BUG_ON(n && node_parent(n) != tn); } l = (struct leaf *) n; @@ -1551,7 +1560,7 @@ static int trie_leaf_remove(struct trie *t, t_key key) t->revision++; t->size--; - tp = NODE_PARENT(n); + tp = node_parent(n); tnode_free((struct tnode *) n); if (tp) { @@ -1703,7 +1712,7 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) p = (struct tnode*) trie; /* Start */ } else - p = (struct tnode *) NODE_PARENT(c); + p = node_parent(c); while (p) { int pos, last; @@ -1740,7 +1749,7 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) up: /* No more children go up one step */ c = (struct node *) p; - p = (struct tnode *) NODE_PARENT(p); + p = node_parent(c); } return NULL; /* Ready. Root of trie */ } @@ -2043,7 +2052,7 @@ rescan: } /* Current node exhausted, pop back up */ - p = NODE_PARENT(tn); + p = node_parent((struct node *)tn); if (p) { cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; tn = p; @@ -2317,7 +2326,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) return 0; - if (!NODE_PARENT(n)) { + if (!node_parent(n)) { if (iter->trie == trie_local) seq_puts(seq, "<local>:\n"); else @@ -2326,7 +2335,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) if (IS_TNODE(n)) { struct tnode *tn = (struct tnode *) n; - __be32 prf = htonl(MASK_PFX(tn->key, tn->pos)); + __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); seq_indent(seq, iter->depth-1); seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n", @@ -2370,25 +2379,8 @@ static const struct seq_operations fib_trie_seq_ops = { static int fib_trie_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &fib_trie_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; - memset(s, 0, sizeof(*s)); -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &fib_trie_seq_ops, + sizeof(struct fib_trie_iter)); } static const struct file_operations fib_trie_fops = { @@ -2491,25 +2483,8 @@ static const struct seq_operations fib_route_seq_ops = { static int fib_route_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &fib_route_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; - memset(s, 0, sizeof(*s)); -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &fib_route_seq_ops, + sizeof(struct fib_trie_iter)); } static const struct file_operations fib_route_fops = { @@ -2522,30 +2497,30 @@ static const struct file_operations fib_route_fops = { int __init fib_proc_init(void) { - if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops)) + if (!proc_net_fops_create(&init_net, "fib_trie", S_IRUGO, &fib_trie_fops)) goto out1; - if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops)) + if (!proc_net_fops_create(&init_net, "fib_triestat", S_IRUGO, &fib_triestat_fops)) goto out2; - if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops)) + if (!proc_net_fops_create(&init_net, "route", S_IRUGO, &fib_route_fops)) goto out3; return 0; out3: - proc_net_remove("fib_triestat"); + proc_net_remove(&init_net, "fib_triestat"); out2: - proc_net_remove("fib_trie"); + proc_net_remove(&init_net, "fib_trie"); out1: return -ENOMEM; } void __init fib_proc_exit(void) { - proc_net_remove("fib_trie"); - proc_net_remove("fib_triestat"); - proc_net_remove("route"); + proc_net_remove(&init_net, "fib_trie"); + proc_net_remove(&init_net, "fib_triestat"); + proc_net_remove(&init_net, "route"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 02a899bec19..233de063429 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -115,6 +115,7 @@ struct icmp_bxm { * Statistics */ DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly; +DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics) __read_mostly; /* An array of errno for error messages from dest unreach. */ /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ @@ -214,8 +215,6 @@ int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly; */ struct icmp_control { - int output_entry; /* Field for increment on output */ - int input_entry; /* Field for increment on input */ void (*handler)(struct sk_buff *skb); short error; /* This ICMP is classed as an error message */ }; @@ -316,12 +315,10 @@ out: /* * Maintain the counters used in the SNMP statistics for outgoing ICMP */ -static void icmp_out_count(int type) +void icmp_out_count(unsigned char type) { - if (type <= NR_ICMP_TYPES) { - ICMP_INC_STATS(icmp_pointers[type].output_entry); - ICMP_INC_STATS(ICMP_MIB_OUTMSGS); - } + ICMPMSGOUT_INC_STATS(type); + ICMP_INC_STATS(ICMP_MIB_OUTMSGS); } /* @@ -390,7 +387,6 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) return; icmp_param->data.icmph.checksum = 0; - icmp_out_count(icmp_param->data.icmph.type); inet->tos = ip_hdr(skb)->tos; daddr = ipc.addr = rt->rt_src; @@ -517,7 +513,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct net_device *dev = NULL; if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr) - dev = dev_get_by_index(rt->fl.iif); + dev = dev_get_by_index(&init_net, rt->fl.iif); if (dev) { saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); @@ -952,6 +948,7 @@ int icmp_rcv(struct sk_buff *skb) icmph = icmp_hdr(skb); + ICMPMSGIN_INC_STATS_BH(icmph->type); /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * @@ -986,7 +983,6 @@ int icmp_rcv(struct sk_buff *skb) } } - ICMP_INC_STATS_BH(icmp_pointers[icmph->type].input_entry); icmp_pointers[icmph->type].handler(skb); drop: @@ -1002,109 +998,71 @@ error: */ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { [ICMP_ECHOREPLY] = { - .output_entry = ICMP_MIB_OUTECHOREPS, - .input_entry = ICMP_MIB_INECHOREPS, .handler = icmp_discard, }, [1] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [2] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [ICMP_DEST_UNREACH] = { - .output_entry = ICMP_MIB_OUTDESTUNREACHS, - .input_entry = ICMP_MIB_INDESTUNREACHS, .handler = icmp_unreach, .error = 1, }, [ICMP_SOURCE_QUENCH] = { - .output_entry = ICMP_MIB_OUTSRCQUENCHS, - .input_entry = ICMP_MIB_INSRCQUENCHS, .handler = icmp_unreach, .error = 1, }, [ICMP_REDIRECT] = { - .output_entry = ICMP_MIB_OUTREDIRECTS, - .input_entry = ICMP_MIB_INREDIRECTS, .handler = icmp_redirect, .error = 1, }, [6] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [7] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [ICMP_ECHO] = { - .output_entry = ICMP_MIB_OUTECHOS, - .input_entry = ICMP_MIB_INECHOS, .handler = icmp_echo, }, [9] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [10] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [ICMP_TIME_EXCEEDED] = { - .output_entry = ICMP_MIB_OUTTIMEEXCDS, - .input_entry = ICMP_MIB_INTIMEEXCDS, .handler = icmp_unreach, .error = 1, }, [ICMP_PARAMETERPROB] = { - .output_entry = ICMP_MIB_OUTPARMPROBS, - .input_entry = ICMP_MIB_INPARMPROBS, .handler = icmp_unreach, .error = 1, }, [ICMP_TIMESTAMP] = { - .output_entry = ICMP_MIB_OUTTIMESTAMPS, - .input_entry = ICMP_MIB_INTIMESTAMPS, .handler = icmp_timestamp, }, [ICMP_TIMESTAMPREPLY] = { - .output_entry = ICMP_MIB_OUTTIMESTAMPREPS, - .input_entry = ICMP_MIB_INTIMESTAMPREPS, .handler = icmp_discard, }, [ICMP_INFO_REQUEST] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_DUMMY, .handler = icmp_discard, }, [ICMP_INFO_REPLY] = { - .output_entry = ICMP_MIB_DUMMY, - .input_entry = ICMP_MIB_DUMMY, .handler = icmp_discard, }, [ICMP_ADDRESS] = { - .output_entry = ICMP_MIB_OUTADDRMASKS, - .input_entry = ICMP_MIB_INADDRMASKS, .handler = icmp_address, }, [ICMP_ADDRESSREPLY] = { - .output_entry = ICMP_MIB_OUTADDRMASKREPS, - .input_entry = ICMP_MIB_INADDRMASKREPS, .handler = icmp_address_reply, }, }; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a646409c2d0..7dbc282d4f9 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -91,6 +91,7 @@ #include <linux/rtnetlink.h> #include <linux/times.h> +#include <net/net_namespace.h> #include <net/arp.h> #include <net/ip.h> #include <net/protocol.h> @@ -1694,8 +1695,8 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { #ifdef CONFIG_IP_MULTICAST - struct in_device *in_dev = pmc->interface; struct ip_sf_list *psf; + in_dev = pmc->interface; #endif /* filter mode change */ @@ -1798,7 +1799,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, { int err; - if (iml->sflist == 0) { + if (iml->sflist == NULL) { /* any-source empty exclude case */ return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, iml->sfmode, 0, NULL, 0); @@ -2166,7 +2167,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, return -EFAULT; } for (i=0; i<copycount; i++) { - struct sockaddr_in *psin; struct sockaddr_storage ss; psin = (struct sockaddr_in *)&ss; @@ -2291,7 +2291,7 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); state->in_dev = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct in_device *in_dev; in_dev = in_dev_get(state->dev); if (!in_dev) @@ -2410,23 +2410,8 @@ static const struct seq_operations igmp_mc_seq_ops = { static int igmp_mc_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct igmp_mc_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - rc = seq_open(file, &igmp_mc_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &igmp_mc_seq_ops, + sizeof(struct igmp_mc_iter_state)); } static const struct file_operations igmp_mc_seq_fops = { @@ -2453,7 +2438,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) state->idev = NULL; state->im = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct in_device *idev; idev = in_dev_get(state->dev); if (unlikely(idev == NULL)) @@ -2584,23 +2569,8 @@ static const struct seq_operations igmp_mcf_seq_ops = { static int igmp_mcf_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct igmp_mcf_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - rc = seq_open(file, &igmp_mcf_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &igmp_mcf_seq_ops, + sizeof(struct igmp_mcf_iter_state)); } static const struct file_operations igmp_mcf_seq_fops = { @@ -2613,8 +2583,8 @@ static const struct file_operations igmp_mcf_seq_fops = { int __init igmp_mc_proc_init(void) { - proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops); - proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops); + proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops); + proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops); return 0; } #endif diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index fbe7714f21d..8fb6ca23700 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -33,6 +33,19 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); * This array holds the first and last local port number. */ int sysctl_local_port_range[2] = { 32768, 61000 }; +DEFINE_SEQLOCK(sysctl_port_range_lock); + +void inet_get_local_port_range(int *low, int *high) +{ + unsigned seq; + do { + seq = read_seqbegin(&sysctl_port_range_lock); + + *low = sysctl_local_port_range[0]; + *high = sysctl_local_port_range[1]; + } while (read_seqretry(&sysctl_port_range_lock, seq)); +} +EXPORT_SYMBOL(inet_get_local_port_range); int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) @@ -77,10 +90,11 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, local_bh_disable(); if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover = net_random() % (high - low) + low; + int remaining, rover, low, high; + + inet_get_local_port_range(&low, &high); + remaining = (high - low) + 1; + rover = net_random() % remaining + low; do { head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index def007ec1d6..dc429b6b0ba 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -11,6 +11,7 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> #include <linux/fcntl.h> @@ -112,7 +113,7 @@ static int inet_csk_diag_fill(struct sock *sk, } #endif -#define EXPIRES_IN_MS(tmo) ((tmo - jiffies) * 1000 + HZ - 1) / HZ +#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) if (icsk->icsk_pending == ICSK_TIME_RETRANS) { r->idiag_timer = 1; @@ -190,7 +191,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->id.idiag_dst[0] = tw->tw_daddr; r->idiag_state = tw->tw_substate; r->idiag_timer = 3; - r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ; + r->idiag_expires = DIV_ROUND_UP(tmo * 1000, HZ); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; @@ -814,6 +815,12 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) nlmsg_len(nlh) < hdrlen) return -EINVAL; +#ifdef CONFIG_KMOD + if (inet_diag_table[nlh->nlmsg_type] == NULL) + request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_INET_DIAG, nlh->nlmsg_type); +#endif + if (inet_diag_table[nlh->nlmsg_type] == NULL) return -ENOENT; @@ -838,15 +845,11 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) static DEFINE_MUTEX(inet_diag_mutex); -static void inet_diag_rcv(struct sock *sk, int len) +static void inet_diag_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - mutex_lock(&inet_diag_mutex); - netlink_run_queue(sk, &qlen, &inet_diag_rcv_msg); - mutex_unlock(&inet_diag_mutex); - } while (qlen); + mutex_lock(&inet_diag_mutex); + netlink_rcv_skb(skb, &inet_diag_rcv_msg); + mutex_unlock(&inet_diag_mutex); } static DEFINE_SPINLOCK(inet_diag_register_lock); @@ -896,8 +899,8 @@ static int __init inet_diag_init(void) if (!inet_diag_table) goto out; - idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, - NULL, THIS_MODULE); + idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0, + inet_diag_rcv, NULL, THIS_MODULE); if (idiagnl == NULL) goto out_free_table; err = 0; @@ -917,3 +920,4 @@ static void __exit inet_diag_exit(void) module_init(inet_diag_init); module_exit(inet_diag_exit); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_INET_DIAG); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c new file mode 100644 index 00000000000..e15e04fc666 --- /dev/null +++ b/net/ipv4/inet_fragment.c @@ -0,0 +1,261 @@ +/* + * inet fragments management + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Pavel Emelyanov <xemul@openvz.org> + * Started as consolidation of ipv4/ip_fragment.c, + * ipv6/reassembly. and ipv6 nf conntrack reassembly + */ + +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/mm.h> +#include <linux/random.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> + +#include <net/inet_frag.h> + +static void inet_frag_secret_rebuild(unsigned long dummy) +{ + struct inet_frags *f = (struct inet_frags *)dummy; + unsigned long now = jiffies; + int i; + + write_lock(&f->lock); + get_random_bytes(&f->rnd, sizeof(u32)); + for (i = 0; i < INETFRAGS_HASHSZ; i++) { + struct inet_frag_queue *q; + struct hlist_node *p, *n; + + hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) { + unsigned int hval = f->hashfn(q); + + if (hval != i) { + hlist_del(&q->list); + + /* Relink to new hash chain. */ + hlist_add_head(&q->list, &f->hash[hval]); + } + } + } + write_unlock(&f->lock); + + mod_timer(&f->secret_timer, now + f->ctl->secret_interval); +} + +void inet_frags_init(struct inet_frags *f) +{ + int i; + + for (i = 0; i < INETFRAGS_HASHSZ; i++) + INIT_HLIST_HEAD(&f->hash[i]); + + INIT_LIST_HEAD(&f->lru_list); + rwlock_init(&f->lock); + + f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ + (jiffies ^ (jiffies >> 6))); + + f->nqueues = 0; + atomic_set(&f->mem, 0); + + init_timer(&f->secret_timer); + f->secret_timer.function = inet_frag_secret_rebuild; + f->secret_timer.data = (unsigned long)f; + f->secret_timer.expires = jiffies + f->ctl->secret_interval; + add_timer(&f->secret_timer); +} +EXPORT_SYMBOL(inet_frags_init); + +void inet_frags_fini(struct inet_frags *f) +{ + del_timer(&f->secret_timer); +} +EXPORT_SYMBOL(inet_frags_fini); + +static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) +{ + write_lock(&f->lock); + hlist_del(&fq->list); + list_del(&fq->lru_list); + f->nqueues--; + write_unlock(&f->lock); +} + +void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) +{ + if (del_timer(&fq->timer)) + atomic_dec(&fq->refcnt); + + if (!(fq->last_in & COMPLETE)) { + fq_unlink(fq, f); + atomic_dec(&fq->refcnt); + fq->last_in |= COMPLETE; + } +} + +EXPORT_SYMBOL(inet_frag_kill); + +static inline void frag_kfree_skb(struct inet_frags *f, struct sk_buff *skb, + int *work) +{ + if (work) + *work -= skb->truesize; + + atomic_sub(skb->truesize, &f->mem); + if (f->skb_free) + f->skb_free(skb); + kfree_skb(skb); +} + +void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, + int *work) +{ + struct sk_buff *fp; + + BUG_TRAP(q->last_in & COMPLETE); + BUG_TRAP(del_timer(&q->timer) == 0); + + /* Release all fragment data. */ + fp = q->fragments; + while (fp) { + struct sk_buff *xp = fp->next; + + frag_kfree_skb(f, fp, work); + fp = xp; + } + + if (work) + *work -= f->qsize; + atomic_sub(f->qsize, &f->mem); + + if (f->destructor) + f->destructor(q); + kfree(q); + +} +EXPORT_SYMBOL(inet_frag_destroy); + +int inet_frag_evictor(struct inet_frags *f) +{ + struct inet_frag_queue *q; + int work, evicted = 0; + + work = atomic_read(&f->mem) - f->ctl->low_thresh; + while (work > 0) { + read_lock(&f->lock); + if (list_empty(&f->lru_list)) { + read_unlock(&f->lock); + break; + } + + q = list_first_entry(&f->lru_list, + struct inet_frag_queue, lru_list); + atomic_inc(&q->refcnt); + read_unlock(&f->lock); + + spin_lock(&q->lock); + if (!(q->last_in & COMPLETE)) + inet_frag_kill(q, f); + spin_unlock(&q->lock); + + if (atomic_dec_and_test(&q->refcnt)) + inet_frag_destroy(q, f, &work); + evicted++; + } + + return evicted; +} +EXPORT_SYMBOL(inet_frag_evictor); + +static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, + struct inet_frags *f, unsigned int hash, void *arg) +{ + struct inet_frag_queue *qp; +#ifdef CONFIG_SMP + struct hlist_node *n; +#endif + + write_lock(&f->lock); +#ifdef CONFIG_SMP + /* With SMP race we have to recheck hash table, because + * such entry could be created on other cpu, while we + * promoted read lock to write lock. + */ + hlist_for_each_entry(qp, n, &f->hash[hash], list) { + if (f->match(qp, arg)) { + atomic_inc(&qp->refcnt); + write_unlock(&f->lock); + qp_in->last_in |= COMPLETE; + inet_frag_put(qp_in, f); + return qp; + } + } +#endif + qp = qp_in; + if (!mod_timer(&qp->timer, jiffies + f->ctl->timeout)) + atomic_inc(&qp->refcnt); + + atomic_inc(&qp->refcnt); + hlist_add_head(&qp->list, &f->hash[hash]); + list_add_tail(&qp->lru_list, &f->lru_list); + f->nqueues++; + write_unlock(&f->lock); + return qp; +} + +static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg) +{ + struct inet_frag_queue *q; + + q = kzalloc(f->qsize, GFP_ATOMIC); + if (q == NULL) + return NULL; + + f->constructor(q, arg); + atomic_add(f->qsize, &f->mem); + setup_timer(&q->timer, f->frag_expire, (unsigned long)q); + spin_lock_init(&q->lock); + atomic_set(&q->refcnt, 1); + + return q; +} + +static struct inet_frag_queue *inet_frag_create(struct inet_frags *f, + void *arg, unsigned int hash) +{ + struct inet_frag_queue *q; + + q = inet_frag_alloc(f, arg); + if (q == NULL) + return NULL; + + return inet_frag_intern(q, f, hash, arg); +} + +struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, + unsigned int hash) +{ + struct inet_frag_queue *q; + struct hlist_node *n; + + read_lock(&f->lock); + hlist_for_each_entry(q, n, &f->hash[hash], list) { + if (f->match(q, key)) { + atomic_inc(&q->refcnt); + read_unlock(&f->lock); + return q; + } + } + read_unlock(&f->lock); + + return inet_frag_create(f, key, hash); +} +EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fb662621c54..16eecc7046a 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -279,19 +279,18 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, int ret; if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int range = high - low; - int i; - int port; + int i, remaining, low, high, port; static u32 hint; u32 offset = hint + inet_sk_port_offset(sk); struct hlist_node *node; struct inet_timewait_sock *tw = NULL; + inet_get_local_port_range(&low, &high); + remaining = (high - low) + 1; + local_bh_disable(); - for (i = 1; i <= range; i++) { - port = low + (i + offset) % range; + for (i = 1; i <= remaining; i++) { + port = low + (i + offset) % remaining; head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; spin_lock(&head->lock); diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c new file mode 100644 index 00000000000..ac3b1d3dba2 --- /dev/null +++ b/net/ipv4/inet_lro.c @@ -0,0 +1,600 @@ +/* + * linux/net/ipv4/inet_lro.c + * + * Large Receive Offload (ipv4 / tcp) + * + * (C) Copyright IBM Corp. 2007 + * + * Authors: + * Jan-Bernd Themann <themann@de.ibm.com> + * Christoph Raisch <raisch@de.ibm.com> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include <linux/module.h> +#include <linux/if_vlan.h> +#include <linux/inet_lro.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>"); +MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)"); + +#define TCP_HDR_LEN(tcph) (tcph->doff << 2) +#define IP_HDR_LEN(iph) (iph->ihl << 2) +#define TCP_PAYLOAD_LENGTH(iph, tcph) \ + (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph)) + +#define IPH_LEN_WO_OPTIONS 5 +#define TCPH_LEN_WO_OPTIONS 5 +#define TCPH_LEN_W_TIMESTAMP 8 + +#define LRO_MAX_PG_HLEN 64 + +#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; } + +/* + * Basic tcp checks whether packet is suitable for LRO + */ + +static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph, + int len, struct net_lro_desc *lro_desc) +{ + /* check ip header: don't aggregate padded frames */ + if (ntohs(iph->tot_len) != len) + return -1; + + if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0) + return -1; + + if (iph->ihl != IPH_LEN_WO_OPTIONS) + return -1; + + if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack + || tcph->rst || tcph->syn || tcph->fin) + return -1; + + if (INET_ECN_is_ce(ipv4_get_dsfield(iph))) + return -1; + + if (tcph->doff != TCPH_LEN_WO_OPTIONS + && tcph->doff != TCPH_LEN_W_TIMESTAMP) + return -1; + + /* check tcp options (only timestamp allowed) */ + if (tcph->doff == TCPH_LEN_W_TIMESTAMP) { + __be32 *topt = (__be32 *)(tcph + 1); + + if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) + | (TCPOPT_TIMESTAMP << 8) + | TCPOLEN_TIMESTAMP)) + return -1; + + /* timestamp should be in right order */ + topt++; + if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval), + ntohl(*topt))) + return -1; + + /* timestamp reply should not be zero */ + topt++; + if (*topt == 0) + return -1; + } + + return 0; +} + +static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) +{ + struct iphdr *iph = lro_desc->iph; + struct tcphdr *tcph = lro_desc->tcph; + __be32 *p; + __wsum tcp_hdr_csum; + + tcph->ack_seq = lro_desc->tcp_ack; + tcph->window = lro_desc->tcp_window; + + if (lro_desc->tcp_saw_tstamp) { + p = (__be32 *)(tcph + 1); + *(p+2) = lro_desc->tcp_rcv_tsecr; + } + + iph->tot_len = htons(lro_desc->ip_tot_len); + + iph->check = 0; + iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl); + + tcph->check = 0; + tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), 0); + lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); + tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, + lro_desc->ip_tot_len - + IP_HDR_LEN(iph), IPPROTO_TCP, + lro_desc->data_csum); +} + +static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) +{ + __wsum tcp_csum; + __wsum tcp_hdr_csum; + __wsum tcp_ps_hdr_csum; + + tcp_csum = ~csum_unfold(tcph->check); + tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), tcp_csum); + + tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + len + TCP_HDR_LEN(tcph), + IPPROTO_TCP, 0); + + return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum), + tcp_ps_hdr_csum); +} + +static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, + struct iphdr *iph, struct tcphdr *tcph, + u16 vlan_tag, struct vlan_group *vgrp) +{ + int nr_frags; + __be32 *ptr; + u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); + + nr_frags = skb_shinfo(skb)->nr_frags; + lro_desc->parent = skb; + lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]); + lro_desc->iph = iph; + lro_desc->tcph = tcph; + lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len; + lro_desc->tcp_ack = tcph->ack_seq; + lro_desc->tcp_window = tcph->window; + + lro_desc->pkt_aggr_cnt = 1; + lro_desc->ip_tot_len = ntohs(iph->tot_len); + + if (tcph->doff == 8) { + ptr = (__be32 *)(tcph+1); + lro_desc->tcp_saw_tstamp = 1; + lro_desc->tcp_rcv_tsval = *(ptr+1); + lro_desc->tcp_rcv_tsecr = *(ptr+2); + } + + lro_desc->mss = tcp_data_len; + lro_desc->vgrp = vgrp; + lro_desc->vlan_tag = vlan_tag; + lro_desc->active = 1; + + lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, + tcp_data_len); +} + +static inline void lro_clear_desc(struct net_lro_desc *lro_desc) +{ + memset(lro_desc, 0, sizeof(struct net_lro_desc)); +} + +static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph, + struct tcphdr *tcph, int tcp_data_len) +{ + struct sk_buff *parent = lro_desc->parent; + __be32 *topt; + + lro_desc->pkt_aggr_cnt++; + lro_desc->ip_tot_len += tcp_data_len; + lro_desc->tcp_next_seq += tcp_data_len; + lro_desc->tcp_window = tcph->window; + lro_desc->tcp_ack = tcph->ack_seq; + + /* don't update tcp_rcv_tsval, would not work with PAWS */ + if (lro_desc->tcp_saw_tstamp) { + topt = (__be32 *) (tcph + 1); + lro_desc->tcp_rcv_tsecr = *(topt + 2); + } + + lro_desc->data_csum = csum_block_add(lro_desc->data_csum, + lro_tcp_data_csum(iph, tcph, + tcp_data_len), + parent->len); + + parent->len += tcp_data_len; + parent->data_len += tcp_data_len; + if (tcp_data_len > lro_desc->mss) + lro_desc->mss = tcp_data_len; +} + +static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb, + struct iphdr *iph, struct tcphdr *tcph) +{ + struct sk_buff *parent = lro_desc->parent; + int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); + + lro_add_common(lro_desc, iph, tcph, tcp_data_len); + + skb_pull(skb, (skb->len - tcp_data_len)); + parent->truesize += skb->truesize; + + if (lro_desc->last_skb) + lro_desc->last_skb->next = skb; + else + skb_shinfo(parent)->frag_list = skb; + + lro_desc->last_skb = skb; +} + +static void lro_add_frags(struct net_lro_desc *lro_desc, + int len, int hlen, int truesize, + struct skb_frag_struct *skb_frags, + struct iphdr *iph, struct tcphdr *tcph) +{ + struct sk_buff *skb = lro_desc->parent; + int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); + + lro_add_common(lro_desc, iph, tcph, tcp_data_len); + + skb->truesize += truesize; + + skb_frags[0].page_offset += hlen; + skb_frags[0].size -= hlen; + + while (tcp_data_len > 0) { + *(lro_desc->next_frag) = *skb_frags; + tcp_data_len -= skb_frags->size; + lro_desc->next_frag++; + skb_frags++; + skb_shinfo(skb)->nr_frags++; + } +} + +static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, + struct iphdr *iph, + struct tcphdr *tcph) +{ + if ((lro_desc->iph->saddr != iph->saddr) + || (lro_desc->iph->daddr != iph->daddr) + || (lro_desc->tcph->source != tcph->source) + || (lro_desc->tcph->dest != tcph->dest)) + return -1; + return 0; +} + +static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr, + struct net_lro_desc *lro_arr, + struct iphdr *iph, + struct tcphdr *tcph) +{ + struct net_lro_desc *lro_desc = NULL; + struct net_lro_desc *tmp; + int max_desc = lro_mgr->max_desc; + int i; + + for (i = 0; i < max_desc; i++) { + tmp = &lro_arr[i]; + if (tmp->active) + if (!lro_check_tcp_conn(tmp, iph, tcph)) { + lro_desc = tmp; + goto out; + } + } + + for (i = 0; i < max_desc; i++) { + if (!lro_arr[i].active) { + lro_desc = &lro_arr[i]; + goto out; + } + } + + LRO_INC_STATS(lro_mgr, no_desc); +out: + return lro_desc; +} + +static void lro_flush(struct net_lro_mgr *lro_mgr, + struct net_lro_desc *lro_desc) +{ + if (lro_desc->pkt_aggr_cnt > 1) + lro_update_tcp_ip_header(lro_desc); + + skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; + + if (lro_desc->vgrp) { + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + vlan_hwaccel_receive_skb(lro_desc->parent, + lro_desc->vgrp, + lro_desc->vlan_tag); + else + vlan_hwaccel_rx(lro_desc->parent, + lro_desc->vgrp, + lro_desc->vlan_tag); + + } else { + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + netif_receive_skb(lro_desc->parent); + else + netif_rx(lro_desc->parent); + } + + LRO_INC_STATS(lro_mgr, flushed); + lro_clear_desc(lro_desc); +} + +static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, + struct vlan_group *vgrp, u16 vlan_tag, void *priv) +{ + struct net_lro_desc *lro_desc; + struct iphdr *iph; + struct tcphdr *tcph; + u64 flags; + int vlan_hdr_len = 0; + + if (!lro_mgr->get_skb_header + || lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph, + &flags, priv)) + goto out; + + if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) + goto out; + + lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); + if (!lro_desc) + goto out; + + if ((skb->protocol == htons(ETH_P_8021Q)) + && !test_bit(LRO_F_EXTRACT_VLAN_ID, &lro_mgr->features)) + vlan_hdr_len = VLAN_HLEN; + + if (!lro_desc->active) { /* start new lro session */ + if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL)) + goto out; + + skb->ip_summed = lro_mgr->ip_summed_aggr; + lro_init_desc(lro_desc, skb, iph, tcph, vlan_tag, vgrp); + LRO_INC_STATS(lro_mgr, aggregated); + return 0; + } + + if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) + goto out2; + + if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc)) + goto out2; + + lro_add_packet(lro_desc, skb, iph, tcph); + LRO_INC_STATS(lro_mgr, aggregated); + + if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) || + lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) + lro_flush(lro_mgr, lro_desc); + + return 0; + +out2: /* send aggregated SKBs to stack */ + lro_flush(lro_mgr, lro_desc); + +out: /* Original SKB has to be posted to stack */ + skb->ip_summed = lro_mgr->ip_summed; + return 1; +} + + +static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, + void *mac_hdr, + int hlen, __wsum sum, + u32 ip_summed) +{ + struct sk_buff *skb; + struct skb_frag_struct *skb_frags; + int data_len = len; + int hdr_len = min(len, hlen); + + skb = netdev_alloc_skb(lro_mgr->dev, hlen); + if (!skb) + return NULL; + + skb->len = len; + skb->data_len = len - hdr_len; + skb->truesize += true_size; + skb->tail += hdr_len; + + memcpy(skb->data, mac_hdr, hdr_len); + + skb_frags = skb_shinfo(skb)->frags; + while (data_len > 0) { + *skb_frags = *frags; + data_len -= frags->size; + skb_frags++; + frags++; + skb_shinfo(skb)->nr_frags++; + } + + skb_shinfo(skb)->frags[0].page_offset += hdr_len; + skb_shinfo(skb)->frags[0].size -= hdr_len; + + skb->ip_summed = ip_summed; + skb->csum = sum; + skb->protocol = eth_type_trans(skb, lro_mgr->dev); + return skb; +} + +static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, + struct vlan_group *vgrp, + u16 vlan_tag, void *priv, __wsum sum) +{ + struct net_lro_desc *lro_desc; + struct iphdr *iph; + struct tcphdr *tcph; + struct sk_buff *skb; + u64 flags; + void *mac_hdr; + int mac_hdr_len; + int hdr_len = LRO_MAX_PG_HLEN; + int vlan_hdr_len = 0; + + if (!lro_mgr->get_frag_header + || lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, + (void *)&tcph, &flags, priv)) { + mac_hdr = page_address(frags->page) + frags->page_offset; + goto out1; + } + + if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) + goto out1; + + hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr); + mac_hdr_len = (int)((void *)(iph) - mac_hdr); + + lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); + if (!lro_desc) + goto out1; + + if (!lro_desc->active) { /* start new lro session */ + if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL)) + goto out1; + + skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, + hdr_len, 0, lro_mgr->ip_summed_aggr); + if (!skb) + goto out; + + if ((skb->protocol == htons(ETH_P_8021Q)) + && !test_bit(LRO_F_EXTRACT_VLAN_ID, &lro_mgr->features)) + vlan_hdr_len = VLAN_HLEN; + + iph = (void *)(skb->data + vlan_hdr_len); + tcph = (void *)((u8 *)skb->data + vlan_hdr_len + + IP_HDR_LEN(iph)); + + lro_init_desc(lro_desc, skb, iph, tcph, 0, NULL); + LRO_INC_STATS(lro_mgr, aggregated); + return NULL; + } + + if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) + goto out2; + + if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc)) + goto out2; + + lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph); + LRO_INC_STATS(lro_mgr, aggregated); + + if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) || + lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) + lro_flush(lro_mgr, lro_desc); + + return NULL; + +out2: /* send aggregated packets to the stack */ + lro_flush(lro_mgr, lro_desc); + +out1: /* Original packet has to be posted to the stack */ + skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, + hdr_len, sum, lro_mgr->ip_summed); +out: + return skb; +} + +void lro_receive_skb(struct net_lro_mgr *lro_mgr, + struct sk_buff *skb, + void *priv) +{ + if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) { + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + netif_receive_skb(skb); + else + netif_rx(skb); + } +} +EXPORT_SYMBOL(lro_receive_skb); + +void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr, + struct sk_buff *skb, + struct vlan_group *vgrp, + u16 vlan_tag, + void *priv) +{ + if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) { + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); + else + vlan_hwaccel_rx(skb, vgrp, vlan_tag); + } +} +EXPORT_SYMBOL(lro_vlan_hwaccel_receive_skb); + +void lro_receive_frags(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, void *priv, __wsum sum) +{ + struct sk_buff *skb; + + skb = __lro_proc_segment(lro_mgr, frags, len, true_size, NULL, 0, + priv, sum); + if (!skb) + return; + + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + netif_receive_skb(skb); + else + netif_rx(skb); +} +EXPORT_SYMBOL(lro_receive_frags); + +void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, + struct vlan_group *vgrp, + u16 vlan_tag, void *priv, __wsum sum) +{ + struct sk_buff *skb; + + skb = __lro_proc_segment(lro_mgr, frags, len, true_size, vgrp, + vlan_tag, priv, sum); + if (!skb) + return; + + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); + else + vlan_hwaccel_rx(skb, vgrp, vlan_tag); +} +EXPORT_SYMBOL(lro_vlan_hwaccel_receive_frags); + +void lro_flush_all(struct net_lro_mgr *lro_mgr) +{ + int i; + struct net_lro_desc *lro_desc = lro_mgr->lro_arr; + + for (i = 0; i < lro_mgr->max_desc; i++) { + if (lro_desc[i].active) + lro_flush(lro_mgr, &lro_desc[i]); + } +} +EXPORT_SYMBOL(lro_flush_all); + +void lro_flush_pkt(struct net_lro_mgr *lro_mgr, + struct iphdr *iph, struct tcphdr *tcph) +{ + struct net_lro_desc *lro_desc; + + lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); + if (lro_desc->active) + lro_flush(lro_mgr, lro_desc); +} +EXPORT_SYMBOL(lro_flush_pkt); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 2586df09b9b..4e189e28f30 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -8,7 +8,7 @@ * From code orinally in TCP */ - +#include <linux/kernel.h> #include <net/inet_hashtables.h> #include <net/inet_timewait_sock.h> #include <net/ip.h> @@ -292,7 +292,7 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, if (timeo >= timewait_len) { slot = INET_TWDR_TWKILL_SLOTS - 1; } else { - slot = (timeo + twdr->period - 1) / twdr->period; + slot = DIV_ROUND_UP(timeo, twdr->period); if (slot >= INET_TWDR_TWKILL_SLOTS) slot = INET_TWDR_TWKILL_SLOTS - 1; } diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 8c95cf09f87..877da3ed52e 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -40,7 +40,7 @@ #include <net/route.h> #include <net/xfrm.h> -static inline int ip_forward_finish(struct sk_buff *skb) +static int ip_forward_finish(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); @@ -105,7 +105,7 @@ int ip_forward(struct sk_buff *skb) * We now generate an ICMP HOST REDIRECT giving the route * we calculated. */ - if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr) + if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb->sp) ip_rt_send_redirect(skb); skb->priority = rt_tos2priority(iph->tos); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 0231bdcb2ab..2143bf30597 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -39,6 +39,7 @@ #include <net/icmp.h> #include <net/checksum.h> #include <net/inetpeer.h> +#include <net/inet_frag.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/inet.h> @@ -49,21 +50,8 @@ * as well. Or notify me, at least. --ANK */ -/* Fragment cache limits. We will commit 256K at one time. Should we - * cross that limit we will prune down to 192K. This should cope with - * even the most extreme cases without allowing an attacker to measurably - * harm machine performance. - */ -int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; -int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; - int sysctl_ipfrag_max_dist __read_mostly = 64; -/* Important NOTE! Fragment queue must be destroyed before MSL expires. - * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. - */ -int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; - struct ipfrag_skb_cb { struct inet_skb_parm h; @@ -74,153 +62,123 @@ struct ipfrag_skb_cb /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { - struct hlist_node list; - struct list_head lru_list; /* lru list member */ + struct inet_frag_queue q; + u32 user; __be32 saddr; __be32 daddr; __be16 id; u8 protocol; - u8 last_in; -#define COMPLETE 4 -#define FIRST_IN 2 -#define LAST_IN 1 - - struct sk_buff *fragments; /* linked list of received fragments */ - int len; /* total length of original datagram */ - int meat; - spinlock_t lock; - atomic_t refcnt; - struct timer_list timer; /* when will this queue expire? */ - ktime_t stamp; int iif; unsigned int rid; struct inet_peer *peer; }; -/* Hash table. */ +struct inet_frags_ctl ip4_frags_ctl __read_mostly = { + /* + * Fragment cache limits. We will commit 256K at one time. Should we + * cross that limit we will prune down to 192K. This should cope with + * even the most extreme cases without allowing an attacker to + * measurably harm machine performance. + */ + .high_thresh = 256 * 1024, + .low_thresh = 192 * 1024, -#define IPQ_HASHSZ 64 + /* + * Important NOTE! Fragment queue must be destroyed before MSL expires. + * RFC791 is wrong proposing to prolongate timer each fragment arrival + * by TTL. + */ + .timeout = IP_FRAG_TIME, + .secret_interval = 10 * 60 * HZ, +}; -/* Per-bucket lock is easy to add now. */ -static struct hlist_head ipq_hash[IPQ_HASHSZ]; -static DEFINE_RWLOCK(ipfrag_lock); -static u32 ipfrag_hash_rnd; -static LIST_HEAD(ipq_lru_list); -int ip_frag_nqueues = 0; +static struct inet_frags ip4_frags; -static __inline__ void __ipq_unlink(struct ipq *qp) +int ip_frag_nqueues(void) { - hlist_del(&qp->list); - list_del(&qp->lru_list); - ip_frag_nqueues--; + return ip4_frags.nqueues; } -static __inline__ void ipq_unlink(struct ipq *ipq) +int ip_frag_mem(void) { - write_lock(&ipfrag_lock); - __ipq_unlink(ipq); - write_unlock(&ipfrag_lock); + return atomic_read(&ip4_frags.mem); } +static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, + struct net_device *dev); + +struct ip4_create_arg { + struct iphdr *iph; + u32 user; +}; + static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) { return jhash_3words((__force u32)id << 16 | prot, (__force u32)saddr, (__force u32)daddr, - ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); + ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); } -static struct timer_list ipfrag_secret_timer; -int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; - -static void ipfrag_secret_rebuild(unsigned long dummy) +static unsigned int ip4_hashfn(struct inet_frag_queue *q) { - unsigned long now = jiffies; - int i; + struct ipq *ipq; - write_lock(&ipfrag_lock); - get_random_bytes(&ipfrag_hash_rnd, sizeof(u32)); - for (i = 0; i < IPQ_HASHSZ; i++) { - struct ipq *q; - struct hlist_node *p, *n; - - hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) { - unsigned int hval = ipqhashfn(q->id, q->saddr, - q->daddr, q->protocol); - - if (hval != i) { - hlist_del(&q->list); - - /* Relink to new hash chain. */ - hlist_add_head(&q->list, &ipq_hash[hval]); - } - } - } - write_unlock(&ipfrag_lock); - - mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval); + ipq = container_of(q, struct ipq, q); + return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); } -atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ +static int ip4_frag_match(struct inet_frag_queue *q, void *a) +{ + struct ipq *qp; + struct ip4_create_arg *arg = a; + + qp = container_of(q, struct ipq, q); + return (qp->id == arg->iph->id && + qp->saddr == arg->iph->saddr && + qp->daddr == arg->iph->daddr && + qp->protocol == arg->iph->protocol && + qp->user == arg->user); +} /* Memory Tracking Functions. */ static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &ip_frag_mem); + atomic_sub(skb->truesize, &ip4_frags.mem); kfree_skb(skb); } -static __inline__ void frag_free_queue(struct ipq *qp, int *work) +static void ip4_frag_init(struct inet_frag_queue *q, void *a) { - if (work) - *work -= sizeof(struct ipq); - atomic_sub(sizeof(struct ipq), &ip_frag_mem); - kfree(qp); + struct ipq *qp = container_of(q, struct ipq, q); + struct ip4_create_arg *arg = a; + + qp->protocol = arg->iph->protocol; + qp->id = arg->iph->id; + qp->saddr = arg->iph->saddr; + qp->daddr = arg->iph->daddr; + qp->user = arg->user; + qp->peer = sysctl_ipfrag_max_dist ? + inet_getpeer(arg->iph->saddr, 1) : NULL; } -static __inline__ struct ipq *frag_alloc_queue(void) +static __inline__ void ip4_frag_free(struct inet_frag_queue *q) { - struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); - - if (!qp) - return NULL; - atomic_add(sizeof(struct ipq), &ip_frag_mem); - return qp; -} - - -/* Destruction primitives. */ - -/* Complete destruction of ipq. */ -static void ip_frag_destroy(struct ipq *qp, int *work) -{ - struct sk_buff *fp; - - BUG_TRAP(qp->last_in&COMPLETE); - BUG_TRAP(del_timer(&qp->timer) == 0); + struct ipq *qp; + qp = container_of(q, struct ipq, q); if (qp->peer) inet_putpeer(qp->peer); +} - /* Release all fragment data. */ - fp = qp->fragments; - while (fp) { - struct sk_buff *xp = fp->next; - - frag_kfree_skb(fp, work); - fp = xp; - } - /* Finally, release the queue descriptor itself. */ - frag_free_queue(qp, work); -} +/* Destruction primitives. */ -static __inline__ void ipq_put(struct ipq *ipq, int *work) +static __inline__ void ipq_put(struct ipq *ipq) { - if (atomic_dec_and_test(&ipq->refcnt)) - ip_frag_destroy(ipq, work); + inet_frag_put(&ipq->q, &ip4_frags); } /* Kill ipq entry. It is not destroyed immediately, @@ -228,14 +186,7 @@ static __inline__ void ipq_put(struct ipq *ipq, int *work) */ static void ipq_kill(struct ipq *ipq) { - if (del_timer(&ipq->timer)) - atomic_dec(&ipq->refcnt); - - if (!(ipq->last_in & COMPLETE)) { - ipq_unlink(ipq); - atomic_dec(&ipq->refcnt); - ipq->last_in |= COMPLETE; - } + inet_frag_kill(&ipq->q, &ip4_frags); } /* Memory limiting on fragments. Evictor trashes the oldest @@ -243,33 +194,11 @@ static void ipq_kill(struct ipq *ipq) */ static void ip_evictor(void) { - struct ipq *qp; - struct list_head *tmp; - int work; - - work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh; - if (work <= 0) - return; - - while (work > 0) { - read_lock(&ipfrag_lock); - if (list_empty(&ipq_lru_list)) { - read_unlock(&ipfrag_lock); - return; - } - tmp = ipq_lru_list.next; - qp = list_entry(tmp, struct ipq, lru_list); - atomic_inc(&qp->refcnt); - read_unlock(&ipfrag_lock); - - spin_lock(&qp->lock); - if (!(qp->last_in&COMPLETE)) - ipq_kill(qp); - spin_unlock(&qp->lock); + int evicted; - ipq_put(qp, &work); - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - } + evicted = inet_frag_evictor(&ip4_frags); + if (evicted) + IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted); } /* @@ -277,11 +206,13 @@ static void ip_evictor(void) */ static void ip_expire(unsigned long arg) { - struct ipq *qp = (struct ipq *) arg; + struct ipq *qp; + + qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); - spin_lock(&qp->lock); + spin_lock(&qp->q.lock); - if (qp->last_in & COMPLETE) + if (qp->q.last_in & COMPLETE) goto out; ipq_kill(qp); @@ -289,130 +220,43 @@ static void ip_expire(unsigned long arg) IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) { - struct sk_buff *head = qp->fragments; + if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) { + struct sk_buff *head = qp->q.fragments; /* Send an ICMP "Fragment Reassembly Timeout" message. */ - if ((head->dev = dev_get_by_index(qp->iif)) != NULL) { + if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) { icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); dev_put(head->dev); } } out: - spin_unlock(&qp->lock); - ipq_put(qp, NULL); + spin_unlock(&qp->q.lock); + ipq_put(qp); } -/* Creation primitives. */ - -static struct ipq *ip_frag_intern(struct ipq *qp_in) +/* Find the correct entry in the "incomplete datagrams" queue for + * this IP datagram, and create new one, if nothing is found. + */ +static inline struct ipq *ip_find(struct iphdr *iph, u32 user) { - struct ipq *qp; -#ifdef CONFIG_SMP - struct hlist_node *n; -#endif + struct inet_frag_queue *q; + struct ip4_create_arg arg; unsigned int hash; - write_lock(&ipfrag_lock); - hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, - qp_in->protocol); -#ifdef CONFIG_SMP - /* With SMP race we have to recheck hash table, because - * such entry could be created on other cpu, while we - * promoted read lock to write lock. - */ - hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { - if (qp->id == qp_in->id && - qp->saddr == qp_in->saddr && - qp->daddr == qp_in->daddr && - qp->protocol == qp_in->protocol && - qp->user == qp_in->user) { - atomic_inc(&qp->refcnt); - write_unlock(&ipfrag_lock); - qp_in->last_in |= COMPLETE; - ipq_put(qp_in, NULL); - return qp; - } - } -#endif - qp = qp_in; - - if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) - atomic_inc(&qp->refcnt); - - atomic_inc(&qp->refcnt); - hlist_add_head(&qp->list, &ipq_hash[hash]); - INIT_LIST_HEAD(&qp->lru_list); - list_add_tail(&qp->lru_list, &ipq_lru_list); - ip_frag_nqueues++; - write_unlock(&ipfrag_lock); - return qp; -} - -/* Add an entry to the 'ipq' queue for a newly received IP datagram. */ -static struct ipq *ip_frag_create(struct iphdr *iph, u32 user) -{ - struct ipq *qp; + arg.iph = iph; + arg.user = user; + hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); - if ((qp = frag_alloc_queue()) == NULL) + q = inet_frag_find(&ip4_frags, &arg, hash); + if (q == NULL) goto out_nomem; - qp->protocol = iph->protocol; - qp->last_in = 0; - qp->id = iph->id; - qp->saddr = iph->saddr; - qp->daddr = iph->daddr; - qp->user = user; - qp->len = 0; - qp->meat = 0; - qp->fragments = NULL; - qp->iif = 0; - qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; - - /* Initialize a timer for this entry. */ - init_timer(&qp->timer); - qp->timer.data = (unsigned long) qp; /* pointer to queue */ - qp->timer.function = ip_expire; /* expire function */ - spin_lock_init(&qp->lock); - atomic_set(&qp->refcnt, 1); - - return ip_frag_intern(qp); + return container_of(q, struct ipq, q); out_nomem: LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); return NULL; } -/* Find the correct entry in the "incomplete datagrams" queue for - * this IP datagram, and create new one, if nothing is found. - */ -static inline struct ipq *ip_find(struct iphdr *iph, u32 user) -{ - __be16 id = iph->id; - __be32 saddr = iph->saddr; - __be32 daddr = iph->daddr; - __u8 protocol = iph->protocol; - unsigned int hash; - struct ipq *qp; - struct hlist_node *n; - - read_lock(&ipfrag_lock); - hash = ipqhashfn(id, saddr, daddr, protocol); - hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { - if (qp->id == id && - qp->saddr == saddr && - qp->daddr == daddr && - qp->protocol == protocol && - qp->user == user) { - atomic_inc(&qp->refcnt); - read_unlock(&ipfrag_lock); - return qp; - } - } - read_unlock(&ipfrag_lock); - - return ip_frag_create(iph, user); -} - /* Is the fragment too far ahead to be part of ipq? */ static inline int ip_frag_too_far(struct ipq *qp) { @@ -429,7 +273,7 @@ static inline int ip_frag_too_far(struct ipq *qp) end = atomic_inc_return(&peer->rid); qp->rid = end; - rc = qp->fragments && (end - start) > max; + rc = qp->q.fragments && (end - start) > max; if (rc) { IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); @@ -442,39 +286,42 @@ static int ip_frag_reinit(struct ipq *qp) { struct sk_buff *fp; - if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { - atomic_inc(&qp->refcnt); + if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) { + atomic_inc(&qp->q.refcnt); return -ETIMEDOUT; } - fp = qp->fragments; + fp = qp->q.fragments; do { struct sk_buff *xp = fp->next; frag_kfree_skb(fp, NULL); fp = xp; } while (fp); - qp->last_in = 0; - qp->len = 0; - qp->meat = 0; - qp->fragments = NULL; + qp->q.last_in = 0; + qp->q.len = 0; + qp->q.meat = 0; + qp->q.fragments = NULL; qp->iif = 0; return 0; } /* Add new segment to existing queue. */ -static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) +static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { struct sk_buff *prev, *next; + struct net_device *dev; int flags, offset; int ihl, end; + int err = -ENOENT; - if (qp->last_in & COMPLETE) + if (qp->q.last_in & COMPLETE) goto err; if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && - unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) { + unlikely(ip_frag_too_far(qp)) && + unlikely(err = ip_frag_reinit(qp))) { ipq_kill(qp); goto err; } @@ -487,36 +334,40 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) /* Determine the position of this fragment. */ end = offset + skb->len - ihl; + err = -EINVAL; /* Is this the final fragment? */ if ((flags & IP_MF) == 0) { /* If we already have some bits beyond end * or have different end, the segment is corrrupted. */ - if (end < qp->len || - ((qp->last_in & LAST_IN) && end != qp->len)) + if (end < qp->q.len || + ((qp->q.last_in & LAST_IN) && end != qp->q.len)) goto err; - qp->last_in |= LAST_IN; - qp->len = end; + qp->q.last_in |= LAST_IN; + qp->q.len = end; } else { if (end&7) { end &= ~7; if (skb->ip_summed != CHECKSUM_UNNECESSARY) skb->ip_summed = CHECKSUM_NONE; } - if (end > qp->len) { + if (end > qp->q.len) { /* Some bits beyond end -> corruption. */ - if (qp->last_in & LAST_IN) + if (qp->q.last_in & LAST_IN) goto err; - qp->len = end; + qp->q.len = end; } } if (end == offset) goto err; + err = -ENOMEM; if (pskb_pull(skb, ihl) == NULL) goto err; - if (pskb_trim_rcsum(skb, end-offset)) + + err = pskb_trim_rcsum(skb, end - offset); + if (err) goto err; /* Find out which fragments are in front and at the back of us @@ -524,7 +375,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * this fragment, right? */ prev = NULL; - for (next = qp->fragments; next != NULL; next = next->next) { + for (next = qp->q.fragments; next != NULL; next = next->next) { if (FRAG_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -539,8 +390,10 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (i > 0) { offset += i; + err = -EINVAL; if (end <= offset) goto err; + err = -ENOMEM; if (!pskb_pull(skb, i)) goto err; if (skb->ip_summed != CHECKSUM_UNNECESSARY) @@ -548,6 +401,8 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } } + err = -ENOMEM; + while (next && FRAG_CB(next)->offset < end) { int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */ @@ -558,7 +413,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (!pskb_pull(next, i)) goto err; FRAG_CB(next)->offset += i; - qp->meat -= i; + qp->q.meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; break; @@ -573,9 +428,9 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (prev) prev->next = next; else - qp->fragments = next; + qp->q.fragments = next; - qp->meat -= free_it->len; + qp->q.meat -= free_it->len; frag_kfree_skb(free_it, NULL); } } @@ -587,46 +442,71 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (prev) prev->next = skb; else - qp->fragments = skb; - - if (skb->dev) - qp->iif = skb->dev->ifindex; - skb->dev = NULL; - qp->stamp = skb->tstamp; - qp->meat += skb->len; - atomic_add(skb->truesize, &ip_frag_mem); + qp->q.fragments = skb; + + dev = skb->dev; + if (dev) { + qp->iif = dev->ifindex; + skb->dev = NULL; + } + qp->q.stamp = skb->tstamp; + qp->q.meat += skb->len; + atomic_add(skb->truesize, &ip4_frags.mem); if (offset == 0) - qp->last_in |= FIRST_IN; + qp->q.last_in |= FIRST_IN; - write_lock(&ipfrag_lock); - list_move_tail(&qp->lru_list, &ipq_lru_list); - write_unlock(&ipfrag_lock); + if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len) + return ip_frag_reasm(qp, prev, dev); - return; + write_lock(&ip4_frags.lock); + list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list); + write_unlock(&ip4_frags.lock); + return -EINPROGRESS; err: kfree_skb(skb); + return err; } /* Build a new IP datagram from all its fragments. */ -static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) +static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, + struct net_device *dev) { struct iphdr *iph; - struct sk_buff *fp, *head = qp->fragments; + struct sk_buff *fp, *head = qp->q.fragments; int len; int ihlen; + int err; ipq_kill(qp); + /* Make the one we just received the head. */ + if (prev) { + head = prev->next; + fp = skb_clone(head, GFP_ATOMIC); + if (!fp) + goto out_nomem; + + fp->next = head->next; + prev->next = fp; + + skb_morph(head, qp->q.fragments); + head->next = qp->q.fragments->next; + + kfree_skb(qp->q.fragments); + qp->q.fragments = head; + } + BUG_TRAP(head != NULL); BUG_TRAP(FRAG_CB(head)->offset == 0); /* Allocate a new buffer for the datagram. */ ihlen = ip_hdrlen(head); - len = ihlen + qp->len; + len = ihlen + qp->q.len; + err = -E2BIG; if (len > 65535) goto out_oversize; @@ -654,12 +534,12 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &ip_frag_mem); + atomic_add(clone->truesize, &ip4_frags.mem); } skb_shinfo(head)->frag_list = head->next; skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &ip_frag_mem); + atomic_sub(head->truesize, &ip4_frags.mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -669,23 +549,24 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &ip_frag_mem); + atomic_sub(fp->truesize, &ip4_frags.mem); } head->next = NULL; head->dev = dev; - head->tstamp = qp->stamp; + head->tstamp = qp->q.stamp; iph = ip_hdr(head); iph->frag_off = 0; iph->tot_len = htons(len); IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS); - qp->fragments = NULL; - return head; + qp->q.fragments = NULL; + return 0; out_nomem: LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " "queue %p\n", qp); + err = -ENOMEM; goto out_fail; out_oversize: if (net_ratelimit()) @@ -694,54 +575,49 @@ out_oversize: NIPQUAD(qp->saddr)); out_fail: IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - return NULL; + return err; } /* Process an incoming IP datagram fragment. */ -struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user) +int ip_defrag(struct sk_buff *skb, u32 user) { struct ipq *qp; - struct net_device *dev; IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); /* Start by cleaning up the memory. */ - if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh) + if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh) ip_evictor(); - dev = skb->dev; - /* Lookup (or create) queue header */ if ((qp = ip_find(ip_hdr(skb), user)) != NULL) { - struct sk_buff *ret = NULL; + int ret; - spin_lock(&qp->lock); + spin_lock(&qp->q.lock); - ip_frag_queue(qp, skb); + ret = ip_frag_queue(qp, skb); - if (qp->last_in == (FIRST_IN|LAST_IN) && - qp->meat == qp->len) - ret = ip_frag_reasm(qp, dev); - - spin_unlock(&qp->lock); - ipq_put(qp, NULL); + spin_unlock(&qp->q.lock); + ipq_put(qp); return ret; } IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); kfree_skb(skb); - return NULL; + return -ENOMEM; } void __init ipfrag_init(void) { - ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ - (jiffies ^ (jiffies >> 6))); - - init_timer(&ipfrag_secret_timer); - ipfrag_secret_timer.function = ipfrag_secret_rebuild; - ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval; - add_timer(&ipfrag_secret_timer); + ip4_frags.ctl = &ip4_frags_ctl; + ip4_frags.hashfn = ip4_hashfn; + ip4_frags.constructor = ip4_frag_init; + ip4_frags.destructor = ip4_frag_free; + ip4_frags.skb_free = NULL; + ip4_frags.qsize = sizeof(struct ipq); + ip4_frags.match = ip4_frag_match; + ip4_frags.frag_expire = ip_expire; + inet_frags_init(&ip4_frags); } EXPORT_SYMBOL(ip_defrag); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5c14ed63e56..02b02a8d681 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -262,7 +262,7 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int int i; for (i=1; i<100; i++) { sprintf(name, "gre%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i==100) @@ -674,7 +674,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ struct iphdr *iph; /* Our new IP header */ - int max_headroom; /* The extra header space needed */ + unsigned int max_headroom; /* The extra header space needed */ int gre_hlen; __be32 dst; int mtu; @@ -684,7 +684,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_error; } - if (dev->hard_header) { + if (dev->header_ops) { gre_hlen = 0; tiph = (struct iphdr*)skb->data; } else { @@ -1033,7 +1033,6 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) return 0; } -#ifdef CONFIG_NET_IPGRE_BROADCAST /* Nice toy. Unfortunately, useless in real life :-) It allows to construct virtual multiprotocol broadcast "LAN" over the Internet, provided multicast routing is tuned. @@ -1063,8 +1062,9 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) */ -static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) +static int ipgre_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, unsigned len) { struct ip_tunnel *t = netdev_priv(dev); struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); @@ -1091,6 +1091,19 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned sh return -t->hlen; } +static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) +{ + struct iphdr *iph = (struct iphdr*) skb_mac_header(skb); + memcpy(haddr, &iph->saddr, 4); + return 4; +} + +static const struct header_ops ipgre_header_ops = { + .create = ipgre_header, + .parse = ipgre_header_parse, +}; + +#ifdef CONFIG_NET_IPGRE_BROADCAST static int ipgre_open(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); @@ -1132,7 +1145,6 @@ static int ipgre_close(struct net_device *dev) static void ipgre_tunnel_setup(struct net_device *dev) { - SET_MODULE_OWNER(dev); dev->uninit = ipgre_tunnel_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ipgre_tunnel_xmit; @@ -1188,15 +1200,17 @@ static int ipgre_tunnel_init(struct net_device *dev) if (!iph->saddr) return -EINVAL; dev->flags = IFF_BROADCAST; - dev->hard_header = ipgre_header; + dev->header_ops = &ipgre_header_ops; dev->open = ipgre_open; dev->stop = ipgre_close; } #endif + } else { + dev->header_ops = &ipgre_header_ops; } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(&init_net, tunnel->parms.link); if (tdev) { hlen = tdev->hard_header_len; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 97069399d86..168c871fcd7 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -172,8 +172,7 @@ int ip_call_ra_chain(struct sk_buff *skb) (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN); - if (skb == NULL) { + if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { read_unlock(&ip_ra_lock); return 1; } @@ -196,7 +195,7 @@ int ip_call_ra_chain(struct sk_buff *skb) return 0; } -static inline int ip_local_deliver_finish(struct sk_buff *skb) +static int ip_local_deliver_finish(struct sk_buff *skb) { __skb_pull(skb, ip_hdrlen(skb)); @@ -265,8 +264,7 @@ int ip_local_deliver(struct sk_buff *skb) */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER); - if (!skb) + if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) return 0; } @@ -326,7 +324,7 @@ drop: return -1; } -static inline int ip_rcv_finish(struct sk_buff *skb) +static int ip_rcv_finish(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; @@ -382,6 +380,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct iphdr *iph; u32 len; + if (dev->nd_net != &init_net) + goto drop; + /* When the interface is in promisc. mode, drop all the crap * that it receives, do not try to analyse it. */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0f1d7beacf7..e5f7dc2de30 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -161,7 +161,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) struct dst_entry *dst = skb->dst; struct rtable *rt = (struct rtable *)dst; struct net_device *dev = dst->dev; - int hh_len = LL_RESERVED_SPACE(dev); + unsigned int hh_len = LL_RESERVED_SPACE(dev); if (rt->rt_type == RTN_MULTICAST) IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); @@ -169,7 +169,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS); /* Be paranoid, rather than too clever. */ - if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) { + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { struct sk_buff *skb2; skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); @@ -202,7 +202,7 @@ static inline int ip_skb_dst_mtu(struct sk_buff *skb) skb->dst->dev->mtu : dst_mtu(skb->dst); } -static inline int ip_finish_output(struct sk_buff *skb) +static int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ @@ -1261,6 +1261,10 @@ int ip_push_pending_frames(struct sock *sk) skb->priority = sk->sk_priority; skb->dst = dst_clone(&rt->u.dst); + if (iph->protocol == IPPROTO_ICMP) + icmp_out_count(((struct icmphdr *) + skb_transport_header(skb))->type); + /* Netfilter gets whole the not fragmented skb. */ err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 6b420aedcdc..f51f20e487c 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -602,7 +602,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, dev_put(dev); } } else - dev = __dev_get_by_index(mreq.imr_ifindex); + dev = __dev_get_by_index(&init_net, mreq.imr_ifindex); err = -EADDRNOTAVAIL; @@ -659,7 +659,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, break; } msf = kmalloc(optlen, GFP_KERNEL); - if (msf == 0) { + if (!msf) { err = -ENOBUFS; break; } @@ -816,7 +816,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, break; } gsf = kmalloc(optlen,GFP_KERNEL); - if (gsf == 0) { + if (!gsf) { err = -ENOBUFS; break; } @@ -836,7 +836,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, } msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); msf = kmalloc(msize,GFP_KERNEL); - if (msf == 0) { + if (!msf) { err = -ENOBUFS; goto mc_msf_out; } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index e787044a851..0bfeb02a5f8 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -75,7 +75,6 @@ out: static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -ENOMEM; - struct iphdr *iph; struct ip_comp_hdr *ipch; if (skb_linearize_cow(skb)) @@ -84,12 +83,14 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; /* Remove ipcomp header and decompress original payload */ - iph = ip_hdr(skb); ipch = (void *)skb->data; - iph->protocol = ipch->nexthdr; skb->transport_header = skb->network_header + sizeof(*ipch); __skb_pull(skb, sizeof(*ipch)); err = ipcomp_decompress(x, skb); + if (err) + goto out; + + err = ipch->nexthdr; out: return err; @@ -98,10 +99,9 @@ out: static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) { struct ipcomp_data *ipcd = x->data; - const int ihlen = ip_hdrlen(skb); - const int plen = skb->len - ihlen; + const int plen = skb->len; int dlen = IPCOMP_SCRATCH_SIZE; - u8 *start = skb->data + ihlen; + u8 *start = skb->data; const int cpu = get_cpu(); u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); @@ -118,7 +118,7 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); put_cpu(); - pskb_trim(skb, ihlen + dlen + sizeof(struct ip_comp_hdr)); + pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); return 0; out: @@ -131,12 +131,8 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) int err; struct ip_comp_hdr *ipch; struct ipcomp_data *ipcd = x->data; - int hdr_len = 0; - struct iphdr *iph = ip_hdr(skb); - iph->tot_len = htons(skb->len); - hdr_len = iph->ihl * 4; - if ((skb->len - hdr_len) < ipcd->threshold) { + if (skb->len < ipcd->threshold) { /* Don't bother compressing */ goto out_ok; } @@ -145,25 +141,19 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) goto out_ok; err = ipcomp_compress(x, skb); - iph = ip_hdr(skb); if (err) { goto out_ok; } /* Install ipcomp header, convert into ipcomp datagram. */ - iph->tot_len = htons(skb->len); - ipch = (struct ip_comp_hdr *)((char *)iph + iph->ihl * 4); - ipch->nexthdr = iph->protocol; + ipch = ip_comp_hdr(skb); + ipch->nexthdr = *skb_mac_header(skb); ipch->flags = 0; ipch->cpi = htons((u16 )ntohl(x->id.spi)); - iph->protocol = IPPROTO_COMP; - ip_send_check(iph); - return 0; - + *skb_mac_header(skb) = IPPROTO_COMP; out_ok: - if (x->props.mode == XFRM_MODE_TUNNEL) - ip_send_check(iph); + skb_push(skb, -skb_network_offset(skb)); return 0; } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index c5b24707753..c5c107a0182 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -55,6 +55,7 @@ #include <linux/root_dev.h> #include <linux/delay.h> #include <linux/nfs_fs.h> +#include <net/net_namespace.h> #include <net/arp.h> #include <net/ip.h> #include <net/ipconfig.h> @@ -189,11 +190,15 @@ static int __init ic_open_devs(void) rtnl_lock(); /* bring loopback device up first */ - if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0) - printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name); + for_each_netdev(&init_net, dev) { + if (!(dev->flags & IFF_LOOPBACK)) + continue; + if (dev_change_flags(dev, dev->flags | IFF_UP) < 0) + printk(KERN_ERR "IP-Config: Failed to open %s\n", dev->name); + } - for_each_netdev(dev) { - if (dev == &loopback_dev) + for_each_netdev(&init_net, dev) { + if (dev->flags & IFF_LOOPBACK) continue; if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : (!(dev->flags & IFF_LOOPBACK) && @@ -425,6 +430,9 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt unsigned char *sha, *tha; /* s for "source", t for "target" */ struct ic_device *d; + if (dev->nd_net != &init_net) + goto drop; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; @@ -749,8 +757,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d /* Chain packet down the line... */ skb->dev = dev; skb->protocol = htons(ETH_P_IP); - if ((dev->hard_header && - dev->hard_header(skb, dev, ntohs(skb->protocol), dev->broadcast, dev->dev_addr, skb->len) < 0) || + if (dev_hard_header(skb, dev, ntohs(skb->protocol), + dev->broadcast, dev->dev_addr, skb->len) < 0 || dev_queue_xmit(skb) < 0) printk("E"); } @@ -834,6 +842,9 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str struct ic_device *d; int len, ext_len; + if (dev->nd_net != &init_net) + goto drop; + /* Perform verifications before taking the lock. */ if (skb->pkt_type == PACKET_OTHERHOST) goto drop; @@ -1253,7 +1264,7 @@ static int __init ip_auto_config(void) __be32 addr; #ifdef CONFIG_PROC_FS - proc_net_fops_create("pnp", S_IRUGO, &pnp_seq_fops); + proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); #endif /* CONFIG_PROC_FS */ if (!ic_enable) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 396437242a1..8c2b2b0741d 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -225,7 +225,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c int i; for (i=1; i<100; i++) { sprintf(name, "tunl%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i==100) @@ -237,7 +237,6 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c return NULL; nt = netdev_priv(dev); - SET_MODULE_OWNER(dev); dev->init = ipip_tunnel_init; nt->parms = *parms; @@ -516,7 +515,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct net_device *tdev; /* Device to other host */ struct iphdr *old_iph = ip_hdr(skb); struct iphdr *iph; /* Our new IP header */ - int max_headroom; /* The extra header space needed */ + unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; int mtu; @@ -775,7 +774,6 @@ static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) static void ipip_tunnel_setup(struct net_device *dev) { - SET_MODULE_OWNER(dev); dev->uninit = ipip_tunnel_uninit; dev->hard_start_xmit = ipip_tunnel_xmit; dev->get_stats = ipip_tunnel_get_stats; @@ -822,7 +820,7 @@ static int ipip_tunnel_init(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(&init_net, tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7003cc1b7fe..37bb497d92a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -49,6 +49,7 @@ #include <linux/mroute.h> #include <linux/init.h> #include <linux/if_ether.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> #include <linux/skbuff.h> @@ -124,7 +125,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) { struct net_device *dev; - dev = __dev_get_by_name("tunl0"); + dev = __dev_get_by_name(&init_net, "tunl0"); if (dev) { int err; @@ -148,7 +149,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) dev = NULL; - if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { + if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) { dev->flags |= IFF_MULTICAST; in_dev = __in_dev_get_rtnl(dev); @@ -1082,13 +1083,18 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; struct vif_device *v; int ct; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; v=&vif_table[0]; for (ct=0;ct<maxvif;ct++,v++) { - if (v->dev==ptr) + if (v->dev==dev) vif_delete(ct); } return NOTIFY_DONE; @@ -1708,26 +1714,8 @@ static const struct seq_operations ipmr_vif_seq_ops = { static int ipmr_vif_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &ipmr_vif_seq_ops); - if (rc) - goto out_kfree; - - s->ct = 0; - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; - + return seq_open_private(file, &ipmr_vif_seq_ops, + sizeof(struct ipmr_vif_iter)); } static const struct file_operations ipmr_vif_fops = { @@ -1871,25 +1859,8 @@ static const struct seq_operations ipmr_mfc_seq_ops = { static int ipmr_mfc_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &ipmr_mfc_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; - + return seq_open_private(file, &ipmr_mfc_seq_ops, + sizeof(struct ipmr_mfc_iter)); } static const struct file_operations ipmr_mfc_fops = { @@ -1922,7 +1893,7 @@ void __init ip_mr_init(void) ipmr_expire_timer.function=ipmr_expire_process; register_netdevice_notifier(&ip_mr_notifier); #ifdef CONFIG_PROC_FS - proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops); - proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops); + proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops); + proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops); #endif } diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 8d6901d4e94..664cb8e97c1 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -25,6 +25,8 @@ #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> +#include <linux/netfilter.h> +#include <net/net_namespace.h> #include <net/protocol.h> #include <net/tcp.h> #include <asm/system.h> @@ -327,18 +329,18 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, spin_unlock(&cp->lock); } -static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb, +static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, struct ip_vs_app *app) { int diff; - const unsigned int tcp_offset = ip_hdrlen(*pskb); + const unsigned int tcp_offset = ip_hdrlen(skb); struct tcphdr *th; __u32 seq; - if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) + if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) return 0; - th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset); + th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); /* * Remember seq number in case this pkt gets resized @@ -359,7 +361,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb, if (app->pkt_out == NULL) return 1; - if (!app->pkt_out(app, cp, pskb, &diff)) + if (!app->pkt_out(app, cp, skb, &diff)) return 0; /* @@ -377,7 +379,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb, * called by ipvs packet handler, assumes previously checked cp!=NULL * returns false if it can't handle packet (oom) */ -int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb) +int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_app *app; @@ -390,7 +392,7 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb) /* TCP is complicated */ if (cp->protocol == IPPROTO_TCP) - return app_tcp_pkt_out(cp, pskb, app); + return app_tcp_pkt_out(cp, skb, app); /* * Call private output hook function @@ -398,22 +400,22 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb) if (app->pkt_out == NULL) return 1; - return app->pkt_out(app, cp, pskb, NULL); + return app->pkt_out(app, cp, skb, NULL); } -static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb, +static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, struct ip_vs_app *app) { int diff; - const unsigned int tcp_offset = ip_hdrlen(*pskb); + const unsigned int tcp_offset = ip_hdrlen(skb); struct tcphdr *th; __u32 seq; - if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) + if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) return 0; - th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset); + th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); /* * Remember seq number in case this pkt gets resized @@ -434,7 +436,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb, if (app->pkt_in == NULL) return 1; - if (!app->pkt_in(app, cp, pskb, &diff)) + if (!app->pkt_in(app, cp, skb, &diff)) return 0; /* @@ -452,7 +454,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb, * called by ipvs packet handler, assumes previously checked cp!=NULL. * returns false if can't handle packet (oom). */ -int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb) +int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_app *app; @@ -465,7 +467,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb) /* TCP is complicated */ if (cp->protocol == IPPROTO_TCP) - return app_tcp_pkt_in(cp, pskb, app); + return app_tcp_pkt_in(cp, skb, app); /* * Call private input hook function @@ -473,7 +475,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb) if (app->pkt_in == NULL) return 1; - return app->pkt_in(app, cp, pskb, NULL); + return app->pkt_in(app, cp, skb, NULL); } @@ -616,12 +618,12 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, int ip_vs_app_init(void) { /* we will replace it with proc_net_ipvs_create() soon */ - proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops); + proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); return 0; } void ip_vs_app_cleanup(void) { - proc_net_remove("ip_vs_app"); + proc_net_remove(&init_net, "ip_vs_app"); } diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index d612a6a5d95..4b702f708d3 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -35,6 +35,7 @@ #include <linux/jhash.h> #include <linux/random.h> +#include <net/net_namespace.h> #include <net/ip_vs.h> @@ -922,7 +923,7 @@ int ip_vs_conn_init(void) rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); } - proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops); + proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); /* calculate the random value for connection hash */ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); @@ -938,6 +939,6 @@ void ip_vs_conn_cleanup(void) /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); - proc_net_remove("ip_vs_conn"); + proc_net_remove(&init_net, "ip_vs_conn"); vfree(ip_vs_conn_tab); } diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index f005a2f929f..c6ed7654e83 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -58,7 +58,6 @@ EXPORT_SYMBOL(ip_vs_conn_put); #ifdef CONFIG_IP_VS_DEBUG EXPORT_SYMBOL(ip_vs_get_debug_level); #endif -EXPORT_SYMBOL(ip_vs_make_skb_writable); /* ID used in ICMP lookups */ @@ -163,42 +162,6 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction, } -int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len) -{ - struct sk_buff *skb = *pskb; - - /* skb is already used, better copy skb and its payload */ - if (unlikely(skb_shared(skb) || skb->sk)) - goto copy_skb; - - /* skb data is already used, copy it */ - if (unlikely(skb_cloned(skb))) - goto copy_data; - - return pskb_may_pull(skb, writable_len); - - copy_data: - if (unlikely(writable_len > skb->len)) - return 0; - return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - - copy_skb: - if (unlikely(writable_len > skb->len)) - return 0; - skb = skb_copy(skb, GFP_ATOMIC); - if (!skb) - return 0; - BUG_ON(skb_is_nonlinear(skb)); - - /* Rest of kernel will get very unhappy if we pass it a - suddenly-orphaned skbuff */ - if ((*pskb)->sk) - skb_set_owner_w(skb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = skb; - return 1; -} - /* * IPVS persistent scheduling function * It creates a connection entry according to its template if exists, @@ -525,12 +488,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * for VS/NAT. */ static unsigned int ip_vs_post_routing(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (!((*pskb)->ipvs_property)) + if (!skb->ipvs_property) return NF_ACCEPT; /* The packet was sent from IPVS, exit this chain */ return NF_STOP; @@ -541,13 +504,14 @@ __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); } -static inline struct sk_buff * -ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) +static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) { - skb = ip_defrag(skb, user); - if (skb) + int err = ip_defrag(skb, user); + + if (!err) ip_send_check(ip_hdr(skb)); - return skb; + + return err; } /* @@ -605,9 +569,8 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, * Currently handles error types - unreachable, quench, ttl exceeded. * (Only used in VS/NAT) */ -static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) +static int ip_vs_out_icmp(struct sk_buff *skb, int *related) { - struct sk_buff *skb = *pskb; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ @@ -619,10 +582,8 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) /* reassemble IP fragments */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); - if (!skb) + if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) return NF_STOLEN; - *pskb = skb; } iph = ip_hdr(skb); @@ -690,9 +651,8 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) offset += 2 * sizeof(__u16); - if (!ip_vs_make_skb_writable(pskb, offset)) + if (!skb_make_writable(skb, offset)) goto out; - skb = *pskb; ip_vs_nat_icmp(skb, pp, cp, 1); @@ -724,11 +684,10 @@ static inline int is_tcp_reset(const struct sk_buff *skb) * rewrite addresses of the packet and send it on its way... */ static unsigned int -ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, +ip_vs_out(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; struct iphdr *iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; @@ -741,11 +700,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, iph = ip_hdr(skb); if (unlikely(iph->protocol == IPPROTO_ICMP)) { - int related, verdict = ip_vs_out_icmp(pskb, &related); + int related, verdict = ip_vs_out_icmp(skb, &related); if (related) return verdict; - skb = *pskb; iph = ip_hdr(skb); } @@ -756,11 +714,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, /* reassemble IP fragments */ if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) && !pp->dont_defrag)) { - skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); - if (!skb) + if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) return NF_STOLEN; iph = ip_hdr(skb); - *pskb = skb; } ihl = iph->ihl << 2; @@ -802,13 +758,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); - if (!ip_vs_make_skb_writable(pskb, ihl)) + if (!skb_make_writable(skb, ihl)) goto drop; /* mangle the packet */ - if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp)) + if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) goto drop; - skb = *pskb; ip_hdr(skb)->saddr = cp->vaddr; ip_send_check(ip_hdr(skb)); @@ -818,9 +773,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, * if it came from this machine itself. So re-compute * the routing information. */ - if (ip_route_me_harder(pskb, RTN_LOCAL) != 0) + if (ip_route_me_harder(skb, RTN_LOCAL) != 0) goto drop; - skb = *pskb; IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); @@ -835,7 +789,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, drop: ip_vs_conn_put(cp); - kfree_skb(*pskb); + kfree_skb(skb); return NF_STOLEN; } @@ -847,9 +801,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, * Currently handles error types - unreachable, quench, ttl exceeded. */ static int -ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) +ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) { - struct sk_buff *skb = *pskb; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ @@ -861,12 +814,9 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) /* reassemble IP fragments */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - skb = ip_vs_gather_frags(skb, - hooknum == NF_IP_LOCAL_IN ? - IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD); - if (!skb) + if (ip_vs_gather_frags(skb, hooknum == NF_IP_LOCAL_IN ? + IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD)) return NF_STOLEN; - *pskb = skb; } iph = ip_hdr(skb); @@ -945,11 +895,10 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) * and send it on its way... */ static unsigned int -ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, +ip_vs_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; struct iphdr *iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; @@ -961,7 +910,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, * ... don't know why 1st test DOES NOT include 2nd (?) */ if (unlikely(skb->pkt_type != PACKET_HOST - || skb->dev == &loopback_dev || skb->sk)) { + || skb->dev->flags & IFF_LOOPBACK || skb->sk)) { IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", skb->pkt_type, ip_hdr(skb)->protocol, @@ -971,11 +920,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, iph = ip_hdr(skb); if (unlikely(iph->protocol == IPPROTO_ICMP)) { - int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum); + int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); if (related) return verdict; - skb = *pskb; iph = ip_hdr(skb); } @@ -1056,16 +1004,16 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, * and send them to ip_vs_in_icmp. */ static unsigned int -ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb, +ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { int r; - if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP) + if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; - return ip_vs_in_icmp(pskb, &r, hooknum); + return ip_vs_in_icmp(skb, &r, hooknum); } diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index f656d41d8d4..7345fc252a2 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -35,6 +35,7 @@ #include <linux/netfilter_ipv4.h> #include <linux/mutex.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/route.h> #include <net/sock.h> @@ -1791,24 +1792,8 @@ static const struct seq_operations ip_vs_info_seq_ops = { static int ip_vs_info_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct ip_vs_iter *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &ip_vs_info_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &ip_vs_info_seq_ops, + sizeof(struct ip_vs_iter)); } static const struct file_operations ip_vs_info_fops = { @@ -2356,8 +2341,8 @@ int ip_vs_control_init(void) return ret; } - proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops); - proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops); + proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); + proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); sysctl_header = register_sysctl_table(vs_root_table); @@ -2390,8 +2375,8 @@ void ip_vs_control_cleanup(void) cancel_work_sync(&defense_work.work); ip_vs_kill_estimator(&ip_vs_stats); unregister_sysctl_table(sysctl_header); - proc_net_remove("ip_vs_stats"); - proc_net_remove("ip_vs"); + proc_net_remove(&init_net, "ip_vs_stats"); + proc_net_remove(&init_net, "ip_vs"); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); } diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index 344ddbbdc75..59aa166b767 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -30,6 +30,7 @@ #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> +#include <linux/netfilter.h> #include <net/protocol.h> #include <net/tcp.h> #include <asm/unaligned.h> @@ -135,7 +136,7 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number. */ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, - struct sk_buff **pskb, int *diff) + struct sk_buff *skb, int *diff) { struct iphdr *iph; struct tcphdr *th; @@ -155,14 +156,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; /* Linear packets are much easier to deal with. */ - if (!ip_vs_make_skb_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return 0; if (cp->app_data == &ip_vs_ftp_pasv) { - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); data = (char *)th + (th->doff << 2); - data_limit = skb_tail_pointer(*pskb); + data_limit = skb_tail_pointer(skb); if (ip_vs_ftp_get_addrport(data, data_limit, SERVER_STRING, @@ -213,7 +214,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, memcpy(start, buf, buf_len); ret = 1; } else { - ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start, + ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start, end-start, buf, buf_len); } @@ -238,7 +239,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * the client. */ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, - struct sk_buff **pskb, int *diff) + struct sk_buff *skb, int *diff) { struct iphdr *iph; struct tcphdr *th; @@ -256,20 +257,20 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; /* Linear packets are much easier to deal with. */ - if (!ip_vs_make_skb_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return 0; /* * Detecting whether it is passive */ - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); /* Since there may be OPTIONS in the TCP packet and the HLEN is the length of the header in 32-bit multiples, it is accurate to calculate data address by th+HLEN*4 */ data = data_start = (char *)th + (th->doff << 2); - data_limit = skb_tail_pointer(*pskb); + data_limit = skb_tail_pointer(skb); while (data <= data_limit - 6) { if (strnicmp(data, "PASV\r\n", 6) == 0) { diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 6225acac7a3..6a1fec416ea 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -50,6 +50,7 @@ #include <linux/sysctl.h> /* for proc_net_create/proc_net_remove */ #include <linux/proc_fs.h> +#include <net/net_namespace.h> #include <net/ip_vs.h> @@ -843,7 +844,7 @@ static int __init ip_vs_lblcr_init(void) INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list); sysctl_header = register_sysctl_table(lblcr_root_table); #ifdef CONFIG_IP_VS_LBLCR_DEBUG - proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo); + proc_net_create(&init_net, "ip_vs_lblcr", 0, ip_vs_lblcr_getinfo); #endif return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); } @@ -852,7 +853,7 @@ static int __init ip_vs_lblcr_init(void) static void __exit ip_vs_lblcr_cleanup(void) { #ifdef CONFIG_IP_VS_LBLCR_DEBUG - proc_net_remove("ip_vs_lblcr"); + proc_net_remove(&init_net, "ip_vs_lblcr"); #endif unregister_sysctl_table(sysctl_header); unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index e65577a7700..12dc0d640b6 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -20,6 +20,7 @@ #include <linux/tcp.h> /* for tcphdr */ #include <net/ip.h> #include <net/tcp.h> /* for csum_tcpudp_magic */ +#include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <net/ip_vs.h> @@ -122,27 +123,27 @@ tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip, static int -tcp_snat_handler(struct sk_buff **pskb, +tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct tcphdr *tcph; - const unsigned int tcphoff = ip_hdrlen(*pskb); + const unsigned int tcphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ - if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) + if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(*pskb, pp)) + if (pp->csum_check && !pp->csum_check(skb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, pskb)) + if (!ip_vs_app_pkt_out(cp, skb)) return 0; } - tcph = (void *)ip_hdr(*pskb) + tcphoff; + tcph = (void *)ip_hdr(skb) + tcphoff; tcph->source = cp->vport; /* Adjust TCP checksums */ @@ -150,17 +151,15 @@ tcp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) - (*pskb)->ip_summed = CHECKSUM_NONE; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ tcph->check = 0; - (*pskb)->csum = skb_checksum(*pskb, tcphoff, - (*pskb)->len - tcphoff, 0); + skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, - (*pskb)->len - tcphoff, - cp->protocol, - (*pskb)->csum); + skb->len - tcphoff, + cp->protocol, skb->csum); IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, tcph->check, (char*)&(tcph->check) - (char*)tcph); @@ -170,30 +169,30 @@ tcp_snat_handler(struct sk_buff **pskb, static int -tcp_dnat_handler(struct sk_buff **pskb, +tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct tcphdr *tcph; - const unsigned int tcphoff = ip_hdrlen(*pskb); + const unsigned int tcphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ - if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) + if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(*pskb, pp)) + if (pp->csum_check && !pp->csum_check(skb, pp)) return 0; /* * Attempt ip_vs_app call. * It will fix ip_vs_conn and iph ack_seq stuff */ - if (!ip_vs_app_pkt_in(cp, pskb)) + if (!ip_vs_app_pkt_in(cp, skb)) return 0; } - tcph = (void *)ip_hdr(*pskb) + tcphoff; + tcph = (void *)ip_hdr(skb) + tcphoff; tcph->dest = cp->dport; /* @@ -203,18 +202,16 @@ tcp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) - (*pskb)->ip_summed = CHECKSUM_NONE; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ tcph->check = 0; - (*pskb)->csum = skb_checksum(*pskb, tcphoff, - (*pskb)->len - tcphoff, 0); + skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, - (*pskb)->len - tcphoff, - cp->protocol, - (*pskb)->csum); - (*pskb)->ip_summed = CHECKSUM_UNNECESSARY; + skb->len - tcphoff, + cp->protocol, skb->csum); + skb->ip_summed = CHECKSUM_UNNECESSARY; } return 1; } diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 8ee5fe6a101..1fa7b330b9a 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -18,6 +18,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/kernel.h> +#include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/udp.h> @@ -129,29 +130,29 @@ udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, } static int -udp_snat_handler(struct sk_buff **pskb, +udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct udphdr *udph; - const unsigned int udphoff = ip_hdrlen(*pskb); + const unsigned int udphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ - if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) + if (!skb_make_writable(skb, udphoff+sizeof(*udph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(*pskb, pp)) + if (pp->csum_check && !pp->csum_check(skb, pp)) return 0; /* * Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, pskb)) + if (!ip_vs_app_pkt_out(cp, skb)) return 0; } - udph = (void *)ip_hdr(*pskb) + udphoff; + udph = (void *)ip_hdr(skb) + udphoff; udph->source = cp->vport; /* @@ -161,17 +162,15 @@ udp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) - (*pskb)->ip_summed = CHECKSUM_NONE; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; - (*pskb)->csum = skb_checksum(*pskb, udphoff, - (*pskb)->len - udphoff, 0); + skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, - (*pskb)->len - udphoff, - cp->protocol, - (*pskb)->csum); + skb->len - udphoff, + cp->protocol, skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", @@ -183,30 +182,30 @@ udp_snat_handler(struct sk_buff **pskb, static int -udp_dnat_handler(struct sk_buff **pskb, +udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct udphdr *udph; - unsigned int udphoff = ip_hdrlen(*pskb); + unsigned int udphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ - if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) + if (!skb_make_writable(skb, udphoff+sizeof(*udph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(*pskb, pp)) + if (pp->csum_check && !pp->csum_check(skb, pp)) return 0; /* * Attempt ip_vs_app call. * It will fix ip_vs_conn */ - if (!ip_vs_app_pkt_in(cp, pskb)) + if (!ip_vs_app_pkt_in(cp, skb)) return 0; } - udph = (void *)ip_hdr(*pskb) + udphoff; + udph = (void *)ip_hdr(skb) + udphoff; udph->dest = cp->dport; /* @@ -216,20 +215,18 @@ udp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) - (*pskb)->ip_summed = CHECKSUM_NONE; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; - (*pskb)->csum = skb_checksum(*pskb, udphoff, - (*pskb)->len - udphoff, 0); + skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, - (*pskb)->len - udphoff, - cp->protocol, - (*pskb)->csum); + skb->len - udphoff, + cp->protocol, skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; - (*pskb)->ip_summed = CHECKSUM_UNNECESSARY; + skb->ip_summed = CHECKSUM_UNNECESSARY; } return 1; } diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 356f067484e..c99f2a33fb9 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -387,7 +387,7 @@ static int set_mcast_if(struct sock *sk, char *ifname) struct net_device *dev; struct inet_sock *inet = inet_sk(sk); - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) @@ -412,7 +412,7 @@ static int set_sync_mesg_maxlen(int sync_state) int num; if (sync_state == IP_VS_STATE_MASTER) { - if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) return -ENODEV; num = (dev->mtu - sizeof(struct iphdr) - @@ -423,7 +423,7 @@ static int set_sync_mesg_maxlen(int sync_state) IP_VS_DBG(7, "setting the maximum length of sync sending " "message %d.\n", sync_send_mesg_maxlen); } else if (sync_state == IP_VS_STATE_BACKUP) { - if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) return -ENODEV; sync_recv_mesg_maxlen = dev->mtu - @@ -451,7 +451,7 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) memset(&mreq, 0, sizeof(mreq)); memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -472,7 +472,7 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) __be32 addr; struct sockaddr_in sin; - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) return -ENODEV; addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); @@ -794,7 +794,7 @@ static int sync_thread(void *startup) add_wait_queue(&sync_wait, &wait); - set_sync_pid(state, current->pid); + set_sync_pid(state, task_pid_nr(current)); complete(tinfo->startup); /* @@ -877,7 +877,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) if (!tinfo) return -ENOMEM; - IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); + IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current)); IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n", sizeof(struct ip_vs_sync_conn)); @@ -917,7 +917,7 @@ int stop_sync_thread(int state) (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) return -ESRCH; - IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); + IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current)); IP_VS_INFO("stopping sync thread %d ...\n", (state == IP_VS_STATE_MASTER) ? sync_master_pid : sync_backup_pid); diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 666e080a74a..7c074e386c1 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -253,7 +253,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } /* copy-on-write the packet before mangling it */ - if (!ip_vs_make_skb_writable(&skb, sizeof(struct iphdr))) + if (!skb_make_writable(skb, sizeof(struct iphdr))) goto tx_error_put; if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) @@ -264,7 +264,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dst = &rt->u.dst; /* mangle the packet */ - if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp)) + if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) goto tx_error; ip_hdr(skb)->daddr = cp->daddr; ip_send_check(ip_hdr(skb)); @@ -325,7 +325,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, __be16 df = old_iph->frag_off; sk_buff_data_t old_transport_header = skb->transport_header; struct iphdr *iph; /* Our new IP header */ - int max_headroom; /* The extra header space needed */ + unsigned int max_headroom; /* The extra header space needed */ int mtu; EnterFunction(10); @@ -529,7 +529,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } /* copy-on-write the packet before mangling it */ - if (!ip_vs_make_skb_writable(&skb, offset)) + if (!skb_make_writable(skb, offset)) goto tx_error_put; if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index b44192924f9..5539debf497 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -3,14 +3,15 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/ip.h> +#include <linux/skbuff.h> #include <net/route.h> #include <net/xfrm.h> #include <net/ip.h> /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) +int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) { - const struct iphdr *iph = ip_hdr(*pskb); + const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi fl = {}; struct dst_entry *odst; @@ -29,14 +30,14 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) if (type == RTN_LOCAL) fl.nl_u.ip4_u.saddr = iph->saddr; fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; - fl.mark = (*pskb)->mark; + fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; + fl.mark = skb->mark; if (ip_route_output_key(&rt, &fl) != 0) return -1; /* Drop old route. */ - dst_release((*pskb)->dst); - (*pskb)->dst = &rt->u.dst; + dst_release(skb->dst); + skb->dst = &rt->u.dst; } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ @@ -44,8 +45,8 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) if (ip_route_output_key(&rt, &fl) != 0) return -1; - odst = (*pskb)->dst; - if (ip_route_input(*pskb, iph->daddr, iph->saddr, + odst = skb->dst; + if (ip_route_input(skb, iph->daddr, iph->saddr, RT_TOS(iph->tos), rt->u.dst.dev) != 0) { dst_release(&rt->u.dst); return -1; @@ -54,70 +55,54 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) dst_release(odst); } - if ((*pskb)->dst->error) + if (skb->dst->error) return -1; #ifdef CONFIG_XFRM - if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) && - xfrm_decode_session(*pskb, &fl, AF_INET) == 0) - if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0)) + if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && + xfrm_decode_session(skb, &fl, AF_INET) == 0) + if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) return -1; #endif /* Change in oif may mean change in hh_len. */ - hh_len = (*pskb)->dst->dev->hard_header_len; - if (skb_headroom(*pskb) < hh_len) { - struct sk_buff *nskb; - - nskb = skb_realloc_headroom(*pskb, hh_len); - if (!nskb) - return -1; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } + hh_len = skb->dst->dev->hard_header_len; + if (skb_headroom(skb) < hh_len && + pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) + return -1; return 0; } EXPORT_SYMBOL(ip_route_me_harder); #ifdef CONFIG_XFRM -int ip_xfrm_me_harder(struct sk_buff **pskb) +int ip_xfrm_me_harder(struct sk_buff *skb) { struct flowi fl; unsigned int hh_len; struct dst_entry *dst; - if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) + if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return 0; - if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0) + if (xfrm_decode_session(skb, &fl, AF_INET) < 0) return -1; - dst = (*pskb)->dst; + dst = skb->dst; if (dst->xfrm) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); - if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0) + if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0) return -1; - dst_release((*pskb)->dst); - (*pskb)->dst = dst; + dst_release(skb->dst); + skb->dst = dst; /* Change in oif may mean change in hh_len. */ - hh_len = (*pskb)->dst->dev->hard_header_len; - if (skb_headroom(*pskb) < hh_len) { - struct sk_buff *nskb; - - nskb = skb_realloc_headroom(*pskb, hh_len); - if (!nskb) - return -1; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } + hh_len = skb->dst->dev->hard_header_len; + if (skb_headroom(skb) < hh_len && + pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) + return -1; return 0; } EXPORT_SYMBOL(ip_xfrm_me_harder); @@ -150,17 +135,17 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info) } } -static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info) +static int nf_ip_reroute(struct sk_buff *skb, const struct nf_info *info) { const struct ip_rt_info *rt_info = nf_info_reroute(info); if (info->hook == NF_IP_LOCAL_OUT) { - const struct iphdr *iph = ip_hdr(*pskb); + const struct iphdr *iph = ip_hdr(skb); if (!(iph->tos == rt_info->tos && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(pskb, RTN_UNSPEC); + return ip_route_me_harder(skb, RTN_UNSPEC); } return 0; } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 29114a9ccd1..2909c92ecd9 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -197,7 +197,7 @@ static inline int arp_checkentry(const struct arpt_arp *arp) return 1; } -static unsigned int arpt_error(struct sk_buff **pskb, +static unsigned int arpt_error(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -215,7 +215,7 @@ static inline struct arpt_entry *get_entry(void *base, unsigned int offset) return (struct arpt_entry *)(base + offset); } -unsigned int arpt_do_table(struct sk_buff **pskb, +unsigned int arpt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, @@ -231,9 +231,9 @@ unsigned int arpt_do_table(struct sk_buff **pskb, struct xt_table_info *private; /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ - if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) + - (2 * (*pskb)->dev->addr_len) + - (2 * sizeof(u32))))) + if (!pskb_may_pull(skb, (sizeof(struct arphdr) + + (2 * skb->dev->addr_len) + + (2 * sizeof(u32))))) return NF_DROP; indev = in ? in->name : nulldevname; @@ -245,14 +245,14 @@ unsigned int arpt_do_table(struct sk_buff **pskb, e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); - arp = arp_hdr(*pskb); + arp = arp_hdr(skb); do { - if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) { + if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { struct arpt_entry_target *t; int hdr_len; hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + - (2 * (*pskb)->dev->addr_len); + (2 * skb->dev->addr_len); ADD_COUNTER(e->counters, hdr_len, 1); t = arpt_get_target(e); @@ -290,14 +290,14 @@ unsigned int arpt_do_table(struct sk_buff **pskb, /* Targets which reenter must return * abs. verdicts */ - verdict = t->u.kernel.target->target(pskb, + verdict = t->u.kernel.target->target(skb, in, out, hook, t->u.kernel.target, t->data); /* Target might have changed stuff. */ - arp = arp_hdr(*pskb); + arp = arp_hdr(skb); if (verdict == ARPT_CONTINUE) e = (void *)e + e->next_offset; diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index c4bdab47597..45fa4e20094 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -1,5 +1,6 @@ /* module that allows mangling of the arp payload */ #include <linux/module.h> +#include <linux/netfilter.h> #include <linux/netfilter_arp/arpt_mangle.h> #include <net/sock.h> @@ -8,7 +9,7 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); MODULE_DESCRIPTION("arptables arp payload mangle target"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -18,47 +19,38 @@ target(struct sk_buff **pskb, unsigned char *arpptr; int pln, hln; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (skb_make_writable(skb, skb->len)) + return NF_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - - arp = arp_hdr(*pskb); - arpptr = skb_network_header(*pskb) + sizeof(*arp); + arp = arp_hdr(skb); + arpptr = skb_network_header(skb) + sizeof(*arp); pln = arp->ar_pln; hln = arp->ar_hln; /* We assume that pln and hln were checked in the match */ if (mangle->flags & ARPT_MANGLE_SDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || - (arpptr + hln > skb_tail_pointer(*pskb))) + (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, mangle->src_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_SIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || - (arpptr + pln > skb_tail_pointer(*pskb))) + (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, &mangle->u_s.src_ip, pln); } arpptr += pln; if (mangle->flags & ARPT_MANGLE_TDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || - (arpptr + hln > skb_tail_pointer(*pskb))) + (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, mangle->tgt_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_TIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || - (arpptr + pln > skb_tail_pointer(*pskb))) + (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, &mangle->u_t.tgt_ip, pln); } diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 75c02306253..302d3da5f69 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -56,12 +56,12 @@ static struct arpt_table packet_filter = { /* The work comes in here from netfilter.c */ static unsigned int arpt_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return arpt_do_table(pskb, hook, in, out, &packet_filter); + return arpt_do_table(skb, hook, in, out, &packet_filter); } static struct nf_hook_ops arpt_ops[] = { diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 702d94db19b..10a2ce09fd8 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -24,6 +24,7 @@ #include <linux/proc_fs.h> #include <linux/security.h> #include <linux/mutex.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/route.h> @@ -249,10 +250,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) if (entry->info->indev && entry->skb->dev) { pmsg->hw_type = entry->skb->dev->type; - if (entry->skb->dev->hard_header_parse) - pmsg->hw_addrlen = - entry->skb->dev->hard_header_parse(entry->skb, - pmsg->hw_addr); + pmsg->hw_addrlen = dev_parse_header(entry->skb, + pmsg->hw_addr); } if (data_len) @@ -336,6 +335,7 @@ static int ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) { int diff; + int err; struct iphdr *user_iph = (struct iphdr *)v->payload; if (v->data_len < sizeof(*user_iph)) @@ -348,25 +348,18 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (v->data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(e->skb, - skb_headroom(e->skb), - diff, - GFP_ATOMIC); - if (newskb == NULL) { - printk(KERN_WARNING "ip_queue: OOM " - "in mangle, dropping packet\n"); - return -ENOMEM; + err = pskb_expand_head(e->skb, 0, + diff - skb_tailroom(e->skb), + GFP_ATOMIC); + if (err) { + printk(KERN_WARNING "ip_queue: error " + "in mangle, dropping packet: %d\n", -err); + return err; } - if (e->skb->sk) - skb_set_owner_w(newskb, e->skb->sk); - kfree_skb(e->skb); - e->skb = newskb; } skb_put(e->skb, diff); } - if (!skb_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(e->skb, v->data_len)) return -ENOMEM; skb_copy_to_linear_data(e->skb, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; @@ -476,7 +469,7 @@ ipq_dev_drop(int ifindex) #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) static inline void -ipq_rcv_skb(struct sk_buff *skb) +__ipq_rcv_skb(struct sk_buff *skb) { int status, type, pid, flags, nlmsglen, skblen; struct nlmsghdr *nlh; @@ -534,19 +527,10 @@ ipq_rcv_skb(struct sk_buff *skb) } static void -ipq_rcv_sk(struct sock *sk, int len) +ipq_rcv_skb(struct sk_buff *skb) { - struct sk_buff *skb; - unsigned int qlen; - mutex_lock(&ipqnl_mutex); - - for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { - skb = skb_dequeue(&sk->sk_receive_queue); - ipq_rcv_skb(skb); - kfree_skb(skb); - } - + __ipq_rcv_skb(skb); mutex_unlock(&ipqnl_mutex); } @@ -556,6 +540,9 @@ ipq_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) ipq_dev_drop(dev->ifindex); @@ -575,7 +562,7 @@ ipq_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL && n->pid) { write_lock_bh(&queue_lock); - if (n->pid == peer_pid) + if ((n->net == &init_net) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -667,14 +654,14 @@ static int __init ip_queue_init(void) struct proc_dir_entry *proc; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, - NULL, THIS_MODULE); + ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, + ipq_rcv_skb, NULL, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; } - proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); + proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); if (proc) proc->owner = THIS_MODULE; else { @@ -695,8 +682,7 @@ static int __init ip_queue_init(void) cleanup_sysctl: unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); - + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); cleanup_ipqnl: sock_release(ipqnl->sk_socket); mutex_lock(&ipqnl_mutex); @@ -715,7 +701,7 @@ static void __exit ip_queue_fini(void) unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); sock_release(ipqnl->sk_socket); mutex_lock(&ipqnl_mutex); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 6486894f450..4b10b98640a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -169,7 +169,7 @@ ip_checkentry(const struct ipt_ip *ip) } static unsigned int -ipt_error(struct sk_buff **pskb, +ipt_error(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -312,7 +312,7 @@ static void trace_packet(struct sk_buff *skb, /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ipt_do_table(struct sk_buff **pskb, +ipt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, @@ -331,8 +331,8 @@ ipt_do_table(struct sk_buff **pskb, struct xt_table_info *private; /* Initialization */ - ip = ip_hdr(*pskb); - datalen = (*pskb)->len - ip->ihl * 4; + ip = ip_hdr(skb); + datalen = skb->len - ip->ihl * 4; indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; /* We handle fragments by dealing with the first fragment as @@ -359,7 +359,7 @@ ipt_do_table(struct sk_buff **pskb, struct ipt_entry_target *t; if (IPT_MATCH_ITERATE(e, do_match, - *pskb, in, out, + skb, in, out, offset, &hotdrop) != 0) goto no_match; @@ -371,8 +371,8 @@ ipt_do_table(struct sk_buff **pskb, #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) /* The packet is traced: log it */ - if (unlikely((*pskb)->nf_trace)) - trace_packet(*pskb, hook, in, out, + if (unlikely(skb->nf_trace)) + trace_packet(skb, hook, in, out, table->name, private, e); #endif /* Standard target? */ @@ -410,7 +410,7 @@ ipt_do_table(struct sk_buff **pskb, ((struct ipt_entry *)table_base)->comefrom = 0xeeeeeeec; #endif - verdict = t->u.kernel.target->target(pskb, + verdict = t->u.kernel.target->target(skb, in, out, hook, t->u.kernel.target, @@ -428,8 +428,8 @@ ipt_do_table(struct sk_buff **pskb, = 0x57acc001; #endif /* Target might have changed stuff. */ - ip = ip_hdr(*pskb); - datalen = (*pskb)->len - ip->ihl * 4; + ip = ip_hdr(skb); + datalen = skb->len - ip->ihl * 4; if (verdict == IPT_CONTINUE) e = (void *)e + e->next_offset; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 69bd362b5fa..2f544dac72d 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -25,6 +25,7 @@ #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> #include <net/netfilter/nf_conntrack.h> +#include <net/net_namespace.h> #include <net/checksum.h> #define CLUSTERIP_VERSION "0.8" @@ -288,7 +289,7 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) ***********************************************************************/ static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -304,7 +305,7 @@ target(struct sk_buff **pskb, * is only decremented by destroy() - and ip_tables guarantees * that the ->target() function isn't called after ->destroy() */ - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) { printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); /* FIXME: need to drop invalid ones, since replies @@ -315,7 +316,7 @@ target(struct sk_buff **pskb, /* special case: ICMP error handling. conntrack distinguishes between * error messages (RELATED) and information requests (see below) */ - if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP + if (ip_hdr(skb)->protocol == IPPROTO_ICMP && (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) return XT_CONTINUE; @@ -324,7 +325,7 @@ target(struct sk_buff **pskb, * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here * on, which all have an ID field [relevant for hashing]. */ - hash = clusterip_hashfn(*pskb, cipinfo->config); + hash = clusterip_hashfn(skb, cipinfo->config); switch (ctinfo) { case IP_CT_NEW: @@ -354,7 +355,7 @@ target(struct sk_buff **pskb, /* despite being received via linklayer multicast, this is * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ - (*pskb)->pkt_type = PACKET_HOST; + skb->pkt_type = PACKET_HOST; return XT_CONTINUE; } @@ -400,7 +401,7 @@ checkentry(const char *tablename, return false; } - dev = dev_get_by_name(e->ip.iniface); + dev = dev_get_by_name(&init_net, e->ip.iniface); if (!dev) { printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); return false; @@ -504,12 +505,12 @@ static void arp_print(struct arp_payload *payload) static unsigned int arp_mangle(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct arphdr *arp = arp_hdr(*pskb); + struct arphdr *arp = arp_hdr(skb); struct arp_payload *payload; struct clusterip_config *c; @@ -726,7 +727,7 @@ static int __init ipt_clusterip_init(void) goto cleanup_target; #ifdef CONFIG_PROC_FS - clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net); + clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net); if (!clusterip_procdir) { printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n"); ret = -ENOMEM; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index f1253bd3837..add110060a2 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -26,15 +26,15 @@ MODULE_DESCRIPTION("iptables ECN modification module"); /* set ECT codepoint from IP header. * return false if there was an error. */ static inline bool -set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) +set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo) { - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { __u8 oldtos; - if (!skb_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(skb, sizeof(struct iphdr))) return false; - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); oldtos = iph->tos; iph->tos &= ~IPT_ECN_IP_MASK; iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); @@ -45,14 +45,13 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) /* Return false if there was an error. */ static inline bool -set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) +set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) { struct tcphdr _tcph, *tcph; __be16 oldval; /* Not enought header? */ - tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb), - sizeof(_tcph), &_tcph); + tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (!tcph) return false; @@ -62,9 +61,9 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) tcph->cwr == einfo->proto.tcp.cwr)) return true; - if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) + if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) return false; - tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb); + tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb); oldval = ((__be16 *)tcph)[6]; if (einfo->operation & IPT_ECN_OP_SET_ECE) @@ -72,13 +71,13 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) if (einfo->operation & IPT_ECN_OP_SET_CWR) tcph->cwr = einfo->proto.tcp.cwr; - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, oldval, ((__be16 *)tcph)[6], 0); return true; } static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -88,12 +87,12 @@ target(struct sk_buff **pskb, const struct ipt_ECN_info *einfo = targinfo; if (einfo->operation & IPT_ECN_OP_SET_IP) - if (!set_ect_ip(pskb, einfo)) + if (!set_ect_ip(skb, einfo)) return NF_DROP; if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) - && ip_hdr(*pskb)->protocol == IPPROTO_TCP) - if (!set_ect_tcp(pskb, einfo)) + && ip_hdr(skb)->protocol == IPPROTO_TCP) + if (!set_ect_tcp(skb, einfo)) return NF_DROP; return XT_CONTINUE; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 127a5e89bf1..4b5e8216a4e 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -418,7 +418,7 @@ ipt_log_packet(unsigned int pf, } static unsigned int -ipt_log_target(struct sk_buff **pskb, +ipt_log_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -432,7 +432,7 @@ ipt_log_target(struct sk_buff **pskb, li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li, + ipt_log_packet(PF_INET, hooknum, skb, in, out, &li, loginfo->prefix); return XT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 7c4e4be7c8b..44b516e7cb7 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -52,7 +52,7 @@ masquerade_check(const char *tablename, } static unsigned int -masquerade_target(struct sk_buff **pskb, +masquerade_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -69,7 +69,7 @@ masquerade_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); nat = nfct_nat(ct); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED @@ -82,7 +82,7 @@ masquerade_target(struct sk_buff **pskb, return NF_ACCEPT; mr = targinfo; - rt = (struct rtable *)(*pskb)->dst; + rt = (struct rtable *)skb->dst; newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); if (!newsrc) { printk("MASQUERADE: %s ate my IP address\n", out->name); @@ -125,6 +125,9 @@ static int masq_device_event(struct notifier_block *this, { const struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) { /* Device was downed. Search entire table for conntracks which were associated with that device, diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 41a011d5a06..f8699291e33 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -43,7 +43,7 @@ check(const char *tablename, } static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -59,14 +59,14 @@ target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_POST_ROUTING || hooknum == NF_IP_LOCAL_OUT); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT) - new_ip = ip_hdr(*pskb)->daddr & ~netmask; + new_ip = ip_hdr(skb)->daddr & ~netmask; else - new_ip = ip_hdr(*pskb)->saddr & ~netmask; + new_ip = ip_hdr(skb)->saddr & ~netmask; new_ip |= mr->range[0].min_ip & netmask; newrange = ((struct nf_nat_range) diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 6ac7a237331..f7cf7d61a2d 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -47,7 +47,7 @@ redirect_check(const char *tablename, } static unsigned int -redirect_target(struct sk_buff **pskb, +redirect_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -63,7 +63,7 @@ redirect_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); /* Local packets: make them go to loopback */ @@ -76,7 +76,7 @@ redirect_target(struct sk_buff **pskb, newdst = 0; rcu_read_lock(); - indev = __in_dev_get_rcu((*pskb)->dev); + indev = __in_dev_get_rcu(skb->dev); if (indev && (ifa = indev->ifa_list)) newdst = ifa->ifa_local; rcu_read_unlock(); diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index cb038c8fbc9..dcf4d21d511 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -131,7 +131,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) ) addr_type = RTN_LOCAL; - if (ip_route_me_harder(&nskb, addr_type)) + if (ip_route_me_harder(nskb, addr_type)) goto free_nskb; nskb->ip_summed = CHECKSUM_NONE; @@ -162,7 +162,7 @@ static inline void send_unreach(struct sk_buff *skb_in, int code) icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); } -static unsigned int reject(struct sk_buff **pskb, +static unsigned int reject(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -173,7 +173,7 @@ static unsigned int reject(struct sk_buff **pskb, /* Our naive response construction doesn't deal with IP options, and probably shouldn't try. */ - if (ip_hdrlen(*pskb) != sizeof(struct iphdr)) + if (ip_hdrlen(skb) != sizeof(struct iphdr)) return NF_DROP; /* WARNING: This code causes reentry within iptables. @@ -181,28 +181,28 @@ static unsigned int reject(struct sk_buff **pskb, must return an absolute verdict. --RR */ switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: - send_unreach(*pskb, ICMP_NET_UNREACH); + send_unreach(skb, ICMP_NET_UNREACH); break; case IPT_ICMP_HOST_UNREACHABLE: - send_unreach(*pskb, ICMP_HOST_UNREACH); + send_unreach(skb, ICMP_HOST_UNREACH); break; case IPT_ICMP_PROT_UNREACHABLE: - send_unreach(*pskb, ICMP_PROT_UNREACH); + send_unreach(skb, ICMP_PROT_UNREACH); break; case IPT_ICMP_PORT_UNREACHABLE: - send_unreach(*pskb, ICMP_PORT_UNREACH); + send_unreach(skb, ICMP_PORT_UNREACH); break; case IPT_ICMP_NET_PROHIBITED: - send_unreach(*pskb, ICMP_NET_ANO); + send_unreach(skb, ICMP_NET_ANO); break; case IPT_ICMP_HOST_PROHIBITED: - send_unreach(*pskb, ICMP_HOST_ANO); + send_unreach(skb, ICMP_HOST_ANO); break; case IPT_ICMP_ADMIN_PROHIBITED: - send_unreach(*pskb, ICMP_PKT_FILTERED); + send_unreach(skb, ICMP_PKT_FILTERED); break; case IPT_TCP_RESET: - send_reset(*pskb, hooknum); + send_reset(skb, hooknum); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index 97641f1a97f..8988571436b 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -104,7 +104,7 @@ same_destroy(const struct xt_target *target, void *targinfo) } static unsigned int -same_target(struct sk_buff **pskb, +same_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -121,7 +121,7 @@ same_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_POST_ROUTING); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 25f5d0b3906..d4573baa7f2 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -21,7 +21,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("iptables TOS mangling module"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -29,13 +29,13 @@ target(struct sk_buff **pskb, const void *targinfo) { const struct ipt_tos_target_info *tosinfo = targinfo; - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { __u8 oldtos; - if (!skb_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(skb, sizeof(struct iphdr))) return NF_DROP; - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); oldtos = iph->tos; iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 2b54e7b0cfe..c620a052766 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -20,7 +20,7 @@ MODULE_DESCRIPTION("IP tables TTL modification module"); MODULE_LICENSE("GPL"); static unsigned int -ipt_ttl_target(struct sk_buff **pskb, +ipt_ttl_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -29,10 +29,10 @@ ipt_ttl_target(struct sk_buff **pskb, const struct ipt_TTL_info *info = targinfo; int new_ttl; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return NF_DROP; - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); switch (info->mode) { case IPT_TTL_SET: diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 6ca43e4ca7e..212b830765a 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -279,7 +279,7 @@ alloc_failure: spin_unlock_bh(&ulog_lock); } -static unsigned int ipt_ulog_target(struct sk_buff **pskb, +static unsigned int ipt_ulog_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -288,7 +288,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; - ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL); + ipt_ulog_packet(hooknum, skb, in, out, loginfo, NULL); return XT_CONTINUE; } @@ -409,7 +409,8 @@ static int __init ipt_ulog_init(void) for (i = 0; i < ULOG_MAXNLGROUPS; i++) setup_timer(&ulog_buffers[i].timer, ulog_timer, i); - nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, + nflognl = netlink_kernel_create(&init_net, + NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, NULL, THIS_MODULE); if (!nflognl) return -ENOMEM; diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 6d0c0f7364a..11d39fb5f38 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -24,6 +24,7 @@ #include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/inet.h> +#include <net/net_namespace.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_recent.h> @@ -380,25 +381,14 @@ static const struct seq_operations recent_seq_ops = { static int recent_seq_open(struct inode *inode, struct file *file) { struct proc_dir_entry *pde = PDE(inode); - struct seq_file *seq; struct recent_iter_state *st; - int ret; - st = kzalloc(sizeof(*st), GFP_KERNEL); + st = __seq_open_private(file, &recent_seq_ops, sizeof(*st)); if (st == NULL) return -ENOMEM; - ret = seq_open(file, &recent_seq_ops); - if (ret) { - kfree(st); - goto out; - } - st->table = pde->data; - seq = file->private_data; - seq->private = st; -out: - return ret; + return 0; } static ssize_t recent_proc_write(struct file *file, const char __user *input, @@ -487,7 +477,7 @@ static int __init ipt_recent_init(void) #ifdef CONFIG_PROC_FS if (err) return err; - proc_dir = proc_mkdir("ipt_recent", proc_net); + proc_dir = proc_mkdir("ipt_recent", init_net.proc_net); if (proc_dir == NULL) { xt_unregister_match(&recent_match); err = -ENOMEM; @@ -501,7 +491,7 @@ static void __exit ipt_recent_exit(void) BUG_ON(!list_empty(&tables)); xt_unregister_match(&recent_match); #ifdef CONFIG_PROC_FS - remove_proc_entry("ipt_recent", proc_net); + remove_proc_entry("ipt_recent", init_net.proc_net); #endif } diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 4f51c1d7d2d..ba3262c6043 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -62,31 +62,31 @@ static struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int ipt_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_filter); + return ipt_do_table(skb, hook, in, out, &packet_filter); } static unsigned int ipt_local_out_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("iptable_filter: ignoring short SOCK_RAW " "packet.\n"); return NF_ACCEPT; } - return ipt_do_table(pskb, hook, in, out, &packet_filter); + return ipt_do_table(skb, hook, in, out, &packet_filter); } static struct nf_hook_ops ipt_ops[] = { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 902446f7cbc..b4360a69d5c 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -75,17 +75,17 @@ static struct xt_table packet_mangler = { /* The work comes in here from netfilter.c. */ static unsigned int ipt_route_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_mangler); + return ipt_do_table(skb, hook, in, out, &packet_mangler); } static unsigned int ipt_local_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -97,8 +97,8 @@ ipt_local_hook(unsigned int hook, u_int32_t mark; /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) + || ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("iptable_mangle: ignoring short SOCK_RAW " "packet.\n"); @@ -106,22 +106,22 @@ ipt_local_hook(unsigned int hook, } /* Save things which could affect route */ - mark = (*pskb)->mark; - iph = ip_hdr(*pskb); + mark = skb->mark; + iph = ip_hdr(skb); saddr = iph->saddr; daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(pskb, hook, in, out, &packet_mangler); + ret = ipt_do_table(skb, hook, in, out, &packet_mangler); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); if (iph->saddr != saddr || iph->daddr != daddr || - (*pskb)->mark != mark || + skb->mark != mark || iph->tos != tos) - if (ip_route_me_harder(pskb, RTN_UNSPEC)) + if (ip_route_me_harder(skb, RTN_UNSPEC)) ret = NF_DROP; } diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index d6e50339568..5de6e57ac55 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -47,30 +47,30 @@ static struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int ipt_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_raw); + return ipt_do_table(skb, hook, in, out, &packet_raw); } static unsigned int ipt_local_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) || - ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("iptable_raw: ignoring short SOCK_RAW" "packet.\n"); return NF_ACCEPT; } - return ipt_do_table(pskb, hook, in, out, &packet_raw); + return ipt_do_table(skb, hook, in, out, &packet_raw); } /* 'raw' is the very first table. */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index f813e02aab3..831e9b29806 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -63,19 +63,20 @@ static int ipv4_print_conntrack(struct seq_file *s, } /* Returns new sk_buff, or NULL */ -static struct sk_buff * -nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) { + int err; + skb_orphan(skb); local_bh_disable(); - skb = ip_defrag(skb, user); + err = ip_defrag(skb, user); local_bh_enable(); - if (skb) + if (!err) ip_send_check(ip_hdr(skb)); - return skb; + return err; } static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, @@ -99,17 +100,17 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, } static unsigned int ipv4_confirm(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(pskb); + return nf_conntrack_confirm(skb); } static unsigned int ipv4_conntrack_help(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -120,7 +121,7 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, struct nf_conntrack_helper *helper; /* This is where we call the helper: as the packet goes out. */ - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) return NF_ACCEPT; @@ -131,56 +132,55 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, helper = rcu_dereference(help->helper); if (!helper) return NF_ACCEPT; - return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb), + return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), ct, ctinfo); } static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* Previously seen (loopback)? Ignore. Do this before fragment check. */ - if ((*pskb)->nfct) + if (skb->nfct) return NF_ACCEPT; /* Gather fragments. */ - if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) { - *pskb = nf_ct_ipv4_gather_frags(*pskb, - hooknum == NF_IP_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT); - if (!*pskb) + if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (nf_ct_ipv4_gather_frags(skb, + hooknum == NF_IP_PRE_ROUTING ? + IP_DEFRAG_CONNTRACK_IN : + IP_DEFRAG_CONNTRACK_OUT)) return NF_STOLEN; } return NF_ACCEPT; } static unsigned int ipv4_conntrack_in(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(PF_INET, hooknum, pskb); + return nf_conntrack_in(PF_INET, hooknum, skb); } static unsigned int ipv4_conntrack_local(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ipt_hook: happy cracking.\n"); return NF_ACCEPT; } - return nf_conntrack_in(PF_INET, hooknum, pskb); + return nf_conntrack_in(PF_INET, hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so @@ -360,35 +360,32 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> -static int ipv4_tuple_to_nfattr(struct sk_buff *skb, +static int ipv4_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), + NLA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.u3.ip); - NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), + NLA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.u3.ip); return 0; -nfattr_failure: +nla_put_failure: return -1; } -static const size_t cta_min_ip[CTA_IP_MAX] = { - [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), - [CTA_IP_V4_DST-1] = sizeof(u_int32_t), +static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = { + [CTA_IP_V4_SRC] = { .type = NLA_U32 }, + [CTA_IP_V4_DST] = { .type = NLA_U32 }, }; -static int ipv4_nfattr_to_tuple(struct nfattr *tb[], +static int ipv4_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t) { - if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1]) - return -EINVAL; - - if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) + if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST]) return -EINVAL; - t->src.u3.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); - t->dst.u3.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]); + t->src.u3.ip = *(__be32 *)nla_data(tb[CTA_IP_V4_SRC]); + t->dst.u3.ip = *(__be32 *)nla_data(tb[CTA_IP_V4_DST]); return 0; } @@ -411,8 +408,9 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .print_conntrack = ipv4_print_conntrack, .get_l4proto = ipv4_get_l4proto, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = ipv4_tuple_to_nfattr, - .nfattr_to_tuple = ipv4_nfattr_to_tuple, + .tuple_to_nlattr = ipv4_tuple_to_nlattr, + .nlattr_to_tuple = ipv4_nlattr_to_tuple, + .nla_policy = ipv4_nla_policy, #endif #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) .ctl_table_path = nf_net_ipv4_netfilter_sysctl_path, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index b3dd5de9a25..741f3dfaa5a 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -11,6 +11,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/percpu.h> +#include <net/net_namespace.h> #include <linux/netfilter.h> #include <net/netfilter/nf_conntrack_core.h> @@ -173,22 +174,8 @@ static const struct seq_operations ct_seq_ops = { static int ct_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct ct_iter_state *st; - int ret; - - st = kzalloc(sizeof(struct ct_iter_state), GFP_KERNEL); - if (st == NULL) - return -ENOMEM; - ret = seq_open(file, &ct_seq_ops); - if (ret) - goto out_free; - seq = file->private_data; - seq->private = st; - return ret; -out_free: - kfree(st); - return ret; + return seq_open_private(file, &ct_seq_ops, + sizeof(struct ct_iter_state)); } static const struct file_operations ct_file_ops = { @@ -290,22 +277,8 @@ static const struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct ct_expect_iter_state *st; - int ret; - - st = kzalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL); - if (!st) - return -ENOMEM; - ret = seq_open(file, &exp_seq_ops); - if (ret) - goto out_free; - seq = file->private_data; - seq->private = st; - return ret; -out_free: - kfree(st); - return ret; + return seq_open_private(file, &exp_seq_ops, + sizeof(struct ct_expect_iter_state)); } static const struct file_operations ip_exp_file_ops = { @@ -408,16 +381,16 @@ int __init nf_conntrack_ipv4_compat_init(void) { struct proc_dir_entry *proc, *proc_exp, *proc_stat; - proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops); if (!proc) goto err1; - proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440, + proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440, &ip_exp_file_ops); if (!proc_exp) goto err2; - proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat); + proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, init_net.proc_net_stat); if (!proc_stat) goto err3; @@ -427,16 +400,16 @@ int __init nf_conntrack_ipv4_compat_init(void) return 0; err3: - proc_net_remove("ip_conntrack_expect"); + proc_net_remove(&init_net, "ip_conntrack_expect"); err2: - proc_net_remove("ip_conntrack"); + proc_net_remove(&init_net, "ip_conntrack"); err1: return -ENOMEM; } void __exit nf_conntrack_ipv4_compat_fini(void) { - remove_proc_entry("ip_conntrack", proc_net_stat); - proc_net_remove("ip_conntrack_expect"); - proc_net_remove("ip_conntrack"); + remove_proc_entry("ip_conntrack", init_net.proc_net_stat); + proc_net_remove(&init_net, "ip_conntrack_expect"); + proc_net_remove(&init_net, "ip_conntrack"); } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 6593fd2c5b1..adcbaf6d429 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -232,45 +232,42 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> -static int icmp_tuple_to_nfattr(struct sk_buff *skb, +static int icmp_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *t) { - NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), + NLA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), &t->src.u.icmp.id); - NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), + NLA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), &t->dst.u.icmp.type); - NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t), + NLA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t), &t->dst.u.icmp.code); return 0; -nfattr_failure: +nla_put_failure: return -1; } -static const size_t cta_min_proto[CTA_PROTO_MAX] = { - [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), - [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t), - [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t) +static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = { + [CTA_PROTO_ICMP_TYPE] = { .type = NLA_U8 }, + [CTA_PROTO_ICMP_CODE] = { .type = NLA_U8 }, + [CTA_PROTO_ICMP_ID] = { .type = NLA_U16 }, }; -static int icmp_nfattr_to_tuple(struct nfattr *tb[], +static int icmp_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *tuple) { - if (!tb[CTA_PROTO_ICMP_TYPE-1] - || !tb[CTA_PROTO_ICMP_CODE-1] - || !tb[CTA_PROTO_ICMP_ID-1]) - return -EINVAL; - - if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + if (!tb[CTA_PROTO_ICMP_TYPE] + || !tb[CTA_PROTO_ICMP_CODE] + || !tb[CTA_PROTO_ICMP_ID]) return -EINVAL; tuple->dst.u.icmp.type = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]); + *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMP_TYPE]); tuple->dst.u.icmp.code = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); + *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMP_CODE]); tuple->src.u.icmp.id = - *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); + *(__be16 *)nla_data(tb[CTA_PROTO_ICMP_ID]); if (tuple->dst.u.icmp.type >= sizeof(invmap) || !invmap[tuple->dst.u.icmp.type]) @@ -284,7 +281,6 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[], static struct ctl_table_header *icmp_sysctl_header; static struct ctl_table icmp_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_ICMP_TIMEOUT, .procname = "nf_conntrack_icmp_timeout", .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), @@ -298,7 +294,6 @@ static struct ctl_table icmp_sysctl_table[] = { #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table icmp_compat_sysctl_table[] = { { - .ctl_name = NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT, .procname = "ip_conntrack_icmp_timeout", .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), @@ -327,8 +322,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = .destroy = NULL, .me = NULL, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = icmp_tuple_to_nfattr, - .nfattr_to_tuple = icmp_nfattr_to_tuple, + .tuple_to_nlattr = icmp_tuple_to_nlattr, + .nlattr_to_tuple = icmp_nlattr_to_tuple, + .nla_policy = icmp_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_header = &icmp_sysctl_header, diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index bd93a1d7105..35a5aa69cd9 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -24,7 +24,7 @@ MODULE_DESCRIPTION("Amanda NAT helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_nat_amanda"); -static unsigned int help(struct sk_buff **pskb, +static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -53,7 +53,7 @@ static unsigned int help(struct sk_buff **pskb, return NF_DROP; sprintf(buffer, "%u", port); - ret = nf_nat_mangle_udp_packet(pskb, exp->master, ctinfo, + ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index deab27facba..56e93f692e8 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -349,7 +349,7 @@ EXPORT_SYMBOL(nf_nat_setup_info); /* Returns true if succeeded. */ static int manip_pkt(u_int16_t proto, - struct sk_buff **pskb, + struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *target, enum nf_nat_manip_type maniptype) @@ -357,19 +357,19 @@ manip_pkt(u_int16_t proto, struct iphdr *iph; struct nf_nat_protocol *p; - if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) + if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) return 0; - iph = (void *)(*pskb)->data + iphdroff; + iph = (void *)skb->data + iphdroff; /* Manipulate protcol part. */ /* rcu_read_lock()ed by nf_hook_slow */ p = __nf_nat_proto_find(proto); - if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) + if (!p->manip_pkt(skb, iphdroff, target, maniptype)) return 0; - iph = (void *)(*pskb)->data + iphdroff; + iph = (void *)skb->data + iphdroff; if (maniptype == IP_NAT_MANIP_SRC) { nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); @@ -385,7 +385,7 @@ manip_pkt(u_int16_t proto, unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, - struct sk_buff **pskb) + struct sk_buff *skb) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; @@ -407,7 +407,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct, /* We are aiming to look like inverse of other direction. */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype)) + if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype)) return NF_DROP; } return NF_ACCEPT; @@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(nf_nat_packet); int nf_nat_icmp_reply_translation(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, - struct sk_buff **pskb) + struct sk_buff *skb) { struct { struct icmphdr icmp; @@ -426,24 +426,24 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, } *inside; struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple inner, target; - int hdrlen = ip_hdrlen(*pskb); + int hdrlen = ip_hdrlen(skb); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); - if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) + if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) return 0; - inside = (void *)(*pskb)->data + ip_hdrlen(*pskb); + inside = (void *)skb->data + ip_hdrlen(skb); /* We're actually going to mangle it beyond trivial checksum adjustment, so make sure the current checksum is correct. */ - if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0)) + if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) return 0; /* Must be RELATED */ - NF_CT_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || - (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); + NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || + skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); /* Redirects on non-null nats must be dropped, else they'll start talking to each other without our translation, and be @@ -458,15 +458,15 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, } pr_debug("icmp_reply_translation: translating error %p manip %u " - "dir %s\n", *pskb, manip, + "dir %s\n", skb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); /* rcu_read_lock()ed by nf_hook_slow */ l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); - if (!nf_ct_get_tuple(*pskb, - ip_hdrlen(*pskb) + sizeof(struct icmphdr), - (ip_hdrlen(*pskb) + + if (!nf_ct_get_tuple(skb, + ip_hdrlen(skb) + sizeof(struct icmphdr), + (ip_hdrlen(skb) + sizeof(struct icmphdr) + inside->ip.ihl * 4), (u_int16_t)AF_INET, inside->ip.protocol, @@ -478,19 +478,19 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, pass all hooks (locally-generated ICMP). Consider incoming packet: PREROUTING (DST manip), routing produces ICMP, goes through POSTROUTING (which must correct the DST manip). */ - if (!manip_pkt(inside->ip.protocol, pskb, - ip_hdrlen(*pskb) + sizeof(inside->icmp), + if (!manip_pkt(inside->ip.protocol, skb, + ip_hdrlen(skb) + sizeof(inside->icmp), &ct->tuplehash[!dir].tuple, !manip)) return 0; - if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + if (skb->ip_summed != CHECKSUM_PARTIAL) { /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)(*pskb)->data + ip_hdrlen(*pskb); + inside = (void *)skb->data + ip_hdrlen(skb); inside->icmp.checksum = 0; inside->icmp.checksum = - csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, 0)); + csum_fold(skb_checksum(skb, hdrlen, + skb->len - hdrlen, 0)); } /* Change outer to look the reply to an incoming packet @@ -506,7 +506,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, if (ct->status & statusbit) { nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - if (!manip_pkt(0, pskb, 0, &target, manip)) + if (!manip_pkt(0, skb, 0, &target, manip)) return 0; } @@ -544,46 +544,46 @@ EXPORT_SYMBOL(nf_nat_protocol_unregister); #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) int -nf_nat_port_range_to_nfattr(struct sk_buff *skb, +nf_nat_port_range_to_nlattr(struct sk_buff *skb, const struct nf_nat_range *range) { - NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16), + NLA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16), &range->min.tcp.port); - NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16), + NLA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16), &range->max.tcp.port); return 0; -nfattr_failure: +nla_put_failure: return -1; } -EXPORT_SYMBOL_GPL(nf_nat_port_nfattr_to_range); +EXPORT_SYMBOL_GPL(nf_nat_port_nlattr_to_range); int -nf_nat_port_nfattr_to_range(struct nfattr *tb[], struct nf_nat_range *range) +nf_nat_port_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range) { int ret = 0; /* we have to return whether we actually parsed something or not */ - if (tb[CTA_PROTONAT_PORT_MIN-1]) { + if (tb[CTA_PROTONAT_PORT_MIN]) { ret = 1; range->min.tcp.port = - *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); + *(__be16 *)nla_data(tb[CTA_PROTONAT_PORT_MIN]); } - if (!tb[CTA_PROTONAT_PORT_MAX-1]) { + if (!tb[CTA_PROTONAT_PORT_MAX]) { if (ret) range->max.tcp.port = range->min.tcp.port; } else { ret = 1; range->max.tcp.port = - *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); + *(__be16 *)nla_data(tb[CTA_PROTONAT_PORT_MAX]); } return ret; } -EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr); +EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nlattr); #endif /* Noone using conntrack by the time this called. */ diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index 3663bd879c3..e1a16d3ea4c 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -28,7 +28,7 @@ MODULE_ALIAS("ip_nat_ftp"); /* FIXME: Time out? --RR */ static int -mangle_rfc959_packet(struct sk_buff **pskb, +mangle_rfc959_packet(struct sk_buff *skb, __be32 newip, u_int16_t port, unsigned int matchoff, @@ -43,13 +43,13 @@ mangle_rfc959_packet(struct sk_buff **pskb, pr_debug("calling nf_nat_mangle_tcp_packet\n"); - return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, + return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } /* |1|132.235.1.2|6275| */ static int -mangle_eprt_packet(struct sk_buff **pskb, +mangle_eprt_packet(struct sk_buff *skb, __be32 newip, u_int16_t port, unsigned int matchoff, @@ -63,13 +63,13 @@ mangle_eprt_packet(struct sk_buff **pskb, pr_debug("calling nf_nat_mangle_tcp_packet\n"); - return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, + return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } /* |1|132.235.1.2|6275| */ static int -mangle_epsv_packet(struct sk_buff **pskb, +mangle_epsv_packet(struct sk_buff *skb, __be32 newip, u_int16_t port, unsigned int matchoff, @@ -83,11 +83,11 @@ mangle_epsv_packet(struct sk_buff **pskb, pr_debug("calling nf_nat_mangle_tcp_packet\n"); - return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, + return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } -static int (*mangle[])(struct sk_buff **, __be32, u_int16_t, +static int (*mangle[])(struct sk_buff *, __be32, u_int16_t, unsigned int, unsigned int, struct nf_conn *, enum ip_conntrack_info) = { @@ -99,7 +99,7 @@ static int (*mangle[])(struct sk_buff **, __be32, u_int16_t, /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ -static unsigned int nf_nat_ftp(struct sk_buff **pskb, +static unsigned int nf_nat_ftp(struct sk_buff *skb, enum ip_conntrack_info ctinfo, enum nf_ct_ftp_type type, unsigned int matchoff, @@ -132,7 +132,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb, if (port == 0) return NF_DROP; - if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo)) { + if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) { nf_ct_unexpect_related(exp); return NF_DROP; } diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index c1b059a7370..a868c8c4132 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -22,12 +22,12 @@ #include <linux/netfilter/nf_conntrack_h323.h> /****************************************************************************/ -static int set_addr(struct sk_buff **pskb, +static int set_addr(struct sk_buff *skb, unsigned char **data, int dataoff, unsigned int addroff, __be32 ip, __be16 port) { enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(*pskb, &ctinfo); + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); struct { __be32 ip; __be16 port; @@ -38,8 +38,8 @@ static int set_addr(struct sk_buff **pskb, buf.port = port; addroff += dataoff; - if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) { - if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, + if (ip_hdr(skb)->protocol == IPPROTO_TCP) { + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, addroff, sizeof(buf), (char *) &buf, sizeof(buf))) { if (net_ratelimit()) @@ -49,14 +49,13 @@ static int set_addr(struct sk_buff **pskb, } /* Relocate data pointer */ - th = skb_header_pointer(*pskb, ip_hdrlen(*pskb), + th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (th == NULL) return -1; - *data = (*pskb)->data + ip_hdrlen(*pskb) + - th->doff * 4 + dataoff; + *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff; } else { - if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, addroff, sizeof(buf), (char *) &buf, sizeof(buf))) { if (net_ratelimit()) @@ -67,36 +66,35 @@ static int set_addr(struct sk_buff **pskb, /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy * or pull everything in a linear buffer, so we can safely * use the skb pointers now */ - *data = ((*pskb)->data + ip_hdrlen(*pskb) + - sizeof(struct udphdr)); + *data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); } return 0; } /****************************************************************************/ -static int set_h225_addr(struct sk_buff **pskb, +static int set_h225_addr(struct sk_buff *skb, unsigned char **data, int dataoff, TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) { - return set_addr(pskb, data, dataoff, taddr->ipAddress.ip, + return set_addr(skb, data, dataoff, taddr->ipAddress.ip, addr->ip, port); } /****************************************************************************/ -static int set_h245_addr(struct sk_buff **pskb, +static int set_h245_addr(struct sk_buff *skb, unsigned char **data, int dataoff, H245_TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) { - return set_addr(pskb, data, dataoff, + return set_addr(skb, data, dataoff, taddr->unicastAddress.iPAddress.network, addr->ip, port); } /****************************************************************************/ -static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, +static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) @@ -125,7 +123,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, NIPQUAD(addr.ip), port, NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), info->sig_port[!dir]); - return set_h225_addr(pskb, data, 0, &taddr[i], + return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir]. tuple.dst.u3, info->sig_port[!dir]); @@ -137,7 +135,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, NIPQUAD(addr.ip), port, NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip), info->sig_port[!dir]); - return set_h225_addr(pskb, data, 0, &taddr[i], + return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir]. tuple.src.u3, info->sig_port[!dir]); @@ -149,7 +147,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct, +static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) @@ -168,7 +166,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct, NIPQUAD(addr.ip), ntohs(port), NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); - return set_h225_addr(pskb, data, 0, &taddr[i], + return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir].tuple.dst.u3, ct->tuplehash[!dir].tuple. dst.u.udp.port); @@ -179,7 +177,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr, @@ -244,7 +242,7 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (set_h245_addr(pskb, data, dataoff, taddr, + if (set_h245_addr(skb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons((port & htons(1)) ? nated_port + 1 : nated_port)) == 0) { @@ -273,7 +271,7 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, @@ -301,7 +299,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (set_h245_addr(pskb, data, dataoff, taddr, + if (set_h245_addr(skb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) < 0) { nf_ct_unexpect_related(exp); @@ -318,7 +316,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, @@ -351,7 +349,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (set_h225_addr(pskb, data, dataoff, taddr, + if (set_h225_addr(skb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { /* Save ports */ @@ -406,7 +404,7 @@ static void ip_nat_q931_expect(struct nf_conn *new, } /****************************************************************************/ -static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int idx, __be16 port, struct nf_conntrack_expect *exp) @@ -439,7 +437,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (set_h225_addr(pskb, data, 0, &taddr[idx], + if (set_h225_addr(skb, data, 0, &taddr[idx], &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { /* Save ports */ @@ -450,7 +448,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, if (idx > 0 && get_h225_addr(ct, *data, &taddr[0], &addr, &port) && (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { - set_h225_addr(pskb, data, 0, &taddr[0], + set_h225_addr(skb, data, 0, &taddr[0], &ct->tuplehash[!dir].tuple.dst.u3, info->sig_port[!dir]); } @@ -495,7 +493,7 @@ static void ip_nat_callforwarding_expect(struct nf_conn *new, } /****************************************************************************/ -static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, @@ -525,7 +523,7 @@ static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (!set_h225_addr(pskb, data, dataoff, taddr, + if (!set_h225_addr(skb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { nf_ct_unexpect_related(exp); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 93d8a0a8f03..8718da00ef2 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -111,22 +111,14 @@ static void mangle_contents(struct sk_buff *skb, } /* Unusual, but possible case. */ -static int enlarge_skb(struct sk_buff **pskb, unsigned int extra) +static int enlarge_skb(struct sk_buff *skb, unsigned int extra) { - struct sk_buff *nskb; - - if ((*pskb)->len + extra > 65535) + if (skb->len + extra > 65535) return 0; - nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC); - if (!nskb) + if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC)) return 0; - /* Transfer socket to new skb. */ - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; return 1; } @@ -139,7 +131,7 @@ static int enlarge_skb(struct sk_buff **pskb, unsigned int extra) * * */ int -nf_nat_mangle_tcp_packet(struct sk_buff **pskb, +nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int match_offset, @@ -147,37 +139,37 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, const char *rep_buffer, unsigned int rep_len) { - struct rtable *rt = (struct rtable *)(*pskb)->dst; + struct rtable *rt = (struct rtable *)skb->dst; struct iphdr *iph; struct tcphdr *tcph; int oldlen, datalen; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return 0; if (rep_len > match_len && - rep_len - match_len > skb_tailroom(*pskb) && - !enlarge_skb(pskb, rep_len - match_len)) + rep_len - match_len > skb_tailroom(skb) && + !enlarge_skb(skb, rep_len - match_len)) return 0; - SKB_LINEAR_ASSERT(*pskb); + SKB_LINEAR_ASSERT(skb); - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); tcph = (void *)iph + iph->ihl*4; - oldlen = (*pskb)->len - iph->ihl*4; - mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, + oldlen = skb->len - iph->ihl*4; + mangle_contents(skb, iph->ihl*4 + tcph->doff*4, match_offset, match_len, rep_buffer, rep_len); - datalen = (*pskb)->len - iph->ihl*4; - if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + datalen = skb->len - iph->ihl*4; + if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (*pskb)->dev->features & NETIF_F_V4_CSUM) { - (*pskb)->ip_summed = CHECKSUM_PARTIAL; - (*pskb)->csum_start = skb_headroom(*pskb) + - skb_network_offset(*pskb) + - iph->ihl * 4; - (*pskb)->csum_offset = offsetof(struct tcphdr, check); + skb->dev->features & NETIF_F_V4_CSUM) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + iph->ihl * 4; + skb->csum_offset = offsetof(struct tcphdr, check); tcph->check = ~tcp_v4_check(datalen, iph->saddr, iph->daddr, 0); } else { @@ -188,7 +180,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, datalen, 0)); } } else - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, htons(oldlen), htons(datalen), 1); if (rep_len != match_len) { @@ -197,7 +189,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, (int)rep_len - (int)match_len, ct, ctinfo); /* Tell TCP window tracking about seq change */ - nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), + nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, CTINFO2DIR(ctinfo)); } return 1; @@ -215,7 +207,7 @@ EXPORT_SYMBOL(nf_nat_mangle_tcp_packet); * should be fairly easy to do. */ int -nf_nat_mangle_udp_packet(struct sk_buff **pskb, +nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int match_offset, @@ -223,48 +215,48 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb, const char *rep_buffer, unsigned int rep_len) { - struct rtable *rt = (struct rtable *)(*pskb)->dst; + struct rtable *rt = (struct rtable *)skb->dst; struct iphdr *iph; struct udphdr *udph; int datalen, oldlen; /* UDP helpers might accidentally mangle the wrong packet */ - iph = ip_hdr(*pskb); - if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) + + iph = ip_hdr(skb); + if (skb->len < iph->ihl*4 + sizeof(*udph) + match_offset + match_len) return 0; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return 0; if (rep_len > match_len && - rep_len - match_len > skb_tailroom(*pskb) && - !enlarge_skb(pskb, rep_len - match_len)) + rep_len - match_len > skb_tailroom(skb) && + !enlarge_skb(skb, rep_len - match_len)) return 0; - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); udph = (void *)iph + iph->ihl*4; - oldlen = (*pskb)->len - iph->ihl*4; - mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), + oldlen = skb->len - iph->ihl*4; + mangle_contents(skb, iph->ihl*4 + sizeof(*udph), match_offset, match_len, rep_buffer, rep_len); /* update the length of the UDP packet */ - datalen = (*pskb)->len - iph->ihl*4; + datalen = skb->len - iph->ihl*4; udph->len = htons(datalen); /* fix udp checksum if udp checksum was previously calculated */ - if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL) + if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) return 1; - if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (*pskb)->dev->features & NETIF_F_V4_CSUM) { - (*pskb)->ip_summed = CHECKSUM_PARTIAL; - (*pskb)->csum_start = skb_headroom(*pskb) + - skb_network_offset(*pskb) + - iph->ihl * 4; - (*pskb)->csum_offset = offsetof(struct udphdr, check); + skb->dev->features & NETIF_F_V4_CSUM) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + iph->ihl * 4; + skb->csum_offset = offsetof(struct udphdr, check); udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_UDP, 0); @@ -278,7 +270,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb, udph->check = CSUM_MANGLED_0; } } else - nf_proto_csum_replace2(&udph->check, *pskb, + nf_proto_csum_replace2(&udph->check, skb, htons(oldlen), htons(datalen), 1); return 1; @@ -330,7 +322,7 @@ sack_adjust(struct sk_buff *skb, /* TCP SACK sequence number adjustment */ static inline unsigned int -nf_nat_sack_adjust(struct sk_buff **pskb, +nf_nat_sack_adjust(struct sk_buff *skb, struct tcphdr *tcph, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -338,17 +330,17 @@ nf_nat_sack_adjust(struct sk_buff **pskb, unsigned int dir, optoff, optend; struct nf_conn_nat *nat = nfct_nat(ct); - optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr); - optend = ip_hdrlen(*pskb) + tcph->doff * 4; + optoff = ip_hdrlen(skb) + sizeof(struct tcphdr); + optend = ip_hdrlen(skb) + tcph->doff * 4; - if (!skb_make_writable(pskb, optend)) + if (!skb_make_writable(skb, optend)) return 0; dir = CTINFO2DIR(ctinfo); while (optoff < optend) { /* Usually: option, length. */ - unsigned char *op = (*pskb)->data + optoff; + unsigned char *op = skb->data + optoff; switch (op[0]) { case TCPOPT_EOL: @@ -365,7 +357,7 @@ nf_nat_sack_adjust(struct sk_buff **pskb, if (op[0] == TCPOPT_SACK && op[1] >= 2+TCPOLEN_SACK_PERBLOCK && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) - sack_adjust(*pskb, tcph, optoff+2, + sack_adjust(skb, tcph, optoff+2, optoff+op[1], &nat->seq[!dir]); optoff += op[1]; } @@ -375,7 +367,7 @@ nf_nat_sack_adjust(struct sk_buff **pskb, /* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ int -nf_nat_seq_adjust(struct sk_buff **pskb, +nf_nat_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { @@ -390,10 +382,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb, this_way = &nat->seq[dir]; other_way = &nat->seq[!dir]; - if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) + if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) return 0; - tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb); + tcph = (void *)skb->data + ip_hdrlen(skb); if (after(ntohl(tcph->seq), this_way->correction_pos)) newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); else @@ -405,8 +397,8 @@ nf_nat_seq_adjust(struct sk_buff **pskb, else newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); - nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0); - nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0); + nf_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); + nf_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), @@ -415,10 +407,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb, tcph->seq = newseq; tcph->ack_seq = newack; - if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo)) + if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo)) return 0; - nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, dir); + nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir); return 1; } diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index bcf274bba60..766e2c16c6b 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -27,7 +27,7 @@ MODULE_DESCRIPTION("IRC (DCC) NAT helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_nat_irc"); -static unsigned int help(struct sk_buff **pskb, +static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -58,7 +58,7 @@ static unsigned int help(struct sk_buff **pskb, pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n", buffer, NIPQUAD(ip), port); - ret = nf_nat_mangle_tcp_packet(pskb, exp->master, ctinfo, + ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 984ec8308b2..e1385a09907 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -110,7 +110,7 @@ static void pptp_nat_expected(struct nf_conn *ct, /* outbound packets == from PNS to PAC */ static int -pptp_outbound_pkt(struct sk_buff **pskb, +pptp_outbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, @@ -175,7 +175,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); /* mangle packet */ - if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, + if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, cid_off + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader), sizeof(new_callid), (char *)&new_callid, @@ -213,7 +213,7 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig, /* inbound packets == from PAC to PNS */ static int -pptp_inbound_pkt(struct sk_buff **pskb, +pptp_inbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, @@ -268,7 +268,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, pr_debug("altering peer call id from 0x%04x to 0x%04x\n", ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); - if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, + if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, pcid_off + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader), sizeof(new_pcid), (char *)&new_pcid, diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 2e40cc83526..b820f996035 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -98,21 +98,21 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, /* manipulate a GRE packet according to maniptype */ static int -gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, +gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { struct gre_hdr *greh; struct gre_hdr_pptp *pgreh; - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); unsigned int hdroff = iphdroff + iph->ihl * 4; /* pgreh includes two optional 32bit fields which are not required * to be there. That's where the magic '8' comes from */ - if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh) - 8)) + if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8)) return 0; - greh = (void *)(*pskb)->data + hdroff; + greh = (void *)skb->data + hdroff; pgreh = (struct gre_hdr_pptp *)greh; /* we only have destination manip of a packet, since 'source key' @@ -142,8 +142,8 @@ static struct nf_nat_protocol gre __read_mostly = { .in_range = gre_in_range, .unique_tuple = gre_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nfattr = nf_nat_port_range_to_nfattr, - .nfattr_to_range = nf_nat_port_nfattr_to_range, + .range_to_nlattr = nf_nat_port_range_to_nlattr, + .nlattr_to_range = nf_nat_port_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index f71ef9b5f42..b9fc724388f 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -52,20 +52,20 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -icmp_manip_pkt(struct sk_buff **pskb, +icmp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct icmphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return 0; - hdr = (struct icmphdr *)((*pskb)->data + hdroff); - nf_proto_csum_replace2(&hdr->checksum, *pskb, + hdr = (struct icmphdr *)(skb->data + hdroff); + nf_proto_csum_replace2(&hdr->checksum, skb, hdr->un.echo.id, tuple->src.u.icmp.id, 0); hdr->un.echo.id = tuple->src.u.icmp.id; return 1; @@ -79,7 +79,7 @@ struct nf_nat_protocol nf_nat_protocol_icmp = { .in_range = icmp_in_range, .unique_tuple = icmp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nfattr = nf_nat_port_range_to_nfattr, - .nfattr_to_range = nf_nat_port_nfattr_to_range, + .range_to_nlattr = nf_nat_port_range_to_nlattr, + .nlattr_to_range = nf_nat_port_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index 123c95913f2..6bab2e18445 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -88,12 +88,12 @@ tcp_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -tcp_manip_pkt(struct sk_buff **pskb, +tcp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct tcphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; __be32 oldip, newip; @@ -103,14 +103,14 @@ tcp_manip_pkt(struct sk_buff **pskb, /* this could be a inner header returned in icmp packet; in such cases we cannot update the checksum field since it is outside of the 8 bytes of transport layer headers we are guaranteed */ - if ((*pskb)->len >= hdroff + sizeof(struct tcphdr)) + if (skb->len >= hdroff + sizeof(struct tcphdr)) hdrsize = sizeof(struct tcphdr); - if (!skb_make_writable(pskb, hdroff + hdrsize)) + if (!skb_make_writable(skb, hdroff + hdrsize)) return 0; - iph = (struct iphdr *)((*pskb)->data + iphdroff); - hdr = (struct tcphdr *)((*pskb)->data + hdroff); + iph = (struct iphdr *)(skb->data + iphdroff); + hdr = (struct tcphdr *)(skb->data + hdroff); if (maniptype == IP_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ @@ -132,8 +132,8 @@ tcp_manip_pkt(struct sk_buff **pskb, if (hdrsize < sizeof(*hdr)) return 1; - nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); - nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0); + nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); + nf_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); return 1; } @@ -145,7 +145,7 @@ struct nf_nat_protocol nf_nat_protocol_tcp = { .in_range = tcp_in_range, .unique_tuple = tcp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nfattr = nf_nat_port_range_to_nfattr, - .nfattr_to_range = nf_nat_port_nfattr_to_range, + .range_to_nlattr = nf_nat_port_range_to_nlattr, + .nlattr_to_range = nf_nat_port_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index 1c4c70e25cd..cbf1a61e290 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -86,22 +86,22 @@ udp_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -udp_manip_pkt(struct sk_buff **pskb, +udp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct udphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; __be32 oldip, newip; __be16 *portptr, newport; - if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return 0; - iph = (struct iphdr *)((*pskb)->data + iphdroff); - hdr = (struct udphdr *)((*pskb)->data + hdroff); + iph = (struct iphdr *)(skb->data + iphdroff); + hdr = (struct udphdr *)(skb->data + hdroff); if (maniptype == IP_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ @@ -116,9 +116,9 @@ udp_manip_pkt(struct sk_buff **pskb, newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } - if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) { - nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); - nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport, + if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { + nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); + nf_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); if (!hdr->check) hdr->check = CSUM_MANGLED_0; @@ -135,7 +135,7 @@ struct nf_nat_protocol nf_nat_protocol_udp = { .in_range = udp_in_range, .unique_tuple = udp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nfattr = nf_nat_port_range_to_nfattr, - .nfattr_to_range = nf_nat_port_nfattr_to_range, + .range_to_nlattr = nf_nat_port_range_to_nlattr, + .nlattr_to_range = nf_nat_port_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c index f50d0203f9c..cfd2742e970 100644 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c @@ -37,7 +37,7 @@ static int unknown_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -unknown_manip_pkt(struct sk_buff **pskb, +unknown_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 76ec59ae524..46b25ab5f78 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -65,7 +65,7 @@ static struct xt_table nat_table = { }; /* Source NAT */ -static unsigned int ipt_snat_target(struct sk_buff **pskb, +static unsigned int ipt_snat_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -78,7 +78,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || @@ -107,7 +107,7 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) ip_rt_put(rt); } -static unsigned int ipt_dnat_target(struct sk_buff **pskb, +static unsigned int ipt_dnat_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -121,14 +121,14 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); if (hooknum == NF_IP_LOCAL_OUT && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) - warn_if_extra_mangle(ip_hdr(*pskb)->daddr, + warn_if_extra_mangle(ip_hdr(skb)->daddr, mr->range[0].min_ip); return nf_nat_setup_info(ct, &mr->range[0], hooknum); @@ -204,7 +204,7 @@ alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum) return nf_nat_setup_info(ct, &range, hooknum); } -int nf_nat_rule_find(struct sk_buff **pskb, +int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, const struct net_device *in, const struct net_device *out, @@ -212,7 +212,7 @@ int nf_nat_rule_find(struct sk_buff **pskb, { int ret; - ret = ipt_do_table(pskb, hooknum, in, out, &nat_table); + ret = ipt_do_table(skb, hooknum, in, out, &nat_table); if (ret == NF_ACCEPT) { if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index e14d41976c2..ce9edbcc01e 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -60,7 +60,7 @@ static void addr_map_init(struct nf_conn *ct, struct addr_map *map) } } -static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, +static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr, size_t dlen, enum sip_header_pos pos, struct addr_map *map) { @@ -84,15 +84,15 @@ static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, } else return 1; - if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, addr, addrlen)) return 0; - *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr); + *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); return 1; } -static unsigned int ip_nat_sip(struct sk_buff **pskb, +static unsigned int ip_nat_sip(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr) @@ -101,8 +101,8 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb, struct addr_map map; int dataoff, datalen; - dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); - datalen = (*pskb)->len - dataoff; + dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); + datalen = skb->len - dataoff; if (datalen < sizeof("SIP/2.0") - 1) return NF_ACCEPT; @@ -121,19 +121,19 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb, else pos = POS_REQ_URI; - if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map)) + if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, pos, &map)) return NF_DROP; } - if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || - !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) || - !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || - !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) + if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || + !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_TO, &map) || + !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || + !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) return NF_DROP; return NF_ACCEPT; } -static unsigned int mangle_sip_packet(struct sk_buff **pskb, +static unsigned int mangle_sip_packet(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr, size_t dlen, @@ -145,16 +145,16 @@ static unsigned int mangle_sip_packet(struct sk_buff **pskb, if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0) return 0; - if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, bufflen)) return 0; /* We need to reload this. Thanks Patrick. */ - *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr); + *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); return 1; } -static int mangle_content_len(struct sk_buff **pskb, +static int mangle_content_len(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char *dptr) @@ -163,22 +163,22 @@ static int mangle_content_len(struct sk_buff **pskb, char buffer[sizeof("65536")]; int bufflen; - dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); + dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); /* Get actual SDP lenght */ - if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff, + if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, &matchlen, POS_SDP_HEADER) > 0) { /* since ct_sip_get_info() give us a pointer passing 'v=' we need to add 2 bytes in this count. */ - int c_len = (*pskb)->len - dataoff - matchoff + 2; + int c_len = skb->len - dataoff - matchoff + 2; /* Now, update SDP length */ - if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff, + if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, &matchlen, POS_CONTENT) > 0) { bufflen = sprintf(buffer, "%u", c_len); - return nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + return nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, bufflen); } @@ -186,7 +186,7 @@ static int mangle_content_len(struct sk_buff **pskb, return 0; } -static unsigned int mangle_sdp(struct sk_buff **pskb, +static unsigned int mangle_sdp(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, __be32 newip, u_int16_t port, @@ -195,25 +195,25 @@ static unsigned int mangle_sdp(struct sk_buff **pskb, char buffer[sizeof("nnn.nnn.nnn.nnn")]; unsigned int dataoff, bufflen; - dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); + dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); /* Mangle owner and contact info. */ bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); - if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, buffer, bufflen, POS_OWNER_IP4)) return 0; - if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, buffer, bufflen, POS_CONNECTION_IP4)) return 0; /* Mangle media port. */ bufflen = sprintf(buffer, "%u", port); - if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, buffer, bufflen, POS_MEDIA)) return 0; - return mangle_content_len(pskb, ctinfo, ct, dptr); + return mangle_content_len(skb, ctinfo, ct, dptr); } static void ip_nat_sdp_expect(struct nf_conn *ct, @@ -241,7 +241,7 @@ static void ip_nat_sdp_expect(struct nf_conn *ct, /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ -static unsigned int ip_nat_sdp(struct sk_buff **pskb, +static unsigned int ip_nat_sdp(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp, const char *dptr) @@ -277,7 +277,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, if (port == 0) return NF_DROP; - if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) { + if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr)) { nf_ct_unexpect_related(exp); return NF_DROP; } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 6bfcd3a90f0..03709d6b4b0 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1188,9 +1188,9 @@ static int snmp_parse_mangle(unsigned char *msg, */ static int snmp_translate(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - struct sk_buff **pskb) + struct sk_buff *skb) { - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); u_int16_t udplen = ntohs(udph->len); u_int16_t paylen = udplen - sizeof(struct udphdr); @@ -1225,13 +1225,13 @@ static int snmp_translate(struct nf_conn *ct, /* We don't actually set up expectations, just adjust internal IP * addresses if this is being NATted */ -static int help(struct sk_buff **pskb, unsigned int protoff, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { int dir = CTINFO2DIR(ctinfo); unsigned int ret; - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); /* SNMP replies and originating SNMP traps get mangled */ @@ -1250,7 +1250,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, * enough room for a UDP header. Just verify the UDP length field so we * can mess around with the payload. */ - if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) { + if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { if (net_ratelimit()) printk(KERN_WARNING "SNMP: dropping malformed packet " "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n", @@ -1258,11 +1258,11 @@ static int help(struct sk_buff **pskb, unsigned int protoff, return NF_DROP; } - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return NF_DROP; spin_lock_bh(&snmp_lock); - ret = snmp_translate(ct, ctinfo, pskb); + ret = snmp_translate(ct, ctinfo, skb); spin_unlock_bh(&snmp_lock); return ret; } diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 46cc99def16..7db76ea9af9 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -67,7 +67,7 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) static unsigned int nf_nat_fn(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -80,9 +80,9 @@ nf_nat_fn(unsigned int hooknum, /* We never see fragments: conntrack defrags on pre-routing and local-out, and nf_nat_out protects post-routing. */ - NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET))); + NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would have dropped it. Hence it's the user's responsibilty to packet filter it out, or implement conntrack/NAT for that @@ -91,10 +91,10 @@ nf_nat_fn(unsigned int hooknum, /* Exception: ICMP redirect to new connection (not in hash table yet). We must not let this through, in case we're doing NAT to the same network. */ - if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) { + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { struct icmphdr _hdr, *hp; - hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb), + hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp != NULL && hp->type == ICMP_REDIRECT) @@ -119,9 +119,9 @@ nf_nat_fn(unsigned int hooknum, switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: - if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) { + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(ct, ctinfo, - hooknum, pskb)) + hooknum, skb)) return NF_DROP; else return NF_ACCEPT; @@ -141,7 +141,7 @@ nf_nat_fn(unsigned int hooknum, /* LOCAL_IN hook doesn't have a chain! */ ret = alloc_null_binding(ct, hooknum); else - ret = nf_nat_rule_find(pskb, hooknum, in, out, + ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) { @@ -159,31 +159,31 @@ nf_nat_fn(unsigned int hooknum, ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); } - return nf_nat_packet(ct, ctinfo, hooknum, pskb); + return nf_nat_packet(ct, ctinfo, hooknum, skb); } static unsigned int nf_nat_in(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { unsigned int ret; - __be32 daddr = ip_hdr(*pskb)->daddr; + __be32 daddr = ip_hdr(skb)->daddr; - ret = nf_nat_fn(hooknum, pskb, in, out, okfn); + ret = nf_nat_fn(hooknum, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && - daddr != ip_hdr(*pskb)->daddr) { - dst_release((*pskb)->dst); - (*pskb)->dst = NULL; + daddr != ip_hdr(skb)->daddr) { + dst_release(skb->dst); + skb->dst = NULL; } return ret; } static unsigned int nf_nat_out(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -195,14 +195,14 @@ nf_nat_out(unsigned int hooknum, unsigned int ret; /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) || - ip_hdrlen(*pskb) < sizeof(struct iphdr)) + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_fn(hooknum, pskb, in, out, okfn); + ret = nf_nat_fn(hooknum, skb, in, out, okfn); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) { + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.src.u3.ip != @@ -210,7 +210,7 @@ nf_nat_out(unsigned int hooknum, || ct->tuplehash[dir].tuple.src.u.all != ct->tuplehash[!dir].tuple.dst.u.all ) - return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP; + return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP; } #endif return ret; @@ -218,7 +218,7 @@ nf_nat_out(unsigned int hooknum, static unsigned int nf_nat_local_fn(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -228,24 +228,24 @@ nf_nat_local_fn(unsigned int hooknum, unsigned int ret; /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) || - ip_hdrlen(*pskb) < sizeof(struct iphdr)) + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_fn(hooknum, pskb, in, out, okfn); + ret = nf_nat_fn(hooknum, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) { + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.dst.u3.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - if (ip_route_me_harder(pskb, RTN_UNSPEC)) + if (ip_route_me_harder(skb, RTN_UNSPEC)) ret = NF_DROP; } #ifdef CONFIG_XFRM else if (ct->tuplehash[dir].tuple.dst.u.all != ct->tuplehash[!dir].tuple.src.u.all) - if (ip_xfrm_me_harder(pskb)) + if (ip_xfrm_me_harder(skb)) ret = NF_DROP; #endif } @@ -254,7 +254,7 @@ nf_nat_local_fn(unsigned int hooknum, static unsigned int nf_nat_adjust(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -262,10 +262,10 @@ nf_nat_adjust(unsigned int hooknum, struct nf_conn *ct; enum ip_conntrack_info ctinfo; - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { pr_debug("nf_nat_standalone: adjusting sequence number\n"); - if (!nf_nat_seq_adjust(pskb, ct, ctinfo)) + if (!nf_nat_seq_adjust(skb, ct, ctinfo)) return NF_DROP; } return NF_ACCEPT; diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index 04dfeaefec0..0ecec701cb4 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -20,7 +20,7 @@ MODULE_DESCRIPTION("TFTP NAT helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_nat_tftp"); -static unsigned int help(struct sk_buff **pskb, +static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp) { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3b690cf2a4e..9be0daa9c0e 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -34,6 +34,7 @@ * 2 of the License, or (at your option) any later version. */ #include <linux/types.h> +#include <net/net_namespace.h> #include <net/icmp.h> #include <net/protocol.h> #include <net/tcp.h> @@ -69,8 +70,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); - seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues, - atomic_read(&ip_frag_mem)); + seq_printf(seq, "FRAG: inuse %d memory %d\n", + ip_frag_nqueues(), ip_frag_mem()); return 0; } @@ -120,36 +121,25 @@ static const struct snmp_mib snmp4_ipextstats_list[] = { SNMP_MIB_SENTINEL }; -static const struct snmp_mib snmp4_icmp_list[] = { - SNMP_MIB_ITEM("InMsgs", ICMP_MIB_INMSGS), - SNMP_MIB_ITEM("InErrors", ICMP_MIB_INERRORS), - SNMP_MIB_ITEM("InDestUnreachs", ICMP_MIB_INDESTUNREACHS), - SNMP_MIB_ITEM("InTimeExcds", ICMP_MIB_INTIMEEXCDS), - SNMP_MIB_ITEM("InParmProbs", ICMP_MIB_INPARMPROBS), - SNMP_MIB_ITEM("InSrcQuenchs", ICMP_MIB_INSRCQUENCHS), - SNMP_MIB_ITEM("InRedirects", ICMP_MIB_INREDIRECTS), - SNMP_MIB_ITEM("InEchos", ICMP_MIB_INECHOS), - SNMP_MIB_ITEM("InEchoReps", ICMP_MIB_INECHOREPS), - SNMP_MIB_ITEM("InTimestamps", ICMP_MIB_INTIMESTAMPS), - SNMP_MIB_ITEM("InTimestampReps", ICMP_MIB_INTIMESTAMPREPS), - SNMP_MIB_ITEM("InAddrMasks", ICMP_MIB_INADDRMASKS), - SNMP_MIB_ITEM("InAddrMaskReps", ICMP_MIB_INADDRMASKREPS), - SNMP_MIB_ITEM("OutMsgs", ICMP_MIB_OUTMSGS), - SNMP_MIB_ITEM("OutErrors", ICMP_MIB_OUTERRORS), - SNMP_MIB_ITEM("OutDestUnreachs", ICMP_MIB_OUTDESTUNREACHS), - SNMP_MIB_ITEM("OutTimeExcds", ICMP_MIB_OUTTIMEEXCDS), - SNMP_MIB_ITEM("OutParmProbs", ICMP_MIB_OUTPARMPROBS), - SNMP_MIB_ITEM("OutSrcQuenchs", ICMP_MIB_OUTSRCQUENCHS), - SNMP_MIB_ITEM("OutRedirects", ICMP_MIB_OUTREDIRECTS), - SNMP_MIB_ITEM("OutEchos", ICMP_MIB_OUTECHOS), - SNMP_MIB_ITEM("OutEchoReps", ICMP_MIB_OUTECHOREPS), - SNMP_MIB_ITEM("OutTimestamps", ICMP_MIB_OUTTIMESTAMPS), - SNMP_MIB_ITEM("OutTimestampReps", ICMP_MIB_OUTTIMESTAMPREPS), - SNMP_MIB_ITEM("OutAddrMasks", ICMP_MIB_OUTADDRMASKS), - SNMP_MIB_ITEM("OutAddrMaskReps", ICMP_MIB_OUTADDRMASKREPS), - SNMP_MIB_SENTINEL +static struct { + char *name; + int index; +} icmpmibmap[] = { + { "DestUnreachs", ICMP_DEST_UNREACH }, + { "TimeExcds", ICMP_TIME_EXCEEDED }, + { "ParmProbs", ICMP_PARAMETERPROB }, + { "SrcQuenchs", ICMP_SOURCE_QUENCH }, + { "Redirects", ICMP_REDIRECT }, + { "Echos", ICMP_ECHO }, + { "EchoReps", ICMP_ECHOREPLY }, + { "Timestamps", ICMP_TIMESTAMP }, + { "TimestampReps", ICMP_TIMESTAMPREPLY }, + { "AddrMasks", ICMP_ADDRESS }, + { "AddrMaskReps", ICMP_ADDRESSREPLY }, + { NULL, 0 } }; + static const struct snmp_mib snmp4_tcp_list[] = { SNMP_MIB_ITEM("RtoAlgorithm", TCP_MIB_RTOALGORITHM), SNMP_MIB_ITEM("RtoMin", TCP_MIB_RTOMIN), @@ -244,9 +234,79 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPAbortOnLinger", LINUX_MIB_TCPABORTONLINGER), SNMP_MIB_ITEM("TCPAbortFailed", LINUX_MIB_TCPABORTFAILED), SNMP_MIB_ITEM("TCPMemoryPressures", LINUX_MIB_TCPMEMORYPRESSURES), + SNMP_MIB_ITEM("TCPSACKDiscard", LINUX_MIB_TCPSACKDISCARD), + SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), + SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), + SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), SNMP_MIB_SENTINEL }; +static void icmpmsg_put(struct seq_file *seq) +{ +#define PERLINE 16 + + int j, i, count; + static int out[PERLINE]; + + count = 0; + for (i = 0; i < ICMPMSG_MIB_MAX; i++) { + + if (snmp_fold_field((void **) icmpmsg_statistics, i)) + out[count++] = i; + if (count < PERLINE) + continue; + + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < PERLINE; ++j) + seq_printf(seq, " %sType%u", i & 0x100 ? "Out" : "In", + i & 0xff); + seq_printf(seq, "\nIcmpMsg: "); + for (j = 0; j < PERLINE; ++j) + seq_printf(seq, " %lu", + snmp_fold_field((void **) icmpmsg_statistics, + out[j])); + seq_putc(seq, '\n'); + } + if (count) { + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < count; ++j) + seq_printf(seq, " %sType%u", out[j] & 0x100 ? "Out" : + "In", out[j] & 0xff); + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < count; ++j) + seq_printf(seq, " %lu", snmp_fold_field((void **) + icmpmsg_statistics, out[j])); + } + +#undef PERLINE +} + +static void icmp_put(struct seq_file *seq) +{ + int i; + + seq_puts(seq, "\nIcmp: InMsgs InErrors"); + for (i=0; icmpmibmap[i].name != NULL; i++) + seq_printf(seq, " In%s", icmpmibmap[i].name); + seq_printf(seq, " OutMsgs OutErrors"); + for (i=0; icmpmibmap[i].name != NULL; i++) + seq_printf(seq, " Out%s", icmpmibmap[i].name); + seq_printf(seq, "\nIcmp: %lu %lu", + snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INMSGS), + snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INERRORS)); + for (i=0; icmpmibmap[i].name != NULL; i++) + seq_printf(seq, " %lu", + snmp_fold_field((void **) icmpmsg_statistics, + icmpmibmap[i].index)); + seq_printf(seq, " %lu %lu", + snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTMSGS), + snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTERRORS)); + for (i=0; icmpmibmap[i].name != NULL; i++) + seq_printf(seq, " %lu", + snmp_fold_field((void **) icmpmsg_statistics, + icmpmibmap[i].index)); +} + /* * Called from the PROCfs module. This outputs /proc/net/snmp. */ @@ -267,15 +327,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v) snmp_fold_field((void **)ip_statistics, snmp4_ipstats_list[i].entry)); - seq_puts(seq, "\nIcmp:"); - for (i = 0; snmp4_icmp_list[i].name != NULL; i++) - seq_printf(seq, " %s", snmp4_icmp_list[i].name); - - seq_puts(seq, "\nIcmp:"); - for (i = 0; snmp4_icmp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field((void **)icmp_statistics, - snmp4_icmp_list[i].entry)); + icmp_put(seq); /* RFC 2011 compatibility */ + icmpmsg_put(seq); seq_puts(seq, "\nTcp:"); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) @@ -332,6 +385,8 @@ static const struct file_operations snmp_seq_fops = { .release = single_release, }; + + /* * Output /proc/net/netstat */ @@ -380,20 +435,20 @@ int __init ip_misc_proc_init(void) { int rc = 0; - if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops)) + if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops)) goto out_netstat; - if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops)) + if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops)) goto out_snmp; - if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops)) + if (!proc_net_fops_create(&init_net, "sockstat", S_IRUGO, &sockstat_seq_fops)) goto out_sockstat; out: return rc; out_sockstat: - proc_net_remove("snmp"); + proc_net_remove(&init_net, "snmp"); out_snmp: - proc_net_remove("netstat"); + proc_net_remove(&init_net, "netstat"); out_netstat: rc = -ENOMEM; goto out; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index c6d71526f62..3916faca3af 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -59,6 +59,7 @@ #include <linux/in_route.h> #include <linux/route.h> #include <linux/skbuff.h> +#include <net/net_namespace.h> #include <net/dst.h> #include <net/sock.h> #include <linux/gfp.h> @@ -313,6 +314,9 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } + if (iph->protocol == IPPROTO_ICMP) + icmp_out_count(((struct icmphdr *) + skb_transport_header(skb))->type); err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); @@ -898,24 +902,8 @@ static const struct seq_operations raw_seq_ops = { static int raw_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct raw_iter_state *s; - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - goto out; - rc = seq_open(file, &raw_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &raw_seq_ops, + sizeof(struct raw_iter_state)); } static const struct file_operations raw_seq_fops = { @@ -928,13 +916,13 @@ static const struct file_operations raw_seq_fops = { int __init raw_proc_init(void) { - if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops)) + if (!proc_net_fops_create(&init_net, "raw", S_IRUGO, &raw_seq_fops)) return -ENOMEM; return 0; } void __init raw_proc_exit(void) { - proc_net_remove("raw"); + proc_net_remove(&init_net, "raw"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c7ca94bd152..21b12de9e65 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -81,6 +81,7 @@ #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/init.h> +#include <linux/workqueue.h> #include <linux/skbuff.h> #include <linux/inetdevice.h> #include <linux/igmp.h> @@ -91,6 +92,7 @@ #include <linux/jhash.h> #include <linux/rcupdate.h> #include <linux/times.h> +#include <net/net_namespace.h> #include <net/protocol.h> #include <net/ip.h> #include <net/route.h> @@ -135,7 +137,8 @@ static unsigned long rt_deadline; #define RTprint(a...) printk(KERN_DEBUG a) static struct timer_list rt_flush_timer; -static struct timer_list rt_periodic_timer; +static void rt_check_expire(struct work_struct *work); +static DECLARE_DELAYED_WORK(expires_work, rt_check_expire); static struct timer_list rt_secret_timer; /* @@ -243,7 +246,7 @@ static spinlock_t *rt_hash_locks; static struct rt_hash_bucket *rt_hash_table; static unsigned rt_hash_mask; -static int rt_hash_log; +static unsigned int rt_hash_log; static unsigned int rt_hash_rnd; static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); @@ -372,23 +375,8 @@ static const struct seq_operations rt_cache_seq_ops = { static int rt_cache_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct rt_cache_iter_state *s; - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - goto out; - rc = seq_open(file, &rt_cache_seq_ops); - if (rc) - goto out_kfree; - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &rt_cache_seq_ops, + sizeof(struct rt_cache_iter_state)); } static const struct file_operations rt_cache_seq_fops = { @@ -571,33 +559,32 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) (fl1->iif ^ fl2->iif)) == 0; } -/* This runs via a timer and thus is always in BH context. */ -static void rt_check_expire(unsigned long dummy) +static void rt_check_expire(struct work_struct *work) { static unsigned int rover; unsigned int i = rover, goal; struct rtable *rth, **rthp; - unsigned long now = jiffies; u64 mult; mult = ((u64)ip_rt_gc_interval) << rt_hash_log; if (ip_rt_gc_timeout > 1) do_div(mult, ip_rt_gc_timeout); goal = (unsigned int)mult; - if (goal > rt_hash_mask) goal = rt_hash_mask + 1; + if (goal > rt_hash_mask) + goal = rt_hash_mask + 1; for (; goal > 0; goal--) { unsigned long tmo = ip_rt_gc_timeout; i = (i + 1) & rt_hash_mask; rthp = &rt_hash_table[i].chain; - if (*rthp == 0) + if (*rthp == NULL) continue; - spin_lock(rt_hash_lock_addr(i)); + spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { if (rth->u.dst.expires) { /* Entry is expired even if it is in use */ - if (time_before_eq(now, rth->u.dst.expires)) { + if (time_before_eq(jiffies, rth->u.dst.expires)) { tmo >>= 1; rthp = &rth->u.dst.rt_next; continue; @@ -612,14 +599,10 @@ static void rt_check_expire(unsigned long dummy) *rthp = rth->u.dst.rt_next; rt_free(rth); } - spin_unlock(rt_hash_lock_addr(i)); - - /* Fallback loop breaker. */ - if (time_after(jiffies, now)) - break; + spin_unlock_bh(rt_hash_lock_addr(i)); } rover = i; - mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval); + schedule_delayed_work(&expires_work, ip_rt_gc_interval); } /* This can run from both BH and non-BH contexts, the latter @@ -1404,8 +1387,8 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { struct rtable *rt = (struct rtable *) dst; struct in_device *idev = rt->idev; - if (dev != &loopback_dev && idev && idev->dev == dev) { - struct in_device *loopback_idev = in_dev_get(&loopback_dev); + if (dev != init_net.loopback_dev && idev && idev->dev == dev) { + struct in_device *loopback_idev = in_dev_get(init_net.loopback_dev); if (loopback_idev) { rt->idev = loopback_idev; in_dev_put(idev); @@ -1557,7 +1540,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = &loopback_dev; + rth->u.dst.dev = init_net.loopback_dev; dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->fl.oif = 0; @@ -1814,7 +1797,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { int result; result = fib_validate_source(saddr, daddr, tos, - loopback_dev.ifindex, + init_net.loopback_dev->ifindex, dev, &spec_dst, &itag); if (result < 0) goto martian_source; @@ -1881,7 +1864,7 @@ local_input: #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = &loopback_dev; + rth->u.dst.dev = init_net.loopback_dev; dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->rt_gateway = daddr; @@ -2151,7 +2134,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) RT_SCOPE_UNIVERSE), } }, .mark = oldflp->mark, - .iif = loopback_dev.ifindex, + .iif = init_net.loopback_dev->ifindex, .oif = oldflp->oif }; struct fib_result res; unsigned flags = 0; @@ -2212,7 +2195,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) if (oldflp->oif) { - dev_out = dev_get_by_index(oldflp->oif); + dev_out = dev_get_by_index(&init_net, oldflp->oif); err = -ENODEV; if (dev_out == NULL) goto out; @@ -2245,9 +2228,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = init_net.loopback_dev; dev_hold(dev_out); - fl.oif = loopback_dev.ifindex; + fl.oif = init_net.loopback_dev->ifindex; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; @@ -2292,7 +2275,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) fl.fl4_src = fl.fl4_dst; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = init_net.loopback_dev; dev_hold(dev_out); fl.oif = dev_out->ifindex; if (res.fi) @@ -2591,7 +2574,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (iif) { struct net_device *dev; - dev = __dev_get_by_index(iif); + dev = __dev_get_by_index(&init_net, iif); if (dev == NULL) { err = -ENODEV; goto errout_free; @@ -2992,17 +2975,14 @@ int __init ip_rt_init(void) init_timer(&rt_flush_timer); rt_flush_timer.function = rt_run_flush; - init_timer(&rt_periodic_timer); - rt_periodic_timer.function = rt_check_expire; init_timer(&rt_secret_timer); rt_secret_timer.function = rt_secret_rebuild; /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ - rt_periodic_timer.expires = jiffies + net_random() % ip_rt_gc_interval + - ip_rt_gc_interval; - add_timer(&rt_periodic_timer); + schedule_delayed_work(&expires_work, + net_random() % ip_rt_gc_interval + ip_rt_gc_interval); rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + ip_rt_secret_interval; @@ -3011,15 +2991,15 @@ int __init ip_rt_init(void) #ifdef CONFIG_PROC_FS { struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */ - if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) || + if (!proc_net_fops_create(&init_net, "rt_cache", S_IRUGO, &rt_cache_seq_fops) || !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, - proc_net_stat))) { + init_net.proc_net_stat))) { return -ENOMEM; } rtstat_pde->proc_fops = &rt_cpu_seq_fops; } #ifdef CONFIG_NET_CLS_ROUTE - create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL); + create_proc_read_entry("rt_acct", 0, init_net.proc_net, ip_rt_acct_read, NULL); #endif #endif #ifdef CONFIG_XFRM diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 53ef0f4bbda..ffddd2b4535 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -12,12 +12,14 @@ #include <linux/sysctl.h> #include <linux/igmp.h> #include <linux/inetdevice.h> +#include <linux/seqlock.h> #include <net/snmp.h> #include <net/icmp.h> #include <net/ip.h> #include <net/route.h> #include <net/tcp.h> #include <net/cipso_ipv4.h> +#include <net/inet_frag.h> /* From af_inet.c */ extern int sysctl_ip_nonlocal_bind; @@ -89,6 +91,74 @@ static int ipv4_sysctl_forward_strategy(ctl_table *table, return 1; } +extern seqlock_t sysctl_port_range_lock; +extern int sysctl_local_port_range[2]; + +/* Update system visible IP port range */ +static void set_local_port_range(int range[2]) +{ + write_seqlock(&sysctl_port_range_lock); + sysctl_local_port_range[0] = range[0]; + sysctl_local_port_range[1] = range[1]; + write_sequnlock(&sysctl_port_range_lock); +} + +/* Validate changes from /proc interface. */ +static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret; + int range[2] = { sysctl_local_port_range[0], + sysctl_local_port_range[1] }; + ctl_table tmp = { + .data = &range, + .maxlen = sizeof(range), + .mode = table->mode, + .extra1 = &ip_local_port_range_min, + .extra2 = &ip_local_port_range_max, + }; + + ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); + + if (write && ret == 0) { + if (range[1] < range[0]) + ret = -EINVAL; + else + set_local_port_range(range); + } + + return ret; +} + +/* Validate changes from sysctl interface. */ +static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, + int nlen, void __user *oldval, + size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + int ret; + int range[2] = { sysctl_local_port_range[0], + sysctl_local_port_range[1] }; + ctl_table tmp = { + .data = &range, + .maxlen = sizeof(range), + .mode = table->mode, + .extra1 = &ip_local_port_range_min, + .extra2 = &ip_local_port_range_max, + }; + + ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); + if (ret == 0 && newval && newlen) { + if (range[1] < range[0]) + ret = -EINVAL; + else + set_local_port_range(range); + } + return ret; +} + + static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -288,7 +358,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, .procname = "ipfrag_high_thresh", - .data = &sysctl_ipfrag_high_thresh, + .data = &ip4_frags_ctl.high_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -296,7 +366,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, .procname = "ipfrag_low_thresh", - .data = &sysctl_ipfrag_low_thresh, + .data = &ip4_frags_ctl.low_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -312,7 +382,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_TIME, .procname = "ipfrag_time", - .data = &sysctl_ipfrag_time, + .data = &ip4_frags_ctl.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -427,10 +497,8 @@ ctl_table ipv4_table[] = { .data = &sysctl_local_port_range, .maxlen = sizeof(sysctl_local_port_range), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = ip_local_port_range_min, - .extra2 = ip_local_port_range_max + .proc_handler = &ipv4_local_port_range, + .strategy = &ipv4_sysctl_local_port_range, }, { .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL, @@ -665,14 +733,13 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, .procname = "ipfrag_secret_interval", - .data = &sysctl_ipfrag_secret_interval, + .data = &ip4_frags_ctl.secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies }, { - .ctl_name = NET_IPV4_IPFRAG_MAX_DIST, .procname = "ipfrag_max_dist", .data = &sysctl_ipfrag_max_dist, .maxlen = sizeof(int), @@ -797,7 +864,6 @@ ctl_table ipv4_table[] = { }, #endif /* CONFIG_NETLABEL */ { - .ctl_name = NET_TCP_AVAIL_CONG_CONTROL, .procname = "tcp_available_congestion_control", .maxlen = TCP_CA_BUF_MAX, .mode = 0444, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7e740112b23..2e6ad6dbba6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -247,6 +247,7 @@ * TCP_CLOSE socket is finished */ +#include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> #include <linux/fcntl.h> @@ -1333,7 +1334,7 @@ do_prequeue: if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) { if (net_ratelimit()) printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", - current->comm, current->pid); + current->comm, task_pid_nr(current)); peek_seq = tp->copied_seq; } continue; @@ -2014,7 +2015,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) if (tp->rx_opt.tstamp_ok) info->tcpi_options |= TCPI_OPT_TIMESTAMPS; - if (tp->rx_opt.sack_ok) + if (tcp_is_sack(tp)) info->tcpi_options |= TCPI_OPT_SACK; if (tp->rx_opt.wscale_ok) { info->tcpi_options |= TCPI_OPT_WSCALE; @@ -2030,8 +2031,13 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; - info->tcpi_unacked = tp->packets_out; - info->tcpi_sacked = tp->sacked_out; + if (sk->sk_state == TCP_LISTEN) { + info->tcpi_unacked = sk->sk_ack_backlog; + info->tcpi_sacked = sk->sk_max_ack_backlog; + } else { + info->tcpi_unacked = tp->packets_out; + info->tcpi_sacked = tp->sacked_out; + } info->tcpi_lost = tp->lost_out; info->tcpi_retrans = tp->retrans_out; info->tcpi_fackets = tp->fackets_out; @@ -2210,7 +2216,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) goto out; mss = skb_shinfo(skb)->gso_size; - skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss; + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); segs = NULL; goto out; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 4586211e375..5dba0fc8f57 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -210,7 +210,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt) { const struct inet_connection_sock *icsk = inet_csk(sk); - if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { + if (icsk->icsk_ca_state == TCP_CA_Open) { struct bictcp *ca = inet_csk_ca(sk); cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 485d7ea35f7..80bd084a9f9 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -314,7 +314,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) struct bictcp *ca = inet_csk_ca(sk); u32 delay; - if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { + if (icsk->icsk_ca_state == TCP_CA_Open) { cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; } diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 57c5f0b10e6..2fbcc7d1b1a 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -25,11 +25,13 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, const struct tcp_sock *tp = tcp_sk(sk); struct tcp_info *info = _info; - if (sk->sk_state == TCP_LISTEN) + if (sk->sk_state == TCP_LISTEN) { r->idiag_rqueue = sk->sk_ack_backlog; - else + r->idiag_wqueue = sk->sk_max_ack_backlog; + } else { r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; - r->idiag_wqueue = tp->write_seq - tp->snd_una; + r->idiag_wqueue = tp->write_seq - tp->snd_una; + } if (info != NULL) tcp_get_info(sk, info); } @@ -54,3 +56,4 @@ static void __exit tcp_diag_exit(void) module_init(tcp_diag_init); module_exit(tcp_diag_exit); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, TCPDIAG_GETSOCK); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f893e90061e..69d8c38ccd3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -85,7 +85,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 2; int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; -int sysctl_tcp_frto __read_mostly; +int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_nometrics_save __read_mostly; @@ -103,7 +103,8 @@ int sysctl_tcp_abc __read_mostly; #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ -#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained DSACK info */ +#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ +#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -111,13 +112,10 @@ int sysctl_tcp_abc __read_mostly; #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) #define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) -#define IsReno(tp) ((tp)->rx_opt.sack_ok == 0) -#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2) -#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4) - #define IsSackFrto() (sysctl_tcp_frto == 0x2) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) +#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) /* Adapt the MSS value used to make delayed ack decision to the * real world. @@ -198,6 +196,55 @@ static inline int tcp_in_quickack_mode(const struct sock *sk) return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; } +static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp) +{ + if (tp->ecn_flags&TCP_ECN_OK) + tp->ecn_flags |= TCP_ECN_QUEUE_CWR; +} + +static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb) +{ + if (tcp_hdr(skb)->cwr) + tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; +} + +static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp) +{ + tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; +} + +static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) +{ + if (tp->ecn_flags&TCP_ECN_OK) { + if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + /* Funny extension: if ECT is not set on a segment, + * it is surely retransmit. It is not in ECN RFC, + * but Linux follows this rule. */ + else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) + tcp_enter_quickack_mode((struct sock *)tp); + } +} + +static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th) +{ + if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || th->cwr)) + tp->ecn_flags &= ~TCP_ECN_OK; +} + +static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th) +{ + if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || !th->cwr)) + tp->ecn_flags &= ~TCP_ECN_OK; +} + +static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) +{ + if (th->ece && !th->syn && (tp->ecn_flags&TCP_ECN_OK)) + return 1; + return 0; +} + /* Buffer size and advertised window tuning. * * 1. Tuning sk->sk_sndbuf, when connection enters established state. @@ -810,6 +857,21 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) } } +/* + * Packet counting of FACK is based on in-order assumptions, therefore TCP + * disables it when reordering is detected + */ +static void tcp_disable_fack(struct tcp_sock *tp) +{ + tp->rx_opt.sack_ok &= ~2; +} + +/* Take a notice that peer is sending D-SACKs */ +static void tcp_dsack_seen(struct tcp_sock *tp) +{ + tp->rx_opt.sack_ok |= 4; +} + /* Initialize metrics on socket. */ static void tcp_init_metrics(struct sock *sk) @@ -831,7 +893,7 @@ static void tcp_init_metrics(struct sock *sk) } if (dst_metric(dst, RTAX_REORDERING) && tp->reordering != dst_metric(dst, RTAX_REORDERING)) { - tp->rx_opt.sack_ok &= ~2; + tcp_disable_fack(tp); tp->reordering = dst_metric(dst, RTAX_REORDERING); } @@ -893,9 +955,9 @@ static void tcp_update_reordering(struct sock *sk, const int metric, /* This exciting event is worth to be remembered. 8) */ if (ts) NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER); - else if (IsReno(tp)) + else if (tcp_is_reno(tp)) NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER); - else if (IsFack(tp)) + else if (tcp_is_fack(tp)) NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER); else NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER); @@ -907,8 +969,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric, tp->sacked_out, tp->undo_marker ? tp->undo_retrans : 0); #endif - /* Disable FACK yet. */ - tp->rx_opt.sack_ok &= ~2; + tcp_disable_fack(tp); } } @@ -959,7 +1020,216 @@ static void tcp_update_reordering(struct sock *sk, const int metric, * for retransmitted and already SACKed segment -> reordering.. * Both of these heuristics are not used in Loss state, when we cannot * account for retransmits accurately. + * + * SACK block validation. + * ---------------------- + * + * SACK block range validation checks that the received SACK block fits to + * the expected sequence limits, i.e., it is between SND.UNA and SND.NXT. + * Note that SND.UNA is not included to the range though being valid because + * it means that the receiver is rather inconsistent with itself reporting + * SACK reneging when it should advance SND.UNA. Such SACK block this is + * perfectly valid, however, in light of RFC2018 which explicitly states + * that "SACK block MUST reflect the newest segment. Even if the newest + * segment is going to be discarded ...", not that it looks very clever + * in case of head skb. Due to potentional receiver driven attacks, we + * choose to avoid immediate execution of a walk in write queue due to + * reneging and defer head skb's loss recovery to standard loss recovery + * procedure that will eventually trigger (nothing forbids us doing this). + * + * Implements also blockage to start_seq wrap-around. Problem lies in the + * fact that though start_seq (s) is before end_seq (i.e., not reversed), + * there's no guarantee that it will be before snd_nxt (n). The problem + * happens when start_seq resides between end_seq wrap (e_w) and snd_nxt + * wrap (s_w): + * + * <- outs wnd -> <- wrapzone -> + * u e n u_w e_w s n_w + * | | | | | | | + * |<------------+------+----- TCP seqno space --------------+---------->| + * ...-- <2^31 ->| |<--------... + * ...---- >2^31 ------>| |<--------... + * + * Current code wouldn't be vulnerable but it's better still to discard such + * crazy SACK blocks. Doing this check for start_seq alone closes somewhat + * similar case (end_seq after snd_nxt wrap) as earlier reversed check in + * snd_nxt wrap -> snd_una region will then become "well defined", i.e., + * equal to the ideal case (infinite seqno space without wrap caused issues). + * + * With D-SACK the lower bound is extended to cover sequence space below + * SND.UNA down to undo_marker, which is the last point of interest. Yet + * again, D-SACK block must not to go across snd_una (for the same reason as + * for the normal SACK blocks, explained above). But there all simplicity + * ends, TCP might receive valid D-SACKs below that. As long as they reside + * fully below undo_marker they do not affect behavior in anyway and can + * therefore be safely ignored. In rare cases (which are more or less + * theoretical ones), the D-SACK will nicely cross that boundary due to skb + * fragmentation and packet reordering past skb's retransmission. To consider + * them correctly, the acceptable range must be extended even more though + * the exact amount is rather hard to quantify. However, tp->max_window can + * be used as an exaggerated estimate. + */ +static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, + u32 start_seq, u32 end_seq) +{ + /* Too far in future, or reversed (interpretation is ambiguous) */ + if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq)) + return 0; + + /* Nasty start_seq wrap-around check (see comments above) */ + if (!before(start_seq, tp->snd_nxt)) + return 0; + + /* In outstanding window? ...This is valid exit for D-SACKs too. + * start_seq == snd_una is non-sensical (see comments above) + */ + if (after(start_seq, tp->snd_una)) + return 1; + + if (!is_dsack || !tp->undo_marker) + return 0; + + /* ...Then it's D-SACK, and must reside below snd_una completely */ + if (!after(end_seq, tp->snd_una)) + return 0; + + if (!before(start_seq, tp->undo_marker)) + return 1; + + /* Too old */ + if (!after(end_seq, tp->undo_marker)) + return 0; + + /* Undo_marker boundary crossing (overestimates a lot). Known already: + * start_seq < undo_marker and end_seq >= undo_marker. + */ + return !before(start_seq, end_seq - tp->max_window); +} + +/* Check for lost retransmit. This superb idea is borrowed from "ratehalving". + * Event "C". Later note: FACK people cheated me again 8), we have to account + * for reordering! Ugly, but should help. + * + * Search retransmitted skbs from write_queue that were sent when snd_nxt was + * less than what is now known to be received by the other end (derived from + * SACK blocks by the caller). Also calculate the lowest snd_nxt among the + * remaining retransmitted skbs to avoid some costly processing per ACKs. */ +static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + int flag = 0; + int cnt = 0; + u32 new_low_seq = tp->snd_nxt; + + tcp_for_write_queue(skb, sk) { + u32 ack_seq = TCP_SKB_CB(skb)->ack_seq; + + if (skb == tcp_send_head(sk)) + break; + if (cnt == tp->retrans_out) + break; + if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) + continue; + + if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) + continue; + + if (after(received_upto, ack_seq) && + (tcp_is_fack(tp) || + !before(received_upto, + ack_seq + tp->reordering * tp->mss_cache))) { + TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; + tp->retrans_out -= tcp_skb_pcount(skb); + + /* clear lost hint */ + tp->retransmit_skb_hint = NULL; + + if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { + tp->lost_out += tcp_skb_pcount(skb); + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + flag |= FLAG_DATA_SACKED; + NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); + } + } else { + if (before(ack_seq, new_low_seq)) + new_low_seq = ack_seq; + cnt += tcp_skb_pcount(skb); + } + } + + if (tp->retrans_out) + tp->lost_retrans_low = new_low_seq; + + return flag; +} + +static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, + struct tcp_sack_block_wire *sp, int num_sacks, + u32 prior_snd_una) +{ + u32 start_seq_0 = ntohl(get_unaligned(&sp[0].start_seq)); + u32 end_seq_0 = ntohl(get_unaligned(&sp[0].end_seq)); + int dup_sack = 0; + + if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { + dup_sack = 1; + tcp_dsack_seen(tp); + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); + } else if (num_sacks > 1) { + u32 end_seq_1 = ntohl(get_unaligned(&sp[1].end_seq)); + u32 start_seq_1 = ntohl(get_unaligned(&sp[1].start_seq)); + + if (!after(end_seq_0, end_seq_1) && + !before(start_seq_0, start_seq_1)) { + dup_sack = 1; + tcp_dsack_seen(tp); + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); + } + } + + /* D-SACK for already forgotten data... Do dumb counting. */ + if (dup_sack && + !after(end_seq_0, prior_snd_una) && + after(end_seq_0, tp->undo_marker)) + tp->undo_retrans--; + + return dup_sack; +} + +/* Check if skb is fully within the SACK block. In presence of GSO skbs, + * the incoming SACK may not exactly match but we can find smaller MSS + * aligned portion of it that matches. Therefore we might need to fragment + * which may fail and creates some hassle (caller must handle error case + * returns). + */ +static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, + u32 start_seq, u32 end_seq) +{ + int in_sack, err; + unsigned int pkt_len; + + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && + !before(end_seq, TCP_SKB_CB(skb)->end_seq); + + if (tcp_skb_pcount(skb) > 1 && !in_sack && + after(TCP_SKB_CB(skb)->end_seq, start_seq)) { + + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); + + if (!in_sack) + pkt_len = start_seq - TCP_SKB_CB(skb)->seq; + else + pkt_len = end_seq - TCP_SKB_CB(skb)->seq; + err = tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size); + if (err < 0) + return err; + } + + return in_sack; +} + static int tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) { @@ -972,38 +1242,25 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int reord = tp->packets_out; int prior_fackets; - u32 lost_retrans = 0; + u32 highest_sack_end_seq = tp->lost_retrans_low; int flag = 0; int found_dup_sack = 0; int cached_fack_count; int i; int first_sack_index; + int force_one_sack; - if (!tp->sacked_out) - tp->fackets_out = 0; + if (!tp->sacked_out) { + if (WARN_ON(tp->fackets_out)) + tp->fackets_out = 0; + tp->highest_sack = tp->snd_una; + } prior_fackets = tp->fackets_out; - /* Check for D-SACK. */ - if (before(ntohl(sp[0].start_seq), TCP_SKB_CB(ack_skb)->ack_seq)) { + found_dup_sack = tcp_check_dsack(tp, ack_skb, sp, + num_sacks, prior_snd_una); + if (found_dup_sack) flag |= FLAG_DSACKING_ACK; - found_dup_sack = 1; - tp->rx_opt.sack_ok |= 4; - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); - } else if (num_sacks > 1 && - !after(ntohl(sp[0].end_seq), ntohl(sp[1].end_seq)) && - !before(ntohl(sp[0].start_seq), ntohl(sp[1].start_seq))) { - flag |= FLAG_DSACKING_ACK; - found_dup_sack = 1; - tp->rx_opt.sack_ok |= 4; - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); - } - - /* D-SACK for already forgotten data... - * Do dumb counting. */ - if (found_dup_sack && - !after(ntohl(sp[0].end_seq), prior_snd_una) && - after(ntohl(sp[0].end_seq), tp->undo_marker)) - tp->undo_retrans--; /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can @@ -1016,18 +1273,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ * if the only SACK change is the increase of the end_seq of * the first block then only apply that SACK block * and use retrans queue hinting otherwise slowpath */ - flag = 1; + force_one_sack = 1; for (i = 0; i < num_sacks; i++) { __be32 start_seq = sp[i].start_seq; __be32 end_seq = sp[i].end_seq; if (i == 0) { if (tp->recv_sack_cache[i].start_seq != start_seq) - flag = 0; + force_one_sack = 0; } else { if ((tp->recv_sack_cache[i].start_seq != start_seq) || (tp->recv_sack_cache[i].end_seq != end_seq)) - flag = 0; + force_one_sack = 0; } tp->recv_sack_cache[i].start_seq = start_seq; tp->recv_sack_cache[i].end_seq = end_seq; @@ -1039,7 +1296,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ } first_sack_index = 0; - if (flag) + if (force_one_sack) num_sacks = 1; else { int j; @@ -1065,9 +1322,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ } } - /* clear flag as used for different purpose in following code */ - flag = 0; - /* Use SACK fastpath hint if valid */ cached_skb = tp->fastpath_skb_hint; cached_fack_count = tp->fastpath_cnt_hint; @@ -1083,6 +1337,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int fack_count; int dup_sack = (found_dup_sack && (i == first_sack_index)); + if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) { + if (dup_sack) { + if (!tp->undo_marker) + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO); + else + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD); + } else { + /* Don't count olds caused by ACK reordering */ + if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) && + !after(end_seq, tp->snd_una)) + continue; + NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD); + } + continue; + } + skb = cached_skb; fack_count = cached_fack_count; @@ -1091,7 +1361,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ flag |= FLAG_DATA_LOST; tcp_for_write_queue_from(skb, sk) { - int in_sack, pcount; + int in_sack; u8 sacked; if (skb == tcp_send_head(sk)) @@ -1110,30 +1380,11 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (!before(TCP_SKB_CB(skb)->seq, end_seq)) break; - in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && - !before(end_seq, TCP_SKB_CB(skb)->end_seq); - - pcount = tcp_skb_pcount(skb); - - if (pcount > 1 && !in_sack && - after(TCP_SKB_CB(skb)->end_seq, start_seq)) { - unsigned int pkt_len; - - in_sack = !after(start_seq, - TCP_SKB_CB(skb)->seq); - - if (!in_sack) - pkt_len = (start_seq - - TCP_SKB_CB(skb)->seq); - else - pkt_len = (end_seq - - TCP_SKB_CB(skb)->seq); - if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size)) - break; - pcount = tcp_skb_pcount(skb); - } + in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq); + if (in_sack < 0) + break; - fack_count += pcount; + fack_count += tcp_skb_pcount(skb); sacked = TCP_SKB_CB(skb)->sacked; @@ -1160,11 +1411,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ continue; } - if ((sacked&TCPCB_SACKED_RETRANS) && - after(end_seq, TCP_SKB_CB(skb)->ack_seq) && - (!lost_retrans || after(end_seq, lost_retrans))) - lost_retrans = end_seq; - if (!in_sack) continue; @@ -1217,6 +1463,11 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; + + if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) { + tp->highest_sack = TCP_SKB_CB(skb)->seq; + highest_sack_end_seq = TCP_SKB_CB(skb)->end_seq; + } } else { if (dup_sack && (sacked&TCPCB_RETRANS)) reord = min(fack_count, reord); @@ -1236,45 +1487,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ } } - /* Check for lost retransmit. This superb idea is - * borrowed from "ratehalving". Event "C". - * Later note: FACK people cheated me again 8), - * we have to account for reordering! Ugly, - * but should help. - */ - if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) { - struct sk_buff *skb; - - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (after(TCP_SKB_CB(skb)->seq, lost_retrans)) - break; - if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) - continue; - if ((TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) && - after(lost_retrans, TCP_SKB_CB(skb)->ack_seq) && - (IsFack(tp) || - !before(lost_retrans, - TCP_SKB_CB(skb)->ack_seq + tp->reordering * - tp->mss_cache))) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - - /* clear lost hint */ - tp->retransmit_skb_hint = NULL; - - if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { - tp->lost_out += tcp_skb_pcount(skb); - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - flag |= FLAG_DATA_SACKED; - NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); - } - } - } - } + if (tp->retrans_out && + after(highest_sack_end_seq, tp->lost_retrans_low) && + icsk->icsk_ca_state == TCP_CA_Recovery) + flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq); - tp->left_out = tp->sacked_out + tp->lost_out; + tcp_verify_left_out(tp); if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss && (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) @@ -1289,6 +1507,56 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ return flag; } +/* If we receive more dupacks than we expected counting segments + * in assumption of absent reordering, interpret this as reordering. + * The only another reason could be bug in receiver TCP. + */ +static void tcp_check_reno_reordering(struct sock *sk, const int addend) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 holes; + + holes = max(tp->lost_out, 1U); + holes = min(holes, tp->packets_out); + + if ((tp->sacked_out + holes) > tp->packets_out) { + tp->sacked_out = tp->packets_out - holes; + tcp_update_reordering(sk, tp->packets_out + addend, 0); + } +} + +/* Emulate SACKs for SACKless connection: account for a new dupack. */ + +static void tcp_add_reno_sack(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + tp->sacked_out++; + tcp_check_reno_reordering(sk, 0); + tcp_verify_left_out(tp); +} + +/* Account for ACK, ACKing some data in Reno Recovery phase. */ + +static void tcp_remove_reno_sacks(struct sock *sk, int acked) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (acked > 0) { + /* One ACK acked hole. The rest eat duplicate ACKs. */ + if (acked-1 >= tp->sacked_out) + tp->sacked_out = 0; + else + tp->sacked_out -= acked-1; + } + tcp_check_reno_reordering(sk, acked); + tcp_verify_left_out(tp); +} + +static inline void tcp_reset_reno_sack(struct tcp_sock *tp) +{ + tp->sacked_out = 0; +} + /* F-RTO can only be used if TCP has never retransmitted anything other than * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) */ @@ -1345,7 +1613,7 @@ void tcp_enter_frto(struct sock *sk) !icsk->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(sk); /* Our state is too optimistic in ssthresh() call because cwnd - * is not reduced until tcp_enter_frto_loss() when previous FRTO + * is not reduced until tcp_enter_frto_loss() when previous F-RTO * recovery has not yet completed. Pattern would be this: RTO, * Cumulative ACK, RTO (2xRTO for the same segment does not end * up here twice). @@ -1376,11 +1644,13 @@ void tcp_enter_frto(struct sock *sk) tp->undo_retrans = 0; skb = tcp_write_queue_head(sk); + if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) + tp->undo_marker = 0; if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); } - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); /* Earlier loss recovery underway (see RFC4138; Appendix B). * The last condition is necessary at least in tp->frto_counter case. @@ -1405,17 +1675,15 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int cnt = 0; - tp->sacked_out = 0; tp->lost_out = 0; - tp->fackets_out = 0; tp->retrans_out = 0; + if (tcp_is_reno(tp)) + tcp_reset_reno_sack(tp); tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) break; - cnt += tcp_skb_pcount(skb); /* * Count the retransmission made on RTO correctly (only when * waiting for the first ACK and did not get it)... @@ -1427,30 +1695,25 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) /* ...enter this if branch just for the first segment */ flag |= FLAG_DATA_ACKED; } else { + if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) + tp->undo_marker = 0; TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); } - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { - /* Do not mark those segments lost that were - * forward transmitted after RTO - */ - if (!after(TCP_SKB_CB(skb)->end_seq, - tp->frto_highmark)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - } - } else { - tp->sacked_out += tcp_skb_pcount(skb); - tp->fackets_out = cnt; + /* Don't lost mark skbs that were fwd transmitted after RTO */ + if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) && + !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + tp->lost_out += tcp_skb_pcount(skb); } } - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments; tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; - tp->undo_marker = 0; tp->frto_counter = 0; + tp->bytes_acked = 0; tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); @@ -1458,22 +1721,26 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->high_seq = tp->frto_highmark; TCP_ECN_queue_cwr(tp); - clear_all_retrans_hints(tp); + tcp_clear_retrans_hints_partial(tp); } -void tcp_clear_retrans(struct tcp_sock *tp) +static void tcp_clear_retrans_partial(struct tcp_sock *tp) { - tp->left_out = 0; tp->retrans_out = 0; - - tp->fackets_out = 0; - tp->sacked_out = 0; tp->lost_out = 0; tp->undo_marker = 0; tp->undo_retrans = 0; } +void tcp_clear_retrans(struct tcp_sock *tp) +{ + tcp_clear_retrans_partial(tp); + + tp->fackets_out = 0; + tp->sacked_out = 0; +} + /* Enter Loss state. If "how" is not zero, forget all SACK information * and reset tags completely, otherwise preserve SACKs. If receiver * dropped its ofo queue, we will know this due to reneging detection. @@ -1483,7 +1750,6 @@ void tcp_enter_loss(struct sock *sk, int how) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int cnt = 0; /* Reduce ssthresh if it has not yet been made inside this window. */ if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || @@ -1497,17 +1763,26 @@ void tcp_enter_loss(struct sock *sk, int how) tp->snd_cwnd_stamp = tcp_time_stamp; tp->bytes_acked = 0; - tcp_clear_retrans(tp); + tcp_clear_retrans_partial(tp); - /* Push undo marker, if it was plain RTO and nothing - * was retransmitted. */ - if (!how) + if (tcp_is_reno(tp)) + tcp_reset_reno_sack(tp); + + if (!how) { + /* Push undo marker, if it was plain RTO and nothing + * was retransmitted. */ tp->undo_marker = tp->snd_una; + tcp_clear_retrans_hints_partial(tp); + } else { + tp->sacked_out = 0; + tp->fackets_out = 0; + tcp_clear_all_retrans_hints(tp); + } tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) break; - cnt += tcp_skb_pcount(skb); + if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) tp->undo_marker = 0; TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; @@ -1515,22 +1790,17 @@ void tcp_enter_loss(struct sock *sk, int how) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); - } else { - tp->sacked_out += tcp_skb_pcount(skb); - tp->fackets_out = cnt; } } - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); - /* Abort FRTO algorithm if one is in progress */ + /* Abort F-RTO algorithm if one is in progress */ tp->frto_counter = 0; - - clear_all_retrans_hints(tp); } static int tcp_check_sack_reneging(struct sock *sk) @@ -1560,7 +1830,7 @@ static int tcp_check_sack_reneging(struct sock *sk) static inline int tcp_fackets_out(struct tcp_sock *tp) { - return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out; + return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out; } static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) @@ -1674,7 +1944,7 @@ static int tcp_time_to_recover(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); __u32 packets_out; - /* Do not perform any recovery during FRTO algorithm */ + /* Do not perform any recovery during F-RTO algorithm */ if (tp->frto_counter) return 0; @@ -1708,60 +1978,22 @@ static int tcp_time_to_recover(struct sock *sk) return 0; } -/* If we receive more dupacks than we expected counting segments - * in assumption of absent reordering, interpret this as reordering. - * The only another reason could be bug in receiver TCP. +/* RFC: This is from the original, I doubt that this is necessary at all: + * clear xmit_retrans hint if seq of this skb is beyond hint. How could we + * retransmitted past LOST markings in the first place? I'm not fully sure + * about undo and end of connection cases, which can cause R without L? */ -static void tcp_check_reno_reordering(struct sock *sk, const int addend) +static void tcp_verify_retransmit_hint(struct tcp_sock *tp, + struct sk_buff *skb) { - struct tcp_sock *tp = tcp_sk(sk); - u32 holes; - - holes = max(tp->lost_out, 1U); - holes = min(holes, tp->packets_out); - - if ((tp->sacked_out + holes) > tp->packets_out) { - tp->sacked_out = tp->packets_out - holes; - tcp_update_reordering(sk, tp->packets_out + addend, 0); - } -} - -/* Emulate SACKs for SACKless connection: account for a new dupack. */ - -static void tcp_add_reno_sack(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - tp->sacked_out++; - tcp_check_reno_reordering(sk, 0); - tcp_sync_left_out(tp); -} - -/* Account for ACK, ACKing some data in Reno Recovery phase. */ - -static void tcp_remove_reno_sacks(struct sock *sk, int acked) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (acked > 0) { - /* One ACK acked hole. The rest eat duplicate ACKs. */ - if (acked-1 >= tp->sacked_out) - tp->sacked_out = 0; - else - tp->sacked_out -= acked-1; - } - tcp_check_reno_reordering(sk, acked); - tcp_sync_left_out(tp); -} - -static inline void tcp_reset_reno_sack(struct tcp_sock *tp) -{ - tp->sacked_out = 0; - tp->left_out = tp->lost_out; + if ((tp->retransmit_skb_hint != NULL) && + before(TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) + tp->retransmit_skb_hint = NULL; } /* Mark head of queue up as lost. */ -static void tcp_mark_head_lost(struct sock *sk, - int packets, u32 high_seq) +static void tcp_mark_head_lost(struct sock *sk, int packets) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -1784,22 +2016,15 @@ static void tcp_mark_head_lost(struct sock *sk, tp->lost_skb_hint = skb; tp->lost_cnt_hint = cnt; cnt += tcp_skb_pcount(skb); - if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq)) + if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) break; - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { + if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); - - /* clear xmit_retransmit_queue hints - * if this is beyond hint */ - if (tp->retransmit_skb_hint != NULL && - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) - tp->retransmit_skb_hint = NULL; - + tcp_verify_retransmit_hint(tp, skb); } } - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); } /* Account newly detected lost packet(s) */ @@ -1808,13 +2033,13 @@ static void tcp_update_scoreboard(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (IsFack(tp)) { + if (tcp_is_fack(tp)) { int lost = tp->fackets_out - tp->reordering; if (lost <= 0) lost = 1; - tcp_mark_head_lost(sk, lost, tp->high_seq); + tcp_mark_head_lost(sk, lost); } else { - tcp_mark_head_lost(sk, 1, tp->high_seq); + tcp_mark_head_lost(sk, 1); } /* New heuristics: it is possible only after we switched @@ -1822,7 +2047,7 @@ static void tcp_update_scoreboard(struct sock *sk) * Hence, we can detect timed out packets during fast * retransmit without falling to slow start. */ - if (!IsReno(tp) && tcp_head_timedout(sk)) { + if (!tcp_is_reno(tp) && tcp_head_timedout(sk)) { struct sk_buff *skb; skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint @@ -1837,19 +2062,13 @@ static void tcp_update_scoreboard(struct sock *sk) if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); - - /* clear xmit_retrans hint */ - if (tp->retransmit_skb_hint && - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) - - tp->retransmit_skb_hint = NULL; + tcp_verify_retransmit_hint(tp, skb); } } tp->scoreboard_skb_hint = skb; - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); } } @@ -1880,7 +2099,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag) int decr = tp->snd_cwnd_cnt + 1; if ((flag&(FLAG_ANY_PROGRESS|FLAG_DSACKING_ACK)) || - (IsReno(tp) && !(flag&FLAG_NOT_DUP))) { + (tcp_is_reno(tp) && !(flag&FLAG_NOT_DUP))) { tp->snd_cwnd_cnt = decr&1; decr >>= 1; @@ -1913,7 +2132,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", msg, NIPQUAD(inet->daddr), ntohs(inet->dport), - tp->snd_cwnd, tp->left_out, + tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } @@ -1945,7 +2164,7 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) /* There is something screwy going on with the retrans hints after an undo */ - clear_all_retrans_hints(tp); + tcp_clear_all_retrans_hints(tp); } static inline int tcp_may_undo(struct tcp_sock *tp) @@ -1971,7 +2190,7 @@ static int tcp_try_undo_recovery(struct sock *sk) NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO); tp->undo_marker = 0; } - if (tp->snd_una == tp->high_seq && IsReno(tp)) { + if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq * is ACKed. For Reno it is MUST to prevent false * fast retransmits (RFC2582). SACK TCP is safe. */ @@ -2001,7 +2220,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) { struct tcp_sock *tp = tcp_sk(sk); /* Partial ACK arrived. Force Hoe's retransmit. */ - int failed = IsReno(tp) || tp->fackets_out>tp->reordering; + int failed = tcp_is_reno(tp) || tp->fackets_out>tp->reordering; if (tcp_may_undo(tp)) { /* Plain luck! Hole if filled with delayed @@ -2038,16 +2257,15 @@ static int tcp_try_undo_loss(struct sock *sk) TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; } - clear_all_retrans_hints(tp); + tcp_clear_all_retrans_hints(tp); DBGUNDO(sk, "partial loss"); tp->lost_out = 0; - tp->left_out = tp->sacked_out; tcp_undo_cwr(sk, 1); NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; - if (!IsReno(tp)) + if (tcp_is_sack(tp)) tcp_set_ca_state(sk, TCP_CA_Open); return 1; } @@ -2066,7 +2284,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); if (tp->retrans_out == 0) tp->retrans_stamp = 0; @@ -2077,7 +2295,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { int state = TCP_CA_Open; - if (tp->left_out || tp->retrans_out || tp->undo_marker) + if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) state = TCP_CA_Disorder; if (inet_csk(sk)->icsk_ca_state != state) { @@ -2130,7 +2348,7 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) * tcp_xmit_retransmit_queue(). */ static void -tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) +tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -2142,8 +2360,8 @@ tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) * 1. Reno does not count dupacks (sacked_out) automatically. */ if (!tp->packets_out) tp->sacked_out = 0; - /* 2. SACK counts snd_fack in packets inaccurately. */ - if (tp->sacked_out == 0) + + if (WARN_ON(!tp->sacked_out && tp->fackets_out)) tp->fackets_out = 0; /* Now state machine starts. @@ -2160,12 +2378,12 @@ tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) before(tp->snd_una, tp->high_seq) && icsk->icsk_ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) { - tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq); + tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); } - /* D. Synchronize left_out to current state. */ - tcp_sync_left_out(tp); + /* D. Check consistency of the current state. */ + tcp_verify_left_out(tp); /* E. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ @@ -2194,14 +2412,14 @@ tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) if (!tp->undo_marker || /* For SACK case do not Open to allow to undo * catching for all duplicate ACKs. */ - IsReno(tp) || tp->snd_una != tp->high_seq) { + tcp_is_reno(tp) || tp->snd_una != tp->high_seq) { tp->undo_marker = 0; tcp_set_ca_state(sk, TCP_CA_Open); } break; case TCP_CA_Recovery: - if (IsReno(tp)) + if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); if (tcp_try_undo_recovery(sk)) return; @@ -2214,14 +2432,10 @@ tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) switch (icsk->icsk_ca_state) { case TCP_CA_Recovery: if (!(flag & FLAG_SND_UNA_ADVANCED)) { - if (IsReno(tp) && is_dupack) + if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); - } else { - int acked = prior_packets - tp->packets_out; - if (IsReno(tp)) - tcp_remove_reno_sacks(sk, acked); - do_lost = tcp_try_undo_partial(sk, acked); - } + } else + do_lost = tcp_try_undo_partial(sk, pkts_acked); break; case TCP_CA_Loss: if (flag&FLAG_DATA_ACKED) @@ -2235,7 +2449,7 @@ tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) return; /* Loss is undone; fall through to processing in Open state. */ default: - if (IsReno(tp)) { + if (tcp_is_reno(tp)) { if (flag & FLAG_SND_UNA_ADVANCED) tcp_reset_reno_sack(tp); if (is_dupack) @@ -2263,7 +2477,7 @@ tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) /* Otherwise enter Recovery state */ - if (IsReno(tp)) + if (tcp_is_reno(tp)) NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY); else NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY); @@ -2361,8 +2575,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, /* Restart timer after forward progress on connection. * RFC2988 recommends to restart timer to now+rto. */ - -static void tcp_ack_packets_out(struct sock *sk) +static void tcp_rearm_rto(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -2373,158 +2586,143 @@ static void tcp_ack_packets_out(struct sock *sk) } } -static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, - __u32 now, __s32 *seq_rtt) +/* If we get here, the whole TSO packet has not been acked. */ +static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - struct tcp_skb_cb *scb = TCP_SKB_CB(skb); - __u32 seq = tp->snd_una; - __u32 packets_acked; - int acked = 0; + u32 packets_acked; - /* If we get here, the whole TSO packet has not been - * acked. - */ - BUG_ON(!after(scb->end_seq, seq)); + BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)); packets_acked = tcp_skb_pcount(skb); - if (tcp_trim_head(sk, skb, seq - scb->seq)) + if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) return 0; packets_acked -= tcp_skb_pcount(skb); if (packets_acked) { - __u8 sacked = scb->sacked; - - acked |= FLAG_DATA_ACKED; - if (sacked) { - if (sacked & TCPCB_RETRANS) { - if (sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out -= packets_acked; - acked |= FLAG_RETRANS_DATA_ACKED; - *seq_rtt = -1; - } else if (*seq_rtt < 0) - *seq_rtt = now - scb->when; - if (sacked & TCPCB_SACKED_ACKED) - tp->sacked_out -= packets_acked; - if (sacked & TCPCB_LOST) - tp->lost_out -= packets_acked; - if (sacked & TCPCB_URG) { - if (tp->urg_mode && - !before(seq, tp->snd_up)) - tp->urg_mode = 0; - } - } else if (*seq_rtt < 0) - *seq_rtt = now - scb->when; - - if (tp->fackets_out) { - __u32 dval = min(tp->fackets_out, packets_acked); - tp->fackets_out -= dval; - } - /* hint's skb might be NULL but we don't need to care */ - tp->fastpath_cnt_hint -= min_t(u32, packets_acked, - tp->fastpath_cnt_hint); - tp->packets_out -= packets_acked; - BUG_ON(tcp_skb_pcount(skb) == 0); - BUG_ON(!before(scb->seq, scb->end_seq)); + BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)); } - return acked; + return packets_acked; } -/* Remove acknowledged frames from the retransmission queue. */ -static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) +/* Remove acknowledged frames from the retransmission queue. If our packet + * is before the ack sequence we can discard it as it's confirmed to have + * arrived at the other end. + */ +static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb; - __u32 now = tcp_time_stamp; - int acked = 0; + u32 now = tcp_time_stamp; + int fully_acked = 1; + int flag = 0; int prior_packets = tp->packets_out; - __s32 seq_rtt = -1; + s32 seq_rtt = -1; ktime_t last_ackt = net_invalid_timestamp(); - while ((skb = tcp_write_queue_head(sk)) && - skb != tcp_send_head(sk)) { + while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); - __u8 sacked = scb->sacked; + u32 end_seq; + u32 packets_acked; + u8 sacked = scb->sacked; - /* If our packet is before the ack sequence we can - * discard it as it's confirmed to have arrived at - * the other end. - */ if (after(scb->end_seq, tp->snd_una)) { - if (tcp_skb_pcount(skb) > 1 && - after(tp->snd_una, scb->seq)) - acked |= tcp_tso_acked(sk, skb, - now, &seq_rtt); - break; - } + if (tcp_skb_pcount(skb) == 1 || + !after(tp->snd_una, scb->seq)) + break; - /* Initial outgoing SYN's get put onto the write_queue - * just like anything else we transmit. It is not - * true data, and if we misinform our callers that - * this ACK acks real data, we will erroneously exit - * connection startup slow start one packet too - * quickly. This is severely frowned upon behavior. - */ - if (!(scb->flags & TCPCB_FLAG_SYN)) { - acked |= FLAG_DATA_ACKED; + packets_acked = tcp_tso_acked(sk, skb); + if (!packets_acked) + break; + + fully_acked = 0; + end_seq = tp->snd_una; } else { - acked |= FLAG_SYN_ACKED; - tp->retrans_stamp = 0; + packets_acked = tcp_skb_pcount(skb); + end_seq = scb->end_seq; } /* MTU probing checks */ - if (icsk->icsk_mtup.probe_size) { - if (!after(tp->mtu_probe.probe_seq_end, TCP_SKB_CB(skb)->end_seq)) { - tcp_mtup_probe_success(sk, skb); - } + if (fully_acked && icsk->icsk_mtup.probe_size && + !after(tp->mtu_probe.probe_seq_end, scb->end_seq)) { + tcp_mtup_probe_success(sk, skb); } if (sacked) { if (sacked & TCPCB_RETRANS) { if (sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out -= tcp_skb_pcount(skb); - acked |= FLAG_RETRANS_DATA_ACKED; + tp->retrans_out -= packets_acked; + flag |= FLAG_RETRANS_DATA_ACKED; seq_rtt = -1; + if ((flag & FLAG_DATA_ACKED) || + (packets_acked > 1)) + flag |= FLAG_NONHEAD_RETRANS_ACKED; } else if (seq_rtt < 0) { seq_rtt = now - scb->when; - last_ackt = skb->tstamp; + if (fully_acked) + last_ackt = skb->tstamp; } + if (sacked & TCPCB_SACKED_ACKED) - tp->sacked_out -= tcp_skb_pcount(skb); + tp->sacked_out -= packets_acked; if (sacked & TCPCB_LOST) - tp->lost_out -= tcp_skb_pcount(skb); - if (sacked & TCPCB_URG) { - if (tp->urg_mode && - !before(scb->end_seq, tp->snd_up)) - tp->urg_mode = 0; - } + tp->lost_out -= packets_acked; + + if ((sacked & TCPCB_URG) && tp->urg_mode && + !before(end_seq, tp->snd_up)) + tp->urg_mode = 0; } else if (seq_rtt < 0) { seq_rtt = now - scb->when; - last_ackt = skb->tstamp; + if (fully_acked) + last_ackt = skb->tstamp; + } + tp->packets_out -= packets_acked; + + /* Initial outgoing SYN's get put onto the write_queue + * just like anything else we transmit. It is not + * true data, and if we misinform our callers that + * this ACK acks real data, we will erroneously exit + * connection startup slow start one packet too + * quickly. This is severely frowned upon behavior. + */ + if (!(scb->flags & TCPCB_FLAG_SYN)) { + flag |= FLAG_DATA_ACKED; + } else { + flag |= FLAG_SYN_ACKED; + tp->retrans_stamp = 0; } - tcp_dec_pcount_approx(&tp->fackets_out, skb); - tcp_packets_out_dec(tp, skb); + + if (!fully_acked) + break; + tcp_unlink_write_queue(skb, sk); sk_stream_free_skb(sk, skb); - clear_all_retrans_hints(tp); + tcp_clear_all_retrans_hints(tp); } - if (acked&FLAG_ACKED) { + if (flag & FLAG_ACKED) { u32 pkts_acked = prior_packets - tp->packets_out; const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; - tcp_ack_update_rtt(sk, acked, seq_rtt); - tcp_ack_packets_out(sk); + tcp_ack_update_rtt(sk, flag, seq_rtt); + tcp_rearm_rto(sk); + + tp->fackets_out -= min(pkts_acked, tp->fackets_out); + /* hint's skb might be NULL but we don't need to care */ + tp->fastpath_cnt_hint -= min_t(u32, pkts_acked, + tp->fastpath_cnt_hint); + if (tcp_is_reno(tp)) + tcp_remove_reno_sacks(sk, pkts_acked); if (ca_ops->pkts_acked) { s32 rtt_us = -1; /* Is the ACK triggering packet unambiguous? */ - if (!(acked & FLAG_RETRANS_DATA_ACKED)) { + if (!(flag & FLAG_RETRANS_DATA_ACKED)) { /* High resolution needed and available? */ if (ca_ops->flags & TCP_CONG_RTT_STAMP && !ktime_equal(last_ackt, @@ -2543,8 +2741,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) BUG_TRAP((int)tp->sacked_out >= 0); BUG_TRAP((int)tp->lost_out >= 0); BUG_TRAP((int)tp->retrans_out >= 0); - if (!tp->packets_out && tp->rx_opt.sack_ok) { - const struct inet_connection_sock *icsk = inet_csk(sk); + if (!tp->packets_out && tcp_is_sack(tp)) { + icsk = inet_csk(sk); if (tp->lost_out) { printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out, icsk->icsk_ca_state); @@ -2563,7 +2761,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } #endif *seq_rtt_p = seq_rtt; - return acked; + return flag; } static void tcp_ack_probe(struct sock *sk) @@ -2658,6 +2856,7 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) { tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); tp->snd_cwnd_cnt = 0; + tp->bytes_acked = 0; TCP_ECN_queue_cwr(tp); tcp_moderate_cwnd(tp); } @@ -2712,18 +2911,22 @@ static int tcp_process_frto(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); /* Duplicate the behavior from Loss state (fastretrans_alert) */ if (flag&FLAG_DATA_ACKED) inet_csk(sk)->icsk_retransmits = 0; + if ((flag & FLAG_NONHEAD_RETRANS_ACKED) || + ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED))) + tp->undo_marker = 0; + if (!before(tp->snd_una, tp->frto_highmark)) { tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); return 1; } - if (!IsSackFrto() || IsReno(tp)) { + if (!IsSackFrto() || tcp_is_reno(tp)) { /* RFC4138 shortcoming in step 2; should also have case c): * ACK isn't duplicate nor advances window, e.g., opposite dir * data, winupdate @@ -2757,7 +2960,7 @@ static int tcp_process_frto(struct sock *sk, int flag) } if (tp->frto_counter == 1) { - /* Sending of the next skb must be allowed or no FRTO */ + /* Sending of the next skb must be allowed or no F-RTO */ if (!tcp_send_head(sk) || after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tp->snd_una + tp->snd_wnd)) { @@ -2782,6 +2985,8 @@ static int tcp_process_frto(struct sock *sk, int flag) break; } tp->frto_counter = 0; + tp->undo_marker = 0; + NET_INC_STATS_BH(LINUX_MIB_TCPSPURIOUSRTOS); } return 0; } @@ -2862,6 +3067,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, &seq_rtt); + /* Guarantee sacktag reordering detection against wrap-arounds */ + if (before(tp->frto_highmark, tp->snd_una)) + tp->frto_highmark = 0; if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); @@ -2870,7 +3078,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight, 0); - tcp_fastretrans_alert(sk, prior_packets, flag); + tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight, 1); @@ -3207,7 +3415,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) * Probably, we should reset in this case. For now drop them. */ __skb_queue_purge(&tp->out_of_order_queue); - if (tp->rx_opt.sack_ok) + if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); sk_stream_mem_reclaim(sk); @@ -3237,7 +3445,7 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_se static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) { - if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack) { if (before(seq, tp->rcv_nxt)) NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT); else @@ -3267,7 +3475,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); - if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) @@ -3583,7 +3791,7 @@ drop: if (!skb_peek(&tp->out_of_order_queue)) { /* Initial out of order segment, build 1 SACK. */ - if (tp->rx_opt.sack_ok) { + if (tcp_is_sack(tp)) { tp->rx_opt.num_sacks = 1; tp->rx_opt.dsack = 0; tp->rx_opt.eff_sacks = 1; @@ -3648,7 +3856,7 @@ drop: } add_sack: - if (tp->rx_opt.sack_ok) + if (tcp_is_sack(tp)) tcp_sack_new_ofo_skb(sk, seq, end_seq); } } @@ -3699,7 +3907,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, while (before(start, end)) { struct sk_buff *nskb; - int header = skb_headroom(skb); + unsigned int header = skb_headroom(skb); int copy = SKB_MAX_ORDER(header, 0); /* Too big header? This can happen with IPv6. */ @@ -3837,7 +4045,7 @@ static int tcp_prune_queue(struct sock *sk) * is in a sad state like this, we care only about integrity * of the connection not performance. */ - if (tp->rx_opt.sack_ok) + if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); sk_stream_mem_reclaim(sk); } @@ -4538,8 +4746,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tp->tcp_header_len = sizeof(struct tcphdr); } - if (tp->rx_opt.sack_ok && sysctl_tcp_fack) - tp->rx_opt.sack_ok |= 2; + if (tcp_is_sack(tp) && sysctl_tcp_fack) + tcp_enable_fack(tp); tcp_mtup_init(sk); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e089a978e12..ad759f1c377 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -62,6 +62,7 @@ #include <linux/init.h> #include <linux/times.h> +#include <net/net_namespace.h> #include <net/icmp.h> #include <net/inet_hashtables.h> #include <net/tcp.h> @@ -1054,6 +1055,9 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, bp->pad = 0; bp->protocol = protocol; bp->len = htons(tcplen); + + sg_init_table(sg, 4); + sg_set_buf(&sg[block++], bp, sizeof(*bp)); nbytes += sizeof(*bp); @@ -1079,6 +1083,8 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, sg_set_buf(&sg[block++], key->key, key->keylen); nbytes += key->keylen; + sg_mark_end(sg, block); + /* Now store the Hash into the packet */ err = crypto_hash_init(desc); if (err) @@ -2249,7 +2255,7 @@ int tcp_proc_register(struct tcp_seq_afinfo *afinfo) afinfo->seq_fops->llseek = seq_lseek; afinfo->seq_fops->release = seq_release_private; - p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); + p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops); if (p) p->data = afinfo; else @@ -2261,7 +2267,7 @@ void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) { if (!afinfo) return; - proc_net_remove(afinfo->name); + proc_net_remove(&init_net, afinfo->name); memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } @@ -2469,6 +2475,5 @@ EXPORT_SYMBOL(tcp_v4_syn_recv_sock); EXPORT_SYMBOL(tcp_proc_register); EXPORT_SYMBOL(tcp_proc_unregister); #endif -EXPORT_SYMBOL(sysctl_local_port_range); EXPORT_SYMBOL(sysctl_tcp_low_latency); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a12b08fca5a..b61b76847ad 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -368,6 +368,12 @@ void tcp_twsk_destructor(struct sock *sk) EXPORT_SYMBOL_GPL(tcp_twsk_destructor); +static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, + struct request_sock *req) +{ + tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; +} + /* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * @@ -399,7 +405,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; - newtp->left_out = 0; newtp->retrans_out = 0; newtp->sacked_out = 0; newtp->fackets_out = 0; @@ -440,7 +445,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { if (sysctl_tcp_fack) - newtp->rx_opt.sack_ok |= 2; + tcp_enable_fack(newtp); } newtp->window_clamp = req->window_clamp; newtp->rcv_ssthresh = req->rcv_wnd; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 666d8a58d14..324b4207254 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -61,6 +61,18 @@ int sysctl_tcp_base_mss __read_mostly = 512; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; +static inline void tcp_packets_out_inc(struct sock *sk, + const struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + int orig = tp->packets_out; + + tp->packets_out += tcp_skb_pcount(skb); + if (!orig) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, TCP_RTO_MAX); +} + static void update_send_head(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -269,6 +281,56 @@ static u16 tcp_select_window(struct sock *sk) return new_win; } +static inline void TCP_ECN_send_synack(struct tcp_sock *tp, + struct sk_buff *skb) +{ + TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; + if (!(tp->ecn_flags&TCP_ECN_OK)) + TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; +} + +static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tp->ecn_flags = 0; + if (sysctl_tcp_ecn) { + TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR; + tp->ecn_flags = TCP_ECN_OK; + } +} + +static __inline__ void +TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th) +{ + if (inet_rsk(req)->ecn_ok) + th->ece = 1; +} + +static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, + int tcp_header_len) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (tp->ecn_flags & TCP_ECN_OK) { + /* Not-retransmitted data segment: set ECT and inject CWR. */ + if (skb->len != tcp_header_len && + !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) { + INET_ECN_xmit(sk); + if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) { + tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; + tcp_hdr(skb)->cwr = 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + } + } else { + /* ACK or retransmitted segment: clear ECT|CE */ + INET_ECN_dontxmit(sk); + } + if (tp->ecn_flags & TCP_ECN_DEMAND_CWR) + tcp_hdr(skb)->ece = 1; + } +} + static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, __u32 tstamp, __u8 **md5_hash) { @@ -584,16 +646,32 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned skb_shinfo(skb)->gso_size = 0; skb_shinfo(skb)->gso_type = 0; } else { - unsigned int factor; - - factor = skb->len + (mss_now - 1); - factor /= mss_now; - skb_shinfo(skb)->gso_segs = factor; + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); skb_shinfo(skb)->gso_size = mss_now; skb_shinfo(skb)->gso_type = sk->sk_gso_type; } } +/* When a modification to fackets out becomes necessary, we need to check + * skb is counted to fackets_out or not. Another important thing is to + * tweak SACK fastpath hint too as it would overwrite all changes unless + * hint is also changed. + */ +static void tcp_adjust_fackets_out(struct tcp_sock *tp, struct sk_buff *skb, + int decr) +{ + if (!tp->sacked_out || tcp_is_reno(tp)) + return; + + if (!before(tp->highest_sack, TCP_SKB_CB(skb)->seq)) + tp->fackets_out -= decr; + + /* cnt_hint is "off-by-one" compared with fackets_out (see sacktag) */ + if (tp->fastpath_skb_hint != NULL && + after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, TCP_SKB_CB(skb)->seq)) + tp->fastpath_cnt_hint -= decr; +} + /* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the * packet to the list. This won't be called frequently, I hope. @@ -609,7 +687,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss BUG_ON(len > skb->len); - clear_all_retrans_hints(tp); + tcp_clear_retrans_hints_partial(tp); nsize = skb_headlen(skb) - len; if (nsize < 0) nsize = 0; @@ -634,6 +712,10 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; + if (tcp_is_sack(tp) && tp->sacked_out && + (TCP_SKB_CB(skb)->seq == tp->highest_sack)) + tp->highest_sack = TCP_SKB_CB(buff)->seq; + /* PSH and FIN should only be set in the second packet. */ flags = TCP_SKB_CB(skb)->flags; TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); @@ -682,32 +764,15 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) tp->retrans_out -= diff; - if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { + if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) tp->lost_out -= diff; - tp->left_out -= diff; - } - - if (diff > 0) { - /* Adjust Reno SACK estimate. */ - if (!tp->rx_opt.sack_ok) { - tp->sacked_out -= diff; - if ((int)tp->sacked_out < 0) - tp->sacked_out = 0; - tcp_sync_left_out(tp); - } - tp->fackets_out -= diff; - if ((int)tp->fackets_out < 0) - tp->fackets_out = 0; - /* SACK fastpath might overwrite it unless dealt with */ - if (tp->fastpath_skb_hint != NULL && - after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, - TCP_SKB_CB(skb)->seq)) { - tp->fastpath_cnt_hint -= diff; - if ((int)tp->fastpath_cnt_hint < 0) - tp->fastpath_cnt_hint = 0; - } + /* Adjust Reno SACK estimate. */ + if (tcp_is_reno(tp) && diff > 0) { + tcp_dec_pcount_approx_int(&tp->sacked_out, diff); + tcp_verify_left_out(tp); } + tcp_adjust_fackets_out(tp, skb, diff); } /* Link BUFF into the send queue. */ @@ -1654,8 +1719,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); - /* changing transmit queue under us so clear hints */ - clear_all_retrans_hints(tp); + if (WARN_ON(tcp_is_sack(tp) && tp->sacked_out && + (TCP_SKB_CB(next_skb)->seq == tp->highest_sack))) + return; /* Ok. We will be able to collapse the packet. */ tcp_unlink_write_queue(next_skb, sk); @@ -1683,21 +1749,23 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS) tp->retrans_out -= tcp_skb_pcount(next_skb); - if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) { + if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) tp->lost_out -= tcp_skb_pcount(next_skb); - tp->left_out -= tcp_skb_pcount(next_skb); - } /* Reno case is special. Sigh... */ - if (!tp->rx_opt.sack_ok && tp->sacked_out) { + if (tcp_is_reno(tp) && tp->sacked_out) tcp_dec_pcount_approx(&tp->sacked_out, next_skb); - tp->left_out -= tcp_skb_pcount(next_skb); + + tcp_adjust_fackets_out(tp, next_skb, tcp_skb_pcount(next_skb)); + tp->packets_out -= tcp_skb_pcount(next_skb); + + /* changed transmit queue under us so clear hints */ + tcp_clear_retrans_hints_partial(tp); + /* manually tune sacktag skb hint */ + if (tp->fastpath_skb_hint == next_skb) { + tp->fastpath_skb_hint = skb; + tp->fastpath_cnt_hint -= tcp_skb_pcount(skb); } - /* Not quite right: it can be > snd.fack, but - * it is better to underestimate fackets. - */ - tcp_dec_pcount_approx(&tp->fackets_out, next_skb); - tcp_packets_out_dec(tp, next_skb); sk_stream_free_skb(sk, next_skb); } } @@ -1731,12 +1799,12 @@ void tcp_simple_retransmit(struct sock *sk) } } - clear_all_retrans_hints(tp); + tcp_clear_all_retrans_hints(tp); if (!lost) return; - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); /* Don't muck with the congestion window here. * Reason is that we do not increase amount of _data_ @@ -1846,6 +1914,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) printk(KERN_DEBUG "retrans_out leaked.\n"); } #endif + if (!tp->retrans_out) + tp->lost_retrans_low = tp->snd_nxt; TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; tp->retrans_out += tcp_skb_pcount(skb); @@ -1938,40 +2008,35 @@ void tcp_xmit_retransmit_queue(struct sock *sk) return; /* No forward retransmissions in Reno are possible. */ - if (!tp->rx_opt.sack_ok) + if (tcp_is_reno(tp)) return; /* Yeah, we have to make difficult choice between forward transmission * and retransmission... Both ways have their merits... * * For now we do not retransmit anything, while we have some new - * segments to send. + * segments to send. In the other cases, follow rule 3 for + * NextSeg() specified in RFC3517. */ if (tcp_may_send_now(sk)) return; - if (tp->forward_skb_hint) { + /* If nothing is SACKed, highest_sack in the loop won't be valid */ + if (!tp->sacked_out) + return; + + if (tp->forward_skb_hint) skb = tp->forward_skb_hint; - packet_cnt = tp->forward_cnt_hint; - } else{ + else skb = tcp_write_queue_head(sk); - packet_cnt = 0; - } tcp_for_write_queue_from(skb, sk) { if (skb == tcp_send_head(sk)) break; - tp->forward_cnt_hint = packet_cnt; tp->forward_skb_hint = skb; - /* Similar to the retransmit loop above we - * can pretend that the retransmitted SKB - * we send out here will be composed of one - * real MSS sized packet because tcp_retransmit_skb() - * will fragment it if necessary. - */ - if (++packet_cnt > tp->fackets_out) + if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) break; if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index b76398d1b45..87dd5bff315 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -26,6 +26,7 @@ #include <linux/module.h> #include <linux/ktime.h> #include <linux/time.h> +#include <net/net_namespace.h> #include <net/tcp.h> @@ -228,7 +229,7 @@ static __init int tcpprobe_init(void) if (!tcp_probe.log) goto err0; - if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) + if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &tcpprobe_fops)) goto err0; ret = register_jprobe(&tcp_jprobe); @@ -238,7 +239,7 @@ static __init int tcpprobe_init(void) pr_info("TCP probe registered (port=%d)\n", port); return 0; err1: - proc_net_remove(procname); + proc_net_remove(&init_net, procname); err0: kfree(tcp_probe.log); return ret; @@ -247,7 +248,7 @@ module_init(tcpprobe_init); static __exit void tcpprobe_exit(void) { - proc_net_remove(procname); + proc_net_remove(&init_net, procname); unregister_jprobe(&tcp_jprobe); kfree(tcp_probe.log); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index e9b151b3a59..d8970ecfcfc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -315,7 +315,7 @@ static void tcp_retransmit_timer(struct sock *sk) if (icsk->icsk_retransmits == 0) { if (icsk->icsk_ca_state == TCP_CA_Disorder || icsk->icsk_ca_state == TCP_CA_Recovery) { - if (tp->rx_opt.sack_ok) { + if (tcp_is_sack(tp)) { if (icsk->icsk_ca_state == TCP_CA_Recovery) NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL); else diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 69d4bd10f9c..4bc25b46f33 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -98,6 +98,7 @@ #include <linux/skbuff.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include <net/icmp.h> #include <net/route.h> #include <net/checksum.h> @@ -113,9 +114,8 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); -static int udp_port_rover; - -static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) +static inline int __udp_lib_lport_inuse(__u16 num, + const struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; @@ -132,11 +132,10 @@ static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) * @sk: socket struct in question * @snum: port number to look up * @udptable: hash list table, must be of UDP_HTABLE_SIZE - * @port_rover: pointer to record of last unallocated port * @saddr_comp: AF-dependent comparison of bound local IP addresses */ int __udp_lib_get_port(struct sock *sk, unsigned short snum, - struct hlist_head udptable[], int *port_rover, + struct hlist_head udptable[], int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2 ) ) { @@ -146,49 +145,57 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, int error = 1; write_lock_bh(&udp_hash_lock); - if (snum == 0) { - int best_size_so_far, best, result, i; - - if (*port_rover > sysctl_local_port_range[1] || - *port_rover < sysctl_local_port_range[0]) - *port_rover = sysctl_local_port_range[0]; - best_size_so_far = 32767; - best = result = *port_rover; - for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - int size; - - head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; - if (hlist_empty(head)) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] + - ((result - sysctl_local_port_range[0]) & - (UDP_HTABLE_SIZE - 1)); + + if (!snum) { + int i, low, high, remaining; + unsigned rover, best, best_size_so_far; + + inet_get_local_port_range(&low, &high); + remaining = (high - low) + 1; + + best_size_so_far = UINT_MAX; + best = rover = net_random() % remaining + low; + + /* 1st pass: look for empty (or shortest) hash chain */ + for (i = 0; i < UDP_HTABLE_SIZE; i++) { + int size = 0; + + head = &udptable[rover & (UDP_HTABLE_SIZE - 1)]; + if (hlist_empty(head)) goto gotit; - } - size = 0; + sk_for_each(sk2, node, head) { if (++size >= best_size_so_far) goto next; } best_size_so_far = size; - best = result; + best = rover; next: - ; + /* fold back if end of range */ + if (++rover > high) + rover = low + ((rover - low) + & (UDP_HTABLE_SIZE - 1)); + + } - result = best; - for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; - i++, result += UDP_HTABLE_SIZE) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] - + ((result - sysctl_local_port_range[0]) & - (UDP_HTABLE_SIZE - 1)); - if (! __udp_lib_lport_inuse(result, udptable)) - break; + + /* 2nd pass: find hole in shortest hash chain */ + rover = best; + for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { + if (! __udp_lib_lport_inuse(rover, udptable)) + goto gotit; + rover += UDP_HTABLE_SIZE; + if (rover > high) + rover = low + ((rover - low) + & (UDP_HTABLE_SIZE - 1)); } - if (i >= (1 << 16) / UDP_HTABLE_SIZE) - goto fail; + + + /* All ports in use! */ + goto fail; + gotit: - *port_rover = snum = result; + snum = rover; } else { head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; @@ -201,6 +208,7 @@ gotit: (*saddr_comp)(sk, sk2) ) goto fail; } + inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { @@ -217,7 +225,7 @@ fail: int udp_get_port(struct sock *sk, unsigned short snum, int (*scmp)(const struct sock *, const struct sock *)) { - return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); + return __udp_lib_get_port(sk, snum, udp_hash, scmp); } int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) @@ -1144,7 +1152,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, - skb->dev->ifindex, udptable ); + inet_iif(skb), udptable); if (sk != NULL) { int ret = udp_queue_rcv_skb(sk, skb); @@ -1560,7 +1568,7 @@ int udp_proc_register(struct udp_seq_afinfo *afinfo) afinfo->seq_fops->llseek = seq_lseek; afinfo->seq_fops->release = seq_release_private; - p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); + p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops); if (p) p->data = afinfo; else @@ -1572,7 +1580,7 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo) { if (!afinfo) return; - proc_net_remove(afinfo->name); + proc_net_remove(&init_net, afinfo->name); memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 820a477cfaa..6c55828e41b 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -9,7 +9,7 @@ extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int ); extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); extern int __udp_lib_get_port(struct sock *sk, unsigned short snum, - struct hlist_head udptable[], int *port_rover, + struct hlist_head udptable[], int (*)(const struct sock*,const struct sock*)); extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index f34fd686a8f..94977205abb 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -16,12 +16,11 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly; struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; -static int udplite_port_rover; int udplite_get_port(struct sock *sk, unsigned short p, int (*c)(const struct sock *, const struct sock *)) { - return __udp_lib_get_port(sk, p, udplite_hash, &udplite_port_rover, c); + return __udp_lib_get_port(sk, p, udplite_hash, c); } static int udplite_v4_get_port(struct sock *sk, unsigned short snum) diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 2fa10824541..5e95c8a07ef 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -16,19 +16,6 @@ #include <net/ip.h> #include <net/xfrm.h> -static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) -{ - switch (nexthdr) { - case IPPROTO_IPIP: - case IPPROTO_IPV6: - *spi = ip_hdr(skb)->saddr; - *seq = 0; - return 0; - } - - return xfrm_parse_spi(skb, nexthdr, spi, seq); -} - #ifdef CONFIG_NETFILTER static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) { @@ -46,16 +33,19 @@ drop: } #endif -static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) +int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) { - __be32 spi, seq; + int err; + __be32 seq; struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; struct xfrm_state *x; int xfrm_nr = 0; int decaps = 0; - int err = xfrm4_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq); + unsigned int nhoff = offsetof(struct iphdr, protocol); - if (err != 0) + seq = 0; + if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) goto drop; do { @@ -65,7 +55,7 @@ static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) goto drop; x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, - iph->protocol != IPPROTO_IPV6 ? iph->protocol : IPPROTO_IPIP, AF_INET); + nexthdr, AF_INET); if (x == NULL) goto drop; @@ -82,9 +72,12 @@ static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) if (xfrm_state_check_expire(x)) goto drop_unlock; - if (x->type->input(x, skb)) + nexthdr = x->type->input(x, skb); + if (nexthdr <= 0) goto drop_unlock; + skb_network_header(skb)[nhoff] = nexthdr; + /* only the first xfrm gets the encap type */ encap_type = 0; @@ -98,15 +91,15 @@ static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) xfrm_vec[xfrm_nr++] = x; - if (x->mode->input(x, skb)) + if (x->outer_mode->input(x, skb)) goto drop; - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } - err = xfrm_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq); + err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); if (err < 0) goto drop; } while (!err); @@ -160,6 +153,7 @@ drop: kfree_skb(skb); return 0; } +EXPORT_SYMBOL(xfrm4_rcv_encap); /* If it's a keepalive packet, then just eat it. * If it's an encapsulated packet, then pass it to the @@ -247,11 +241,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, len); skb_reset_transport_header(skb); - /* modify the protocol (it's ESP!) */ - iph->protocol = IPPROTO_ESP; - /* process ESP */ - ret = xfrm4_rcv_encap(skb, encap_type); + ret = xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); return ret; drop: @@ -261,7 +252,7 @@ drop: int xfrm4_rcv(struct sk_buff *skb) { - return xfrm4_rcv_encap(skb, 0); + return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); } EXPORT_SYMBOL(xfrm4_rcv); diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index a73e710740c..e42e122414b 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -20,38 +20,33 @@ /* Add encapsulation header. * * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. - * The following fields in it shall be filled in by x->type->output: - * tot_len - * check - * - * On exit, skb->h will be set to the start of the payload to be processed - * by x->type->output and skb->nh will be set to the top IP header. */ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) { + struct ip_beet_phdr *ph; struct iphdr *iph, *top_iph; int hdrlen, optlen; iph = ip_hdr(skb); - skb->transport_header = skb->network_header; hdrlen = 0; optlen = iph->ihl * 4 - sizeof(*iph); if (unlikely(optlen)) hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); - skb_push(skb, x->props.header_len - IPV4_BEET_PHMAXLEN + hdrlen); - skb_reset_network_header(skb); - top_iph = ip_hdr(skb); - skb->transport_header += sizeof(*iph) - hdrlen; + skb_set_network_header(skb, IPV4_BEET_PHMAXLEN - x->props.header_len - + hdrlen); + skb->mac_header = skb->network_header + + offsetof(struct iphdr, protocol); + skb->transport_header = skb->network_header + sizeof(*iph); + + ph = (struct ip_beet_phdr *)__skb_pull(skb, sizeof(*iph) - hdrlen); + top_iph = ip_hdr(skb); memmove(top_iph, iph, sizeof(*iph)); if (unlikely(optlen)) { - struct ip_beet_phdr *ph; - BUG_ON(optlen < 0); - ph = (struct ip_beet_phdr *)skb_transport_header(skb); ph->padlen = 4 - (optlen & 4); ph->hdrlen = optlen / 8; ph->nexthdr = top_iph->protocol; @@ -119,6 +114,7 @@ static struct xfrm_mode xfrm4_beet_mode = { .output = xfrm4_beet_output, .owner = THIS_MODULE, .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm4_beet_init(void) diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index 601047161ea..fd840c7d75e 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -17,18 +17,17 @@ * * The IP header will be moved forward to make space for the encapsulation * header. - * - * On exit, skb->h will be set to the start of the payload to be processed - * by x->type->output and skb->nh will be set to the top IP header. */ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); int ihl = iph->ihl * 4; + skb_set_network_header(skb, -x->props.header_len); + skb->mac_header = skb->network_header + + offsetof(struct iphdr, protocol); skb->transport_header = skb->network_header + ihl; - skb_push(skb, x->props.header_len); - skb_reset_network_header(skb); + __skb_pull(skb, ihl); memmove(skb_network_header(skb), iph, ihl); return 0; } diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 9963700e74c..e4deecba6dd 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -31,13 +31,7 @@ static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) /* Add encapsulation header. * - * The top IP header will be constructed per RFC 2401. The following fields - * in it shall be filled in by x->type->output: - * tot_len - * check - * - * On exit, skb->h will be set to the start of the payload to be processed - * by x->type->output and skb->nh will be set to the top IP header. + * The top IP header will be constructed per RFC 2401. */ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { @@ -47,10 +41,11 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) int flags; iph = ip_hdr(skb); - skb->transport_header = skb->network_header; - skb_push(skb, x->props.header_len); - skb_reset_network_header(skb); + skb_set_network_header(skb, -x->props.header_len); + skb->mac_header = skb->network_header + + offsetof(struct iphdr, protocol); + skb->transport_header = skb->network_header + sizeof(*iph); top_iph = ip_hdr(skb); top_iph->ihl = 5; @@ -144,6 +139,7 @@ static struct xfrm_mode xfrm4_tunnel_mode = { .output = xfrm4_tunnel_output, .owner = THIS_MODULE, .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm4_tunnel_init(void) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 44ef208a75c..c4a7156962b 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -12,7 +12,6 @@ #include <linux/if_ether.h> #include <linux/kernel.h> #include <linux/skbuff.h> -#include <linux/spinlock.h> #include <linux/netfilter_ipv4.h> #include <net/ip.h> #include <net/xfrm.h> @@ -41,58 +40,32 @@ out: return ret; } -static int xfrm4_output_one(struct sk_buff *skb) +static inline int xfrm4_output_one(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; + struct iphdr *iph; int err; - if (skb->ip_summed == CHECKSUM_PARTIAL) { - err = skb_checksum_help(skb); - if (err) - goto error_nolock; - } - - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { err = xfrm4_tunnel_check_size(skb); if (err) goto error_nolock; } - do { - spin_lock_bh(&x->lock); - err = xfrm_state_check(x, skb); - if (err) - goto error; - - err = x->mode->output(x, skb); - if (err) - goto error; - - err = x->type->output(x, skb); - if (err) - goto error; - - x->curlft.bytes += skb->len; - x->curlft.packets++; + err = xfrm_output(skb); + if (err) + goto error_nolock; - spin_unlock_bh(&x->lock); - - if (!(skb->dst = dst_pop(dst))) { - err = -EHOSTUNREACH; - goto error_nolock; - } - dst = skb->dst; - x = dst->xfrm; - } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); + iph = ip_hdr(skb); + iph->tot_len = htons(skb->len); + ip_send_check(iph); IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; err = 0; out_exit: return err; -error: - spin_unlock_bh(&x->lock); error_nolock: kfree_skb(skb); goto out_exit; @@ -105,7 +78,7 @@ static int xfrm4_output_finish2(struct sk_buff *skb) while (likely((err = xfrm4_output_one(skb)) == 0)) { nf_reset(skb); - err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL, + err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); if (unlikely(err != 1)) break; @@ -113,7 +86,7 @@ static int xfrm4_output_finish2(struct sk_buff *skb) if (!skb->dst->xfrm) return dst_output(skb); - err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL, + err = nf_hook(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, xfrm4_output_finish2); if (unlikely(err != 1)) break; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 4ff8ed30024..cc86fb110dd 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -117,7 +117,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int header_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; - if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { unsigned short encap_family = xfrm[i]->props.family; switch (encap_family) { case AF_INET: @@ -151,7 +151,6 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; - struct xfrm_state_afinfo *afinfo; x->u.rt.fl = *fl; dst_prev->xfrm = xfrm[i++]; @@ -169,27 +168,17 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbout for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - /* XXX: When IPv6 module can be unloaded, we should manage reference - * to xfrm6_output in afinfo->output. Miyazawa - * */ - afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); - if (!afinfo) { - dst = *dst_p; - err = -EAFNOSUPPORT; - goto error; - } - dst_prev->output = afinfo->output; - xfrm_state_put_afinfo(afinfo); - if (dst_prev->xfrm->props.family == AF_INET && rt->peer) - atomic_inc(&rt->peer->refcnt); - x->u.rt.peer = rt->peer; + dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output; + if (rt0->peer) + atomic_inc(&rt0->peer->refcnt); + x->u.rt.peer = rt0->peer; /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); - x->u.rt.rt_type = rt->rt_type; + x->u.rt.rt_type = rt0->rt_type; x->u.rt.rt_src = rt0->rt_src; x->u.rt.rt_dst = rt0->rt_dst; - x->u.rt.rt_gateway = rt->rt_gateway; + x->u.rt.rt_gateway = rt0->rt_gateway; x->u.rt.rt_spec_dst = rt0->rt_spec_dst; x->u.rt.idev = rt0->idev; in_dev_hold(rt0->idev); @@ -291,7 +280,7 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt.idev)) in_dev_put(xdst->u.rt.idev); - if (dst->xfrm && dst->xfrm->props.family == AF_INET && likely(xdst->u.rt.peer)) + if (likely(xdst->u.rt.peer)) inet_putpeer(xdst->u.rt.peer); xfrm_dst_destroy(xdst); } @@ -306,7 +295,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xdst = (struct xfrm_dst *)dst; if (xdst->u.rt.idev->dev == dev) { - struct in_device *loopback_idev = in_dev_get(&loopback_dev); + struct in_device *loopback_idev = in_dev_get(init_net.loopback_dev); BUG_ON(!loopback_idev); do { diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 93e2c061cdd..13d54a1c333 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -49,6 +49,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, + .owner = THIS_MODULE, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, .output = xfrm4_output, diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 9275c79119b..32684519562 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -12,17 +12,13 @@ static int ipip_output(struct xfrm_state *x, struct sk_buff *skb) { - struct iphdr *iph = ip_hdr(skb); - - iph->tot_len = htons(skb->len); - ip_send_check(iph); - + skb_push(skb, -skb_network_offset(skb)); return 0; } static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) { - return 0; + return ip_hdr(skb)->protocol; } static int ipip_init_state(struct xfrm_state *x) @@ -52,20 +48,25 @@ static struct xfrm_type ipip_type = { .output = ipip_output }; +static int xfrm_tunnel_rcv(struct sk_buff *skb) +{ + return xfrm4_rcv_spi(skb, IPPROTO_IP, ip_hdr(skb)->saddr); +} + static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) { return -ENOENT; } static struct xfrm_tunnel xfrm_tunnel_handler = { - .handler = xfrm4_rcv, + .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, .priority = 2, }; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static struct xfrm_tunnel xfrm64_tunnel_handler = { - .handler = xfrm4_rcv, + .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, .priority = 2, }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 45b4c82148a..348bd8d0611 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -62,6 +62,7 @@ #include <linux/notifier.h> #include <linux/string.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/snmp.h> @@ -73,6 +74,7 @@ #include <net/tcp.h> #include <net/ip.h> #include <net/netlink.h> +#include <net/pkt_sched.h> #include <linux/if_tunnel.h> #include <linux/rtnetlink.h> @@ -212,6 +214,12 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; +/* Check if a valid qdisc is available */ +static inline int addrconf_qdisc_ok(struct net_device *dev) +{ + return (dev->qdisc != &noop_qdisc); +} + static void addrconf_del_timer(struct inet6_ifaddr *ifp) { if (del_timer(&ifp->timer)) @@ -247,11 +255,6 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { - int err = -ENOMEM; - - if (!idev || !idev->dev) - return -EINVAL; - if (snmp_mib_init((void **)idev->stats.ipv6, sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) @@ -260,20 +263,26 @@ static int snmp6_alloc_dev(struct inet6_dev *idev) sizeof(struct icmpv6_mib), __alignof__(struct icmpv6_mib)) < 0) goto err_icmp; + if (snmp_mib_init((void **)idev->stats.icmpv6msg, + sizeof(struct icmpv6msg_mib), + __alignof__(struct icmpv6msg_mib)) < 0) + goto err_icmpmsg; return 0; +err_icmpmsg: + snmp_mib_free((void **)idev->stats.icmpv6); err_icmp: snmp_mib_free((void **)idev->stats.ipv6); err_ip: - return err; + return -ENOMEM; } -static int snmp6_free_dev(struct inet6_dev *idev) +static void snmp6_free_dev(struct inet6_dev *idev) { + snmp_mib_free((void **)idev->stats.icmpv6msg); snmp_mib_free((void **)idev->stats.icmpv6); snmp_mib_free((void **)idev->stats.ipv6); - return 0; } /* Nobody refers to this device, we may destroy it. */ @@ -376,7 +385,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) } #endif - if (netif_running(dev) && netif_carrier_ok(dev)) + if (netif_running(dev) && addrconf_qdisc_ok(dev)) ndev->if_flags |= IF_READY; ipv6_mc_init_dev(ndev); @@ -449,7 +458,7 @@ static void addrconf_forward_change(void) struct inet6_dev *idev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { @@ -911,7 +920,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { struct inet6_dev *idev; struct inet6_ifaddr *ifa; @@ -1857,7 +1866,7 @@ int addrconf_set_dstaddr(void __user *arg) if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) goto err_exit; - dev = __dev_get_by_index(ireq.ifr6_ifindex); + dev = __dev_get_by_index(&init_net, ireq.ifr6_ifindex); err = -ENODEV; if (dev == NULL) @@ -1888,7 +1897,7 @@ int addrconf_set_dstaddr(void __user *arg) if (err == 0) { err = -ENOBUFS; - if ((dev = __dev_get_by_name(p.name)) == NULL) + if ((dev = __dev_get_by_name(&init_net, p.name)) == NULL) goto err_exit; err = dev_open(dev); } @@ -1918,7 +1927,7 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, if (!valid_lft || prefered_lft > valid_lft) return -EINVAL; - if ((dev = __dev_get_by_index(ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL) return -ENODEV; if ((idev = addrconf_add_dev(dev)) == NULL) @@ -1969,7 +1978,7 @@ static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) struct inet6_dev *idev; struct net_device *dev; - if ((dev = __dev_get_by_index(ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL) return -ENODEV; if ((idev = __in6_dev_get(dev)) == NULL) @@ -2064,7 +2073,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) return; } - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { struct in_device * in_dev = __in_dev_get_rtnl(dev); if (in_dev && (dev->flags & IFF_UP)) { struct in_ifaddr * ifa; @@ -2220,12 +2229,12 @@ static void ip6_tnl_add_linklocal(struct inet6_dev *idev) /* first try to inherit the link-local address from the link device */ if (idev->dev->iflink && - (link_dev = __dev_get_by_index(idev->dev->iflink))) { + (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } /* then try to inherit it from any device */ - for_each_netdev(link_dev) { + for_each_netdev(&init_net, link_dev) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } @@ -2258,6 +2267,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, int run_pending = 0; int err; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch(event) { case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { @@ -2272,7 +2284,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; if (event == NETDEV_UP) { - if (!netif_carrier_ok(dev)) { + if (!addrconf_qdisc_ok(dev)) { /* device is not ready yet. */ printk(KERN_INFO "ADDRCONF(NETDEV_UP): %s: " @@ -2284,7 +2296,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev) idev->if_flags |= IF_READY; } else { - if (!netif_carrier_ok(dev)) { + if (!addrconf_qdisc_ok(dev)) { /* device is still not ready. */ break; } @@ -2399,7 +2411,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) ASSERT_RTNL(); - if (dev == &loopback_dev && how == 1) + if (dev == init_net.loopback_dev && how == 1) how = 0; rt6_ifdown(dev); @@ -2491,9 +2503,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) else ipv6_mc_down(idev); - /* Step 5: netlink notification of this interface */ idev->tstamp = jiffies; - inet6_ifinfo_notify(RTM_DELLINK, idev); /* Shot the device (if unregistered) */ @@ -2797,24 +2807,8 @@ static const struct seq_operations if6_seq_ops = { static int if6_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct if6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &if6_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &if6_seq_ops, + sizeof(struct if6_iter_state)); } static const struct file_operations if6_fops = { @@ -2827,14 +2821,14 @@ static const struct file_operations if6_fops = { int __init if6_proc_init(void) { - if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops)) + if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops)) return -ENOMEM; return 0; } void if6_proc_exit(void) { - proc_net_remove("if_inet6"); + proc_net_remove(&init_net, "if_inet6"); } #endif /* CONFIG_PROC_FS */ @@ -3080,7 +3074,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) valid_lft = INFINITY_LIFE_TIME; } - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(&init_net, ifm->ifa_index); if (dev == NULL) return -ENODEV; @@ -3264,7 +3258,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, s_ip_idx = ip_idx = cb->args[1]; idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -3373,7 +3367,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, ifm = nlmsg_data(nlh); if (ifm->ifa_index) - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(&init_net, ifm->ifa_index); if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { err = -EADDRNOTAVAIL; @@ -3585,7 +3579,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) read_lock(&dev_base_lock); idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if ((idev = in6_dev_get(dev)) == NULL) @@ -4203,16 +4197,19 @@ int __init addrconf_init(void) * device and it being up should be removed. */ rtnl_lock(); - if (!ipv6_add_dev(&loopback_dev)) + if (!ipv6_add_dev(init_net.loopback_dev)) err = -ENOMEM; rtnl_unlock(); if (err) return err; - ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); + ip6_null_entry.u.dst.dev = init_net.loopback_dev; + ip6_null_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES - ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev); - ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev); + ip6_prohibit_entry.u.dst.dev = init_net.loopback_dev; + ip6_prohibit_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); + ip6_blk_hole_entry.u.dst.dev = init_net.loopback_dev; + ip6_blk_hole_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); #endif register_netdevice_notifier(&ipv6_dev_notf); @@ -4262,12 +4259,12 @@ void __exit addrconf_cleanup(void) * clean dev list. */ - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (__in6_dev_get(dev) == NULL) continue; addrconf_ifdown(dev, 1); } - addrconf_ifdown(&loopback_dev, 2); + addrconf_ifdown(init_net.loopback_dev, 2); /* * Check hash table. @@ -4293,6 +4290,6 @@ void __exit addrconf_cleanup(void) rtnl_unlock(); #ifdef CONFIG_PROC_FS - proc_net_remove("if_inet6"); + proc_net_remove(&init_net, "if_inet6"); #endif } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b5f96372ad7..1b1caf3aa1c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -81,7 +81,7 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } -static int inet6_create(struct socket *sock, int protocol) +static int inet6_create(struct net *net, struct socket *sock, int protocol) { struct inet_sock *inet; struct ipv6_pinfo *np; @@ -94,6 +94,9 @@ static int inet6_create(struct socket *sock, int protocol) int try_loading_module = 0; int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM && !inet_ehash_secret) @@ -159,7 +162,7 @@ lookup_protocol: BUG_TRAP(answer_prot->slab != NULL); err = -ENOBUFS; - sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1); + sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, 1); if (sk == NULL) goto out; @@ -299,7 +302,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = -EINVAL; goto out; } - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; @@ -716,6 +719,9 @@ static int __init init_ipv6_mibs(void) if (snmp_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib), __alignof__(struct icmpv6_mib)) < 0) goto err_icmp_mib; + if (snmp_mib_init((void **)icmpv6msg_statistics, + sizeof (struct icmpv6msg_mib), __alignof__(struct icmpv6_mib)) < 0) + goto err_icmpmsg_mib; if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib), __alignof__(struct udp_mib)) < 0) goto err_udp_mib; @@ -727,6 +733,8 @@ static int __init init_ipv6_mibs(void) err_udplite_mib: snmp_mib_free((void **)udp_stats_in6); err_udp_mib: + snmp_mib_free((void **)icmpv6msg_statistics); +err_icmpmsg_mib: snmp_mib_free((void **)icmpv6_statistics); err_icmp_mib: snmp_mib_free((void **)ipv6_statistics); @@ -739,6 +747,7 @@ static void cleanup_ipv6_mibs(void) { snmp_mib_free((void **)ipv6_statistics); snmp_mib_free((void **)icmpv6_statistics); + snmp_mib_free((void **)icmpv6msg_statistics); snmp_mib_free((void **)udp_stats_in6); snmp_mib_free((void **)udplite_stats_in6); } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 53f46ab6af7..66a9139d46e 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -29,6 +29,7 @@ #include <net/ah.h> #include <linux/crypto.h> #include <linux/pfkeyv2.h> +#include <linux/spinlock.h> #include <linux/string.h> #include <net/icmp.h> #include <net/ipv6.h> @@ -235,11 +236,12 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) char hdrs[0]; } *tmp_ext; - top_iph = (struct ipv6hdr *)skb->data; + skb_push(skb, -skb_network_offset(skb)); + top_iph = ipv6_hdr(skb); top_iph->payload_len = htons(skb->len - sizeof(*top_iph)); - nexthdr = *skb_network_header(skb); - *skb_network_header(skb) = IPPROTO_AH; + nexthdr = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_AH; /* When there are no extension headers, we only need to save the first * 8 bytes of the base IP header. @@ -268,7 +270,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) goto error_free_iph; } - ah = (struct ip_auth_hdr *)skb_transport_header(skb); + ah = ip_auth_hdr(skb); ah->nexthdr = nexthdr; top_iph->priority = 0; @@ -278,19 +280,19 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->hop_limit = 0; ahp = x->data; - ah->hdrlen = (XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + - ahp->icv_trunc_len) >> 2) - 2; + ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; ah->reserved = 0; ah->spi = x->id.spi; - ah->seq_no = htonl(++x->replay.oseq); - xfrm_aevent_doreplay(x); + ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq); + + spin_lock_bh(&x->lock); err = ah_mac_digest(ahp, skb, ah->auth_data); - if (err) - goto error_free_iph; memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); + spin_unlock_bh(&x->lock); - err = 0; + if (err) + goto error_free_iph; memcpy(top_iph, tmp_base, sizeof(tmp_base)); if (tmp_ext) { @@ -324,7 +326,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) * There is offset of AH before IPv6 header after the process. */ - struct ipv6_auth_hdr *ah; + struct ip_auth_hdr *ah; struct ipv6hdr *ip6h; struct ah_data *ahp; unsigned char *tmp_hdr = NULL; @@ -342,14 +344,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) goto out; + skb->ip_summed = CHECKSUM_NONE; + hdr_len = skb->data - skb_network_header(skb); - ah = (struct ipv6_auth_hdr*)skb->data; + ah = (struct ip_auth_hdr *)skb->data; ahp = x->data; nexthdr = ah->nexthdr; ah_hlen = (ah->hdrlen + 2) << 2; - if (ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_full_len) && - ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len)) + if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && + ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) goto out; if (!pskb_may_pull(skb, ah_hlen)) @@ -429,10 +433,6 @@ static int ah6_init_state(struct xfrm_state *x) if (!x->aalg) goto error; - /* null auth can use a zero length key */ - if (x->aalg->alg_key_len > 512) - goto error; - if (x->encap) goto error; @@ -440,14 +440,13 @@ static int ah6_init_state(struct xfrm_state *x) if (ahp == NULL) return -ENOMEM; - ahp->key = x->aalg->alg_key; - ahp->key_len = (x->aalg->alg_key_len+7)/8; tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) goto error; ahp->tfm = tfm; - if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len)) + if (crypto_hash_setkey(tfm, x->aalg->alg_key, + (x->aalg->alg_key_len + 7) / 8)) goto error; /* @@ -476,9 +475,18 @@ static int ah6_init_state(struct xfrm_state *x) if (!ahp->work_icv) goto error; - x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode == XFRM_MODE_TUNNEL) + x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + + ahp->icv_trunc_len); + switch (x->props.mode) { + case XFRM_MODE_BEET: + case XFRM_MODE_TRANSPORT: + break; + case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); + break; + default: + goto error; + } x->data = ahp; return 0; @@ -511,6 +519,7 @@ static struct xfrm_type ah6_type = .description = "AH6", .owner = THIS_MODULE, .proto = IPPROTO_AH, + .flags = XFRM_TYPE_REPLAY_PROT, .init_state = ah6_init_state, .destructor = ah6_destroy, .input = ah6_input, diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index b8c533fbdb6..f915c4df982 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -30,6 +30,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/snmp.h> @@ -111,10 +112,10 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) } else { /* router, no matching interface: just pick one */ - dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK); + dev = dev_get_by_flags(&init_net, IFF_UP, IFF_UP|IFF_LOOPBACK); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev == NULL) { err = -ENODEV; @@ -195,7 +196,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr) write_unlock_bh(&ipv6_sk_ac_lock); - dev = dev_get_by_index(pac->acl_ifindex); + dev = dev_get_by_index(&init_net, pac->acl_ifindex); if (dev) { ipv6_dev_ac_dec(dev, &pac->acl_addr); dev_put(dev); @@ -223,7 +224,7 @@ void ipv6_sock_ac_close(struct sock *sk) if (pac->acl_ifindex != prev_index) { if (dev) dev_put(dev); - dev = dev_get_by_index(pac->acl_ifindex); + dev = dev_get_by_index(&init_net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -413,7 +414,7 @@ static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr) break; read_unlock_bh(&idev->lock); in6_dev_put(idev); - return aca != 0; + return aca != NULL; } return 0; } @@ -428,7 +429,7 @@ int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr) if (dev) return ipv6_chk_acast_dev(dev, addr); read_lock(&dev_base_lock); - for_each_netdev(dev) + for_each_netdev(&init_net, dev) if (ipv6_chk_acast_dev(dev, addr)) { found = 1; break; @@ -452,7 +453,7 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) struct ac6_iter_state *state = ac6_seq_private(seq); state->idev = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (!idev) @@ -548,24 +549,8 @@ static const struct seq_operations ac6_seq_ops = { static int ac6_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct ac6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &ac6_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &ac6_seq_ops, + sizeof(struct ac6_iter_state)); } static const struct file_operations ac6_seq_fops = { @@ -578,7 +563,7 @@ static const struct file_operations ac6_seq_fops = { int __init ac6_proc_init(void) { - if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops)) + if (!proc_net_fops_create(&init_net, "anycast6", S_IRUGO, &ac6_seq_fops)) return -ENOMEM; return 0; @@ -586,7 +571,7 @@ int __init ac6_proc_init(void) void ac6_proc_exit(void) { - proc_net_remove("anycast6"); + proc_net_remove(&init_net, "anycast6"); } #endif diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index fe0f49024a0..2ed689ac449 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -544,7 +544,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, if (!src_info->ipi6_ifindex) return -EINVAL; else { - dev = dev_get_by_index(src_info->ipi6_ifindex); + dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex); if (!dev) return -ENODEV; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 2db31ce3c7e..ab17b5e6235 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -29,11 +29,12 @@ #include <net/ip.h> #include <net/xfrm.h> #include <net/esp.h> -#include <asm/scatterlist.h> +#include <linux/scatterlist.h> #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/pfkeyv2.h> #include <linux/random.h> +#include <linux/spinlock.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> @@ -42,8 +43,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) { int err; - struct ipv6hdr *top_iph; - struct ipv6_esp_hdr *esph; + struct ip_esp_hdr *esph; struct crypto_blkcipher *tfm; struct blkcipher_desc desc; struct sk_buff *trailer; @@ -53,13 +53,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) int nfrags; u8 *tail; struct esp_data *esp = x->data; - int hdr_len = (skb_transport_offset(skb) + - sizeof(*esph) + esp->conf.ivlen); - /* Strip IP+ESP header. */ - __skb_pull(skb, hdr_len); - - /* Now skb is pure payload to encrypt */ + /* skb is pure payload to encrypt */ err = -ENOMEM; /* Round to block size */ @@ -88,15 +83,15 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) tail[clen-skb->len - 2] = (clen - skb->len) - 2; pskb_put(skb, trailer, clen - skb->len); - top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len); - esph = (struct ipv6_esp_hdr *)skb_transport_header(skb); - top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph)); - *(skb_tail_pointer(trailer) - 1) = *skb_network_header(skb); - *skb_network_header(skb) = IPPROTO_ESP; + skb_push(skb, -skb_network_offset(skb)); + esph = ip_esp_hdr(skb); + *(skb_tail_pointer(trailer) - 1) = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_ESP; esph->spi = x->id.spi; - esph->seq_no = htonl(++x->replay.oseq); - xfrm_aevent_doreplay(x); + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq); + + spin_lock_bh(&x->lock); if (esp->conf.ivlen) { if (unlikely(!esp->conf.ivinitted)) { @@ -112,16 +107,19 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) if (unlikely(nfrags > ESP_NUM_FAST_SG)) { sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); if (!sg) - goto error; + goto unlock; } - skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen); + sg_init_table(sg, nfrags); + sg_mark_end(sg, skb_to_sgvec(skb, sg, esph->enc_data + + esp->conf.ivlen - + skb->data, clen)); err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); } while (0); if (unlikely(err)) - goto error; + goto unlock; if (esp->conf.ivlen) { memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); @@ -134,6 +132,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); } +unlock: + spin_unlock_bh(&x->lock); + error: return err; } @@ -141,19 +142,19 @@ error: static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *iph; - struct ipv6_esp_hdr *esph; + struct ip_esp_hdr *esph; struct esp_data *esp = x->data; struct crypto_blkcipher *tfm = esp->conf.tfm; struct blkcipher_desc desc = { .tfm = tfm }; struct sk_buff *trailer; int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); int alen = esp->auth.icv_trunc_len; - int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen; + int elen = skb->len - sizeof(*esph) - esp->conf.ivlen - alen; int hdr_len = skb_network_header_len(skb); int nfrags; int ret = 0; - if (!pskb_may_pull(skb, sizeof(struct ipv6_esp_hdr))) { + if (!pskb_may_pull(skb, sizeof(*esph))) { ret = -EINVAL; goto out; } @@ -188,7 +189,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; - esph = (struct ipv6_esp_hdr*)skb->data; + esph = (struct ip_esp_hdr *)skb->data; iph = ipv6_hdr(skb); /* Get ivec. This can be wrong, check against another impls. */ @@ -207,7 +208,10 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } } - skb_to_sgvec(skb, sg, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen, elen); + sg_init_table(sg, nfrags); + sg_mark_end(sg, skb_to_sgvec(skb, sg, + sizeof(*esph) + esp->conf.ivlen, + elen)); ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); @@ -259,7 +263,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; - struct ipv6_esp_hdr *esph = (struct ipv6_esp_hdr*)(skb->data+offset); + struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset); struct xfrm_state *x; if (type != ICMPV6_DEST_UNREACH && @@ -297,11 +301,6 @@ static int esp6_init_state(struct xfrm_state *x) struct esp_data *esp = NULL; struct crypto_blkcipher *tfm; - /* null auth and encryption can have zero length keys */ - if (x->aalg) { - if (x->aalg->alg_key_len > 512) - goto error; - } if (x->ealg == NULL) goto error; @@ -316,15 +315,14 @@ static int esp6_init_state(struct xfrm_state *x) struct xfrm_algo_desc *aalg_desc; struct crypto_hash *hash; - esp->auth.key = x->aalg->alg_key; - esp->auth.key_len = (x->aalg->alg_key_len+7)/8; hash = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(hash)) goto error; esp->auth.tfm = hash; - if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len)) + if (crypto_hash_setkey(hash, x->aalg->alg_key, + (x->aalg->alg_key_len + 7) / 8)) goto error; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); @@ -346,8 +344,6 @@ static int esp6_init_state(struct xfrm_state *x) if (!esp->auth.work_icv) goto error; } - esp->conf.key = x->ealg->alg_key; - esp->conf.key_len = (x->ealg->alg_key_len+7)/8; tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) goto error; @@ -360,11 +356,20 @@ static int esp6_init_state(struct xfrm_state *x) goto error; esp->conf.ivinitted = 0; } - if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) + if (crypto_blkcipher_setkey(tfm, x->ealg->alg_key, + (x->ealg->alg_key_len + 7) / 8)) goto error; - x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen; - if (x->props.mode == XFRM_MODE_TUNNEL) + x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; + switch (x->props.mode) { + case XFRM_MODE_BEET: + case XFRM_MODE_TRANSPORT: + break; + case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); + break; + default: + goto error; + } x->data = esp; return 0; @@ -380,6 +385,7 @@ static struct xfrm_type esp6_type = .description = "ESP6", .owner = THIS_MODULE, .proto = IPPROTO_ESP, + .flags = XFRM_TYPE_REPLAY_PROT, .init_state = esp6_init_state, .destructor = esp6_destroy, .get_mtu = esp6_get_mtu, diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index c82d4d49f71..1e89efd38a0 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(ipv6_find_tlv); struct tlvtype_proc { int type; - int (*func)(struct sk_buff **skbp, int offset); + int (*func)(struct sk_buff *skb, int offset); }; /********************* @@ -111,10 +111,8 @@ struct tlvtype_proc { /* An unknown option is detected, decide what to do */ -static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff) +static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; - switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) { case 0: /* ignore */ return 1; @@ -139,9 +137,8 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff) /* Parse tlv encoded option header (hop-by-hop or destination) */ -static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp) +static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) { - struct sk_buff *skb = *skbp; struct tlvtype_proc *curr; const unsigned char *nh = skb_network_header(skb); int off = skb_network_header_len(skb); @@ -172,13 +169,13 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp) /* type specific length/alignment checks will be performed in the func(). */ - if (curr->func(skbp, off) == 0) + if (curr->func(skb, off) == 0) return 0; break; } } if (curr->type < 0) { - if (ip6_tlvopt_unknown(skbp, off) == 0) + if (ip6_tlvopt_unknown(skb, off) == 0) return 0; } break; @@ -198,9 +195,8 @@ bad: *****************************/ #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) -static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) +static int ipv6_dest_hao(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; struct ipv6_destopt_hao *hao; struct inet6_skb_parm *opt = IP6CB(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -234,22 +230,13 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) goto discard; if (skb_cloned(skb)) { - struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); - struct inet6_skb_parm *opt2; - - if (skb2 == NULL) + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) goto discard; - opt2 = IP6CB(skb2); - memcpy(opt2, opt, sizeof(*opt2)); - - kfree_skb(skb); - /* update all variable using below by copied skbuff */ - *skbp = skb = skb2; - hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) + + hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff); - ipv6h = ipv6_hdr(skb2); + ipv6h = ipv6_hdr(skb); } if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -280,9 +267,8 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = { {-1, NULL} }; -static int ipv6_destopt_rcv(struct sk_buff **skbp) +static int ipv6_destopt_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dstbuf; @@ -304,9 +290,8 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) #endif dst = dst_clone(skb->dst); - if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) { + if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { dst_release(dst); - skb = *skbp; skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) @@ -337,10 +322,8 @@ void __init ipv6_destopt_init(void) NONE header. No data in packet. ********************************/ -static int ipv6_nodata_rcv(struct sk_buff **skbp) +static int ipv6_nodata_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; - kfree_skb(skb); return 0; } @@ -360,9 +343,8 @@ void __init ipv6_nodata_init(void) Routing header. ********************************/ -static int ipv6_rthdr_rcv(struct sk_buff **skbp) +static int ipv6_rthdr_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); struct in6_addr *addr = NULL; struct in6_addr daddr; @@ -464,18 +446,14 @@ looped_back: Do not damage packets queued somewhere. */ if (skb_cloned(skb)) { - struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); /* the copy is a forwarded packet */ - if (skb2 == NULL) { + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -1; } - kfree_skb(skb); - *skbp = skb = skb2; - opt = IP6CB(skb2); - hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb2); + hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb); } if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -578,9 +556,8 @@ static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb) /* Router Alert as of RFC 2711 */ -static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) +static int ipv6_hop_ra(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; const unsigned char *nh = skb_network_header(skb); if (nh[optoff + 1] == 2) { @@ -595,9 +572,8 @@ static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) /* Jumbo payload */ -static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff) +static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; const unsigned char *nh = skb_network_header(skb); u32 pkt_len; @@ -648,9 +624,8 @@ static struct tlvtype_proc tlvprochopopt_lst[] = { { -1, } }; -int ipv6_parse_hopopts(struct sk_buff **skbp) +int ipv6_parse_hopopts(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); /* @@ -667,8 +642,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp) } opt->hop = sizeof(struct ipv6hdr); - if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) { - skb = *skbp; + if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 53b3998a486..706622af206 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -50,8 +50,6 @@ static struct fib6_rule local_rule = { }, }; -static LIST_HEAD(fib6_rules); - struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, pol_lookup_t lookup) { @@ -268,14 +266,14 @@ static struct fib_rules_ops fib6_rules_ops = { .nlmsg_payload = fib6_rule_nlmsg_payload, .nlgroup = RTNLGRP_IPV6_RULE, .policy = fib6_rule_policy, - .rules_list = &fib6_rules, + .rules_list = LIST_HEAD_INIT(fib6_rules_ops.rules_list), .owner = THIS_MODULE, }; void __init fib6_rules_init(void) { - list_add_tail(&local_rule.common.list, &fib6_rules); - list_add_tail(&main_rule.common.list, &fib6_rules); + list_add_tail(&local_rule.common.list, &fib6_rules_ops.rules_list); + list_add_tail(&main_rule.common.list, &fib6_rules_ops.rules_list); fib_rules_register(&fib6_rules_ops); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6a6714d154e..9bb031fa1c2 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -69,6 +69,8 @@ DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly; EXPORT_SYMBOL(icmpv6_statistics); +DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics) __read_mostly; +EXPORT_SYMBOL(icmpv6msg_statistics); /* * The ICMP socket(s). This is the most convenient way to flow control @@ -80,7 +82,7 @@ EXPORT_SYMBOL(icmpv6_statistics); static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL; #define icmpv6_socket __get_cpu_var(__icmpv6_socket) -static int icmpv6_rcv(struct sk_buff **pskb); +static int icmpv6_rcv(struct sk_buff *skb); static struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, @@ -456,8 +458,6 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, } err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr)); - if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB) - ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_OUTDESTUNREACHS, type - ICMPV6_DEST_UNREACH); ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); out_put: @@ -547,9 +547,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb) } err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTECHOREPLIES); - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); - out_put: if (likely(idev != NULL)) in6_dev_put(idev); @@ -617,9 +614,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) * Handle icmp messages */ -static int icmpv6_rcv(struct sk_buff **pskb) +static int icmpv6_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); struct in6_addr *saddr, *daddr; @@ -656,10 +652,7 @@ static int icmpv6_rcv(struct sk_buff **pskb) type = hdr->icmp6_type; - if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB) - ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INDESTUNREACHS, type - ICMPV6_DEST_UNREACH); - else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT) - ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INECHOS, type - ICMPV6_ECHO_REQUEST); + ICMP6MSGIN_INC_STATS_BH(idev, type); switch (type) { case ICMPV6_ECHO_REQUEST: diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 116f94a4907..78de42ada84 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -139,6 +139,41 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); +static inline +void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + __ip6_dst_store(sk, dst, daddr, saddr); + +#ifdef CONFIG_XFRM + { + struct rt6_info *rt = (struct rt6_info *)dst; + rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid); + } +#endif +} + +static inline +struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) +{ + struct dst_entry *dst; + + dst = __sk_dst_check(sk, cookie); + +#ifdef CONFIG_XFRM + if (dst) { + struct rt6_info *rt = (struct rt6_info *)dst; + if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) { + sk->sk_dst_cache = NULL; + dst_release(dst); + dst = NULL; + } + } +#endif + + return dst; +} + int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) { struct sock *sk = skb->sk; @@ -166,7 +201,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) final_p = &final; } - dst = __sk_dst_check(sk, np->dst_cookie); + dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (dst == NULL) { int err = ip6_dst_lookup(sk, &dst, &fl); @@ -186,7 +221,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) return err; } - __ip6_dst_store(sk, dst, NULL, NULL); + __inet6_csk_dst_store(sk, dst, NULL, NULL); } skb->dst = dst_clone(dst); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index ae6b0e7eb48..d6f1026f194 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -254,18 +254,18 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, int ret; if (snum == 0) { - const int low = sysctl_local_port_range[0]; - const int high = sysctl_local_port_range[1]; - const int range = high - low; - int i, port; + int i, port, low, high, remaining; static u32 hint; const u32 offset = hint + inet6_sk_port_offset(sk); struct hlist_node *node; struct inet_timewait_sock *tw = NULL; + inet_get_local_port_range(&low, &high); + remaining = (high - low) + 1; + local_bh_disable(); - for (i = 1; i <= range; i++) { - port = low + (i + offset) % range; + for (i = 1; i <= remaining; i++) { + port = low + (i + offset) % remaining; head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; spin_lock(&head->lock); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 6a612a701ea..946cf389ab9 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1313,7 +1313,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) { int res; struct rt6_info *rt; - struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w; + struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) { res = c->func(rt, c->arg); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 413a4ebb195..b12cc22e774 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -21,6 +21,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/ipv6.h> @@ -153,8 +154,10 @@ static void ip6_fl_gc(unsigned long dummy) write_unlock(&ip6_fl_lock); } -static int fl_intern(struct ip6_flowlabel *fl, __be32 label) +static struct ip6_flowlabel *fl_intern(struct ip6_flowlabel *fl, __be32 label) { + struct ip6_flowlabel *lfl; + fl->label = label & IPV6_FLOWLABEL_MASK; write_lock_bh(&ip6_fl_lock); @@ -162,12 +165,26 @@ static int fl_intern(struct ip6_flowlabel *fl, __be32 label) for (;;) { fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK; if (fl->label) { - struct ip6_flowlabel *lfl; lfl = __fl_lookup(fl->label); if (lfl == NULL) break; } } + } else { + /* + * we dropper the ip6_fl_lock, so this entry could reappear + * and we need to recheck with it. + * + * OTOH no need to search the active socket first, like it is + * done in ipv6_flowlabel_opt - sock is locked, so new entry + * with the same label can only appear on another sock + */ + lfl = __fl_lookup(fl->label); + if (lfl != NULL) { + atomic_inc(&lfl->users); + write_unlock_bh(&ip6_fl_lock); + return lfl; + } } fl->lastuse = jiffies; @@ -175,7 +192,7 @@ static int fl_intern(struct ip6_flowlabel *fl, __be32 label) fl_ht[FL_HASH(fl->label)] = fl; atomic_inc(&fl_size); write_unlock_bh(&ip6_fl_lock); - return 0; + return NULL; } @@ -189,14 +206,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) label &= IPV6_FLOWLABEL_MASK; + read_lock_bh(&ip6_sk_fl_lock); for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label) { fl->lastuse = jiffies; atomic_inc(&fl->users); + read_unlock_bh(&ip6_sk_fl_lock); return fl; } } + read_unlock_bh(&ip6_sk_fl_lock); return NULL; } @@ -408,6 +428,16 @@ static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) return 0; } +static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, + struct ip6_flowlabel *fl) +{ + write_lock_bh(&ip6_sk_fl_lock); + sfl->fl = fl; + sfl->next = np->ipv6_fl_list; + np->ipv6_fl_list = sfl; + write_unlock_bh(&ip6_sk_fl_lock); +} + int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) { int err; @@ -415,7 +445,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) struct in6_flowlabel_req freq; struct ipv6_fl_socklist *sfl1=NULL; struct ipv6_fl_socklist *sfl, **sflp; - struct ip6_flowlabel *fl; + struct ip6_flowlabel *fl, *fl1 = NULL; + if (optlen < sizeof(freq)) return -EINVAL; @@ -471,8 +502,6 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); if (freq.flr_label) { - struct ip6_flowlabel *fl1 = NULL; - err = -EEXIST; read_lock_bh(&ip6_sk_fl_lock); for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { @@ -491,6 +520,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (fl1 == NULL) fl1 = fl_lookup(freq.flr_label); if (fl1) { +recheck: err = -EEXIST; if (freq.flr_flags&IPV6_FL_F_EXCL) goto release; @@ -512,11 +542,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) fl1->linger = fl->linger; if ((long)(fl->expires - fl1->expires) > 0) fl1->expires = fl->expires; - write_lock_bh(&ip6_sk_fl_lock); - sfl1->fl = fl1; - sfl1->next = np->ipv6_fl_list; - np->ipv6_fl_list = sfl1; - write_unlock_bh(&ip6_sk_fl_lock); + fl_link(np, sfl1, fl1); fl_free(fl); return 0; @@ -533,9 +559,9 @@ release: if (sfl1 == NULL || (err = mem_check(sk)) != 0) goto done; - err = fl_intern(fl, freq.flr_label); - if (err) - goto done; + fl1 = fl_intern(fl, freq.flr_label); + if (fl1 != NULL) + goto recheck; if (!freq.flr_label) { if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, @@ -544,9 +570,7 @@ release: } } - sfl1->fl = fl; - sfl1->next = np->ipv6_fl_list; - np->ipv6_fl_list = sfl1; + fl_link(np, sfl1, fl); return 0; default: @@ -657,24 +681,8 @@ static const struct seq_operations ip6fl_seq_ops = { static int ip6fl_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct ip6fl_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &ip6fl_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &ip6fl_seq_ops, + sizeof(struct ip6fl_iter_state)); } static const struct file_operations ip6fl_seq_fops = { @@ -690,7 +698,7 @@ static const struct file_operations ip6fl_seq_fops = { void ip6_flowlabel_init(void) { #ifdef CONFIG_PROC_FS - proc_net_fops_create("ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); + proc_net_fops_create(&init_net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); #endif } @@ -698,6 +706,6 @@ void ip6_flowlabel_cleanup(void) { del_timer(&ip6_fl_gc_timer); #ifdef CONFIG_PROC_FS - proc_net_remove("ip6_flowlabel"); + proc_net_remove(&init_net, "ip6_flowlabel"); #endif } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 30a5cb1b203..fac6f7f9dd7 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -61,6 +61,11 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt u32 pkt_len; struct inet6_dev *idev; + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + if (skb->pkt_type == PACKET_OTHERHOST) { kfree_skb(skb); return 0; @@ -86,7 +91,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt * * BTW, when we send a packet for our own local address on a * non-loopback interface (e.g. ethX), it is being delivered - * via the loopback interface (lo) here; skb->dev = &loopback_dev. + * via the loopback interface (lo) here; skb->dev = loopback_dev. * It, however, should be considered as if it is being * arrived via the sending interface (ethX), because of the * nature of scoping architecture. --yoshfuji @@ -120,7 +125,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt } if (hdr->nexthdr == NEXTHDR_HOP) { - if (ipv6_parse_hopopts(&skb) < 0) { + if (ipv6_parse_hopopts(skb) < 0) { IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); rcu_read_unlock(); return 0; @@ -144,7 +149,7 @@ out: */ -static inline int ip6_input_finish(struct sk_buff *skb) +static int ip6_input_finish(struct sk_buff *skb) { struct inet6_protocol *ipprot; struct sock *raw_sk; @@ -194,7 +199,7 @@ resubmit: !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - ret = ipprot->handler(&skb); + ret = ipprot->handler(skb); if (ret > 0) goto resubmit; else if (ret == 0) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 26de3c0ea31..653fc0a8235 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -70,7 +70,7 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f spin_unlock_bh(&ip6_id_lock); } -static inline int ip6_output_finish(struct sk_buff *skb) +static int ip6_output_finish(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; @@ -171,7 +171,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, u32 mtu; if (opt) { - int head_room; + unsigned int head_room; /* First: exthdrs may take lots of space (~8K for now) MAX_HEADER is not enough. @@ -441,8 +441,10 @@ int ip6_forward(struct sk_buff *skb) /* IPv6 specs say nothing about it, but it is clear that we cannot send redirects to source routed frames. + We don't send redirects to frames decapsulated from IPsec. */ - if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) { + if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && + !skb->sp) { struct in6_addr *target = NULL; struct rt6_info *rt; struct neighbour *n = dst->neighbour; @@ -1397,6 +1399,13 @@ int ip6_push_pending_frames(struct sock *sk) skb->dst = dst_clone(&rt->u.dst); IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); + if (proto == IPPROTO_ICMPV6) { + struct inet6_dev *idev = ip6_dst_idev(skb->dst); + + ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type); + ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); + } + err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); if (err) { if (err > 0) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ca774d8e3be..5383b33db8c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -235,7 +235,7 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p) int i; for (i = 1; i < IP6_TNL_MAX; i++) { sprintf(name, "ip6tnl%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i == IP6_TNL_MAX) @@ -650,7 +650,7 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) struct net_device *ldev = NULL; if (p->link) - ldev = dev_get_by_index(p->link); + ldev = dev_get_by_index(&init_net, p->link); if ((ipv6_addr_is_multicast(&p->laddr) || likely(ipv6_chk_addr(&p->laddr, ldev, 0))) && @@ -786,7 +786,7 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) struct net_device *ldev = NULL; if (p->link) - ldev = dev_get_by_index(p->link); + ldev = dev_get_by_index(&init_net, p->link); if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0))) printk(KERN_WARNING @@ -838,7 +838,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, struct dst_entry *dst; struct net_device *tdev; int mtu; - int max_headroom = sizeof(struct ipv6hdr); + unsigned int max_headroom = sizeof(struct ipv6hdr); u8 proto; int err = -1; int pkt_len; @@ -1313,7 +1313,6 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) static void ip6_tnl_dev_setup(struct net_device *dev) { - SET_MODULE_OWNER(dev); dev->uninit = ip6_tnl_dev_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ip6_tnl_xmit; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 473f165310e..80ef2a1d39f 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -65,8 +65,7 @@ static LIST_HEAD(ipcomp6_tfms_list); static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -ENOMEM; - struct ipv6hdr *iph; - struct ipv6_comp_hdr *ipch; + struct ip_comp_hdr *ipch; int plen, dlen; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; @@ -79,7 +78,6 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; /* Remove ipcomp header and decompress original payload */ - iph = ipv6_hdr(skb); ipch = (void *)skb->data; skb->transport_header = skb->network_header + sizeof(*ipch); __skb_pull(skb, sizeof(*ipch)); @@ -94,12 +92,10 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) tfm = *per_cpu_ptr(ipcd->tfms, cpu); err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); - if (err) { - err = -EINVAL; + if (err) goto out_put_cpu; - } - if (dlen < (plen + sizeof(struct ipv6_comp_hdr))) { + if (dlen < (plen + sizeof(*ipch))) { err = -EINVAL; goto out_put_cpu; } @@ -123,17 +119,15 @@ out: static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb) { int err; - struct ipv6hdr *top_iph; - struct ipv6_comp_hdr *ipch; + struct ip_comp_hdr *ipch; struct ipcomp_data *ipcd = x->data; int plen, dlen; u8 *start, *scratch; struct crypto_comp *tfm; int cpu; - int hdr_len = skb_transport_offset(skb); /* check whether datagram len is larger than threshold */ - if ((skb->len - hdr_len) < ipcd->threshold) { + if (skb->len < ipcd->threshold) { goto out_ok; } @@ -141,35 +135,33 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb) goto out_ok; /* compression */ - plen = skb->len - hdr_len; + plen = skb->len; dlen = IPCOMP_SCRATCH_SIZE; - start = skb_transport_header(skb); + start = skb->data; cpu = get_cpu(); scratch = *per_cpu_ptr(ipcomp6_scratches, cpu); tfm = *per_cpu_ptr(ipcd->tfms, cpu); err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); - if (err || (dlen + sizeof(struct ipv6_comp_hdr)) >= plen) { + if (err || (dlen + sizeof(*ipch)) >= plen) { put_cpu(); goto out_ok; } memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); put_cpu(); - pskb_trim(skb, hdr_len + dlen + sizeof(struct ip_comp_hdr)); + pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); /* insert ipcomp header and replace datagram */ - top_iph = (struct ipv6hdr *)skb->data; - - top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); - - ipch = (struct ipv6_comp_hdr *)start; - ipch->nexthdr = *skb_network_header(skb); + ipch = ip_comp_hdr(skb); + ipch->nexthdr = *skb_mac_header(skb); ipch->flags = 0; ipch->cpi = htons((u16 )ntohl(x->id.spi)); - *skb_network_header(skb) = IPPROTO_COMP; + *skb_mac_header(skb) = IPPROTO_COMP; out_ok: + skb_push(skb, -skb_network_offset(skb)); + return 0; } @@ -178,7 +170,8 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { __be32 spi; struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; - struct ipv6_comp_hdr *ipcomph = (struct ipv6_comp_hdr*)(skb->data+offset); + struct ip_comp_hdr *ipcomph = + (struct ip_comp_hdr *)(skb->data + offset); struct xfrm_state *x; if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG) @@ -418,8 +411,15 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto out; x->props.header_len = 0; - if (x->props.mode == XFRM_MODE_TUNNEL) + switch (x->props.mode) { + case XFRM_MODE_BEET: + case XFRM_MODE_TRANSPORT: + break; + case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); + default: + goto error; + } mutex_lock(&ipcomp6_resource_mutex); if (!ipcomp6_alloc_scratches()) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 6b038aa72e8..1334fc174bc 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -249,7 +249,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } if (ipv6_only_sock(sk) || - !(ipv6_addr_type(&np->daddr) & IPV6_ADDR_MAPPED)) { + !ipv6_addr_v4mapped(&np->daddr)) { retv = -EADDRNOTAVAIL; break; } @@ -539,12 +539,15 @@ done: case IPV6_MULTICAST_IF: if (sk->sk_type == SOCK_STREAM) goto e_inval; - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) - goto e_inval; - if (__dev_get_by_index(val) == NULL) { - retv = -ENODEV; - break; + if (val) { + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) + goto e_inval; + + if (__dev_get_by_index(&init_net, val) == NULL) { + retv = -ENODEV; + break; + } } np->mcast_oif = val; retv = 0; @@ -663,7 +666,7 @@ done: break; } gsf = kmalloc(optlen,GFP_KERNEL); - if (gsf == 0) { + if (!gsf) { retv = -ENOBUFS; break; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index ae9881832a7..331d728c203 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -49,6 +49,7 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/snmp.h> @@ -214,7 +215,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev == NULL) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); @@ -265,7 +266,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) *lnk = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) { + if ((dev = dev_get_by_index(&init_net, mc_lst->ifindex)) != NULL) { struct inet6_dev *idev = in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); @@ -300,7 +301,7 @@ static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex) dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (!dev) return NULL; @@ -331,7 +332,7 @@ void ipv6_sock_mc_close(struct sock *sk) np->ipv6_mc_list = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - dev = dev_get_by_index(mc_lst->ifindex); + dev = dev_get_by_index(&init_net, mc_lst->ifindex); if (dev) { struct inet6_dev *idev = in6_dev_get(dev); @@ -1406,7 +1407,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) /* we assume size > sizeof(ra) here */ skb = sock_alloc_send_skb(sk, size + LL_RESERVED_SPACE(dev), 1, &err); - if (skb == 0) + if (!skb) return NULL; skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -1437,17 +1438,12 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) static inline int mld_dev_queue_xmit2(struct sk_buff *skb) { struct net_device *dev = skb->dev; + unsigned char ha[MAX_ADDR_LEN]; - if (dev->hard_header) { - unsigned char ha[MAX_ADDR_LEN]; - int err; - - ndisc_mc_map(&ipv6_hdr(skb)->daddr, ha, dev, 1); - err = dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len); - if (err < 0) { - kfree_skb(skb); - return err; - } + ndisc_mc_map(&ipv6_hdr(skb)->daddr, ha, dev, 1); + if (dev_hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len) < 0) { + kfree_skb(skb); + return -EINVAL; } return dev_queue_xmit(skb); } @@ -1478,10 +1474,11 @@ static void mld_sendpack(struct sk_buff *skb) err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, mld_dev_queue_xmit); if (!err) { - ICMP6_INC_STATS(idev,ICMP6_MIB_OUTMSGS); - IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); + ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT); + ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTMCASTPKTS); } else - IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTDISCARDS); if (likely(idev != NULL)) in6_dev_put(idev); @@ -1821,10 +1818,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, mld_dev_queue_xmit); if (!err) { - if (type == ICMPV6_MGM_REDUCTION) - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBREDUCTIONS); - else - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBRESPONSES); + ICMP6MSGOUT_INC_STATS(idev, type); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); } else @@ -2150,7 +2144,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, /* callers have the socket lock and a write lock on ipv6_sk_mc_lock, * so no other readers or writers of iml or its sflist */ - if (iml->sflist == 0) { + if (!iml->sflist) { /* any-source empty exclude case */ return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); } @@ -2332,7 +2326,7 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); state->idev = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (!idev) @@ -2432,24 +2426,8 @@ static const struct seq_operations igmp6_mc_seq_ops = { static int igmp6_mc_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct igmp6_mc_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &igmp6_mc_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &igmp6_mc_seq_ops, + sizeof(struct igmp6_mc_iter_state)); } static const struct file_operations igmp6_mc_seq_fops = { @@ -2476,7 +2454,7 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) state->idev = NULL; state->im = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (unlikely(idev == NULL)) @@ -2606,24 +2584,8 @@ static const struct seq_operations igmp6_mcf_seq_ops = { static int igmp6_mcf_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct igmp6_mcf_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &igmp6_mcf_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &igmp6_mcf_seq_ops, + sizeof(struct igmp6_mcf_iter_state)); } static const struct file_operations igmp6_mcf_seq_fops = { @@ -2658,8 +2620,8 @@ int __init igmp6_init(struct net_proto_family *ops) np->hop_limit = 1; #ifdef CONFIG_PROC_FS - proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops); - proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops); + proc_net_fops_create(&init_net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops); + proc_net_fops_create(&init_net, "mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops); #endif return 0; @@ -2671,7 +2633,7 @@ void igmp6_cleanup(void) igmp6_socket = NULL; /* for safety */ #ifdef CONFIG_PROC_FS - proc_net_remove("mcfilter6"); - proc_net_remove("igmp6"); + proc_net_remove(&init_net, "mcfilter6"); + proc_net_remove(&init_net, "igmp6"); #endif } diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 8a1399ce38c..7fd841d4101 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -153,11 +153,11 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) u8 nexthdr; int len; - iph = (struct ipv6hdr *)skb->data; - iph->payload_len = htons(skb->len - sizeof(*iph)); + skb_push(skb, -skb_network_offset(skb)); + iph = ipv6_hdr(skb); - nexthdr = *skb_network_header(skb); - *skb_network_header(skb) = IPPROTO_DSTOPTS; + nexthdr = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_DSTOPTS; dstopt = (struct ipv6_destopt_hdr *)skb_transport_header(skb); dstopt->nexthdr = nexthdr; @@ -172,7 +172,9 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) len = ((char *)hao - (char *)dstopt) + sizeof(*hao); memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr)); + spin_lock_bh(&x->lock); memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr)); + spin_unlock_bh(&x->lock); BUG_TRAP(len == x->props.header_len); dstopt->hdrlen = (x->props.header_len >> 3) - 1; @@ -365,11 +367,11 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) struct rt2_hdr *rt2; u8 nexthdr; - iph = (struct ipv6hdr *)skb->data; - iph->payload_len = htons(skb->len - sizeof(*iph)); + skb_push(skb, -skb_network_offset(skb)); + iph = ipv6_hdr(skb); - nexthdr = *skb_network_header(skb); - *skb_network_header(skb) = IPPROTO_ROUTING; + nexthdr = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_ROUTING; rt2 = (struct rt2_hdr *)skb_transport_header(skb); rt2->rt_hdr.nexthdr = nexthdr; @@ -381,7 +383,9 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) BUG_TRAP(rt2->rt_hdr.hdrlen == 2); memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr)); + spin_lock_bh(&x->lock); memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr)); + spin_unlock_bh(&x->lock); return 0; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 5b596659177..20cfc90d559 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -15,9 +15,10 @@ /* * Changes: * + * Pierre Ynard : export userland ND options + * through netlink (RDNSS support) * Lars Fenneberg : fixed MTU setting on receipt * of an RA. - * * Janos Farkas : kmalloc failure checks * Alexey Kuznetsov : state machine reworked * and moved to net/core. @@ -78,6 +79,9 @@ #include <net/addrconf.h> #include <net/icmp.h> +#include <net/netlink.h> +#include <linux/rtnetlink.h> + #include <net/flow.h> #include <net/ip6_checksum.h> #include <linux/proc_fs.h> @@ -161,6 +165,8 @@ struct ndisc_options { struct nd_opt_hdr *nd_opts_ri; struct nd_opt_hdr *nd_opts_ri_end; #endif + struct nd_opt_hdr *nd_useropts; + struct nd_opt_hdr *nd_useropts_end; }; #define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR] @@ -225,6 +231,22 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, return (cur <= end && cur->nd_opt_type == type ? cur : NULL); } +static inline int ndisc_is_useropt(struct nd_opt_hdr *opt) +{ + return (opt->nd_opt_type == ND_OPT_RDNSS); +} + +static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, + struct nd_opt_hdr *end) +{ + if (!cur || !end || cur >= end) + return NULL; + do { + cur = ((void *)cur) + (cur->nd_opt_len << 3); + } while(cur < end && !ndisc_is_useropt(cur)); + return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL); +} + static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, struct ndisc_options *ndopts) { @@ -256,7 +278,7 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, break; case ND_OPT_PREFIX_INFO: ndopts->nd_opts_pi_end = nd_opt; - if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) + if (!ndopts->nd_opt_array[nd_opt->nd_opt_type]) ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; break; #ifdef CONFIG_IPV6_ROUTE_INFO @@ -267,14 +289,21 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, break; #endif default: - /* - * Unknown options must be silently ignored, - * to accommodate future extension to the protocol. - */ - ND_PRINTK2(KERN_NOTICE - "%s(): ignored unsupported option; type=%d, len=%d\n", - __FUNCTION__, - nd_opt->nd_opt_type, nd_opt->nd_opt_len); + if (ndisc_is_useropt(nd_opt)) { + ndopts->nd_useropts_end = nd_opt; + if (!ndopts->nd_useropts) + ndopts->nd_useropts = nd_opt; + } else { + /* + * Unknown options must be silently ignored, + * to accommodate future extension to the + * protocol. + */ + ND_PRINTK2(KERN_NOTICE + "%s(): ignored unsupported option; type=%d, len=%d\n", + __FUNCTION__, + nd_opt->nd_opt_type, nd_opt->nd_opt_len); + } } opt_len -= l; nd_opt = ((void *)nd_opt) + l; @@ -354,7 +383,7 @@ static int ndisc_constructor(struct neighbour *neigh) rcu_read_unlock(); neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST; - if (dev->hard_header == NULL) { + if (!dev->header_ops) { neigh->nud_state = NUD_NOARP; neigh->ops = &ndisc_direct_ops; neigh->output = neigh->ops->queue_xmit; @@ -371,7 +400,7 @@ static int ndisc_constructor(struct neighbour *neigh) neigh->nud_state = NUD_NOARP; memcpy(neigh->ha, dev->broadcast, dev->addr_len); } - if (dev->hard_header_cache) + if (dev->header_ops->cache) neigh->ops = &ndisc_hh_ops; else neigh->ops = &ndisc_generic_ops; @@ -431,7 +460,7 @@ static void __ndisc_send(struct net_device *dev, struct neighbour *neigh, struct in6_addr *daddr, struct in6_addr *saddr, struct icmp6hdr *icmp6h, struct in6_addr *target, - int llinfo, int icmp6_mib_outnd) + int llinfo) { struct flowi fl; struct dst_entry *dst; @@ -441,9 +470,11 @@ static void __ndisc_send(struct net_device *dev, struct inet6_dev *idev; int len; int err; - u8 *opt; + u8 *opt, type; - ndisc_flow_init(&fl, icmp6h->icmp6_type, saddr, daddr, + type = icmp6h->icmp6_type; + + ndisc_flow_init(&fl, type, saddr, daddr, dev->ifindex); dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); @@ -504,7 +535,7 @@ static void __ndisc_send(struct net_device *dev, err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); if (!err) { - ICMP6_INC_STATS(idev, icmp6_mib_outnd); + ICMP6MSGOUT_INC_STATS(idev, type); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); } @@ -542,8 +573,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, __ndisc_send(dev, neigh, daddr, src_addr, &icmp6h, solicited_addr, - inc_opt ? ND_OPT_TARGET_LL_ADDR : 0, - ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS); + inc_opt ? ND_OPT_TARGET_LL_ADDR : 0); } void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, @@ -564,8 +594,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, __ndisc_send(dev, neigh, daddr, saddr, &icmp6h, solicit, - !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0, - ICMP6_MIB_OUTNEIGHBORSOLICITS); + !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0); } void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, @@ -599,8 +628,7 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, #endif __ndisc_send(dev, NULL, daddr, saddr, &icmp6h, NULL, - send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0, - ICMP6_MIB_OUTROUTERSOLICITS); + send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0); } @@ -808,7 +836,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) neigh_update(neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE); - if (neigh || !dev->hard_header) { + if (neigh || !dev->header_ops) { ndisc_send_na(dev, neigh, saddr, &msg->target, is_router, 1, (ifp != NULL && inc), inc); @@ -985,6 +1013,53 @@ out: in6_dev_put(idev); } +static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) +{ + struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra); + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct nduseroptmsg *ndmsg; + int err; + int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg) + + (opt->nd_opt_len << 3)); + size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr)); + + skb = nlmsg_new(msg_size, GFP_ATOMIC); + if (skb == NULL) { + err = -ENOBUFS; + goto errout; + } + + nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0); + if (nlh == NULL) { + goto nla_put_failure; + } + + ndmsg = nlmsg_data(nlh); + ndmsg->nduseropt_family = AF_INET6; + ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type; + ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code; + ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3; + + memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3); + + NLA_PUT(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr), + &ipv6_hdr(ra)->saddr); + nlmsg_end(skb, nlh); + + err = rtnl_notify(skb, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); + if (err < 0) + goto errout; + + return; + +nla_put_failure: + nlmsg_free(skb); + err = -EMSGSIZE; +errout: + rtnl_set_sk_err(RTNLGRP_ND_USEROPT, err); +} + static void ndisc_router_discovery(struct sk_buff *skb) { struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); @@ -1217,6 +1292,15 @@ skip_defrtr: } } + if (ndopts.nd_useropts) { + struct nd_opt_hdr *opt; + for (opt = ndopts.nd_useropts; + opt; + opt = ndisc_next_useropt(opt, ndopts.nd_useropts_end)) { + ndisc_ra_useropt(skb, opt); + } + } + if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) { ND_PRINTK2(KERN_WARNING "ICMPv6 RA: invalid RA options"); @@ -1455,7 +1539,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output); if (!err) { - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTREDIRECTS); + ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); } @@ -1525,6 +1609,9 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); @@ -1571,30 +1658,26 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f struct inet6_dev *idev; int ret; - if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME || - ctl->ctl_name == NET_NEIGH_REACHABLE_TIME) + if ((strcmp(ctl->procname, "retrans_time") == 0) || + (strcmp(ctl->procname, "base_reachable_time") == 0)) ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default"); - switch (ctl->ctl_name) { - case NET_NEIGH_RETRANS_TIME: + if (strcmp(ctl->procname, "retrans_time") == 0) ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - break; - case NET_NEIGH_REACHABLE_TIME: + + else if (strcmp(ctl->procname, "base_reachable_time") == 0) ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos); - break; - case NET_NEIGH_RETRANS_TIME_MS: - case NET_NEIGH_REACHABLE_TIME_MS: + + else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) || + (strcmp(ctl->procname, "base_reacable_time_ms") == 0)) ret = proc_dointvec_ms_jiffies(ctl, write, filp, buffer, lenp, ppos); - break; - default: + else ret = -1; - } if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) { - if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME || - ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS) + if (ctl->data == &idev->nd_parms->base_reachable_time) idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time); idev->tstamp = jiffies; inet6_ifinfo_notify(RTM_NEWLINK, idev); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 38b14961391..b1326c2bf8a 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -68,15 +68,15 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info) } } -static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info) +static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info) { struct ip6_rt_info *rt_info = nf_info_reroute(info); if (info->hook == NF_IP6_LOCAL_OUT) { - struct ipv6hdr *iph = ipv6_hdr(*pskb); + struct ipv6hdr *iph = ipv6_hdr(skb); if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) - return ip6_route_me_harder(*pskb); + return ip6_route_me_harder(skb); } return 0; } diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 0004db38af6..6413a30d9f6 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -24,6 +24,7 @@ #include <linux/sysctl.h> #include <linux/proc_fs.h> #include <linux/mutex.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/ipv6.h> #include <net/ip6_route.h> @@ -247,10 +248,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) if (entry->info->indev && entry->skb->dev) { pmsg->hw_type = entry->skb->dev->type; - if (entry->skb->dev->hard_header_parse) - pmsg->hw_addrlen = - entry->skb->dev->hard_header_parse(entry->skb, - pmsg->hw_addr); + pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); } if (data_len) @@ -334,6 +332,7 @@ static int ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) { int diff; + int err; struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload; if (v->data_len < sizeof(*user_iph)) @@ -346,25 +345,18 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (v->data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(e->skb, - skb_headroom(e->skb), - diff, - GFP_ATOMIC); - if (newskb == NULL) { + err = pskb_expand_head(e->skb, 0, + diff - skb_tailroom(e->skb), + GFP_ATOMIC); + if (err) { printk(KERN_WARNING "ip6_queue: OOM " "in mangle, dropping packet\n"); - return -ENOMEM; + return err; } - if (e->skb->sk) - skb_set_owner_w(newskb, e->skb->sk); - kfree_skb(e->skb); - e->skb = newskb; } skb_put(e->skb, diff); } - if (!skb_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(e->skb, v->data_len)) return -ENOMEM; skb_copy_to_linear_data(e->skb, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; @@ -466,7 +458,7 @@ ipq_dev_drop(int ifindex) #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) static inline void -ipq_rcv_skb(struct sk_buff *skb) +__ipq_rcv_skb(struct sk_buff *skb) { int status, type, pid, flags, nlmsglen, skblen; struct nlmsghdr *nlh; @@ -524,19 +516,10 @@ ipq_rcv_skb(struct sk_buff *skb) } static void -ipq_rcv_sk(struct sock *sk, int len) +ipq_rcv_skb(struct sk_buff *skb) { - struct sk_buff *skb; - unsigned int qlen; - mutex_lock(&ipqnl_mutex); - - for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { - skb = skb_dequeue(&sk->sk_receive_queue); - ipq_rcv_skb(skb); - kfree_skb(skb); - } - + __ipq_rcv_skb(skb); mutex_unlock(&ipqnl_mutex); } @@ -546,6 +529,9 @@ ipq_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) ipq_dev_drop(dev->ifindex); @@ -565,7 +551,7 @@ ipq_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW && n->pid) { write_lock_bh(&queue_lock); - if (n->pid == peer_pid) + if ((n->net == &init_net) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -657,14 +643,14 @@ static int __init ip6_queue_init(void) struct proc_dir_entry *proc; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL, - THIS_MODULE); + ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, + ipq_rcv_skb, NULL, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; } - proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); + proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); if (proc) proc->owner = THIS_MODULE; else { @@ -685,7 +671,7 @@ static int __init ip6_queue_init(void) cleanup_sysctl: unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); cleanup_ipqnl: sock_release(ipqnl->sk_socket); @@ -705,7 +691,7 @@ static void __exit ip6_queue_fini(void) unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); sock_release(ipqnl->sk_socket); mutex_lock(&ipqnl_mutex); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index cd9df02bb85..acaba153793 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -205,7 +205,7 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6) } static unsigned int -ip6t_error(struct sk_buff **pskb, +ip6t_error(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -350,7 +350,7 @@ static void trace_packet(struct sk_buff *skb, /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ip6t_do_table(struct sk_buff **pskb, +ip6t_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, @@ -389,17 +389,17 @@ ip6t_do_table(struct sk_buff **pskb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6, + if (ip6_packet_match(skb, indev, outdev, &e->ipv6, &protoff, &offset, &hotdrop)) { struct ip6t_entry_target *t; if (IP6T_MATCH_ITERATE(e, do_match, - *pskb, in, out, + skb, in, out, offset, protoff, &hotdrop) != 0) goto no_match; ADD_COUNTER(e->counters, - ntohs(ipv6_hdr(*pskb)->payload_len) + ntohs(ipv6_hdr(skb)->payload_len) + IPV6_HDR_LEN, 1); @@ -409,8 +409,8 @@ ip6t_do_table(struct sk_buff **pskb, #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) /* The packet is traced: log it */ - if (unlikely((*pskb)->nf_trace)) - trace_packet(*pskb, hook, in, out, + if (unlikely(skb->nf_trace)) + trace_packet(skb, hook, in, out, table->name, private, e); #endif /* Standard target? */ @@ -448,7 +448,7 @@ ip6t_do_table(struct sk_buff **pskb, ((struct ip6t_entry *)table_base)->comefrom = 0xeeeeeeec; #endif - verdict = t->u.kernel.target->target(pskb, + verdict = t->u.kernel.target->target(skb, in, out, hook, t->u.kernel.target, diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index ad4d94310b8..9afc836fd45 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -18,7 +18,7 @@ MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); MODULE_DESCRIPTION("IP6 tables Hop Limit modification module"); MODULE_LICENSE("GPL"); -static unsigned int ip6t_hl_target(struct sk_buff **pskb, +static unsigned int ip6t_hl_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -29,10 +29,10 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, const struct ip6t_HL_info *info = targinfo; int new_hl; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return NF_DROP; - ip6h = ipv6_hdr(*pskb); + ip6h = ipv6_hdr(skb); switch (info->mode) { case IP6T_HL_SET: diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 6ab99001dcc..7a48c342df4 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -431,7 +431,7 @@ ip6t_log_packet(unsigned int pf, } static unsigned int -ip6t_log_target(struct sk_buff **pskb, +ip6t_log_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -445,8 +445,7 @@ ip6t_log_target(struct sk_buff **pskb, li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, - loginfo->prefix); + ip6t_log_packet(PF_INET6, hooknum, skb, in, out, &li, loginfo->prefix); return XT_CONTINUE; } diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 2f487cda3b6..1a7d2917545 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -167,12 +167,12 @@ static inline void send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) { if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL) - skb_in->dev = &loopback_dev; + skb_in->dev = init_net.loopback_dev; icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); } -static unsigned int reject6_target(struct sk_buff **pskb, +static unsigned int reject6_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -187,25 +187,25 @@ static unsigned int reject6_target(struct sk_buff **pskb, must return an absolute verdict. --RR */ switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - send_unreach(*pskb, ICMPV6_NOROUTE, hooknum); + send_unreach(skb, ICMPV6_NOROUTE, hooknum); break; case IP6T_ICMP6_ADM_PROHIBITED: - send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum); + send_unreach(skb, ICMPV6_ADM_PROHIBITED, hooknum); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum); + send_unreach(skb, ICMPV6_NOT_NEIGHBOUR, hooknum); break; case IP6T_ICMP6_ADDR_UNREACH: - send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum); + send_unreach(skb, ICMPV6_ADDR_UNREACH, hooknum); break; case IP6T_ICMP6_PORT_UNREACH: - send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum); + send_unreach(skb, ICMPV6_PORT_UNREACH, hooknum); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - send_reset(*pskb); + send_reset(skb); break; default: if (net_ratelimit()) diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 7e32e2aaf7f..1d26b202bf3 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -60,32 +60,32 @@ static struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int ip6t_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_filter); + return ip6t_do_table(skb, hook, in, out, &packet_filter); } static unsigned int ip6t_local_out_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { #if 0 /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) + || ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ip6t_hook: happy cracking.\n"); return NF_ACCEPT; } #endif - return ip6t_do_table(pskb, hook, in, out, &packet_filter); + return ip6t_do_table(skb, hook, in, out, &packet_filter); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index f0a9efa67fb..a0b6381f1e8 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -68,17 +68,17 @@ static struct xt_table packet_mangler = { /* The work comes in here from netfilter.c. */ static unsigned int ip6t_route_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_mangler); + return ip6t_do_table(skb, hook, in, out, &packet_mangler); } static unsigned int ip6t_local_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -91,8 +91,8 @@ ip6t_local_hook(unsigned int hook, #if 0 /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) + || ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ip6t_hook: happy cracking.\n"); return NF_ACCEPT; @@ -100,22 +100,22 @@ ip6t_local_hook(unsigned int hook, #endif /* save source/dest address, mark, hoplimit, flowlabel, priority, */ - memcpy(&saddr, &ipv6_hdr(*pskb)->saddr, sizeof(saddr)); - memcpy(&daddr, &ipv6_hdr(*pskb)->daddr, sizeof(daddr)); - mark = (*pskb)->mark; - hop_limit = ipv6_hdr(*pskb)->hop_limit; + memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); + memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); + mark = skb->mark; + hop_limit = ipv6_hdr(skb)->hop_limit; /* flowlabel and prio (includes version, which shouldn't change either */ - flowlabel = *((u_int32_t *)ipv6_hdr(*pskb)); + flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler); + ret = ip6t_do_table(skb, hook, in, out, &packet_mangler); if (ret != NF_DROP && ret != NF_STOLEN - && (memcmp(&ipv6_hdr(*pskb)->saddr, &saddr, sizeof(saddr)) - || memcmp(&ipv6_hdr(*pskb)->daddr, &daddr, sizeof(daddr)) - || (*pskb)->mark != mark - || ipv6_hdr(*pskb)->hop_limit != hop_limit)) - return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP; + && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) + || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) + || skb->mark != mark + || ipv6_hdr(skb)->hop_limit != hop_limit)) + return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; return ret; } diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index ec290e4ebdd..8f7109f991e 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -46,12 +46,12 @@ static struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int ip6t_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_raw); + return ip6t_do_table(skb, hook, in, out, &packet_raw); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 3153e15e0f7..ad74bab0504 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -18,6 +18,7 @@ #include <linux/icmp.h> #include <linux/sysctl.h> #include <net/ipv6.h> +#include <net/inet_frag.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_conntrack.h> @@ -145,7 +146,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, } static unsigned int ipv6_confirm(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -155,12 +156,12 @@ static unsigned int ipv6_confirm(unsigned int hooknum, struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; unsigned int ret, protoff; - unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data; - unsigned char pnum = ipv6_hdr(*pskb)->nexthdr; + unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; + unsigned char pnum = ipv6_hdr(skb)->nexthdr; /* This is where we call the helper: as the packet goes out. */ - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) goto out; @@ -172,23 +173,23 @@ static unsigned int ipv6_confirm(unsigned int hooknum, if (!helper) goto out; - protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, - (*pskb)->len - extoff); - if (protoff > (*pskb)->len || pnum == NEXTHDR_FRAGMENT) { + protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, + skb->len - extoff); + if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) { pr_debug("proto header not found\n"); return NF_ACCEPT; } - ret = helper->help(pskb, protoff, ct, ctinfo); + ret = helper->help(skb, protoff, ct, ctinfo); if (ret != NF_ACCEPT) return ret; out: /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(pskb); + return nf_conntrack_confirm(skb); } static unsigned int ipv6_defrag(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -196,17 +197,17 @@ static unsigned int ipv6_defrag(unsigned int hooknum, struct sk_buff *reasm; /* Previously seen (loopback)? */ - if ((*pskb)->nfct) + if (skb->nfct) return NF_ACCEPT; - reasm = nf_ct_frag6_gather(*pskb); + reasm = nf_ct_frag6_gather(skb); /* queued */ if (reasm == NULL) return NF_STOLEN; /* error occured or not fragmented */ - if (reasm == *pskb) + if (reasm == skb) return NF_ACCEPT; nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, @@ -216,12 +217,12 @@ static unsigned int ipv6_defrag(unsigned int hooknum, } static unsigned int ipv6_conntrack_in(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *reasm = (*pskb)->nfct_reasm; + struct sk_buff *reasm = skb->nfct_reasm; /* This packet is fragmented and has reassembled packet. */ if (reasm) { @@ -229,32 +230,32 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, if (!reasm->nfct) { unsigned int ret; - ret = nf_conntrack_in(PF_INET6, hooknum, &reasm); + ret = nf_conntrack_in(PF_INET6, hooknum, reasm); if (ret != NF_ACCEPT) return ret; } nf_conntrack_get(reasm->nfct); - (*pskb)->nfct = reasm->nfct; - (*pskb)->nfctinfo = reasm->nfctinfo; + skb->nfct = reasm->nfct; + skb->nfctinfo = reasm->nfctinfo; return NF_ACCEPT; } - return nf_conntrack_in(PF_INET6, hooknum, pskb); + return nf_conntrack_in(PF_INET6, hooknum, skb); } static unsigned int ipv6_conntrack_local(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct ipv6hdr)) { + if (skb->len < sizeof(struct ipv6hdr)) { if (net_ratelimit()) printk("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return ipv6_conntrack_in(hooknum, pskb, in, out, okfn); + return ipv6_conntrack_in(hooknum, skb, in, out, okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] = { @@ -305,9 +306,8 @@ static struct nf_hook_ops ipv6_conntrack_ops[] = { #ifdef CONFIG_SYSCTL static ctl_table nf_ct_ipv6_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT, .procname = "nf_conntrack_frag6_timeout", - .data = &nf_ct_frag6_timeout, + .data = &nf_frags_ctl.timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -315,7 +315,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, .procname = "nf_conntrack_frag6_low_thresh", - .data = &nf_ct_frag6_low_thresh, + .data = &nf_frags_ctl.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -323,7 +323,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, .procname = "nf_conntrack_frag6_high_thresh", - .data = &nf_ct_frag6_high_thresh, + .data = &nf_frags_ctl.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -337,36 +337,33 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = { #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> -static int ipv6_tuple_to_nfattr(struct sk_buff *skb, +static int ipv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - NFA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, + NLA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, &tuple->src.u3.ip6); - NFA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, + NLA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, &tuple->dst.u3.ip6); return 0; -nfattr_failure: +nla_put_failure: return -1; } -static const size_t cta_min_ip[CTA_IP_MAX] = { - [CTA_IP_V6_SRC-1] = sizeof(u_int32_t)*4, - [CTA_IP_V6_DST-1] = sizeof(u_int32_t)*4, +static const struct nla_policy ipv6_nla_policy[CTA_IP_MAX+1] = { + [CTA_IP_V6_SRC] = { .len = sizeof(u_int32_t)*4 }, + [CTA_IP_V6_DST] = { .len = sizeof(u_int32_t)*4 }, }; -static int ipv6_nfattr_to_tuple(struct nfattr *tb[], +static int ipv6_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t) { - if (!tb[CTA_IP_V6_SRC-1] || !tb[CTA_IP_V6_DST-1]) + if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST]) return -EINVAL; - if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) - return -EINVAL; - - memcpy(&t->src.u3.ip6, NFA_DATA(tb[CTA_IP_V6_SRC-1]), + memcpy(&t->src.u3.ip6, nla_data(tb[CTA_IP_V6_SRC]), sizeof(u_int32_t) * 4); - memcpy(&t->dst.u3.ip6, NFA_DATA(tb[CTA_IP_V6_DST-1]), + memcpy(&t->dst.u3.ip6, nla_data(tb[CTA_IP_V6_DST]), sizeof(u_int32_t) * 4); return 0; @@ -382,8 +379,9 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .print_conntrack = ipv6_print_conntrack, .get_l4proto = ipv6_get_l4proto, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = ipv6_tuple_to_nfattr, - .nfattr_to_tuple = ipv6_nfattr_to_tuple, + .tuple_to_nlattr = ipv6_tuple_to_nlattr, + .nlattr_to_tuple = ipv6_nlattr_to_tuple, + .nla_policy = ipv6_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_path = nf_net_netfilter_sysctl_path, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index ab154fb9001..fd9123f3dc0 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -210,45 +210,42 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff, #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> -static int icmpv6_tuple_to_nfattr(struct sk_buff *skb, +static int icmpv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *t) { - NFA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t), + NLA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t), &t->src.u.icmp.id); - NFA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t), + NLA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t), &t->dst.u.icmp.type); - NFA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t), + NLA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t), &t->dst.u.icmp.code); return 0; -nfattr_failure: +nla_put_failure: return -1; } -static const size_t cta_min_proto[CTA_PROTO_MAX] = { - [CTA_PROTO_ICMPV6_TYPE-1] = sizeof(u_int8_t), - [CTA_PROTO_ICMPV6_CODE-1] = sizeof(u_int8_t), - [CTA_PROTO_ICMPV6_ID-1] = sizeof(u_int16_t) +static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = { + [CTA_PROTO_ICMPV6_TYPE] = { .type = NLA_U8 }, + [CTA_PROTO_ICMPV6_CODE] = { .type = NLA_U8 }, + [CTA_PROTO_ICMPV6_ID] = { .type = NLA_U16 }, }; -static int icmpv6_nfattr_to_tuple(struct nfattr *tb[], +static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *tuple) { - if (!tb[CTA_PROTO_ICMPV6_TYPE-1] - || !tb[CTA_PROTO_ICMPV6_CODE-1] - || !tb[CTA_PROTO_ICMPV6_ID-1]) - return -EINVAL; - - if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + if (!tb[CTA_PROTO_ICMPV6_TYPE] + || !tb[CTA_PROTO_ICMPV6_CODE] + || !tb[CTA_PROTO_ICMPV6_ID]) return -EINVAL; tuple->dst.u.icmp.type = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_TYPE-1]); + *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMPV6_TYPE]); tuple->dst.u.icmp.code = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_CODE-1]); + *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMPV6_CODE]); tuple->src.u.icmp.id = - *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMPV6_ID-1]); + *(__be16 *)nla_data(tb[CTA_PROTO_ICMPV6_ID]); if (tuple->dst.u.icmp.type < 128 || tuple->dst.u.icmp.type - 128 >= sizeof(invmap) @@ -263,7 +260,6 @@ static int icmpv6_nfattr_to_tuple(struct nfattr *tb[], static struct ctl_table_header *icmpv6_sysctl_header; static struct ctl_table icmpv6_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_ICMPV6_TIMEOUT, .procname = "nf_conntrack_icmpv6_timeout", .data = &nf_ct_icmpv6_timeout, .maxlen = sizeof(unsigned int), @@ -289,8 +285,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .new = icmpv6_new, .error = icmpv6_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = icmpv6_tuple_to_nfattr, - .nfattr_to_tuple = icmpv6_nfattr_to_tuple, + .tuple_to_nlattr = icmpv6_tuple_to_nlattr, + .nlattr_to_tuple = icmpv6_nlattr_to_tuple, + .nla_policy = icmpv6_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_header = &icmpv6_sysctl_header, diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 25442a8c1ba..e170c67c47a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -31,6 +31,7 @@ #include <net/sock.h> #include <net/snmp.h> +#include <net/inet_frag.h> #include <net/ipv6.h> #include <net/protocol.h> @@ -48,10 +49,6 @@ #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT -unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024; -unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024; -unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT; - struct nf_ct_frag6_skb_cb { struct inet6_skb_parm h; @@ -63,51 +60,24 @@ struct nf_ct_frag6_skb_cb struct nf_ct_frag6_queue { - struct hlist_node list; - struct list_head lru_list; /* lru list member */ + struct inet_frag_queue q; __be32 id; /* fragment id */ struct in6_addr saddr; struct in6_addr daddr; - spinlock_t lock; - atomic_t refcnt; - struct timer_list timer; /* expire timer */ - struct sk_buff *fragments; - int len; - int meat; - ktime_t stamp; unsigned int csum; - __u8 last_in; /* has first/last segment arrived? */ -#define COMPLETE 4 -#define FIRST_IN 2 -#define LAST_IN 1 __u16 nhoffset; }; -/* Hash table. */ - -#define FRAG6Q_HASHSZ 64 - -static struct hlist_head nf_ct_frag6_hash[FRAG6Q_HASHSZ]; -static DEFINE_RWLOCK(nf_ct_frag6_lock); -static u32 nf_ct_frag6_hash_rnd; -static LIST_HEAD(nf_ct_frag6_lru_list); -int nf_ct_frag6_nqueues = 0; - -static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq) -{ - hlist_del(&fq->list); - list_del(&fq->lru_list); - nf_ct_frag6_nqueues--; -} +struct inet_frags_ctl nf_frags_ctl __read_mostly = { + .high_thresh = 256 * 1024, + .low_thresh = 192 * 1024, + .timeout = IPV6_FRAG_TIMEOUT, + .secret_interval = 10 * 60 * HZ, +}; -static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq) -{ - write_lock(&nf_ct_frag6_lock); - __fq_unlink(fq); - write_unlock(&nf_ct_frag6_lock); -} +static struct inet_frags nf_frags; static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, struct in6_addr *daddr) @@ -120,7 +90,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += nf_ct_frag6_hash_rnd; + c += nf_frags.rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; @@ -133,100 +103,38 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, c += (__force u32)id; __jhash_mix(a, b, c); - return c & (FRAG6Q_HASHSZ - 1); + return c & (INETFRAGS_HASHSZ - 1); } -static struct timer_list nf_ct_frag6_secret_timer; -int nf_ct_frag6_secret_interval = 10 * 60 * HZ; - -static void nf_ct_frag6_secret_rebuild(unsigned long dummy) +static unsigned int nf_hashfn(struct inet_frag_queue *q) { - unsigned long now = jiffies; - int i; - - write_lock(&nf_ct_frag6_lock); - get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32)); - for (i = 0; i < FRAG6Q_HASHSZ; i++) { - struct nf_ct_frag6_queue *q; - struct hlist_node *p, *n; - - hlist_for_each_entry_safe(q, p, n, &nf_ct_frag6_hash[i], list) { - unsigned int hval = ip6qhashfn(q->id, - &q->saddr, - &q->daddr); - if (hval != i) { - hlist_del(&q->list); - /* Relink to new hash chain. */ - hlist_add_head(&q->list, - &nf_ct_frag6_hash[hval]); - } - } - } - write_unlock(&nf_ct_frag6_lock); + struct nf_ct_frag6_queue *nq; - mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval); + nq = container_of(q, struct nf_ct_frag6_queue, q); + return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr); } -atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0); - -/* Memory Tracking Functions. */ -static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) +static void nf_skb_free(struct sk_buff *skb) { - if (work) - *work -= skb->truesize; - atomic_sub(skb->truesize, &nf_ct_frag6_mem); if (NFCT_FRAG6_CB(skb)->orig) kfree_skb(NFCT_FRAG6_CB(skb)->orig); - - kfree_skb(skb); } -static inline void frag_free_queue(struct nf_ct_frag6_queue *fq, - unsigned int *work) +/* Memory Tracking Functions. */ +static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) { if (work) - *work -= sizeof(struct nf_ct_frag6_queue); - atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); - kfree(fq); -} - -static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) -{ - struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC); - - if (!fq) - return NULL; - atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); - return fq; + *work -= skb->truesize; + atomic_sub(skb->truesize, &nf_frags.mem); + nf_skb_free(skb); + kfree_skb(skb); } /* Destruction primitives. */ -/* Complete destruction of fq. */ -static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq, - unsigned int *work) +static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) { - struct sk_buff *fp; - - BUG_TRAP(fq->last_in&COMPLETE); - BUG_TRAP(del_timer(&fq->timer) == 0); - - /* Release all fragment data. */ - fp = fq->fragments; - while (fp) { - struct sk_buff *xp = fp->next; - - frag_kfree_skb(fp, work); - fp = xp; - } - - frag_free_queue(fq, work); -} - -static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work) -{ - if (atomic_dec_and_test(&fq->refcnt)) - nf_ct_frag6_destroy(fq, work); + inet_frag_put(&fq->q, &nf_frags); } /* Kill fq entry. It is not destroyed immediately, @@ -234,151 +142,58 @@ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work) */ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) { - if (del_timer(&fq->timer)) - atomic_dec(&fq->refcnt); - - if (!(fq->last_in & COMPLETE)) { - fq_unlink(fq); - atomic_dec(&fq->refcnt); - fq->last_in |= COMPLETE; - } + inet_frag_kill(&fq->q, &nf_frags); } static void nf_ct_frag6_evictor(void) { - struct nf_ct_frag6_queue *fq; - struct list_head *tmp; - unsigned int work; - - work = atomic_read(&nf_ct_frag6_mem); - if (work <= nf_ct_frag6_low_thresh) - return; - - work -= nf_ct_frag6_low_thresh; - while (work > 0) { - read_lock(&nf_ct_frag6_lock); - if (list_empty(&nf_ct_frag6_lru_list)) { - read_unlock(&nf_ct_frag6_lock); - return; - } - tmp = nf_ct_frag6_lru_list.next; - BUG_ON(tmp == NULL); - fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list); - atomic_inc(&fq->refcnt); - read_unlock(&nf_ct_frag6_lock); - - spin_lock(&fq->lock); - if (!(fq->last_in&COMPLETE)) - fq_kill(fq); - spin_unlock(&fq->lock); - - fq_put(fq, &work); - } + inet_frag_evictor(&nf_frags); } static void nf_ct_frag6_expire(unsigned long data) { - struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data; + struct nf_ct_frag6_queue *fq; + + fq = container_of((struct inet_frag_queue *)data, + struct nf_ct_frag6_queue, q); - spin_lock(&fq->lock); + spin_lock(&fq->q.lock); - if (fq->last_in & COMPLETE) + if (fq->q.last_in & COMPLETE) goto out; fq_kill(fq); out: - spin_unlock(&fq->lock); - fq_put(fq, NULL); + spin_unlock(&fq->q.lock); + fq_put(fq); } /* Creation primitives. */ -static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, - struct nf_ct_frag6_queue *fq_in) +static __inline__ struct nf_ct_frag6_queue * +fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) { - struct nf_ct_frag6_queue *fq; -#ifdef CONFIG_SMP - struct hlist_node *n; -#endif - - write_lock(&nf_ct_frag6_lock); -#ifdef CONFIG_SMP - hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) { - if (fq->id == fq_in->id && - ipv6_addr_equal(&fq_in->saddr, &fq->saddr) && - ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) { - atomic_inc(&fq->refcnt); - write_unlock(&nf_ct_frag6_lock); - fq_in->last_in |= COMPLETE; - fq_put(fq_in, NULL); - return fq; - } - } -#endif - fq = fq_in; - - if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout)) - atomic_inc(&fq->refcnt); - - atomic_inc(&fq->refcnt); - hlist_add_head(&fq->list, &nf_ct_frag6_hash[hash]); - INIT_LIST_HEAD(&fq->lru_list); - list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list); - nf_ct_frag6_nqueues++; - write_unlock(&nf_ct_frag6_lock); - return fq; -} + struct inet_frag_queue *q; + struct ip6_create_arg arg; + unsigned int hash; + arg.id = id; + arg.src = src; + arg.dst = dst; + hash = ip6qhashfn(id, src, dst); -static struct nf_ct_frag6_queue * -nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, struct in6_addr *dst) -{ - struct nf_ct_frag6_queue *fq; - - if ((fq = frag_alloc_queue()) == NULL) { - pr_debug("Can't alloc new queue\n"); + q = inet_frag_find(&nf_frags, &arg, hash); + if (q == NULL) goto oom; - } - - memset(fq, 0, sizeof(struct nf_ct_frag6_queue)); - - fq->id = id; - ipv6_addr_copy(&fq->saddr, src); - ipv6_addr_copy(&fq->daddr, dst); - setup_timer(&fq->timer, nf_ct_frag6_expire, (unsigned long)fq); - spin_lock_init(&fq->lock); - atomic_set(&fq->refcnt, 1); - - return nf_ct_frag6_intern(hash, fq); + return container_of(q, struct nf_ct_frag6_queue, q); oom: + pr_debug("Can't alloc new queue\n"); return NULL; } -static __inline__ struct nf_ct_frag6_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) -{ - struct nf_ct_frag6_queue *fq; - struct hlist_node *n; - unsigned int hash = ip6qhashfn(id, src, dst); - - read_lock(&nf_ct_frag6_lock); - hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) { - if (fq->id == id && - ipv6_addr_equal(src, &fq->saddr) && - ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->refcnt); - read_unlock(&nf_ct_frag6_lock); - return fq; - } - } - read_unlock(&nf_ct_frag6_lock); - - return nf_ct_frag6_create(hash, id, src, dst); -} - static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff) @@ -386,7 +201,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, struct sk_buff *prev, *next; int offset, end; - if (fq->last_in & COMPLETE) { + if (fq->q.last_in & COMPLETE) { pr_debug("Allready completed\n"); goto err; } @@ -412,13 +227,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ - if (end < fq->len || - ((fq->last_in & LAST_IN) && end != fq->len)) { + if (end < fq->q.len || + ((fq->q.last_in & LAST_IN) && end != fq->q.len)) { pr_debug("already received last fragment\n"); goto err; } - fq->last_in |= LAST_IN; - fq->len = end; + fq->q.last_in |= LAST_IN; + fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. * Required by the RFC. @@ -430,13 +245,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, pr_debug("end of fragment not rounded to 8 bytes.\n"); return -1; } - if (end > fq->len) { + if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->last_in & LAST_IN) { + if (fq->q.last_in & LAST_IN) { pr_debug("last packet already reached.\n"); goto err; } - fq->len = end; + fq->q.len = end; } } @@ -458,7 +273,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, * this fragment, right? */ prev = NULL; - for (next = fq->fragments; next != NULL; next = next->next) { + for (next = fq->q.fragments; next != NULL; next = next->next) { if (NFCT_FRAG6_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -503,7 +318,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* next fragment */ NFCT_FRAG6_CB(next)->offset += i; - fq->meat -= i; + fq->q.meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; break; @@ -518,9 +333,9 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, if (prev) prev->next = next; else - fq->fragments = next; + fq->q.fragments = next; - fq->meat -= free_it->len; + fq->q.meat -= free_it->len; frag_kfree_skb(free_it, NULL); } } @@ -532,23 +347,23 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, if (prev) prev->next = skb; else - fq->fragments = skb; + fq->q.fragments = skb; skb->dev = NULL; - fq->stamp = skb->tstamp; - fq->meat += skb->len; - atomic_add(skb->truesize, &nf_ct_frag6_mem); + fq->q.stamp = skb->tstamp; + fq->q.meat += skb->len; + atomic_add(skb->truesize, &nf_frags.mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ if (offset == 0) { fq->nhoffset = nhoff; - fq->last_in |= FIRST_IN; + fq->q.last_in |= FIRST_IN; } - write_lock(&nf_ct_frag6_lock); - list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list); - write_unlock(&nf_ct_frag6_lock); + write_lock(&nf_frags.lock); + list_move_tail(&fq->q.lru_list, &nf_frags.lru_list); + write_unlock(&nf_frags.lock); return 0; err: @@ -567,7 +382,7 @@ err: static struct sk_buff * nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) { - struct sk_buff *fp, *op, *head = fq->fragments; + struct sk_buff *fp, *op, *head = fq->q.fragments; int payload_len; fq_kill(fq); @@ -577,7 +392,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - - sizeof(struct ipv6hdr) + fq->len - + sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { pr_debug("payload len is too large.\n"); @@ -614,7 +429,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &nf_ct_frag6_mem); + atomic_add(clone->truesize, &nf_frags.mem); } /* We have to remove fragment header from datagram and to relocate @@ -628,7 +443,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &nf_ct_frag6_mem); + atomic_sub(head->truesize, &nf_frags.mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -638,12 +453,12 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &nf_ct_frag6_mem); + atomic_sub(fp->truesize, &nf_frags.mem); } head->next = NULL; head->dev = dev; - head->tstamp = fq->stamp; + head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); /* Yes, and fold redundant checksum back. 8) */ @@ -652,7 +467,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) skb_network_header_len(head), head->csum); - fq->fragments = NULL; + fq->q.fragments = NULL; /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ fp = skb_shinfo(head)->frag_list; @@ -788,7 +603,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh) + if (atomic_read(&nf_frags.mem) > nf_frags_ctl.high_thresh) nf_ct_frag6_evictor(); fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); @@ -797,23 +612,23 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - spin_lock(&fq->lock); + spin_lock(&fq->q.lock); if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { - spin_unlock(&fq->lock); + spin_unlock(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); - fq_put(fq, NULL); + fq_put(fq); goto ret_orig; } - if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) { + if (fq->q.last_in == (FIRST_IN|LAST_IN) && fq->q.meat == fq->q.len) { ret_skb = nf_ct_frag6_reasm(fq, dev); if (ret_skb == NULL) pr_debug("Can't reassemble fragmented packets\n"); } - spin_unlock(&fq->lock); + spin_unlock(&fq->q.lock); - fq_put(fq, NULL); + fq_put(fq); return ret_skb; ret_orig: @@ -859,20 +674,23 @@ int nf_ct_frag6_kfree_frags(struct sk_buff *skb) int nf_ct_frag6_init(void) { - nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ - (jiffies ^ (jiffies >> 6))); - - setup_timer(&nf_ct_frag6_secret_timer, nf_ct_frag6_secret_rebuild, 0); - nf_ct_frag6_secret_timer.expires = jiffies - + nf_ct_frag6_secret_interval; - add_timer(&nf_ct_frag6_secret_timer); + nf_frags.ctl = &nf_frags_ctl; + nf_frags.hashfn = nf_hashfn; + nf_frags.constructor = ip6_frag_init; + nf_frags.destructor = NULL; + nf_frags.skb_free = nf_skb_free; + nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); + nf_frags.match = ip6_frag_match; + nf_frags.frag_expire = nf_ct_frag6_expire; + inet_frags_init(&nf_frags); return 0; } void nf_ct_frag6_cleanup(void) { - del_timer(&nf_ct_frag6_secret_timer); - nf_ct_frag6_low_thresh = 0; + inet_frags_fini(&nf_frags); + + nf_frags_ctl.low_thresh = 0; nf_ct_frag6_evictor(); } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 920dc9cf6a8..be526ad9254 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -23,6 +23,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stddef.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/sock.h> #include <net/tcp.h> @@ -53,7 +54,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "RAW6: inuse %d\n", fold_prot_inuse(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", - ip6_frag_nqueues, atomic_read(&ip6_frag_mem)); + ip6_frag_nqueues(), ip6_frag_mem()); return 0; } @@ -85,47 +86,33 @@ static struct snmp_mib snmp6_ipstats_list[] = { }; static struct snmp_mib snmp6_icmp6_list[] = { -/* icmpv6 mib according to RFC 2466 - - Exceptions: {In|Out}AdminProhibs are removed, because I see - no good reasons to account them separately - of another dest.unreachs. - OutErrs is zero identically. - OutEchos too. - OutRouterAdvertisements too. - OutGroupMembQueries too. - */ +/* icmpv6 mib according to RFC 2466 */ SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS), SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS), - SNMP_MIB_ITEM("Icmp6InDestUnreachs", ICMP6_MIB_INDESTUNREACHS), - SNMP_MIB_ITEM("Icmp6InPktTooBigs", ICMP6_MIB_INPKTTOOBIGS), - SNMP_MIB_ITEM("Icmp6InTimeExcds", ICMP6_MIB_INTIMEEXCDS), - SNMP_MIB_ITEM("Icmp6InParmProblems", ICMP6_MIB_INPARMPROBLEMS), - SNMP_MIB_ITEM("Icmp6InEchos", ICMP6_MIB_INECHOS), - SNMP_MIB_ITEM("Icmp6InEchoReplies", ICMP6_MIB_INECHOREPLIES), - SNMP_MIB_ITEM("Icmp6InGroupMembQueries", ICMP6_MIB_INGROUPMEMBQUERIES), - SNMP_MIB_ITEM("Icmp6InGroupMembResponses", ICMP6_MIB_INGROUPMEMBRESPONSES), - SNMP_MIB_ITEM("Icmp6InGroupMembReductions", ICMP6_MIB_INGROUPMEMBREDUCTIONS), - SNMP_MIB_ITEM("Icmp6InRouterSolicits", ICMP6_MIB_INROUTERSOLICITS), - SNMP_MIB_ITEM("Icmp6InRouterAdvertisements", ICMP6_MIB_INROUTERADVERTISEMENTS), - SNMP_MIB_ITEM("Icmp6InNeighborSolicits", ICMP6_MIB_INNEIGHBORSOLICITS), - SNMP_MIB_ITEM("Icmp6InNeighborAdvertisements", ICMP6_MIB_INNEIGHBORADVERTISEMENTS), - SNMP_MIB_ITEM("Icmp6InRedirects", ICMP6_MIB_INREDIRECTS), SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS), - SNMP_MIB_ITEM("Icmp6OutDestUnreachs", ICMP6_MIB_OUTDESTUNREACHS), - SNMP_MIB_ITEM("Icmp6OutPktTooBigs", ICMP6_MIB_OUTPKTTOOBIGS), - SNMP_MIB_ITEM("Icmp6OutTimeExcds", ICMP6_MIB_OUTTIMEEXCDS), - SNMP_MIB_ITEM("Icmp6OutParmProblems", ICMP6_MIB_OUTPARMPROBLEMS), - SNMP_MIB_ITEM("Icmp6OutEchoReplies", ICMP6_MIB_OUTECHOREPLIES), - SNMP_MIB_ITEM("Icmp6OutRouterSolicits", ICMP6_MIB_OUTROUTERSOLICITS), - SNMP_MIB_ITEM("Icmp6OutNeighborSolicits", ICMP6_MIB_OUTNEIGHBORSOLICITS), - SNMP_MIB_ITEM("Icmp6OutNeighborAdvertisements", ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS), - SNMP_MIB_ITEM("Icmp6OutRedirects", ICMP6_MIB_OUTREDIRECTS), - SNMP_MIB_ITEM("Icmp6OutGroupMembResponses", ICMP6_MIB_OUTGROUPMEMBRESPONSES), - SNMP_MIB_ITEM("Icmp6OutGroupMembReductions", ICMP6_MIB_OUTGROUPMEMBREDUCTIONS), SNMP_MIB_SENTINEL }; +/* RFC 4293 v6 ICMPMsgStatsTable; named items for RFC 2466 compatibility */ +static char *icmp6type2name[256] = { + [ICMPV6_DEST_UNREACH] = "DestUnreachs", + [ICMPV6_PKT_TOOBIG] = "PktTooBigs", + [ICMPV6_TIME_EXCEED] = "TimeExcds", + [ICMPV6_PARAMPROB] = "ParmProblems", + [ICMPV6_ECHO_REQUEST] = "EchoRequest", + [ICMPV6_ECHO_REPLY] = "EchoReplies", + [ICMPV6_MGM_QUERY] = "GroupMembQueries", + [ICMPV6_MGM_REPORT] = "GroupMembResponses", + [ICMPV6_MGM_REDUCTION] = "GroupMembReductions", + [ICMPV6_MLD2_REPORT] = "MLDv2Reports", + [NDISC_ROUTER_ADVERTISEMENT] = "RouterAdvertisements", + [NDISC_ROUTER_SOLICITATION] = "RouterSolicits", + [NDISC_NEIGHBOUR_ADVERTISEMENT] = "NeighborAdvertisements", + [NDISC_NEIGHBOUR_SOLICITATION] = "NeighborSolicits", + [NDISC_REDIRECT] = "NeighborRedirects", +}; + + static struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_ITEM("Udp6InDatagrams", UDP_MIB_INDATAGRAMS), SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS), @@ -142,6 +129,40 @@ static struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_SENTINEL }; +static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) +{ + static char name[32]; + int i; + + /* print by name -- deprecated items */ + for (i = 0; i < ICMP6MSG_MIB_MAX; i++) { + int icmptype; + char *p; + + icmptype = i & 0xff; + p = icmp6type2name[icmptype]; + if (!p) /* don't print un-named types here */ + continue; + (void) snprintf(name, sizeof(name)-1, "Icmp6%s%s", + i & 0x100 ? "Out" : "In", p); + seq_printf(seq, "%-32s\t%lu\n", name, + snmp_fold_field(mib, i)); + } + + /* print by number (nonzero only) - ICMPMsgStat format */ + for (i = 0; i < ICMP6MSG_MIB_MAX; i++) { + unsigned long val; + + val = snmp_fold_field(mib, i); + if (!val) + continue; + (void) snprintf(name, sizeof(name)-1, "Icmp6%sType%u", + i & 0x100 ? "Out" : "In", i & 0xff); + seq_printf(seq, "%-32s\t%lu\n", name, val); + } + return; +} + static inline void snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist) { @@ -159,9 +180,11 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); snmp6_seq_show_item(seq, (void **)idev->stats.ipv6, snmp6_ipstats_list); snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list); + snmp6_seq_show_icmpv6msg(seq, (void **)idev->stats.icmpv6msg); } else { snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list); snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list); + snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics); snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list); snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list); } @@ -231,22 +254,22 @@ int __init ipv6_misc_proc_init(void) { int rc = 0; - if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops)) + if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops)) goto proc_snmp6_fail; - proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net); + proc_net_devsnmp6 = proc_mkdir("dev_snmp6", init_net.proc_net); if (!proc_net_devsnmp6) goto proc_dev_snmp6_fail; - if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops)) + if (!proc_net_fops_create(&init_net, "sockstat6", S_IRUGO, &sockstat6_seq_fops)) goto proc_sockstat6_fail; out: return rc; proc_sockstat6_fail: - proc_net_remove("dev_snmp6"); + proc_net_remove(&init_net, "dev_snmp6"); proc_dev_snmp6_fail: - proc_net_remove("snmp6"); + proc_net_remove(&init_net, "snmp6"); proc_snmp6_fail: rc = -ENOMEM; goto out; @@ -254,8 +277,8 @@ proc_snmp6_fail: void ipv6_misc_proc_exit(void) { - proc_net_remove("sockstat6"); - proc_net_remove("dev_snmp6"); - proc_net_remove("snmp6"); + proc_net_remove(&init_net, "sockstat6"); + proc_net_remove(&init_net, "dev_snmp6"); + proc_net_remove(&init_net, "snmp6"); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 77167afa345..ca24ef19cd8 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -35,6 +35,7 @@ #include <asm/uaccess.h> #include <asm/ioctls.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/sock.h> #include <net/snmp.h> @@ -282,7 +283,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!sk->sk_bound_dev_if) goto out; - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; @@ -1288,21 +1289,8 @@ static const struct seq_operations raw6_seq_ops = { static int raw6_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct raw6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - goto out; - rc = seq_open(file, &raw6_seq_ops); - if (rc) - goto out_kfree; - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &raw6_seq_ops, + sizeof(struct raw6_iter_state)); } static const struct file_operations raw6_seq_fops = { @@ -1315,13 +1303,13 @@ static const struct file_operations raw6_seq_fops = { int __init raw6_proc_init(void) { - if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops)) + if (!proc_net_fops_create(&init_net, "raw6", S_IRUGO, &raw6_seq_fops)) return -ENOMEM; return 0; } void raw6_proc_exit(void) { - proc_net_remove("raw6"); + proc_net_remove(&init_net, "raw6"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index de795c04e34..76c88a93b9b 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -42,6 +42,7 @@ #include <linux/icmpv6.h> #include <linux/random.h> #include <linux/jhash.h> +#include <linux/skbuff.h> #include <net/sock.h> #include <net/snmp.h> @@ -53,11 +54,7 @@ #include <net/rawv6.h> #include <net/ndisc.h> #include <net/addrconf.h> - -int sysctl_ip6frag_high_thresh __read_mostly = 256*1024; -int sysctl_ip6frag_low_thresh __read_mostly = 192*1024; - -int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT; +#include <net/inet_frag.h> struct ip6frag_skb_cb { @@ -74,53 +71,39 @@ struct ip6frag_skb_cb struct frag_queue { - struct hlist_node list; - struct list_head lru_list; /* lru list member */ + struct inet_frag_queue q; __be32 id; /* fragment id */ struct in6_addr saddr; struct in6_addr daddr; - spinlock_t lock; - atomic_t refcnt; - struct timer_list timer; /* expire timer */ - struct sk_buff *fragments; - int len; - int meat; int iif; - ktime_t stamp; unsigned int csum; - __u8 last_in; /* has first/last segment arrived? */ -#define COMPLETE 4 -#define FIRST_IN 2 -#define LAST_IN 1 __u16 nhoffset; }; -/* Hash table. */ - -#define IP6Q_HASHSZ 64 +struct inet_frags_ctl ip6_frags_ctl __read_mostly = { + .high_thresh = 256 * 1024, + .low_thresh = 192 * 1024, + .timeout = IPV6_FRAG_TIMEOUT, + .secret_interval = 10 * 60 * HZ, +}; -static struct hlist_head ip6_frag_hash[IP6Q_HASHSZ]; -static DEFINE_RWLOCK(ip6_frag_lock); -static u32 ip6_frag_hash_rnd; -static LIST_HEAD(ip6_frag_lru_list); -int ip6_frag_nqueues = 0; +static struct inet_frags ip6_frags; -static __inline__ void __fq_unlink(struct frag_queue *fq) +int ip6_frag_nqueues(void) { - hlist_del(&fq->list); - list_del(&fq->lru_list); - ip6_frag_nqueues--; + return ip6_frags.nqueues; } -static __inline__ void fq_unlink(struct frag_queue *fq) +int ip6_frag_mem(void) { - write_lock(&ip6_frag_lock); - __fq_unlink(fq); - write_unlock(&ip6_frag_lock); + return atomic_read(&ip6_frags.mem); } +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, + struct net_device *dev); + /* * callers should be careful not to use the hash value outside the ipfrag_lock * as doing so could race with ipfrag_hash_rnd being recalculated. @@ -136,7 +119,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += ip6_frag_hash_rnd; + c += ip6_frags.rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; @@ -149,98 +132,54 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, c += (__force u32)id; __jhash_mix(a, b, c); - return c & (IP6Q_HASHSZ - 1); + return c & (INETFRAGS_HASHSZ - 1); } -static struct timer_list ip6_frag_secret_timer; -int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ; - -static void ip6_frag_secret_rebuild(unsigned long dummy) +static unsigned int ip6_hashfn(struct inet_frag_queue *q) { - unsigned long now = jiffies; - int i; - - write_lock(&ip6_frag_lock); - get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32)); - for (i = 0; i < IP6Q_HASHSZ; i++) { - struct frag_queue *q; - struct hlist_node *p, *n; - - hlist_for_each_entry_safe(q, p, n, &ip6_frag_hash[i], list) { - unsigned int hval = ip6qhashfn(q->id, - &q->saddr, - &q->daddr); - - if (hval != i) { - hlist_del(&q->list); + struct frag_queue *fq; - /* Relink to new hash chain. */ - hlist_add_head(&q->list, - &ip6_frag_hash[hval]); + fq = container_of(q, struct frag_queue, q); + return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr); +} - } - } - } - write_unlock(&ip6_frag_lock); +int ip6_frag_match(struct inet_frag_queue *q, void *a) +{ + struct frag_queue *fq; + struct ip6_create_arg *arg = a; - mod_timer(&ip6_frag_secret_timer, now + sysctl_ip6frag_secret_interval); + fq = container_of(q, struct frag_queue, q); + return (fq->id == arg->id && + ipv6_addr_equal(&fq->saddr, arg->src) && + ipv6_addr_equal(&fq->daddr, arg->dst)); } - -atomic_t ip6_frag_mem = ATOMIC_INIT(0); +EXPORT_SYMBOL(ip6_frag_match); /* Memory Tracking Functions. */ static inline void frag_kfree_skb(struct sk_buff *skb, int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &ip6_frag_mem); + atomic_sub(skb->truesize, &ip6_frags.mem); kfree_skb(skb); } -static inline void frag_free_queue(struct frag_queue *fq, int *work) +void ip6_frag_init(struct inet_frag_queue *q, void *a) { - if (work) - *work -= sizeof(struct frag_queue); - atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem); - kfree(fq); -} + struct frag_queue *fq = container_of(q, struct frag_queue, q); + struct ip6_create_arg *arg = a; -static inline struct frag_queue *frag_alloc_queue(void) -{ - struct frag_queue *fq = kzalloc(sizeof(struct frag_queue), GFP_ATOMIC); - - if(!fq) - return NULL; - atomic_add(sizeof(struct frag_queue), &ip6_frag_mem); - return fq; + fq->id = arg->id; + ipv6_addr_copy(&fq->saddr, arg->src); + ipv6_addr_copy(&fq->daddr, arg->dst); } +EXPORT_SYMBOL(ip6_frag_init); /* Destruction primitives. */ -/* Complete destruction of fq. */ -static void ip6_frag_destroy(struct frag_queue *fq, int *work) +static __inline__ void fq_put(struct frag_queue *fq) { - struct sk_buff *fp; - - BUG_TRAP(fq->last_in&COMPLETE); - BUG_TRAP(del_timer(&fq->timer) == 0); - - /* Release all fragment data. */ - fp = fq->fragments; - while (fp) { - struct sk_buff *xp = fp->next; - - frag_kfree_skb(fp, work); - fp = xp; - } - - frag_free_queue(fq, work); -} - -static __inline__ void fq_put(struct frag_queue *fq, int *work) -{ - if (atomic_dec_and_test(&fq->refcnt)) - ip6_frag_destroy(fq, work); + inet_frag_put(&fq->q, &ip6_frags); } /* Kill fq entry. It is not destroyed immediately, @@ -248,60 +187,33 @@ static __inline__ void fq_put(struct frag_queue *fq, int *work) */ static __inline__ void fq_kill(struct frag_queue *fq) { - if (del_timer(&fq->timer)) - atomic_dec(&fq->refcnt); - - if (!(fq->last_in & COMPLETE)) { - fq_unlink(fq); - atomic_dec(&fq->refcnt); - fq->last_in |= COMPLETE; - } + inet_frag_kill(&fq->q, &ip6_frags); } static void ip6_evictor(struct inet6_dev *idev) { - struct frag_queue *fq; - struct list_head *tmp; - int work; - - work = atomic_read(&ip6_frag_mem) - sysctl_ip6frag_low_thresh; - if (work <= 0) - return; - - while(work > 0) { - read_lock(&ip6_frag_lock); - if (list_empty(&ip6_frag_lru_list)) { - read_unlock(&ip6_frag_lock); - return; - } - tmp = ip6_frag_lru_list.next; - fq = list_entry(tmp, struct frag_queue, lru_list); - atomic_inc(&fq->refcnt); - read_unlock(&ip6_frag_lock); - - spin_lock(&fq->lock); - if (!(fq->last_in&COMPLETE)) - fq_kill(fq); - spin_unlock(&fq->lock); - - fq_put(fq, &work); - IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); - } + int evicted; + + evicted = inet_frag_evictor(&ip6_frags); + if (evicted) + IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted); } static void ip6_frag_expire(unsigned long data) { - struct frag_queue *fq = (struct frag_queue *) data; + struct frag_queue *fq; struct net_device *dev = NULL; - spin_lock(&fq->lock); + fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + + spin_lock(&fq->q.lock); - if (fq->last_in & COMPLETE) + if (fq->q.last_in & COMPLETE) goto out; fq_kill(fq); - dev = dev_get_by_index(fq->iif); + dev = dev_get_by_index(&init_net, fq->iif); if (!dev) goto out; @@ -311,7 +223,7 @@ static void ip6_frag_expire(unsigned long data) rcu_read_unlock(); /* Don't send error if the first segment did not arrive. */ - if (!(fq->last_in&FIRST_IN) || !fq->fragments) + if (!(fq->q.last_in&FIRST_IN) || !fq->q.fragments) goto out; /* @@ -319,114 +231,47 @@ static void ip6_frag_expire(unsigned long data) segment was received. And do not use fq->dev pointer directly, device might already disappeared. */ - fq->fragments->dev = dev; - icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); + fq->q.fragments->dev = dev; + icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); out: if (dev) dev_put(dev); - spin_unlock(&fq->lock); - fq_put(fq, NULL); + spin_unlock(&fq->q.lock); + fq_put(fq); } -/* Creation primitives. */ - - -static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in) +static __inline__ struct frag_queue * +fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, + struct inet6_dev *idev) { - struct frag_queue *fq; + struct inet_frag_queue *q; + struct ip6_create_arg arg; unsigned int hash; -#ifdef CONFIG_SMP - struct hlist_node *n; -#endif - - write_lock(&ip6_frag_lock); - hash = ip6qhashfn(fq_in->id, &fq_in->saddr, &fq_in->daddr); -#ifdef CONFIG_SMP - hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) { - if (fq->id == fq_in->id && - ipv6_addr_equal(&fq_in->saddr, &fq->saddr) && - ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) { - atomic_inc(&fq->refcnt); - write_unlock(&ip6_frag_lock); - fq_in->last_in |= COMPLETE; - fq_put(fq_in, NULL); - return fq; - } - } -#endif - fq = fq_in; - - if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time)) - atomic_inc(&fq->refcnt); - - atomic_inc(&fq->refcnt); - hlist_add_head(&fq->list, &ip6_frag_hash[hash]); - INIT_LIST_HEAD(&fq->lru_list); - list_add_tail(&fq->lru_list, &ip6_frag_lru_list); - ip6_frag_nqueues++; - write_unlock(&ip6_frag_lock); - return fq; -} - -static struct frag_queue * -ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, - struct inet6_dev *idev) -{ - struct frag_queue *fq; + arg.id = id; + arg.src = src; + arg.dst = dst; + hash = ip6qhashfn(id, src, dst); - if ((fq = frag_alloc_queue()) == NULL) + q = inet_frag_find(&ip6_frags, &arg, hash); + if (q == NULL) goto oom; - fq->id = id; - ipv6_addr_copy(&fq->saddr, src); - ipv6_addr_copy(&fq->daddr, dst); - - init_timer(&fq->timer); - fq->timer.function = ip6_frag_expire; - fq->timer.data = (long) fq; - spin_lock_init(&fq->lock); - atomic_set(&fq->refcnt, 1); - - return ip6_frag_intern(fq); + return container_of(q, struct frag_queue, q); oom: IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); return NULL; } -static __inline__ struct frag_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, - struct inet6_dev *idev) -{ - struct frag_queue *fq; - struct hlist_node *n; - unsigned int hash; - - read_lock(&ip6_frag_lock); - hash = ip6qhashfn(id, src, dst); - hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) { - if (fq->id == id && - ipv6_addr_equal(src, &fq->saddr) && - ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->refcnt); - read_unlock(&ip6_frag_lock); - return fq; - } - } - read_unlock(&ip6_frag_lock); - - return ip6_frag_create(id, src, dst, idev); -} - - -static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, +static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff) { struct sk_buff *prev, *next; + struct net_device *dev; int offset, end; - if (fq->last_in & COMPLETE) + if (fq->q.last_in & COMPLETE) goto err; offset = ntohs(fhdr->frag_off) & ~0x7; @@ -439,7 +284,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((u8 *)&fhdr->frag_off - skb_network_header(skb))); - return; + return -1; } if (skb->ip_summed == CHECKSUM_COMPLETE) { @@ -454,11 +299,11 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ - if (end < fq->len || - ((fq->last_in & LAST_IN) && end != fq->len)) + if (end < fq->q.len || + ((fq->q.last_in & LAST_IN) && end != fq->q.len)) goto err; - fq->last_in |= LAST_IN; - fq->len = end; + fq->q.last_in |= LAST_IN; + fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. * Required by the RFC. @@ -471,13 +316,13 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); - return; + return -1; } - if (end > fq->len) { + if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->last_in & LAST_IN) + if (fq->q.last_in & LAST_IN) goto err; - fq->len = end; + fq->q.len = end; } } @@ -496,7 +341,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, * this fragment, right? */ prev = NULL; - for(next = fq->fragments; next != NULL; next = next->next) { + for(next = fq->q.fragments; next != NULL; next = next->next) { if (FRAG6_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -533,7 +378,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (!pskb_pull(next, i)) goto err; FRAG6_CB(next)->offset += i; /* next fragment */ - fq->meat -= i; + fq->q.meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; break; @@ -548,9 +393,9 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (prev) prev->next = next; else - fq->fragments = next; + fq->q.fragments = next; - fq->meat -= free_it->len; + fq->q.meat -= free_it->len; frag_kfree_skb(free_it, NULL); } } @@ -562,30 +407,37 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (prev) prev->next = skb; else - fq->fragments = skb; + fq->q.fragments = skb; - if (skb->dev) - fq->iif = skb->dev->ifindex; - skb->dev = NULL; - fq->stamp = skb->tstamp; - fq->meat += skb->len; - atomic_add(skb->truesize, &ip6_frag_mem); + dev = skb->dev; + if (dev) { + fq->iif = dev->ifindex; + skb->dev = NULL; + } + fq->q.stamp = skb->tstamp; + fq->q.meat += skb->len; + atomic_add(skb->truesize, &ip6_frags.mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ if (offset == 0) { fq->nhoffset = nhoff; - fq->last_in |= FIRST_IN; + fq->q.last_in |= FIRST_IN; } - write_lock(&ip6_frag_lock); - list_move_tail(&fq->lru_list, &ip6_frag_lru_list); - write_unlock(&ip6_frag_lock); - return; + + if (fq->q.last_in == (FIRST_IN | LAST_IN) && fq->q.meat == fq->q.len) + return ip6_frag_reasm(fq, prev, dev); + + write_lock(&ip6_frags.lock); + list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list); + write_unlock(&ip6_frags.lock); + return -1; err: IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); + return -1; } /* @@ -597,21 +449,39 @@ err: * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { - struct sk_buff *fp, *head = fq->fragments; + struct sk_buff *fp, *head = fq->q.fragments; int payload_len; unsigned int nhoff; fq_kill(fq); + /* Make the one we just received the head. */ + if (prev) { + head = prev->next; + fp = skb_clone(head, GFP_ATOMIC); + + if (!fp) + goto out_oom; + + fp->next = head->next; + prev->next = fp; + + skb_morph(head, fq->q.fragments); + head->next = fq->q.fragments->next; + + kfree_skb(fq->q.fragments); + fq->q.fragments = head; + } + BUG_TRAP(head != NULL); BUG_TRAP(FRAG6_CB(head)->offset == 0); /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - - sizeof(struct ipv6hdr) + fq->len - + sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) goto out_oversize; @@ -640,7 +510,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &ip6_frag_mem); + atomic_add(clone->truesize, &ip6_frags.mem); } /* We have to remove fragment header from datagram and to relocate @@ -655,7 +525,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &ip6_frag_mem); + atomic_sub(head->truesize, &ip6_frags.mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -665,17 +535,15 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &ip6_frag_mem); + atomic_sub(fp->truesize, &ip6_frags.mem); } head->next = NULL; head->dev = dev; - head->tstamp = fq->stamp; + head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); IP6CB(head)->nhoff = nhoff; - *skb_in = head; - /* Yes, and fold redundant checksum back. 8) */ if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_partial(skb_network_header(head), @@ -685,7 +553,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, rcu_read_lock(); IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); - fq->fragments = NULL; + fq->q.fragments = NULL; return 1; out_oversize: @@ -702,10 +570,8 @@ out_fail: return -1; } -static int ipv6_frag_rcv(struct sk_buff **skbp) +static int ipv6_frag_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; - struct net_device *dev = skb->dev; struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -739,23 +605,19 @@ static int ipv6_frag_rcv(struct sk_buff **skbp) return 1; } - if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh) + if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh) ip6_evictor(ip6_dst_idev(skb->dst)); if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_dst_idev(skb->dst))) != NULL) { - int ret = -1; + int ret; - spin_lock(&fq->lock); + spin_lock(&fq->q.lock); - ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); + ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); - if (fq->last_in == (FIRST_IN|LAST_IN) && - fq->meat == fq->len) - ret = ip6_frag_reasm(fq, skbp, dev); - - spin_unlock(&fq->lock); - fq_put(fq, NULL); + spin_unlock(&fq->q.lock); + fq_put(fq); return ret; } @@ -775,11 +637,13 @@ void __init ipv6_frag_init(void) if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0) printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n"); - ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ - (jiffies ^ (jiffies >> 6))); - - init_timer(&ip6_frag_secret_timer); - ip6_frag_secret_timer.function = ip6_frag_secret_rebuild; - ip6_frag_secret_timer.expires = jiffies + sysctl_ip6frag_secret_interval; - add_timer(&ip6_frag_secret_timer); + ip6_frags.ctl = &ip6_frags_ctl; + ip6_frags.hashfn = ip6_hashfn; + ip6_frags.constructor = ip6_frag_init; + ip6_frags.destructor = NULL; + ip6_frags.skb_free = NULL; + ip6_frags.qsize = sizeof(struct frag_queue); + ip6_frags.match = ip6_frag_match; + ip6_frags.frag_expire = ip6_frag_expire; + inet_frags_init(&ip6_frags); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 55ea80fac60..95f8e4a62f6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -44,6 +44,7 @@ #include <linux/seq_file.h> #endif +#include <net/net_namespace.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/ip6_fib.h> @@ -137,7 +138,6 @@ struct rt6_info ip6_null_entry = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, .obsolete = -1, .error = -ENETUNREACH, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -163,7 +163,6 @@ struct rt6_info ip6_prohibit_entry = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, .obsolete = -1, .error = -EACCES, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -183,7 +182,6 @@ struct rt6_info ip6_blk_hole_entry = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, .obsolete = -1, .error = -EINVAL, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -223,8 +221,8 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - if (dev != &loopback_dev && idev != NULL && idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) { + struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev); if (loopback_idev != NULL) { rt->rt6i_idev = loopback_idev; in6_dev_put(idev); @@ -665,7 +663,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d return rt; } -static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, +static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif, struct flowi *fl, int flags) { struct fib6_node *fn; @@ -684,7 +682,7 @@ restart_2: fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: - rt = rt6_select(fn, fl->iif, strict | reachable); + rt = rt6_select(fn, oif, strict | reachable); BACKTRACK(&fl->fl6_src); if (rt == &ip6_null_entry || rt->rt6i_flags & RTF_CACHE) @@ -737,6 +735,12 @@ out2: return rt; } +static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, + struct flowi *fl, int flags) +{ + return ip6_pol_route(table, fl->iif, fl, flags); +} + void ip6_route_input(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); @@ -763,72 +767,7 @@ void ip6_route_input(struct sk_buff *skb) static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, struct flowi *fl, int flags) { - struct fib6_node *fn; - struct rt6_info *rt, *nrt; - int strict = 0; - int attempts = 3; - int err; - int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; - - strict |= flags & RT6_LOOKUP_F_IFACE; - -relookup: - read_lock_bh(&table->tb6_lock); - -restart_2: - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); - -restart: - rt = rt6_select(fn, fl->oif, strict | reachable); - BACKTRACK(&fl->fl6_src); - if (rt == &ip6_null_entry || - rt->rt6i_flags & RTF_CACHE) - goto out; - - dst_hold(&rt->u.dst); - read_unlock_bh(&table->tb6_lock); - - if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); - else { -#if CLONE_OFFLINK_ROUTE - nrt = rt6_alloc_clone(rt, &fl->fl6_dst); -#else - goto out2; -#endif - } - - dst_release(&rt->u.dst); - rt = nrt ? : &ip6_null_entry; - - dst_hold(&rt->u.dst); - if (nrt) { - err = ip6_ins_rt(nrt); - if (!err) - goto out2; - } - - if (--attempts <= 0) - goto out2; - - /* - * Race condition! In the gap, when table->tb6_lock was - * released someone could insert this route. Relookup. - */ - dst_release(&rt->u.dst); - goto relookup; - -out: - if (reachable) { - reachable = 0; - goto restart_2; - } - dst_hold(&rt->u.dst); - read_unlock_bh(&table->tb6_lock); -out2: - rt->u.dst.lastuse = jiffies; - rt->u.dst.__use++; - return rt; + return ip6_pol_route(table, fl->oif, fl, flags); } struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) @@ -1129,7 +1068,7 @@ int ip6_route_add(struct fib6_config *cfg) #endif if (cfg->fc_ifindex) { err = -ENODEV; - dev = dev_get_by_index(cfg->fc_ifindex); + dev = dev_get_by_index(&init_net, cfg->fc_ifindex); if (!dev) goto out; idev = in6_dev_get(dev); @@ -1187,12 +1126,12 @@ int ip6_route_add(struct fib6_config *cfg) if ((cfg->fc_flags & RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { /* hold loopback dev/idev if we haven't done so. */ - if (dev != &loopback_dev) { + if (dev != init_net.loopback_dev) { if (dev) { dev_put(dev); in6_dev_put(idev); } - dev = &loopback_dev; + dev = init_net.loopback_dev; dev_hold(dev); idev = in6_dev_get(dev); if (!idev) { @@ -1278,7 +1217,7 @@ install_route: int remaining; nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla->nla_type; + int type = nla_type(nla); if (type) { if (type > RTAX_MAX) { @@ -1896,13 +1835,13 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, if (rt == NULL) return ERR_PTR(-ENOMEM); - dev_hold(&loopback_dev); + dev_hold(init_net.loopback_dev); in6_dev_hold(idev); rt->u.dst.flags = DST_HOST; rt->u.dst.input = ip6_input; rt->u.dst.output = ip6_output; - rt->rt6i_dev = &loopback_dev; + rt->rt6i_dev = init_net.loopback_dev; rt->rt6i_idev = idev; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); @@ -2264,7 +2203,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (iif) { struct net_device *dev; - dev = __dev_get_by_index(iif); + dev = __dev_get_by_index(&init_net, iif); if (!dev) { err = -ENODEV; goto errout; @@ -2458,7 +2397,6 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, ctl_table ipv6_route_table[] = { { - .ctl_name = NET_IPV6_ROUTE_FLUSH, .procname = "flush", .data = &flush_delay, .maxlen = sizeof(int), @@ -2561,11 +2499,11 @@ void __init ip6_route_init(void) fib6_init(); #ifdef CONFIG_PROC_FS - p = proc_net_create("ipv6_route", 0, rt6_proc_info); + p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info); if (p) p->owner = THIS_MODULE; - proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops); + proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); #endif #ifdef CONFIG_XFRM xfrm6_init(); @@ -2585,8 +2523,8 @@ void ip6_route_cleanup(void) fib6_rules_cleanup(); #endif #ifdef CONFIG_PROC_FS - proc_net_remove("ipv6_route"); - proc_net_remove("rt6_stats"); + proc_net_remove(&init_net, "ipv6_route"); + proc_net_remove(&init_net, "rt6_stats"); #endif #ifdef CONFIG_XFRM xfrm6_fini(); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index eb20bb690ab..71433d29d88 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -167,7 +167,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int int i; for (i=1; i<100; i++) { sprintf(name, "sit%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i==100) @@ -430,7 +430,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ struct iphdr *iph; /* Our new IP header */ - int max_headroom; /* The extra header space needed */ + unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; int mtu; struct in6_addr *addr6; @@ -714,7 +714,6 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) static void ipip6_tunnel_setup(struct net_device *dev) { - SET_MODULE_OWNER(dev); dev->uninit = ipip6_tunnel_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ipip6_tunnel_xmit; @@ -761,7 +760,7 @@ static int ipip6_tunnel_init(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(&init_net, tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 3fb44277207..68bb2548e46 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -12,6 +12,7 @@ #include <net/ndisc.h> #include <net/ipv6.h> #include <net/addrconf.h> +#include <net/inet_frag.h> #ifdef CONFIG_SYSCTL @@ -41,7 +42,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, .procname = "ip6frag_high_thresh", - .data = &sysctl_ip6frag_high_thresh, + .data = &ip6_frags_ctl.high_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -49,7 +50,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, .procname = "ip6frag_low_thresh", - .data = &sysctl_ip6frag_low_thresh, + .data = &ip6_frags_ctl.low_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -57,7 +58,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_TIME, .procname = "ip6frag_time", - .data = &sysctl_ip6frag_time, + .data = &ip6_frags_ctl.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -66,7 +67,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, .procname = "ip6frag_secret_interval", - .data = &sysctl_ip6frag_secret_interval, + .data = &ip6_frags_ctl.secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3e06799b37a..85208026278 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -59,6 +59,7 @@ #include <net/snmp.h> #include <net/dsfield.h> #include <net/timewait_sock.h> +#include <net/netdma.h> #include <asm/uaccess.h> @@ -697,7 +698,7 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, if (!cmd.tcpm_keylen) { if (!tcp_sk(sk)->md5sig_info) return -ENOENT; - if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED) + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]); return tcp_v6_md5_do_del(sk, &sin6->sin6_addr); } @@ -720,7 +721,7 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); if (!newkey) return -ENOMEM; - if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED) { + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3], newkey, cmd.tcpm_keylen); } @@ -757,6 +758,8 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, bp->len = htonl(tcplen); bp->protocol = htonl(protocol); + sg_init_table(sg, 4); + sg_set_buf(&sg[block++], bp, sizeof(*bp)); nbytes += sizeof(*bp); @@ -778,6 +781,8 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, sg_set_buf(&sg[block++], key->key, key->keylen); nbytes += key->keylen; + sg_mark_end(sg, block); + /* Now store the hash into the packet */ err = crypto_hash_init(desc); if (err) { @@ -1668,9 +1673,8 @@ ipv6_pktoptions: return 0; } -static int tcp_v6_rcv(struct sk_buff **pskb) +static int tcp_v6_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct tcphdr *th; struct sock *sk; int ret; @@ -1729,6 +1733,8 @@ process: if (!sock_owned_by_user(sk)) { #ifdef CONFIG_NET_DMA struct tcp_sock *tp = tcp_sk(sk); + if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) + tp->ucopy.dma_chan = get_softnet_dma(); if (tp->ucopy.dma_chan) ret = tcp_v6_do_rcv(sk, skb); else diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 23e2809878a..6323921b40b 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -87,9 +87,8 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) EXPORT_SYMBOL(xfrm6_tunnel_deregister); -static int tunnel6_rcv(struct sk_buff **pskb) +static int tunnel6_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct xfrm6_tunnel *handler; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) @@ -106,9 +105,8 @@ drop: return 0; } -static int tunnel46_rcv(struct sk_buff **pskb) +static int tunnel46_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct xfrm6_tunnel *handler; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c347f3e30e2..caebad6ee51 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -405,10 +405,9 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, return 0; } -int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[], +int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], int proto) { - struct sk_buff *skb = *pskb; struct sock *sk; struct udphdr *uh; struct net_device *dev = skb->dev; @@ -494,9 +493,9 @@ discard: return 0; } -static __inline__ int udpv6_rcv(struct sk_buff **pskb) +static __inline__ int udpv6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(pskb, udp_hash, IPPROTO_UDP); + return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP); } /* @@ -612,7 +611,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, daddr = NULL; if (daddr) { - if (ipv6_addr_type(daddr) == IPV6_ADDR_MAPPED) { + if (ipv6_addr_v4mapped(daddr)) { struct sockaddr_in sin; sin.sin_family = AF_INET; sin.sin_port = sin6 ? sin6->sin6_port : inet->dport; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 6e252f318f7..2d3fda60123 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -6,7 +6,7 @@ #include <net/addrconf.h> #include <net/inet_common.h> -extern int __udp6_lib_rcv(struct sk_buff **, struct hlist_head [], int ); +extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, int , int , int , __be32 , struct hlist_head []); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index f54016a5500..766566f7de4 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -17,9 +17,9 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly; -static int udplitev6_rcv(struct sk_buff **pskb) +static int udplitev6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(pskb, udplite_hash, IPPROTO_UDPLITE); + return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); } static void udplitev6_err(struct sk_buff *skb, diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index c858537cec4..515783707e8 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -16,7 +16,7 @@ #include <net/ipv6.h> #include <net/xfrm.h> -int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { int err; __be32 seq; @@ -24,11 +24,9 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) struct xfrm_state *x; int xfrm_nr = 0; int decaps = 0; - int nexthdr; unsigned int nhoff; nhoff = IP6CB(skb)->nhoff; - nexthdr = skb_network_header(skb)[nhoff]; seq = 0; if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) @@ -41,7 +39,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) goto drop; x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, - nexthdr != IPPROTO_IPIP ? nexthdr : IPPROTO_IPV6, AF_INET6); + nexthdr, AF_INET6); if (x == NULL) goto drop; spin_lock(&x->lock); @@ -70,10 +68,10 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) xfrm_vec[xfrm_nr++] = x; - if (x->mode->input(x, skb)) + if (x->outer_mode->input(x, skb)) goto drop; - if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */ + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } @@ -99,7 +97,6 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec, xfrm_nr * sizeof(xfrm_vec[0])); skb->sp->len += xfrm_nr; - skb->ip_summed = CHECKSUM_NONE; nf_reset(skb); @@ -133,9 +130,10 @@ drop: EXPORT_SYMBOL(xfrm6_rcv_spi); -int xfrm6_rcv(struct sk_buff **pskb) +int xfrm6_rcv(struct sk_buff *skb) { - return xfrm6_rcv_spi(*pskb, 0); + return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], + 0); } EXPORT_SYMBOL(xfrm6_rcv); diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 2e61d6ddece..2bfb4f05c14 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -22,14 +22,6 @@ /* Add encapsulation header. * * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. - * The following fields in it shall be filled in by x->type->output: - * payload_len - * - * On exit, skb->h will be set to the start of the encapsulation header to be - * filled in by x->type->output and skb->nh will be set to the nextheader field - * of the extension header directly preceding the encapsulation header, or in - * its absence, that of the top IP header. The value of skb->data will always - * point to the top IP header. */ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) { @@ -37,19 +29,17 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) u8 *prevhdr; int hdr_len; - skb_push(skb, x->props.header_len); iph = ipv6_hdr(skb); hdr_len = ip6_find_1stfragopt(skb, &prevhdr); - skb_set_network_header(skb, - (prevhdr - x->props.header_len) - skb->data); - skb_set_transport_header(skb, hdr_len); - memmove(skb->data, iph, hdr_len); - skb_reset_network_header(skb); + skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); + skb_set_network_header(skb, -x->props.header_len); + skb->transport_header = skb->network_header + hdr_len; + __skb_pull(skb, hdr_len); + top_iph = ipv6_hdr(skb); - skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); - skb->network_header += offsetof(struct ipv6hdr, nexthdr); + memmove(top_iph, iph, hdr_len); ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); @@ -89,6 +79,7 @@ static struct xfrm_mode xfrm6_beet_mode = { .output = xfrm6_beet_output, .owner = THIS_MODULE, .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm6_beet_init(void) diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index 6ad6d7ac6bd..a7bc8c62317 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -29,6 +29,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/stringify.h> +#include <linux/time.h> #include <net/ipv6.h> #include <net/xfrm.h> @@ -36,12 +37,6 @@ * * The IP header and mutable extension headers will be moved forward to make * space for the route optimization header. - * - * On exit, skb->h will be set to the start of the encapsulation header to be - * filled in by x->type->output and skb->nh will be set to the nextheader field - * of the extension header directly preceding the encapsulation header, or in - * its absence, that of the top IP header. The value of skb->data will always - * point to the top IP header. */ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) { @@ -49,27 +44,21 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) u8 *prevhdr; int hdr_len; - skb_push(skb, x->props.header_len); iph = ipv6_hdr(skb); hdr_len = x->type->hdr_offset(x, skb, &prevhdr); - skb_set_network_header(skb, - (prevhdr - x->props.header_len) - skb->data); - skb_set_transport_header(skb, hdr_len); - memmove(skb->data, iph, hdr_len); - return 0; -} + skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); + skb_set_network_header(skb, -x->props.header_len); + skb->transport_header = skb->network_header + hdr_len; + __skb_pull(skb, hdr_len); + memmove(ipv6_hdr(skb), iph, hdr_len); + + x->lastused = get_seconds(); -/* - * Do nothing about routing optimization header unlike IPsec. - */ -static int xfrm6_ro_input(struct xfrm_state *x, struct sk_buff *skb) -{ return 0; } static struct xfrm_mode xfrm6_ro_mode = { - .input = xfrm6_ro_input, .output = xfrm6_ro_output, .owner = THIS_MODULE, .encap = XFRM_MODE_ROUTEOPTIMIZATION, diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index c026bfea820..4e344105b3f 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -18,12 +18,6 @@ * * The IP header and mutable extension headers will be moved forward to make * space for the encapsulation header. - * - * On exit, skb->h will be set to the start of the encapsulation header to be - * filled in by x->type->output and skb->nh will be set to the nextheader field - * of the extension header directly preceding the encapsulation header, or in - * its absence, that of the top IP header. The value of skb->data will always - * point to the top IP header. */ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) { @@ -31,14 +25,14 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) u8 *prevhdr; int hdr_len; - skb_push(skb, x->props.header_len); iph = ipv6_hdr(skb); hdr_len = x->type->hdr_offset(x, skb, &prevhdr); - skb_set_network_header(skb, - (prevhdr - x->props.header_len) - skb->data); - skb_set_transport_header(skb, hdr_len); - memmove(skb->data, iph, hdr_len); + skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); + skb_set_network_header(skb, -x->props.header_len); + skb->transport_header = skb->network_header + hdr_len; + __skb_pull(skb, hdr_len); + memmove(ipv6_hdr(skb), iph, hdr_len); return 0; } diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 9fc95bc6509..fd84e221727 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -33,15 +33,7 @@ static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb) /* Add encapsulation header. * - * The top IP header will be constructed per RFC 2401. The following fields - * in it shall be filled in by x->type->output: - * payload_len - * - * On exit, skb->h will be set to the start of the encapsulation header to be - * filled in by x->type->output and skb->nh will be set to the nextheader field - * of the extension header directly preceding the encapsulation header, or in - * its absence, that of the top IP header. The value of skb->data will always - * point to the top IP header. + * The top IP header will be constructed per RFC 2401. */ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { @@ -50,13 +42,13 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) struct ipv6hdr *iph, *top_iph; int dsfield; - skb_push(skb, x->props.header_len); iph = ipv6_hdr(skb); - skb_reset_network_header(skb); + skb_set_network_header(skb, -x->props.header_len); + skb->mac_header = skb->network_header + + offsetof(struct ipv6hdr, nexthdr); + skb->transport_header = skb->network_header + sizeof(*iph); top_iph = ipv6_hdr(skb); - skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); - skb->network_header += offsetof(struct ipv6hdr, nexthdr); top_iph->version = 6; if (xdst->route->ops->family == AF_INET6) { @@ -126,6 +118,7 @@ static struct xfrm_mode xfrm6_tunnel_mode = { .output = xfrm6_tunnel_output, .owner = THIS_MODULE, .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm6_tunnel_init(void) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 56364a5f676..656976760ad 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -9,9 +9,9 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/if_ether.h> #include <linux/compiler.h> #include <linux/skbuff.h> -#include <linux/spinlock.h> #include <linux/icmpv6.h> #include <linux/netfilter_ipv6.h> #include <net/ipv6.h> @@ -43,62 +43,31 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) return ret; } -static int xfrm6_output_one(struct sk_buff *skb) +static inline int xfrm6_output_one(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; + struct ipv6hdr *iph; int err; - if (skb->ip_summed == CHECKSUM_PARTIAL) { - err = skb_checksum_help(skb); - if (err) - goto error_nolock; - } - - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { err = xfrm6_tunnel_check_size(skb); if (err) goto error_nolock; } - do { - spin_lock_bh(&x->lock); - err = xfrm_state_check(x, skb); - if (err) - goto error; - - err = x->mode->output(x, skb); - if (err) - goto error; + err = xfrm_output(skb); + if (err) + goto error_nolock; - err = x->type->output(x, skb); - if (err) - goto error; - - x->curlft.bytes += skb->len; - x->curlft.packets++; - if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) - x->lastused = get_seconds(); - - spin_unlock_bh(&x->lock); - - skb_reset_network_header(skb); - - if (!(skb->dst = dst_pop(dst))) { - err = -EHOSTUNREACH; - goto error_nolock; - } - dst = skb->dst; - x = dst->xfrm; - } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); + iph = ipv6_hdr(skb); + iph->payload_len = htons(skb->len - sizeof(*iph)); IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; err = 0; out_exit: return err; -error: - spin_unlock_bh(&x->lock); error_nolock: kfree_skb(skb); goto out_exit; @@ -111,7 +80,7 @@ static int xfrm6_output_finish2(struct sk_buff *skb) while (likely((err = xfrm6_output_one(skb)) == 0)) { nf_reset(skb); - err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL, + err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); if (unlikely(err != 1)) break; @@ -119,7 +88,7 @@ static int xfrm6_output_finish2(struct sk_buff *skb) if (!skb->dst->xfrm) return dst_output(skb); - err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL, + err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, xfrm6_output_finish2); if (unlikely(err != 1)) break; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 3ec0c4770ee..82e27b80d07 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -178,8 +178,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]); trailer_len += xfrm[i]->props.trailer_len; - if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL || - xfrm[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { unsigned short encap_family = xfrm[i]->props.family; switch(encap_family) { case AF_INET: @@ -215,7 +214,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; - struct xfrm_state_afinfo *afinfo; dst_prev->xfrm = xfrm[i++]; dst_prev->dev = rt->u.dst.dev; @@ -232,18 +230,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbour for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - /* XXX: When IPv4 is implemented as module and can be unloaded, - * we should manage reference to xfrm4_output in afinfo->output. - * Miyazawa - */ - afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); - if (!afinfo) { - dst = *dst_p; - goto error; - } - - dst_prev->output = afinfo->output; - xfrm_state_put_afinfo(afinfo); + dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output; /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); @@ -375,7 +362,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xdst = (struct xfrm_dst *)dst; if (xdst->u.rt6.rt6i_idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev); BUG_ON(!loopback_idev); do { diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index cdadb484746..b392bee396f 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -93,7 +93,8 @@ __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) /* Rule 4: select IPsec tunnel */ for (i = 0; i < n; i++) { if (src[i] && - src[i]->props.mode == XFRM_MODE_TUNNEL) { + (src[i]->props.mode == XFRM_MODE_TUNNEL || + src[i]->props.mode == XFRM_MODE_BEET)) { dst[j++] = src[i]; src[i] = NULL; } @@ -146,7 +147,8 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) /* Rule 3: select IPsec tunnel */ for (i = 0; i < n; i++) { if (src[i] && - src[i]->mode == XFRM_MODE_TUNNEL) { + (src[i]->mode == XFRM_MODE_TUNNEL || + src[i]->mode == XFRM_MODE_BEET)) { dst[j++] = src[i]; src[i] = NULL; } @@ -168,6 +170,7 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, + .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 30f3236c402..fae90ff3108 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -242,17 +242,13 @@ EXPORT_SYMBOL(xfrm6_tunnel_free_spi); static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { - struct ipv6hdr *top_iph; - - top_iph = (struct ipv6hdr *)skb->data; - top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); - + skb_push(skb, -skb_network_offset(skb)); return 0; } static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - return 0; + return skb_network_header(skb)[IP6CB(skb)->nhoff]; } static int xfrm6_tunnel_rcv(struct sk_buff *skb) @@ -261,7 +257,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb) __be32 spi; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); - return xfrm6_rcv_spi(skb, spi) > 0 ? : 0; + return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi) > 0 ? : 0; } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 8400525177a..29b063d4312 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -347,6 +347,9 @@ static int ipxitf_device_event(struct notifier_block *notifier, struct net_device *dev = ptr; struct ipx_interface *i, *tmp; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_DOWN && event != NETDEV_UP) goto out; @@ -986,7 +989,7 @@ static int ipxitf_create(struct ipx_interface_definition *idef) if (intrfc) ipxitf_put(intrfc); - dev = dev_get_by_name(idef->ipx_device); + dev = dev_get_by_name(&init_net, idef->ipx_device); rc = -ENODEV; if (!dev) goto out; @@ -1094,7 +1097,7 @@ static int ipxitf_delete(struct ipx_interface_definition *idef) if (!dlink_type) goto out; - dev = __dev_get_by_name(idef->ipx_device); + dev = __dev_get_by_name(&init_net, idef->ipx_device); rc = -ENODEV; if (!dev) goto out; @@ -1189,7 +1192,7 @@ static int ipxitf_ioctl(unsigned int cmd, void __user *arg) if (copy_from_user(&ifr, arg, sizeof(ifr))) break; sipx = (struct sockaddr_ipx *)&ifr.ifr_addr; - dev = __dev_get_by_name(ifr.ifr_name); + dev = __dev_get_by_name(&init_net, ifr.ifr_name); rc = -ENODEV; if (!dev) break; @@ -1360,11 +1363,14 @@ static struct proto ipx_proto = { .obj_size = sizeof(struct ipx_sock), }; -static int ipx_create(struct socket *sock, int protocol) +static int ipx_create(struct net *net, struct socket *sock, int protocol) { int rc = -ESOCKTNOSUPPORT; struct sock *sk; + if (net != &init_net) + return -EAFNOSUPPORT; + /* * SPX support is not anymore in the kernel sources. If you want to * ressurrect it, completing it and making it understand shared skbs, @@ -1375,7 +1381,7 @@ static int ipx_create(struct socket *sock, int protocol) goto out; rc = -ENOMEM; - sk = sk_alloc(PF_IPX, GFP_KERNEL, &ipx_proto, 1); + sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, 1); if (!sk) goto out; #ifdef IPX_REFCNT_DEBUG @@ -1644,6 +1650,9 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty u16 ipx_pktsize; int rc = 0; + if (dev->nd_net != &init_net) + goto drop; + /* Not ours */ if (skb->pkt_type == PACKET_OTHERHOST) goto drop; diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 4226e71ae1e..d483a00dc42 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -9,6 +9,7 @@ #include <linux/proc_fs.h> #include <linux/spinlock.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include <net/tcp_states.h> #include <net/ipx.h> @@ -353,7 +354,7 @@ int __init ipx_proc_init(void) struct proc_dir_entry *p; int rc = -ENOMEM; - ipx_proc_dir = proc_mkdir("ipx", proc_net); + ipx_proc_dir = proc_mkdir("ipx", init_net.proc_net); if (!ipx_proc_dir) goto out; @@ -381,7 +382,7 @@ out_socket: out_route: remove_proc_entry("interface", ipx_proc_dir); out_interface: - remove_proc_entry("ipx", proc_net); + remove_proc_entry("ipx", init_net.proc_net); goto out; } @@ -390,7 +391,7 @@ void __exit ipx_proc_exit(void) remove_proc_entry("interface", ipx_proc_dir); remove_proc_entry("route", ipx_proc_dir); remove_proc_entry("socket", ipx_proc_dir); - remove_proc_entry("ipx", proc_net); + remove_proc_entry("ipx", init_net.proc_net); } #else /* CONFIG_PROC_FS */ diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 4c670cf6aef..0328ae2654f 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -60,7 +60,7 @@ #include <net/irda/af_irda.h> -static int irda_create(struct socket *sock, int protocol); +static int irda_create(struct net *net, struct socket *sock, int protocol); static const struct proto_ops irda_stream_ops; static const struct proto_ops irda_seqpacket_ops; @@ -831,7 +831,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) IRDA_DEBUG(2, "%s()\n", __FUNCTION__); - err = irda_create(newsock, sk->sk_protocol); + err = irda_create(sk->sk_net, newsock, sk->sk_protocol); if (err) return err; @@ -1057,13 +1057,16 @@ static struct proto irda_proto = { * Create IrDA socket * */ -static int irda_create(struct socket *sock, int protocol) +static int irda_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct irda_sock *self; IRDA_DEBUG(2, "%s()\n", __FUNCTION__); + if (net != &init_net) + return -EAFNOSUPPORT; + /* Check for valid socket type */ switch (sock->type) { case SOCK_STREAM: /* For TTP connections with SAR disabled */ @@ -1075,7 +1078,7 @@ static int irda_create(struct socket *sock, int protocol) } /* Allocate networking socket */ - sk = sk_alloc(PF_IRDA, GFP_ATOMIC, &irda_proto, 1); + sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto, 1); if (sk == NULL) return -ENOMEM; @@ -1245,18 +1248,17 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct irda_sock *self; struct sk_buff *skb; - int err; + int err = -EPIPE; IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len); /* Note : socket.c set MSG_EOR on SEQPACKET sockets */ - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) + if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT | + MSG_NOSIGNAL)) return -EINVAL; - if (sk->sk_shutdown & SEND_SHUTDOWN) { - send_sig(SIGPIPE, current, 0); - return -EPIPE; - } + if (sk->sk_shutdown & SEND_SHUTDOWN) + goto out_err; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; @@ -1283,7 +1285,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, skb = sock_alloc_send_skb(sk, len + self->max_header_size + 16, msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) - return -ENOBUFS; + goto out_err; skb_reserve(skb, self->max_header_size + 16); skb_reset_transport_header(skb); @@ -1291,7 +1293,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len); if (err) { kfree_skb(skb); - return err; + goto out_err; } /* @@ -1301,10 +1303,14 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, err = irttp_data_request(self->tsap, skb); if (err) { IRDA_DEBUG(0, "%s(), err=%d\n", __FUNCTION__, err); - return err; + goto out_err; } /* Tell client how much data we actually sent */ return len; + + out_err: + return sk_stream_error(sk, msg->msg_flags, err); + } /* diff --git a/net/irda/discovery.c b/net/irda/discovery.c index af0cea721d2..80c33f408e3 100644 --- a/net/irda/discovery.c +++ b/net/irda/discovery.c @@ -202,7 +202,7 @@ void irlmp_expire_discoveries(hashbin_t *log, __u32 saddr, int force) /* Drop the spinlock before calling the higher layers, as * we can't guarantee they won't call us back and create a * deadlock. We will work on our own private data, so we - * don't care to be interupted. - Jean II */ + * don't care to be interrupted. - Jean II */ spin_unlock_irqrestore(&log->hb_spinlock, flags); if(buffer == NULL) diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 3d241e415a2..1120b150e21 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -77,7 +77,7 @@ static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len, #endif /* CONFIG_PROC_FS */ static struct tty_driver *driver; -hashbin_t *ircomm_tty = NULL; +static hashbin_t *ircomm_tty = NULL; static const struct tty_operations ops = { .open = ircomm_tty_open, diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index 824309dabfe..b5a13882c92 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -381,18 +381,9 @@ static void ircomm_tty_discovery_indication(discinfo_t *discovery, info.daddr = discovery->daddr; info.saddr = discovery->saddr; - /* FIXME. We have a locking problem on the hashbin here. - * We probably need to use hashbin_find_next(), but we first - * need to ensure that "line" is unique. - Jean II */ - self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty); - while (self != NULL) { - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - ircomm_tty_do_event(self, IRCOMM_TTY_DISCOVERY_INDICATION, - NULL, &info); - - self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty); - } + self = (struct ircomm_tty_cb *) priv; + ircomm_tty_do_event(self, IRCOMM_TTY_DISCOVERY_INDICATION, + NULL, &info); } /* diff --git a/net/irda/iriap.c b/net/irda/iriap.c index ee3889fa49a..dc5e34a0162 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/iriap_event.c b/net/irda/iriap_event.c index 99b18dc7a0b..8fb9d7277ca 100644 --- a/net/irda/iriap_event.c +++ b/net/irda/iriap_event.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c index cf302457097..cbcf04380f3 100644 --- a/net/irda/irias_object.c +++ b/net/irda/irias_object.c @@ -16,7 +16,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c index a4c1c954582..fff52d57a20 100644 --- a/net/irda/irlan/irlan_client.c +++ b/net/irda/irlan/irlan_client.c @@ -20,7 +20,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * @@ -436,6 +436,7 @@ static void irlan_check_response_param(struct irlan_cb *self, char *param, __u16 tmp_cpu; /* Temporary value in host order */ __u8 *bytes; int i; + DECLARE_MAC_BUF(mac); IRDA_DEBUG(4, "%s(), parm=%s\n", __FUNCTION__ , param); @@ -520,9 +521,8 @@ static void irlan_check_response_param(struct irlan_cb *self, char *param, /* FILTER_ENTRY, have we got an ethernet address? */ if (strcmp(param, "FILTER_ENTRY") == 0) { bytes = value; - IRDA_DEBUG(4, "Ethernet address = %02x:%02x:%02x:%02x:%02x:%02x\n", - bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], - bytes[5]); + IRDA_DEBUG(4, "Ethernet address = %s\n", + print_mac(mac, bytes)); for (i = 0; i < 6; i++) self->dev->dev_addr[i] = bytes[i]; } diff --git a/net/irda/irlan/irlan_client_event.c b/net/irda/irlan/irlan_client_event.c index 843ab6fbb39..6afcee59e90 100644 --- a/net/irda/irlan/irlan_client_event.c +++ b/net/irda/irlan/irlan_client_event.c @@ -17,7 +17,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index f5778ef3ccc..a4b56e25a91 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -17,7 +17,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index c421521c0a9..7f9c8542e5f 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -19,7 +19,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * @@ -60,8 +60,6 @@ static void irlan_eth_setup(struct net_device *dev) dev->set_multicast_list = irlan_eth_set_multicast_list; dev->destructor = free_netdev; - SET_MODULE_OWNER(dev); - ether_setup(dev); /* diff --git a/net/irda/irlan/irlan_event.c b/net/irda/irlan/irlan_event.c index 623e0fd16c1..a9750a80138 100644 --- a/net/irda/irlan/irlan_event.c +++ b/net/irda/irlan/irlan_event.c @@ -16,7 +16,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlan/irlan_filter.c b/net/irda/irlan/irlan_filter.c index e6346b88f93..4384be9a688 100644 --- a/net/irda/irlan/irlan_filter.c +++ b/net/irda/irlan/irlan_filter.c @@ -16,7 +16,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c index aac66434e47..13db942812e 100644 --- a/net/irda/irlan/irlan_provider.c +++ b/net/irda/irlan/irlan_provider.c @@ -20,7 +20,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlan/irlan_provider_event.c b/net/irda/irlan/irlan_provider_event.c index ef401bd6ea0..10ece5a4752 100644 --- a/net/irda/irlan/irlan_provider_event.c +++ b/net/irda/irlan/irlan_provider_event.c @@ -16,7 +16,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 3d76aafdb2e..f3236acc8d2 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -1219,29 +1219,11 @@ static const struct seq_operations irlap_seq_ops = { static int irlap_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct irlap_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); + if (irlap == NULL) + return -EINVAL; - if (!s) - goto out; - - if (irlap == NULL) { - rc = -EINVAL; - goto out_kfree; - } - - rc = seq_open(file, &irlap_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &irlap_seq_ops, + sizeof(struct irlap_iter_state)); } const struct file_operations irlap_seq_fops = { diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index a8b8873aa26..4c33bf5c835 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -19,7 +19,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 25a3444a923..4f3764546b2 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * @@ -1326,6 +1326,9 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, int command; __u8 control; + if (dev->nd_net != &init_net) + goto out; + /* FIXME: should we get our own field? */ self = (struct irlap_cb *) dev->atalk_ptr; diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 7efa930ed68..cedff8068fb 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * @@ -2003,27 +2003,10 @@ static const struct seq_operations irlmp_seq_ops = { static int irlmp_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct irlmp_iter_state *s; - IRDA_ASSERT(irlmp != NULL, return -EINVAL;); - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (!s) - goto out; - - rc = seq_open(file, &irlmp_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &irlmp_seq_ops, + sizeof(struct irlmp_iter_state)); } const struct file_operations irlmp_seq_fops = { diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c index 65ffa981510..1bba87e7860 100644 --- a/net/irda/irlmp_event.c +++ b/net/irda/irlmp_event.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c index 559302d3fe6..0a79d9aeb08 100644 --- a/net/irda/irlmp_frame.c +++ b/net/irda/irlmp_frame.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irmod.c b/net/irda/irmod.c index 8ba703da279..01554b996b9 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -17,7 +17,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 1e429c92973..cd9ff176ecd 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -15,6 +15,7 @@ #include <linux/socket.h> #include <linux/irda.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/irda/irda.h> #include <net/irda/irlap.h> @@ -30,7 +31,7 @@ static struct genl_family irda_nl_family = { .maxattr = IRDA_NL_CMD_MAX, }; -static struct net_device * ifname_to_netdev(struct genl_info *info) +static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info) { char * ifname; @@ -41,7 +42,7 @@ static struct net_device * ifname_to_netdev(struct genl_info *info) IRDA_DEBUG(5, "%s(): Looking for %s\n", __FUNCTION__, ifname); - return dev_get_by_name(ifname); + return dev_get_by_name(net, ifname); } static int irda_nl_set_mode(struct sk_buff *skb, struct genl_info *info) @@ -57,7 +58,7 @@ static int irda_nl_set_mode(struct sk_buff *skb, struct genl_info *info) IRDA_DEBUG(5, "%s(): Switching to mode: %d\n", __FUNCTION__, mode); - dev = ifname_to_netdev(info); + dev = ifname_to_netdev(&init_net, info); if (!dev) return -ENODEV; @@ -82,7 +83,7 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) void *hdr; int ret = -ENOBUFS; - dev = ifname_to_netdev(info); + dev = ifname_to_netdev(&init_net, info); if (!dev) return -ENODEV; diff --git a/net/irda/irproc.c b/net/irda/irproc.c index 181cb51b48a..cae24fbda96 100644 --- a/net/irda/irproc.c +++ b/net/irda/irproc.c @@ -28,6 +28,7 @@ #include <linux/seq_file.h> #include <linux/module.h> #include <linux/init.h> +#include <net/net_namespace.h> #include <net/irda/irda.h> #include <net/irda/irlap.h> @@ -66,7 +67,7 @@ void __init irda_proc_register(void) int i; struct proc_dir_entry *d; - proc_irda = proc_mkdir("irda", proc_net); + proc_irda = proc_mkdir("irda", init_net.proc_net); if (proc_irda == NULL) return; proc_irda->owner = THIS_MODULE; @@ -92,7 +93,7 @@ void irda_proc_unregister(void) for (i=0; i<ARRAY_SIZE(irda_dirs); i++) remove_proc_entry(irda_dirs[i].name, proc_irda); - remove_proc_entry("irda", proc_net); + remove_proc_entry("irda", init_net.proc_net); proc_irda = NULL; } } diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c index d058b467f9e..40c28efaed9 100644 --- a/net/irda/irqueue.c +++ b/net/irda/irqueue.c @@ -28,7 +28,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 957e04feb0f..565cbf0421c 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -17,7 +17,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * @@ -31,12 +31,6 @@ #include <net/irda/irda.h> /* irda_debug */ #include <net/irda/irias_object.h> -#define NET_IRDA 412 /* Random number */ -enum { DISCOVERY=1, DEVNAME, DEBUG, FAST_POLL, DISCOVERY_SLOTS, - DISCOVERY_TIMEOUT, SLOT_TIMEOUT, MAX_BAUD_RATE, MIN_TX_TURN_TIME, - MAX_TX_DATA_SIZE, MAX_TX_WINDOW, MAX_NOREPLY_TIME, WARN_NOREPLY_TIME, - LAP_KEEPALIVE_TIME }; - extern int sysctl_discovery; extern int sysctl_discovery_slots; extern int sysctl_discovery_timeout; @@ -94,7 +88,7 @@ static int do_devname(ctl_table *table, int write, struct file *filp, /* One file */ static ctl_table irda_table[] = { { - .ctl_name = DISCOVERY, + .ctl_name = NET_IRDA_DISCOVERY, .procname = "discovery", .data = &sysctl_discovery, .maxlen = sizeof(int), @@ -102,7 +96,7 @@ static ctl_table irda_table[] = { .proc_handler = &proc_dointvec }, { - .ctl_name = DEVNAME, + .ctl_name = NET_IRDA_DEVNAME, .procname = "devname", .data = sysctl_devname, .maxlen = 65, @@ -112,7 +106,7 @@ static ctl_table irda_table[] = { }, #ifdef CONFIG_IRDA_DEBUG { - .ctl_name = DEBUG, + .ctl_name = NET_IRDA_DEBUG, .procname = "debug", .data = &irda_debug, .maxlen = sizeof(int), @@ -122,7 +116,7 @@ static ctl_table irda_table[] = { #endif #ifdef CONFIG_IRDA_FAST_RR { - .ctl_name = FAST_POLL, + .ctl_name = NET_IRDA_FAST_POLL, .procname = "fast_poll_increase", .data = &sysctl_fast_poll_increase, .maxlen = sizeof(int), @@ -131,7 +125,7 @@ static ctl_table irda_table[] = { }, #endif { - .ctl_name = DISCOVERY_SLOTS, + .ctl_name = NET_IRDA_DISCOVERY_SLOTS, .procname = "discovery_slots", .data = &sysctl_discovery_slots, .maxlen = sizeof(int), @@ -142,7 +136,7 @@ static ctl_table irda_table[] = { .extra2 = &max_discovery_slots }, { - .ctl_name = DISCOVERY_TIMEOUT, + .ctl_name = NET_IRDA_DISCOVERY_TIMEOUT, .procname = "discovery_timeout", .data = &sysctl_discovery_timeout, .maxlen = sizeof(int), @@ -150,7 +144,7 @@ static ctl_table irda_table[] = { .proc_handler = &proc_dointvec }, { - .ctl_name = SLOT_TIMEOUT, + .ctl_name = NET_IRDA_SLOT_TIMEOUT, .procname = "slot_timeout", .data = &sysctl_slot_timeout, .maxlen = sizeof(int), @@ -161,7 +155,7 @@ static ctl_table irda_table[] = { .extra2 = &max_slot_timeout }, { - .ctl_name = MAX_BAUD_RATE, + .ctl_name = NET_IRDA_MAX_BAUD_RATE, .procname = "max_baud_rate", .data = &sysctl_max_baud_rate, .maxlen = sizeof(int), @@ -172,7 +166,7 @@ static ctl_table irda_table[] = { .extra2 = &max_max_baud_rate }, { - .ctl_name = MIN_TX_TURN_TIME, + .ctl_name = NET_IRDA_MIN_TX_TURN_TIME, .procname = "min_tx_turn_time", .data = &sysctl_min_tx_turn_time, .maxlen = sizeof(int), @@ -183,7 +177,7 @@ static ctl_table irda_table[] = { .extra2 = &max_min_tx_turn_time }, { - .ctl_name = MAX_TX_DATA_SIZE, + .ctl_name = NET_IRDA_MAX_TX_DATA_SIZE, .procname = "max_tx_data_size", .data = &sysctl_max_tx_data_size, .maxlen = sizeof(int), @@ -194,7 +188,7 @@ static ctl_table irda_table[] = { .extra2 = &max_max_tx_data_size }, { - .ctl_name = MAX_TX_WINDOW, + .ctl_name = NET_IRDA_MAX_TX_WINDOW, .procname = "max_tx_window", .data = &sysctl_max_tx_window, .maxlen = sizeof(int), @@ -205,7 +199,7 @@ static ctl_table irda_table[] = { .extra2 = &max_max_tx_window }, { - .ctl_name = MAX_NOREPLY_TIME, + .ctl_name = NET_IRDA_MAX_NOREPLY_TIME, .procname = "max_noreply_time", .data = &sysctl_max_noreply_time, .maxlen = sizeof(int), @@ -216,7 +210,7 @@ static ctl_table irda_table[] = { .extra2 = &max_max_noreply_time }, { - .ctl_name = WARN_NOREPLY_TIME, + .ctl_name = NET_IRDA_WARN_NOREPLY_TIME, .procname = "warn_noreply_time", .data = &sysctl_warn_noreply_time, .maxlen = sizeof(int), @@ -227,7 +221,7 @@ static ctl_table irda_table[] = { .extra2 = &max_warn_noreply_time }, { - .ctl_name = LAP_KEEPALIVE_TIME, + .ctl_name = NET_IRDA_LAP_KEEPALIVE_TIME, .procname = "lap_keepalive_time", .data = &sysctl_lap_keepalive_time, .maxlen = sizeof(int), diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 3d7ab03fb13..97db158c927 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * @@ -1884,25 +1884,8 @@ static const struct seq_operations irttp_seq_ops = { static int irttp_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct irttp_iter_state *s; - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - goto out; - - rc = seq_open(file, &irttp_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_private(file, &irttp_seq_ops, + sizeof(struct irttp_iter_state)); } const struct file_operations irttp_seq_fops = { diff --git a/net/irda/timer.c b/net/irda/timer.c index d3a6ee8cc4a..d730099080a 100644 --- a/net/irda/timer.c +++ b/net/irda/timer.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/wrapper.c b/net/irda/wrapper.c index a7a7f191f1a..e71286768a4 100644 --- a/net/irda/wrapper.c +++ b/net/irda/wrapper.c @@ -20,7 +20,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 53ae14c35f7..43e01c8d382 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -41,6 +41,9 @@ static struct proto iucv_proto = { .obj_size = sizeof(struct iucv_sock), }; +static void iucv_sock_kill(struct sock *sk); +static void iucv_sock_close(struct sock *sk); + /* Call Back functions */ static void iucv_callback_rx(struct iucv_path *, struct iucv_message *); static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *); @@ -213,7 +216,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) { struct sock *sk; - sk = sk_alloc(PF_IUCV, prio, &iucv_proto, 1); + sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto, 1); if (!sk) return NULL; @@ -221,6 +224,8 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) INIT_LIST_HEAD(&iucv_sk(sk)->accept_q); spin_lock_init(&iucv_sk(sk)->accept_q_lock); skb_queue_head_init(&iucv_sk(sk)->send_skb_q); + INIT_LIST_HEAD(&iucv_sk(sk)->message_q.list); + spin_lock_init(&iucv_sk(sk)->message_q.lock); skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); iucv_sk(sk)->send_tag = 0; @@ -240,7 +245,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) } /* Create an IUCV socket */ -static int iucv_sock_create(struct socket *sock, int protocol) +static int iucv_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -670,6 +675,90 @@ out: return err; } +static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len) +{ + int dataleft, size, copied = 0; + struct sk_buff *nskb; + + dataleft = len; + while (dataleft) { + if (dataleft >= sk->sk_rcvbuf / 4) + size = sk->sk_rcvbuf / 4; + else + size = dataleft; + + nskb = alloc_skb(size, GFP_ATOMIC | GFP_DMA); + if (!nskb) + return -ENOMEM; + + memcpy(nskb->data, skb->data + copied, size); + copied += size; + dataleft -= size; + + skb_reset_transport_header(nskb); + skb_reset_network_header(nskb); + nskb->len = size; + + skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, nskb); + } + + return 0; +} + +static void iucv_process_message(struct sock *sk, struct sk_buff *skb, + struct iucv_path *path, + struct iucv_message *msg) +{ + int rc; + + if (msg->flags & IPRMDATA) { + skb->data = NULL; + skb->len = 0; + } else { + rc = iucv_message_receive(path, msg, 0, skb->data, + msg->length, NULL); + if (rc) { + kfree_skb(skb); + return; + } + if (skb->truesize >= sk->sk_rcvbuf / 4) { + rc = iucv_fragment_skb(sk, skb, msg->length); + kfree_skb(skb); + skb = NULL; + if (rc) { + iucv_path_sever(path, NULL); + return; + } + skb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q); + } else { + skb_reset_transport_header(skb); + skb_reset_network_header(skb); + skb->len = msg->length; + } + } + + if (sock_queue_rcv_skb(sk, skb)) + skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb); +} + +static void iucv_process_message_q(struct sock *sk) +{ + struct iucv_sock *iucv = iucv_sk(sk); + struct sk_buff *skb; + struct sock_msg_q *p, *n; + + list_for_each_entry_safe(p, n, &iucv->message_q.list, list) { + skb = alloc_skb(p->msg.length, GFP_ATOMIC | GFP_DMA); + if (!skb) + break; + iucv_process_message(sk, skb, p->path, &p->msg); + list_del(&p->list); + kfree(p); + if (!skb_queue_empty(&iucv->backlog_skb_q)) + break; + } +} + static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) { @@ -681,8 +770,9 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, int err = 0; if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) && - skb_queue_empty(&iucv->backlog_skb_q) && - skb_queue_empty(&sk->sk_receive_queue)) + skb_queue_empty(&iucv->backlog_skb_q) && + skb_queue_empty(&sk->sk_receive_queue) && + list_empty(&iucv->message_q.list)) return 0; if (flags & (MSG_OOB)) @@ -721,16 +811,23 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, kfree_skb(skb); /* Queue backlog skbs */ - rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q); + rskb = skb_dequeue(&iucv->backlog_skb_q); while (rskb) { if (sock_queue_rcv_skb(sk, rskb)) { - skb_queue_head(&iucv_sk(sk)->backlog_skb_q, + skb_queue_head(&iucv->backlog_skb_q, rskb); break; } else { - rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q); + rskb = skb_dequeue(&iucv->backlog_skb_q); } } + if (skb_queue_empty(&iucv->backlog_skb_q)) { + spin_lock_bh(&iucv->message_q.lock); + if (!list_empty(&iucv->message_q.list)) + iucv_process_message_q(sk); + spin_unlock_bh(&iucv->message_q.lock); + } + } else skb_queue_head(&sk->sk_receive_queue, skb); @@ -972,99 +1069,44 @@ static void iucv_callback_connack(struct iucv_path *path, u8 ipuser[16]) sk->sk_state_change(sk); } -static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len, - struct sk_buff_head *fragmented_skb_q) -{ - int dataleft, size, copied = 0; - struct sk_buff *nskb; - - dataleft = len; - while (dataleft) { - if (dataleft >= sk->sk_rcvbuf / 4) - size = sk->sk_rcvbuf / 4; - else - size = dataleft; - - nskb = alloc_skb(size, GFP_ATOMIC | GFP_DMA); - if (!nskb) - return -ENOMEM; - - memcpy(nskb->data, skb->data + copied, size); - copied += size; - dataleft -= size; - - skb_reset_transport_header(nskb); - skb_reset_network_header(nskb); - nskb->len = size; - - skb_queue_tail(fragmented_skb_q, nskb); - } - - return 0; -} - static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) { struct sock *sk = path->private; struct iucv_sock *iucv = iucv_sk(sk); - struct sk_buff *skb, *fskb; - struct sk_buff_head fragmented_skb_q; - int rc; - - skb_queue_head_init(&fragmented_skb_q); + struct sk_buff *skb; + struct sock_msg_q *save_msg; + int len; if (sk->sk_shutdown & RCV_SHUTDOWN) return; + if (!list_empty(&iucv->message_q.list) || + !skb_queue_empty(&iucv->backlog_skb_q)) + goto save_message; + + len = atomic_read(&sk->sk_rmem_alloc); + len += msg->length + sizeof(struct sk_buff); + if (len > sk->sk_rcvbuf) + goto save_message; + skb = alloc_skb(msg->length, GFP_ATOMIC | GFP_DMA); - if (!skb) { - iucv_path_sever(path, NULL); - return; - } + if (!skb) + goto save_message; - if (msg->flags & IPRMDATA) { - skb->data = NULL; - skb->len = 0; - } else { - rc = iucv_message_receive(path, msg, 0, skb->data, - msg->length, NULL); - if (rc) { - kfree_skb(skb); - return; - } - if (skb->truesize >= sk->sk_rcvbuf / 4) { - rc = iucv_fragment_skb(sk, skb, msg->length, - &fragmented_skb_q); - kfree_skb(skb); - skb = NULL; - if (rc) { - iucv_path_sever(path, NULL); - return; - } - } else { - skb_reset_transport_header(skb); - skb_reset_network_header(skb); - skb->len = msg->length; - } - } - /* Queue the fragmented skb */ - fskb = skb_dequeue(&fragmented_skb_q); - while (fskb) { - if (!skb_queue_empty(&iucv->backlog_skb_q)) - skb_queue_tail(&iucv->backlog_skb_q, fskb); - else if (sock_queue_rcv_skb(sk, fskb)) - skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, fskb); - fskb = skb_dequeue(&fragmented_skb_q); - } + spin_lock(&iucv->message_q.lock); + iucv_process_message(sk, skb, path, msg); + spin_unlock(&iucv->message_q.lock); - /* Queue the original skb if it exists (was not fragmented) */ - if (skb) { - if (!skb_queue_empty(&iucv->backlog_skb_q)) - skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb); - else if (sock_queue_rcv_skb(sk, skb)) - skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb); - } + return; + +save_message: + save_msg = kzalloc(sizeof(struct sock_msg_q), GFP_ATOMIC | GFP_DMA); + save_msg->path = path; + save_msg->msg = *msg; + spin_lock(&iucv->message_q.lock); + list_add_tail(&save_msg->list, &iucv->message_q.list); + spin_unlock(&iucv->message_q.lock); } static void iucv_callback_txdone(struct iucv_path *path, diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 983058d432d..a2f5a6ea389 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -389,7 +389,7 @@ static void iucv_block_cpu(void *data) * iucv_declare_cpu * @data: unused * - * Declare a interupt buffer on this cpu. + * Declare a interrupt buffer on this cpu. */ static void iucv_declare_cpu(void *data) { diff --git a/net/key/af_key.c b/net/key/af_key.c index 5502df115a6..7969f8a716d 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -26,8 +26,8 @@ #include <linux/in6.h> #include <linux/proc_fs.h> #include <linux/init.h> +#include <net/net_namespace.h> #include <net/xfrm.h> -#include <linux/audit.h> #include <net/sock.h> @@ -136,11 +136,14 @@ static struct proto key_proto = { .obj_size = sizeof(struct pfkey_sock), }; -static int pfkey_create(struct socket *sock, int protocol) +static int pfkey_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; if (sock->type != SOCK_RAW) @@ -149,7 +152,7 @@ static int pfkey_create(struct socket *sock, int protocol) return -EPROTONOSUPPORT; err = -ENOMEM; - sk = sk_alloc(PF_KEY, GFP_KERNEL, &key_proto, 1); + sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, 1); if (sk == NULL) goto out; @@ -352,16 +355,14 @@ static int verify_address_len(void *p) switch (addr->sa_family) { case AF_INET: - len = sizeof(*sp) + sizeof(*sin) + (sizeof(uint64_t) - 1); - len /= sizeof(uint64_t); + len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t)); if (sp->sadb_address_len != len || sp->sadb_address_prefixlen > 32) return -EINVAL; break; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: - len = sizeof(*sp) + sizeof(*sin6) + (sizeof(uint64_t) - 1); - len /= sizeof(uint64_t); + len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin6), sizeof(uint64_t)); if (sp->sadb_address_len != len || sp->sadb_address_prefixlen > 128) return -EINVAL; @@ -386,14 +387,9 @@ static int verify_address_len(void *p) static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx) { - int len = 0; - - len += sizeof(struct sadb_x_sec_ctx); - len += sec_ctx->sadb_x_ctx_len; - len += sizeof(uint64_t) - 1; - len /= sizeof(uint64_t); - - return len; + return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) + + sec_ctx->sadb_x_ctx_len, + sizeof(uint64_t)); } static inline int verify_sec_ctx_len(void *p) @@ -659,7 +655,8 @@ static inline int pfkey_mode_to_xfrm(int mode) } } -static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, int hsc) +static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, + int add_keys, int hsc) { struct sk_buff *skb; struct sadb_msg *hdr; @@ -1013,6 +1010,24 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, return skb; } + +static inline struct sk_buff *pfkey_xfrm_state2msg(struct xfrm_state *x) +{ + struct sk_buff *skb; + + spin_lock_bh(&x->lock); + skb = __pfkey_xfrm_state2msg(x, 1, 3); + spin_unlock_bh(&x->lock); + + return skb; +} + +static inline struct sk_buff *pfkey_xfrm_state2msg_expire(struct xfrm_state *x, + int hsc) +{ + return __pfkey_xfrm_state2msg(x, 0, hsc); +} + static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, void **ext_hdrs) { @@ -1257,8 +1272,11 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h struct sadb_x_sa2 *sa2; struct sadb_address *saddr, *daddr; struct sadb_msg *out_hdr; + struct sadb_spirange *range; struct xfrm_state *x = NULL; int mode; + int err; + u32 min_spi, max_spi; u32 reqid; u8 proto; unsigned short family; @@ -1313,25 +1331,17 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if (x == NULL) return -ENOENT; - resp_skb = ERR_PTR(-ENOENT); - - spin_lock_bh(&x->lock); - if (x->km.state != XFRM_STATE_DEAD) { - struct sadb_spirange *range = ext_hdrs[SADB_EXT_SPIRANGE-1]; - u32 min_spi, max_spi; + min_spi = 0x100; + max_spi = 0x0fffffff; - if (range != NULL) { - min_spi = range->sadb_spirange_min; - max_spi = range->sadb_spirange_max; - } else { - min_spi = 0x100; - max_spi = 0x0fffffff; - } - xfrm_alloc_spi(x, htonl(min_spi), htonl(max_spi)); - if (x->id.spi) - resp_skb = pfkey_xfrm_state2msg(x, 0, 3); + range = ext_hdrs[SADB_EXT_SPIRANGE-1]; + if (range) { + min_spi = range->sadb_spirange_min; + max_spi = range->sadb_spirange_max; } - spin_unlock_bh(&x->lock); + + err = xfrm_alloc_spi(x, min_spi, max_spi); + resp_skb = err ? ERR_PTR(err) : pfkey_xfrm_state2msg(x); if (IS_ERR(resp_skb)) { xfrm_state_put(x); @@ -1421,12 +1431,8 @@ static int key_notify_sa(struct xfrm_state *x, struct km_event *c) { struct sk_buff *skb; struct sadb_msg *hdr; - int hsc = 3; - - if (c->event == XFRM_MSG_DELSA) - hsc = 0; - skb = pfkey_xfrm_state2msg(x, 0, hsc); + skb = pfkey_xfrm_state2msg(x); if (IS_ERR(skb)) return PTR_ERR(skb); @@ -1461,8 +1467,8 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, else err = xfrm_state_update(x); - xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, - AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x); + xfrm_audit_state_add(x, err ? 0 : 1, + audit_get_loginuid(current->audit_context), 0); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -1515,8 +1521,8 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); out: - xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, - AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + xfrm_audit_state_delete(x, err ? 0 : 1, + audit_get_loginuid(current->audit_context), 0); xfrm_state_put(x); return err; @@ -1538,7 +1544,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, if (x == NULL) return -ESRCH; - out_skb = pfkey_xfrm_state2msg(x, 1, 3); + out_skb = pfkey_xfrm_state2msg(x); proto = x->id.proto; xfrm_state_put(x); if (IS_ERR(out_skb)) @@ -1718,7 +1724,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) struct sk_buff *out_skb; struct sadb_msg *out_hdr; - out_skb = pfkey_xfrm_state2msg(x, 1, 3); + out_skb = pfkey_xfrm_state2msg(x); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); @@ -2268,8 +2274,8 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, hdr->sadb_msg_type != SADB_X_SPDUPDATE); - xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, - AUDIT_MAC_IPSEC_ADDSPD, err ? 0 : 1, xp, NULL); + xfrm_audit_policy_add(xp, err ? 0 : 1, + audit_get_loginuid(current->audit_context), 0); if (err) goto out; @@ -2352,8 +2358,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg if (xp == NULL) return -ENOENT; - xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, - AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL); + xfrm_audit_policy_delete(xp, err ? 0 : 1, + audit_get_loginuid(current->audit_context), 0); if (err) goto out; @@ -2613,8 +2619,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return -ENOENT; if (delete) { - xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, - AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL); + xfrm_audit_policy_delete(xp, err ? 0 : 1, + audit_get_loginuid(current->audit_context), 0); if (err) goto out; @@ -2919,7 +2925,7 @@ static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c) else hsc = 1; - out_skb = pfkey_xfrm_state2msg(x, 0, hsc); + out_skb = pfkey_xfrm_state2msg_expire(x, hsc); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); @@ -3784,7 +3790,7 @@ static struct xfrm_mgr pfkeyv2_mgr = static void __exit ipsec_pfkey_exit(void) { xfrm_unregister_km(&pfkeyv2_mgr); - remove_proc_entry("net/pfkey", NULL); + remove_proc_entry("pfkey", init_net.proc_net); sock_unregister(PF_KEY); proto_unregister(&key_proto); } @@ -3801,7 +3807,7 @@ static int __init ipsec_pfkey_init(void) goto out_unregister_key_proto; #ifdef CONFIG_PROC_FS err = -ENOMEM; - if (create_proc_read_entry("net/pfkey", 0, NULL, pfkey_read_proc, NULL) == NULL) + if (create_proc_read_entry("pfkey", 0, init_net.proc_net, pfkey_read_proc, NULL) == NULL) goto out_sock_unregister; #endif err = xfrm_register_km(&pfkeyv2_mgr); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 6b8a103cf9e..46cf962f7f8 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -150,14 +150,17 @@ static struct proto llc_proto = { * socket type we have available. * Returns 0 upon success, negative upon failure. */ -static int llc_ui_create(struct socket *sock, int protocol) +static int llc_ui_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; + if (net != &init_net) + return -EAFNOSUPPORT; + if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { rc = -ENOMEM; - sk = llc_sk_alloc(PF_LLC, GFP_KERNEL, &llc_proto); + sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto); if (sk) { rc = 0; llc_ui_sk_init(sock, sk); @@ -249,7 +252,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) if (!sock_flag(sk, SOCK_ZAPPED)) goto out; rc = -ENODEV; - llc->dev = dev_getfirstbyhwtype(addr->sllc_arphrd); + llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); if (!llc->dev) goto out; rc = -EUSERS; @@ -300,7 +303,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) goto out; rc = -ENODEV; rtnl_lock(); - llc->dev = dev_getbyhwaddr(addr->sllc_arphrd, addr->sllc_mac); + llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd, addr->sllc_mac); rtnl_unlock(); if (!llc->dev) goto out; @@ -759,7 +762,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, if (net_ratelimit()) printk(KERN_DEBUG "LLC(%s:%d): Application " "bug, race in MSG_PEEK.\n", - current->comm, current->pid); + current->comm, task_pid_nr(current)); peek_seq = llc->copied_seq; } continue; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 3b8cfbe029a..8ebc2769dfd 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -700,7 +700,7 @@ static struct sock *llc_create_incoming_sock(struct sock *sk, struct llc_addr *saddr, struct llc_addr *daddr) { - struct sock *newsk = llc_sk_alloc(sk->sk_family, GFP_ATOMIC, + struct sock *newsk = llc_sk_alloc(sk->sk_net, sk->sk_family, GFP_ATOMIC, sk->sk_prot); struct llc_sock *newllc, *llc = llc_sk(sk); @@ -867,9 +867,9 @@ static void llc_sk_init(struct sock* sk) * Allocates a LLC sock and initializes it. Returns the new LLC sock * or %NULL if there's no memory available for one */ -struct sock *llc_sk_alloc(int family, gfp_t priority, struct proto *prot) +struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot) { - struct sock *sk = sk_alloc(family, priority, prot, 1); + struct sock *sk = sk_alloc(net, family, priority, prot, 1); if (!sk) goto out; diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index d4b13a031fd..248b5903bb1 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/init.h> +#include <net/net_namespace.h> #include <net/llc.h> LIST_HEAD(llc_sap_list); @@ -162,7 +163,7 @@ static int __init llc_init(void) { struct net_device *dev; - dev = first_net_device(); + dev = first_net_device(&init_net); if (dev != NULL) dev = next_net_device(dev); diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 099ed8fec14..c40c9b2a345 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -12,6 +12,7 @@ * See the GNU General Public License for more details. */ #include <linux/netdevice.h> +#include <net/net_namespace.h> #include <net/llc.h> #include <net/llc_pdu.h> #include <net/llc_sap.h> @@ -145,6 +146,9 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, int (*rcv)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); + if (dev->nd_net != &init_net) + goto drop; + /* * When the interface is in promisc. mode, drop all the crap that it * receives, do not try to analyse it. diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 49be6c902c8..cb34bc0518e 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -17,6 +17,7 @@ #include <linux/proc_fs.h> #include <linux/errno.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/llc.h> #include <net/llc_c_ac.h> @@ -24,10 +25,10 @@ #include <net/llc_c_st.h> #include <net/llc_conn.h> -static void llc_ui_format_mac(struct seq_file *seq, unsigned char *mac) +static void llc_ui_format_mac(struct seq_file *seq, u8 *addr) { - seq_printf(seq, "%02X:%02X:%02X:%02X:%02X:%02X", - mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); + DECLARE_MAC_BUF(mac); + seq_printf(seq, "%s", print_mac(mac, addr)); } static struct sock *llc_get_sk_idx(loff_t pos) @@ -127,8 +128,10 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v) if (llc->dev) llc_ui_format_mac(seq, llc->dev->dev_addr); - else - seq_printf(seq, "00:00:00:00:00:00"); + else { + u8 addr[6] = {0,0,0,0,0,0}; + llc_ui_format_mac(seq, addr); + } seq_printf(seq, "@%02X ", llc->sap->laddr.lsap); llc_ui_format_mac(seq, llc->daddr.mac); seq_printf(seq, "@%02X %8d %8d %2d %3d %4d\n", llc->daddr.lsap, @@ -231,7 +234,7 @@ int __init llc_proc_init(void) int rc = -ENOMEM; struct proc_dir_entry *p; - llc_proc_dir = proc_mkdir("llc", proc_net); + llc_proc_dir = proc_mkdir("llc", init_net.proc_net); if (!llc_proc_dir) goto out; llc_proc_dir->owner = THIS_MODULE; @@ -254,7 +257,7 @@ out: out_core: remove_proc_entry("socket", llc_proc_dir); out_socket: - remove_proc_entry("llc", proc_net); + remove_proc_entry("llc", init_net.proc_net); goto out; } @@ -262,5 +265,5 @@ void llc_proc_exit(void) { remove_proc_entry("socket", llc_proc_dir); remove_proc_entry("core", llc_proc_dir); - remove_proc_entry("llc", proc_net); + remove_proc_entry("llc", init_net.proc_net); } diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index a9c2d0787d4..219cd9f9341 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_MAC80211) += mac80211.o rc80211_simple.o mac80211-objs-$(CONFIG_MAC80211_LEDS) += ieee80211_led.o mac80211-objs-$(CONFIG_MAC80211_DEBUGFS) += debugfs.o debugfs_sta.o debugfs_netdev.o debugfs_key.o +mac80211-objs-$(CONFIG_NET_SCHED) += wme.o mac80211-objs := \ ieee80211.o \ @@ -16,6 +17,10 @@ mac80211-objs := \ regdomain.o \ tkip.o \ aes_ccm.o \ - wme.o \ - ieee80211_cfg.o \ + cfg.o \ + rx.o \ + tx.o \ + key.o \ + util.o \ + event.o \ $(mac80211-objs-y) diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index e55569bee7d..bf7ba128b96 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -7,6 +7,7 @@ * published by the Free Software Foundation. */ +#include <linux/kernel.h> #include <linux/types.h> #include <linux/crypto.h> #include <linux/err.h> @@ -63,7 +64,7 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, s_0 = scratch + AES_BLOCK_LEN; e = scratch + 2 * AES_BLOCK_LEN; - num_blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN; + num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last_len = data_len % AES_BLOCK_LEN; aes_ccm_prepare(tfm, b_0, aad, b, s_0, b); @@ -102,7 +103,7 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, s_0 = scratch + AES_BLOCK_LEN; a = scratch + 2 * AES_BLOCK_LEN; - num_blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN; + num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last_len = data_len % AES_BLOCK_LEN; aes_ccm_prepare(tfm, b_0, aad, b, s_0, a); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c new file mode 100644 index 00000000000..9e2bc1fd023 --- /dev/null +++ b/net/mac80211/cfg.c @@ -0,0 +1,106 @@ +/* + * mac80211 configuration hooks for cfg80211 + * + * Copyright 2006 Johannes Berg <johannes@sipsolutions.net> + * + * This file is GPLv2 as found in COPYING. + */ + +#include <linux/nl80211.h> +#include <linux/rtnetlink.h> +#include <net/net_namespace.h> +#include <net/cfg80211.h> +#include "ieee80211_i.h" +#include "cfg.h" + +static enum ieee80211_if_types +nl80211_type_to_mac80211_type(enum nl80211_iftype type) +{ + switch (type) { + case NL80211_IFTYPE_UNSPECIFIED: + return IEEE80211_IF_TYPE_STA; + case NL80211_IFTYPE_ADHOC: + return IEEE80211_IF_TYPE_IBSS; + case NL80211_IFTYPE_STATION: + return IEEE80211_IF_TYPE_STA; + case NL80211_IFTYPE_MONITOR: + return IEEE80211_IF_TYPE_MNTR; + default: + return IEEE80211_IF_TYPE_INVALID; + } +} + +static int ieee80211_add_iface(struct wiphy *wiphy, char *name, + enum nl80211_iftype type) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + enum ieee80211_if_types itype; + + if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) + return -ENODEV; + + itype = nl80211_type_to_mac80211_type(type); + if (itype == IEEE80211_IF_TYPE_INVALID) + return -EINVAL; + + return ieee80211_if_add(local->mdev, name, NULL, itype); +} + +static int ieee80211_del_iface(struct wiphy *wiphy, int ifindex) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + struct net_device *dev; + char *name; + + if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) + return -ENODEV; + + /* we're under RTNL */ + dev = __dev_get_by_index(&init_net, ifindex); + if (!dev) + return 0; + + name = dev->name; + + return ieee80211_if_remove(local->mdev, name, -1); +} + +static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, + enum nl80211_iftype type) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + struct net_device *dev; + enum ieee80211_if_types itype; + struct ieee80211_sub_if_data *sdata; + + if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) + return -ENODEV; + + /* we're under RTNL */ + dev = __dev_get_by_index(&init_net, ifindex); + if (!dev) + return -ENODEV; + + if (netif_running(dev)) + return -EBUSY; + + itype = nl80211_type_to_mac80211_type(type); + if (itype == IEEE80211_IF_TYPE_INVALID) + return -EINVAL; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->type == IEEE80211_IF_TYPE_VLAN) + return -EOPNOTSUPP; + + ieee80211_if_reinit(dev); + ieee80211_if_set_type(dev, itype); + + return 0; +} + +struct cfg80211_ops mac80211_config_ops = { + .add_virtual_intf = ieee80211_add_iface, + .del_virtual_intf = ieee80211_del_iface, + .change_virtual_intf = ieee80211_change_iface, +}; diff --git a/net/mac80211/ieee80211_cfg.h b/net/mac80211/cfg.h index 85ed2c92487..7d7879f5b00 100644 --- a/net/mac80211/ieee80211_cfg.h +++ b/net/mac80211/cfg.h @@ -1,9 +1,9 @@ /* * mac80211 configuration hooks for cfg80211 */ -#ifndef __IEEE80211_CFG_H -#define __IEEE80211_CFG_H +#ifndef __CFG_H +#define __CFG_H extern struct cfg80211_ops mac80211_config_ops; -#endif /* __IEEE80211_CFG_H */ +#endif /* __CFG_H */ diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 476c8486f78..60514b2c97b 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -28,8 +28,6 @@ static const char *ieee80211_mode_str(int mode) return "IEEE 802.11b"; case MODE_IEEE80211G: return "IEEE 802.11g"; - case MODE_ATHEROS_TURBO: - return "Atheros Turbo (5 GHz)"; default: return "UNKNOWN"; } @@ -86,16 +84,12 @@ DEBUGFS_READONLY_FILE(channel, 20, "%d", local->hw.conf.channel); DEBUGFS_READONLY_FILE(frequency, 20, "%d", local->hw.conf.freq); -DEBUGFS_READONLY_FILE(radar_detect, 20, "%d", - local->hw.conf.radar_detect); DEBUGFS_READONLY_FILE(antenna_sel_tx, 20, "%d", local->hw.conf.antenna_sel_tx); DEBUGFS_READONLY_FILE(antenna_sel_rx, 20, "%d", local->hw.conf.antenna_sel_rx); DEBUGFS_READONLY_FILE(bridge_packets, 20, "%d", local->bridge_packets); -DEBUGFS_READONLY_FILE(key_tx_rx_threshold, 20, "%d", - local->key_tx_rx_threshold); DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d", local->rts_threshold); DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d", @@ -110,9 +104,6 @@ DEBUGFS_READONLY_FILE(mode, 20, "%s", ieee80211_mode_str(local->hw.conf.phymode)); DEBUGFS_READONLY_FILE(wep_iv, 20, "%#06x", local->wep_iv & 0xffffff); -DEBUGFS_READONLY_FILE(tx_power_reduction, 20, "%d.%d dBm", - local->hw.conf.tx_power_reduction / 10, - local->hw.conf.tx_power_reduction % 10); DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : "<unset>"); @@ -305,11 +296,9 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(channel); DEBUGFS_ADD(frequency); - DEBUGFS_ADD(radar_detect); DEBUGFS_ADD(antenna_sel_tx); DEBUGFS_ADD(antenna_sel_rx); DEBUGFS_ADD(bridge_packets); - DEBUGFS_ADD(key_tx_rx_threshold); DEBUGFS_ADD(rts_threshold); DEBUGFS_ADD(fragmentation_threshold); DEBUGFS_ADD(short_retry_limit); @@ -317,7 +306,6 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(mode); DEBUGFS_ADD(wep_iv); - DEBUGFS_ADD(tx_power_reduction); DEBUGFS_ADD(modes); statsd = debugfs_create_dir("statistics", phyd); @@ -370,11 +358,9 @@ void debugfs_hw_del(struct ieee80211_local *local) { DEBUGFS_DEL(channel); DEBUGFS_DEL(frequency); - DEBUGFS_DEL(radar_detect); DEBUGFS_DEL(antenna_sel_tx); DEBUGFS_DEL(antenna_sel_rx); DEBUGFS_DEL(bridge_packets); - DEBUGFS_DEL(key_tx_rx_threshold); DEBUGFS_DEL(rts_threshold); DEBUGFS_DEL(fragmentation_threshold); DEBUGFS_DEL(short_retry_limit); @@ -382,7 +368,6 @@ void debugfs_hw_del(struct ieee80211_local *local) DEBUGFS_DEL(total_ps_buffered); DEBUGFS_DEL(mode); DEBUGFS_DEL(wep_iv); - DEBUGFS_DEL(tx_power_reduction); DEBUGFS_DEL(modes); DEBUGFS_STATS_DEL(transmitted_fragment_count); diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 7d56dc9e732..c881524c872 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -14,17 +14,18 @@ #include "debugfs.h" #include "debugfs_key.h" -#define KEY_READ(name, buflen, format_string) \ +#define KEY_READ(name, prop, buflen, format_string) \ static ssize_t key_##name##_read(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ char buf[buflen]; \ struct ieee80211_key *key = file->private_data; \ - int res = scnprintf(buf, buflen, format_string, key->name); \ + int res = scnprintf(buf, buflen, format_string, key->prop); \ return simple_read_from_buffer(userbuf, count, ppos, buf, res); \ } -#define KEY_READ_D(name) KEY_READ(name, 20, "%d\n") +#define KEY_READ_D(name) KEY_READ(name, name, 20, "%d\n") +#define KEY_READ_X(name) KEY_READ(name, name, 20, "0x%x\n") #define KEY_OPS(name) \ static const struct file_operations key_ ##name## _ops = { \ @@ -36,11 +37,27 @@ static const struct file_operations key_ ##name## _ops = { \ KEY_READ_##format(name) \ KEY_OPS(name) -KEY_FILE(keylen, D); -KEY_FILE(force_sw_encrypt, D); -KEY_FILE(keyidx, D); -KEY_FILE(hw_key_idx, D); +#define KEY_CONF_READ(name, buflen, format_string) \ + KEY_READ(conf_##name, conf.name, buflen, format_string) +#define KEY_CONF_READ_D(name) KEY_CONF_READ(name, 20, "%d\n") + +#define KEY_CONF_OPS(name) \ +static const struct file_operations key_ ##name## _ops = { \ + .read = key_conf_##name##_read, \ + .open = mac80211_open_file_generic, \ +} + +#define KEY_CONF_FILE(name, format) \ + KEY_CONF_READ_##format(name) \ + KEY_CONF_OPS(name) + +KEY_CONF_FILE(keylen, D); +KEY_CONF_FILE(keyidx, D); +KEY_CONF_FILE(hw_key_idx, D); +KEY_FILE(flags, X); KEY_FILE(tx_rx_count, D); +KEY_READ(ifindex, sdata->dev->ifindex, 20, "%d\n"); +KEY_OPS(ifindex); static ssize_t key_algorithm_read(struct file *file, char __user *userbuf, @@ -49,7 +66,7 @@ static ssize_t key_algorithm_read(struct file *file, char *alg; struct ieee80211_key *key = file->private_data; - switch (key->alg) { + switch (key->conf.alg) { case ALG_WEP: alg = "WEP\n"; break; @@ -74,17 +91,20 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf, int len; struct ieee80211_key *key = file->private_data; - switch (key->alg) { + switch (key->conf.alg) { case ALG_WEP: len = scnprintf(buf, sizeof(buf), "\n"); + break; case ALG_TKIP: len = scnprintf(buf, sizeof(buf), "%08x %04x\n", key->u.tkip.iv32, key->u.tkip.iv16); + break; case ALG_CCMP: tpn = key->u.ccmp.tx_pn; len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); + break; default: return 0; } @@ -100,9 +120,10 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, int i, len; const u8 *rpn; - switch (key->alg) { + switch (key->conf.alg) { case ALG_WEP: len = scnprintf(buf, sizeof(buf), "\n"); + break; case ALG_TKIP: for (i = 0; i < NUM_RX_DATA_QUEUES; i++) p += scnprintf(p, sizeof(buf)+buf-p, @@ -110,6 +131,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, key->u.tkip.iv32_rx[i], key->u.tkip.iv16_rx[i]); len = p - buf; + break; case ALG_CCMP: for (i = 0; i < NUM_RX_DATA_QUEUES; i++) { rpn = key->u.ccmp.rx_pn[i]; @@ -119,6 +141,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, rpn[3], rpn[4], rpn[5]); } len = p - buf; + break; default: return 0; } @@ -133,7 +156,7 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf, char buf[20]; int len; - if (key->alg != ALG_CCMP) + if (key->conf.alg != ALG_CCMP) return 0; len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); return simple_read_from_buffer(userbuf, count, ppos, buf, len); @@ -144,12 +167,12 @@ static ssize_t key_key_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { struct ieee80211_key *key = file->private_data; - int i, res, bufsize = 2*key->keylen+2; + int i, res, bufsize = 2 * key->conf.keylen + 2; char *buf = kmalloc(bufsize, GFP_KERNEL); char *p = buf; - for (i = 0; i < key->keylen; i++) - p += scnprintf(p, bufsize+buf-p, "%02x", key->key[i]); + for (i = 0; i < key->conf.keylen; i++) + p += scnprintf(p, bufsize + buf - p, "%02x", key->conf.key[i]); p += scnprintf(p, bufsize+buf-p, "\n"); res = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); kfree(buf); @@ -164,12 +187,14 @@ KEY_OPS(key); void ieee80211_debugfs_key_add(struct ieee80211_local *local, struct ieee80211_key *key) { + static int keycount; char buf[20]; if (!local->debugfs.keys) return; - sprintf(buf, "%d", key->keyidx); + sprintf(buf, "%d", keycount); + keycount++; key->debugfs.dir = debugfs_create_dir(buf, local->debugfs.keys); @@ -177,7 +202,7 @@ void ieee80211_debugfs_key_add(struct ieee80211_local *local, return; DEBUGFS_ADD(keylen); - DEBUGFS_ADD(force_sw_encrypt); + DEBUGFS_ADD(flags); DEBUGFS_ADD(keyidx); DEBUGFS_ADD(hw_key_idx); DEBUGFS_ADD(tx_rx_count); @@ -186,6 +211,7 @@ void ieee80211_debugfs_key_add(struct ieee80211_local *local, DEBUGFS_ADD(rx_spec); DEBUGFS_ADD(replays); DEBUGFS_ADD(key); + DEBUGFS_ADD(ifindex); }; #define DEBUGFS_DEL(name) \ @@ -197,7 +223,7 @@ void ieee80211_debugfs_key_remove(struct ieee80211_key *key) return; DEBUGFS_DEL(keylen); - DEBUGFS_DEL(force_sw_encrypt); + DEBUGFS_DEL(flags); DEBUGFS_DEL(keyidx); DEBUGFS_DEL(hw_key_idx); DEBUGFS_DEL(tx_rx_count); @@ -206,6 +232,7 @@ void ieee80211_debugfs_key_remove(struct ieee80211_key *key) DEBUGFS_DEL(rx_spec); DEBUGFS_DEL(replays); DEBUGFS_DEL(key); + DEBUGFS_DEL(ifindex); debugfs_remove(key->debugfs.stalink); key->debugfs.stalink = NULL; @@ -219,7 +246,7 @@ void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata) if (!sdata->debugfsdir) return; - sprintf(buf, "../keys/%d", sdata->default_key->keyidx); + sprintf(buf, "../keys/%d", sdata->default_key->conf.keyidx); sdata->debugfs.default_key = debugfs_create_symlink("default_key", sdata->debugfsdir, buf); } @@ -235,11 +262,12 @@ void ieee80211_debugfs_key_sta_link(struct ieee80211_key *key, struct sta_info *sta) { char buf[50]; + DECLARE_MAC_BUF(mac); if (!key->debugfs.dir) return; - sprintf(buf, "../sta/" MAC_FMT, MAC_ARG(sta->addr)); + sprintf(buf, "../../stations/%s", print_mac(mac, sta->addr)); key->debugfs.stalink = debugfs_create_symlink("station", key->debugfs.dir, buf); } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 095be91829c..f0e6ab7eb62 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -66,7 +66,8 @@ static ssize_t ieee80211_if_fmt_##name( \ const struct ieee80211_sub_if_data *sdata, char *buf, \ int buflen) \ { \ - return scnprintf(buf, buflen, MAC_FMT "\n", MAC_ARG(sdata->field));\ + DECLARE_MAC_BUF(mac); \ + return scnprintf(buf, buflen, "%s\n", print_mac(mac, sdata->field));\ } #define __IEEE80211_IF_FILE(name) \ @@ -112,13 +113,13 @@ static ssize_t ieee80211_if_fmt_flags( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) { return scnprintf(buf, buflen, "%s%s%s%s%s%s%s\n", - sdata->u.sta.ssid_set ? "SSID\n" : "", - sdata->u.sta.bssid_set ? "BSSID\n" : "", - sdata->u.sta.prev_bssid_set ? "prev BSSID\n" : "", - sdata->u.sta.authenticated ? "AUTH\n" : "", - sdata->u.sta.associated ? "ASSOC\n" : "", - sdata->u.sta.probereq_poll ? "PROBEREQ POLL\n" : "", - sdata->use_protection ? "CTS prot\n" : ""); + sdata->u.sta.flags & IEEE80211_STA_SSID_SET ? "SSID\n" : "", + sdata->u.sta.flags & IEEE80211_STA_BSSID_SET ? "BSSID\n" : "", + sdata->u.sta.flags & IEEE80211_STA_PREV_BSSID_SET ? "prev BSSID\n" : "", + sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "", + sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "", + sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "", + sdata->flags & IEEE80211_SDATA_USE_PROTECTION ? "CTS prot\n" : ""); } __IEEE80211_IF_FILE(flags); @@ -161,23 +162,6 @@ __IEEE80211_IF_FILE(beacon_tail_len); /* WDS attributes */ IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); -/* VLAN attributes */ -IEEE80211_IF_FILE(vlan_id, u.vlan.id, DEC); - -/* MONITOR attributes */ -static ssize_t ieee80211_if_fmt_mode( - const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) -{ - struct ieee80211_local *local = sdata->local; - - return scnprintf(buf, buflen, "%s\n", - ((local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER) || - local->open_count == local->monitors) ? - "hard" : "soft"); -} -__IEEE80211_IF_FILE(mode); - - #define DEBUGFS_ADD(name, type)\ sdata->debugfs.type.name = debugfs_create_file(#name, 0444,\ sdata->debugfsdir, sdata, &name##_ops); @@ -236,12 +220,10 @@ static void add_vlan_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(drop_unencrypted, vlan); DEBUGFS_ADD(eapol, vlan); DEBUGFS_ADD(ieee8021_x, vlan); - DEBUGFS_ADD(vlan_id, vlan); } static void add_monitor_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_ADD(mode, monitor); } static void add_files(struct ieee80211_sub_if_data *sdata) @@ -331,12 +313,10 @@ static void del_vlan_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_DEL(drop_unencrypted, vlan); DEBUGFS_DEL(eapol, vlan); DEBUGFS_DEL(ieee8021_x, vlan); - DEBUGFS_DEL(vlan_id, vlan); } static void del_monitor_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_DEL(mode, monitor); } static void del_files(struct ieee80211_sub_if_data *sdata, int type) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index da34ea70276..8f5944c53d4 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -60,9 +60,7 @@ static const struct file_operations sta_ ##name## _ops = { \ STA_OPS(name) STA_FILE(aid, aid, D); -STA_FILE(key_idx_compression, key_idx_compression, D); STA_FILE(dev, dev->name, S); -STA_FILE(vlan_id, vlan_id, D); STA_FILE(rx_packets, rx_packets, LU); STA_FILE(tx_packets, tx_packets, LU); STA_FILE(rx_bytes, rx_bytes, LU); @@ -204,15 +202,15 @@ STA_OPS(wme_tx_queue); void ieee80211_sta_debugfs_add(struct sta_info *sta) { - char buf[3*6]; struct dentry *stations_dir = sta->local->debugfs.stations; + DECLARE_MAC_BUF(mac); if (!stations_dir) return; - sprintf(buf, MAC_FMT, MAC_ARG(sta->addr)); + print_mac(mac, sta->addr); - sta->debugfs.dir = debugfs_create_dir(buf, stations_dir); + sta->debugfs.dir = debugfs_create_dir(mac, stations_dir); if (!sta->debugfs.dir) return; diff --git a/net/mac80211/event.c b/net/mac80211/event.c new file mode 100644 index 00000000000..2280f40b456 --- /dev/null +++ b/net/mac80211/event.c @@ -0,0 +1,43 @@ +/* + * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * mac80211 - events + */ + +#include <linux/netdevice.h> +#include <net/iw_handler.h> +#include "ieee80211_i.h" + +/* + * indicate a failed Michael MIC to userspace; the passed packet + * (in the variable hdr) must be long enough to extract the TKIP + * fields like TSC + */ +void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, + struct ieee80211_hdr *hdr) +{ + union iwreq_data wrqu; + char *buf = kmalloc(128, GFP_ATOMIC); + DECLARE_MAC_BUF(mac); + + if (buf) { + /* TODO: needed parameters: count, key type, TSC */ + sprintf(buf, "MLME-MICHAELMICFAILURE.indication(" + "keyid=%d %scast addr=%s)", + keyidx, hdr->addr1[0] & 0x01 ? "broad" : "uni", + print_mac(mac, hdr->addr2)); + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.data.length = strlen(buf); + wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf); + kfree(buf); + } + + /* + * TODO: re-add support for sending MIC failure indication + * with all info via nl80211 + */ +} diff --git a/net/mac80211/hostapd_ioctl.h b/net/mac80211/hostapd_ioctl.h deleted file mode 100644 index 52da513f060..00000000000 --- a/net/mac80211/hostapd_ioctl.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Host AP (software wireless LAN access point) user space daemon for - * Host AP kernel driver - * Copyright 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi> - * Copyright 2002-2004, Instant802 Networks, Inc. - * Copyright 2005, Devicescape Software, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef HOSTAPD_IOCTL_H -#define HOSTAPD_IOCTL_H - -#ifdef __KERNEL__ -#include <linux/types.h> -#endif /* __KERNEL__ */ - -#define PRISM2_IOCTL_PRISM2_PARAM (SIOCIWFIRSTPRIV + 0) -#define PRISM2_IOCTL_GET_PRISM2_PARAM (SIOCIWFIRSTPRIV + 1) -#define PRISM2_IOCTL_HOSTAPD (SIOCIWFIRSTPRIV + 3) - -/* PRISM2_IOCTL_PRISM2_PARAM ioctl() subtypes: - * This table is no longer added to, the whole sub-ioctl - * mess shall be deleted completely. */ -enum { - PRISM2_PARAM_IEEE_802_1X = 23, - - /* Instant802 additions */ - PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES = 1001, - PRISM2_PARAM_PREAMBLE = 1003, - PRISM2_PARAM_SHORT_SLOT_TIME = 1006, - PRISM2_PARAM_NEXT_MODE = 1008, - PRISM2_PARAM_RADIO_ENABLED = 1010, - PRISM2_PARAM_ANTENNA_MODE = 1013, - PRISM2_PARAM_STAT_TIME = 1016, - PRISM2_PARAM_STA_ANTENNA_SEL = 1017, - PRISM2_PARAM_TX_POWER_REDUCTION = 1022, - PRISM2_PARAM_KEY_TX_RX_THRESHOLD = 1024, - PRISM2_PARAM_DEFAULT_WEP_ONLY = 1026, - PRISM2_PARAM_WIFI_WME_NOACK_TEST = 1033, - PRISM2_PARAM_SCAN_FLAGS = 1035, - PRISM2_PARAM_HW_MODES = 1036, - PRISM2_PARAM_CREATE_IBSS = 1037, - PRISM2_PARAM_WMM_ENABLED = 1038, - PRISM2_PARAM_MIXED_CELL = 1039, - PRISM2_PARAM_RADAR_DETECT = 1043, - PRISM2_PARAM_SPECTRUM_MGMT = 1044, -}; - -enum { - IEEE80211_KEY_MGMT_NONE = 0, - IEEE80211_KEY_MGMT_IEEE8021X = 1, - IEEE80211_KEY_MGMT_WPA_PSK = 2, - IEEE80211_KEY_MGMT_WPA_EAP = 3, -}; - - -/* Data structures used for get_hw_features ioctl */ -struct hostapd_ioctl_hw_modes_hdr { - int mode; - int num_channels; - int num_rates; -}; - -struct ieee80211_channel_data { - short chan; /* channel number (IEEE 802.11) */ - short freq; /* frequency in MHz */ - int flag; /* flag for hostapd use (IEEE80211_CHAN_*) */ -}; - -struct ieee80211_rate_data { - int rate; /* rate in 100 kbps */ - int flags; /* IEEE80211_RATE_ flags */ -}; - - -/* ADD_IF, REMOVE_IF, and UPDATE_IF 'type' argument */ -enum { - HOSTAP_IF_WDS = 1, HOSTAP_IF_VLAN = 2, HOSTAP_IF_BSS = 3, - HOSTAP_IF_STA = 4 -}; - -struct hostapd_if_wds { - u8 remote_addr[ETH_ALEN]; -}; - -struct hostapd_if_vlan { - u8 id; -}; - -struct hostapd_if_bss { - u8 bssid[ETH_ALEN]; -}; - -struct hostapd_if_sta { -}; - -#endif /* HOSTAPD_IOCTL_H */ diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index ff2172ffd86..f484ca7ade9 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -20,42 +20,19 @@ #include <linux/if_arp.h> #include <linux/wireless.h> #include <linux/rtnetlink.h> -#include <net/iw_handler.h> -#include <linux/compiler.h> #include <linux/bitmap.h> +#include <net/net_namespace.h> #include <net/cfg80211.h> -#include <asm/unaligned.h> -#include "ieee80211_common.h" #include "ieee80211_i.h" #include "ieee80211_rate.h" #include "wep.h" -#include "wpa.h" -#include "tkip.h" #include "wme.h" #include "aes_ccm.h" #include "ieee80211_led.h" -#include "ieee80211_cfg.h" +#include "cfg.h" #include "debugfs.h" #include "debugfs_netdev.h" -#include "debugfs_key.h" - -/* privid for wiphys to determine whether they belong to us or not */ -void *mac80211_wiphy_privid = &mac80211_wiphy_privid; - -/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ -/* Ethernet-II snap header (RFC1042 for most EtherTypes) */ -static const unsigned char rfc1042_header[] = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; - -/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ -static const unsigned char bridge_tunnel_header[] = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; - -/* No encapsulation header if EtherType < 0x600 (=length) */ -static const unsigned char eapol_header[] = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00, 0x88, 0x8e }; - /* * For seeing transmitted packets on monitor interfaces @@ -67,2217 +44,408 @@ struct ieee80211_tx_status_rtap_hdr { u8 data_retries; } __attribute__ ((packed)); +/* common interface routines */ -static inline void ieee80211_include_sequence(struct ieee80211_sub_if_data *sdata, - struct ieee80211_hdr *hdr) -{ - /* Set the sequence number for this frame. */ - hdr->seq_ctrl = cpu_to_le16(sdata->sequence); - - /* Increase the sequence number. */ - sdata->sequence = (sdata->sequence + 0x10) & IEEE80211_SCTL_SEQ; -} - -struct ieee80211_key_conf * -ieee80211_key_data2conf(struct ieee80211_local *local, - const struct ieee80211_key *data) +static int header_parse_80211(const struct sk_buff *skb, unsigned char *haddr) { - struct ieee80211_key_conf *conf; - - conf = kmalloc(sizeof(*conf) + data->keylen, GFP_ATOMIC); - if (!conf) - return NULL; - - conf->hw_key_idx = data->hw_key_idx; - conf->alg = data->alg; - conf->keylen = data->keylen; - conf->flags = 0; - if (data->force_sw_encrypt) - conf->flags |= IEEE80211_KEY_FORCE_SW_ENCRYPT; - conf->keyidx = data->keyidx; - if (data->default_tx_key) - conf->flags |= IEEE80211_KEY_DEFAULT_TX_KEY; - if (local->default_wep_only) - conf->flags |= IEEE80211_KEY_DEFAULT_WEP_ONLY; - memcpy(conf->key, data->key, data->keylen); - - return conf; + memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */ + return ETH_ALEN; } -struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata, - int idx, size_t key_len, gfp_t flags) +/* must be called under mdev tx lock */ +static void ieee80211_configure_filter(struct ieee80211_local *local) { - struct ieee80211_key *key; + unsigned int changed_flags; + unsigned int new_flags = 0; - key = kzalloc(sizeof(struct ieee80211_key) + key_len, flags); - if (!key) - return NULL; - kref_init(&key->kref); - return key; -} + if (atomic_read(&local->iff_promiscs)) + new_flags |= FIF_PROMISC_IN_BSS; -static void ieee80211_key_release(struct kref *kref) -{ - struct ieee80211_key *key; + if (atomic_read(&local->iff_allmultis)) + new_flags |= FIF_ALLMULTI; - key = container_of(kref, struct ieee80211_key, kref); - if (key->alg == ALG_CCMP) - ieee80211_aes_key_free(key->u.ccmp.tfm); - ieee80211_debugfs_key_remove(key); - kfree(key); -} + if (local->monitors) + new_flags |= FIF_CONTROL | + FIF_OTHER_BSS | + FIF_BCN_PRBRESP_PROMISC; -void ieee80211_key_free(struct ieee80211_key *key) -{ - if (key) - kref_put(&key->kref, ieee80211_key_release); -} + changed_flags = local->filter_flags ^ new_flags; -static int rate_list_match(const int *rate_list, int rate) -{ - int i; + /* be a bit nasty */ + new_flags |= (1<<31); - if (!rate_list) - return 0; + local->ops->configure_filter(local_to_hw(local), + changed_flags, &new_flags, + local->mdev->mc_count, + local->mdev->mc_list); - for (i = 0; rate_list[i] >= 0; i++) - if (rate_list[i] == rate) - return 1; + WARN_ON(new_flags & (1<<31)); - return 0; + local->filter_flags = new_flags & ~(1<<31); } +/* master interface */ -void ieee80211_prepare_rates(struct ieee80211_local *local, - struct ieee80211_hw_mode *mode) +static int ieee80211_master_open(struct net_device *dev) { - int i; - - for (i = 0; i < mode->num_rates; i++) { - struct ieee80211_rate *rate = &mode->rates[i]; - - rate->flags &= ~(IEEE80211_RATE_SUPPORTED | - IEEE80211_RATE_BASIC); - - if (local->supp_rates[mode->mode]) { - if (!rate_list_match(local->supp_rates[mode->mode], - rate->rate)) - continue; - } - - rate->flags |= IEEE80211_RATE_SUPPORTED; - - /* Use configured basic rate set if it is available. If not, - * use defaults that are sane for most cases. */ - if (local->basic_rates[mode->mode]) { - if (rate_list_match(local->basic_rates[mode->mode], - rate->rate)) - rate->flags |= IEEE80211_RATE_BASIC; - } else switch (mode->mode) { - case MODE_IEEE80211A: - if (rate->rate == 60 || rate->rate == 120 || - rate->rate == 240) - rate->flags |= IEEE80211_RATE_BASIC; - break; - case MODE_IEEE80211B: - if (rate->rate == 10 || rate->rate == 20) - rate->flags |= IEEE80211_RATE_BASIC; - break; - case MODE_ATHEROS_TURBO: - if (rate->rate == 120 || rate->rate == 240 || - rate->rate == 480) - rate->flags |= IEEE80211_RATE_BASIC; - break; - case MODE_IEEE80211G: - if (rate->rate == 10 || rate->rate == 20 || - rate->rate == 55 || rate->rate == 110) - rate->flags |= IEEE80211_RATE_BASIC; - break; - } + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata; + int res = -EOPNOTSUPP; - /* Set ERP and MANDATORY flags based on phymode */ - switch (mode->mode) { - case MODE_IEEE80211A: - if (rate->rate == 60 || rate->rate == 120 || - rate->rate == 240) - rate->flags |= IEEE80211_RATE_MANDATORY; - break; - case MODE_IEEE80211B: - if (rate->rate == 10) - rate->flags |= IEEE80211_RATE_MANDATORY; - break; - case MODE_ATHEROS_TURBO: - break; - case MODE_IEEE80211G: - if (rate->rate == 10 || rate->rate == 20 || - rate->rate == 55 || rate->rate == 110 || - rate->rate == 60 || rate->rate == 120 || - rate->rate == 240) - rate->flags |= IEEE80211_RATE_MANDATORY; + /* we hold the RTNL here so can safely walk the list */ + list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->dev != dev && netif_running(sdata->dev)) { + res = 0; break; } - if (ieee80211_is_erp_rate(mode->mode, rate->rate)) - rate->flags |= IEEE80211_RATE_ERP; } + return res; } - -static void ieee80211_key_threshold_notify(struct net_device *dev, - struct ieee80211_key *key, - struct sta_info *sta) +static int ieee80211_master_stop(struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sk_buff *skb; - struct ieee80211_msg_key_notification *msg; - - /* if no one will get it anyway, don't even allocate it. - * unlikely because this is only relevant for APs - * where the device must be open... */ - if (unlikely(!local->apdev)) - return; - - skb = dev_alloc_skb(sizeof(struct ieee80211_frame_info) + - sizeof(struct ieee80211_msg_key_notification)); - if (!skb) - return; - - skb_reserve(skb, sizeof(struct ieee80211_frame_info)); - msg = (struct ieee80211_msg_key_notification *) - skb_put(skb, sizeof(struct ieee80211_msg_key_notification)); - msg->tx_rx_count = key->tx_rx_count; - memcpy(msg->ifname, dev->name, IFNAMSIZ); - if (sta) - memcpy(msg->addr, sta->addr, ETH_ALEN); - else - memset(msg->addr, 0xff, ETH_ALEN); - - key->tx_rx_count = 0; - - ieee80211_rx_mgmt(local, skb, NULL, - ieee80211_msg_key_threshold_notification); -} - - -static u8 * ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len) -{ - u16 fc; - - if (len < 24) - return NULL; - - fc = le16_to_cpu(hdr->frame_control); - - switch (fc & IEEE80211_FCTL_FTYPE) { - case IEEE80211_FTYPE_DATA: - switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { - case IEEE80211_FCTL_TODS: - return hdr->addr1; - case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): - return NULL; - case IEEE80211_FCTL_FROMDS: - return hdr->addr2; - case 0: - return hdr->addr3; - } - break; - case IEEE80211_FTYPE_MGMT: - return hdr->addr3; - case IEEE80211_FTYPE_CTL: - if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL) - return hdr->addr1; - else - return NULL; - } - - return NULL; -} - -int ieee80211_get_hdrlen(u16 fc) -{ - int hdrlen = 24; - - switch (fc & IEEE80211_FCTL_FTYPE) { - case IEEE80211_FTYPE_DATA: - if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS)) - hdrlen = 30; /* Addr4 */ - /* - * The QoS Control field is two bytes and its presence is - * indicated by the IEEE80211_STYPE_QOS_DATA bit. Add 2 to - * hdrlen if that bit is set. - * This works by masking out the bit and shifting it to - * bit position 1 so the result has the value 0 or 2. - */ - hdrlen += (fc & IEEE80211_STYPE_QOS_DATA) - >> (ilog2(IEEE80211_STYPE_QOS_DATA)-1); - break; - case IEEE80211_FTYPE_CTL: - /* - * ACK and CTS are 10 bytes, all others 16. To see how - * to get this condition consider - * subtype mask: 0b0000000011110000 (0x00F0) - * ACK subtype: 0b0000000011010000 (0x00D0) - * CTS subtype: 0b0000000011000000 (0x00C0) - * bits that matter: ^^^ (0x00E0) - * value of those: 0b0000000011000000 (0x00C0) - */ - if ((fc & 0xE0) == 0xC0) - hdrlen = 10; - else - hdrlen = 16; - break; - } - - return hdrlen; -} -EXPORT_SYMBOL(ieee80211_get_hdrlen); - -int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb) -{ - const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *) skb->data; - int hdrlen; - - if (unlikely(skb->len < 10)) - return 0; - hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)); - if (unlikely(hdrlen > skb->len)) - return 0; - return hdrlen; -} -EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb); - -static int ieee80211_get_radiotap_len(struct sk_buff *skb) -{ - struct ieee80211_radiotap_header *hdr = - (struct ieee80211_radiotap_header *) skb->data; - - return le16_to_cpu(hdr->it_len); -} - -#ifdef CONFIG_MAC80211_LOWTX_FRAME_DUMP -static void ieee80211_dump_frame(const char *ifname, const char *title, - const struct sk_buff *skb) -{ - const struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc; - int hdrlen; - - printk(KERN_DEBUG "%s: %s (len=%d)", ifname, title, skb->len); - if (skb->len < 4) { - printk("\n"); - return; - } - - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); - if (hdrlen > skb->len) - hdrlen = skb->len; - if (hdrlen >= 4) - printk(" FC=0x%04x DUR=0x%04x", - fc, le16_to_cpu(hdr->duration_id)); - if (hdrlen >= 10) - printk(" A1=" MAC_FMT, MAC_ARG(hdr->addr1)); - if (hdrlen >= 16) - printk(" A2=" MAC_FMT, MAC_ARG(hdr->addr2)); - if (hdrlen >= 24) - printk(" A3=" MAC_FMT, MAC_ARG(hdr->addr3)); - if (hdrlen >= 30) - printk(" A4=" MAC_FMT, MAC_ARG(hdr->addr4)); - printk("\n"); -} -#else /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ -static inline void ieee80211_dump_frame(const char *ifname, const char *title, - struct sk_buff *skb) -{ -} -#endif /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ - - -static int ieee80211_is_eapol(const struct sk_buff *skb) -{ - const struct ieee80211_hdr *hdr; - u16 fc; - int hdrlen; - - if (unlikely(skb->len < 10)) - return 0; - - hdr = (const struct ieee80211_hdr *) skb->data; - fc = le16_to_cpu(hdr->frame_control); - - if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) - return 0; - - hdrlen = ieee80211_get_hdrlen(fc); + struct ieee80211_sub_if_data *sdata; - if (unlikely(skb->len >= hdrlen + sizeof(eapol_header) && - memcmp(skb->data + hdrlen, eapol_header, - sizeof(eapol_header)) == 0)) - return 1; + /* we hold the RTNL here so can safely walk the list */ + list_for_each_entry(sdata, &local->interfaces, list) + if (sdata->dev != dev && netif_running(sdata->dev)) + dev_close(sdata->dev); return 0; } - -static ieee80211_txrx_result -ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx) -{ - struct rate_control_extra extra; - - memset(&extra, 0, sizeof(extra)); - extra.mode = tx->u.tx.mode; - extra.mgmt_data = tx->sdata && - tx->sdata->type == IEEE80211_IF_TYPE_MGMT; - extra.ethertype = tx->ethertype; - - tx->u.tx.rate = rate_control_get_rate(tx->local, tx->dev, tx->skb, - &extra); - if (unlikely(extra.probe != NULL)) { - tx->u.tx.control->flags |= IEEE80211_TXCTL_RATE_CTRL_PROBE; - tx->u.tx.probe_last_frag = 1; - tx->u.tx.control->alt_retry_rate = tx->u.tx.rate->val; - tx->u.tx.rate = extra.probe; - } else { - tx->u.tx.control->alt_retry_rate = -1; - } - if (!tx->u.tx.rate) - return TXRX_DROP; - if (tx->u.tx.mode->mode == MODE_IEEE80211G && - tx->sdata->use_protection && tx->fragmented && - extra.nonerp) { - tx->u.tx.last_frag_rate = tx->u.tx.rate; - tx->u.tx.probe_last_frag = extra.probe ? 1 : 0; - - tx->u.tx.rate = extra.nonerp; - tx->u.tx.control->rate = extra.nonerp; - tx->u.tx.control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE; - } else { - tx->u.tx.last_frag_rate = tx->u.tx.rate; - tx->u.tx.control->rate = tx->u.tx.rate; - } - tx->u.tx.control->tx_rate = tx->u.tx.rate->val; - if ((tx->u.tx.rate->flags & IEEE80211_RATE_PREAMBLE2) && - tx->local->short_preamble && - (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) { - tx->u.tx.short_preamble = 1; - tx->u.tx.control->tx_rate = tx->u.tx.rate->val2; - } - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx) +static void ieee80211_master_set_multicast_list(struct net_device *dev) { - if (tx->sta) - tx->u.tx.control->key_idx = tx->sta->key_idx_compression; - else - tx->u.tx.control->key_idx = HW_KEY_IDX_INVALID; - - if (unlikely(tx->u.tx.control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT)) - tx->key = NULL; - else if (tx->sta && tx->sta->key) - tx->key = tx->sta->key; - else if (tx->sdata->default_key) - tx->key = tx->sdata->default_key; - else if (tx->sdata->drop_unencrypted && - !(tx->sdata->eapol && ieee80211_is_eapol(tx->skb))) { - I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); - return TXRX_DROP; - } else - tx->key = NULL; - - if (tx->key) { - tx->key->tx_rx_count++; - if (unlikely(tx->local->key_tx_rx_threshold && - tx->key->tx_rx_count > - tx->local->key_tx_rx_threshold)) { - ieee80211_key_threshold_notify(tx->dev, tx->key, - tx->sta); - } - } + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - return TXRX_CONTINUE; + ieee80211_configure_filter(local); } +/* regular interfaces */ -static ieee80211_txrx_result -ieee80211_tx_h_fragment(struct ieee80211_txrx_data *tx) +static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - size_t hdrlen, per_fragm, num_fragm, payload_len, left; - struct sk_buff **frags, *first, *frag; - int i; - u16 seq; - u8 *pos; - int frag_threshold = tx->local->fragmentation_threshold; - - if (!tx->fragmented) - return TXRX_CONTINUE; - - first = tx->skb; - - hdrlen = ieee80211_get_hdrlen(tx->fc); - payload_len = first->len - hdrlen; - per_fragm = frag_threshold - hdrlen - FCS_LEN; - num_fragm = (payload_len + per_fragm - 1) / per_fragm; - - frags = kzalloc(num_fragm * sizeof(struct sk_buff *), GFP_ATOMIC); - if (!frags) - goto fail; - - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREFRAGS); - seq = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ; - pos = first->data + hdrlen + per_fragm; - left = payload_len - per_fragm; - for (i = 0; i < num_fragm - 1; i++) { - struct ieee80211_hdr *fhdr; - size_t copylen; - - if (left <= 0) - goto fail; - - /* reserve enough extra head and tail room for possible - * encryption */ - frag = frags[i] = - dev_alloc_skb(tx->local->tx_headroom + - frag_threshold + - IEEE80211_ENCRYPT_HEADROOM + - IEEE80211_ENCRYPT_TAILROOM); - if (!frag) - goto fail; - /* Make sure that all fragments use the same priority so - * that they end up using the same TX queue */ - frag->priority = first->priority; - skb_reserve(frag, tx->local->tx_headroom + - IEEE80211_ENCRYPT_HEADROOM); - fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen); - memcpy(fhdr, first->data, hdrlen); - if (i == num_fragm - 2) - fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS); - fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG)); - copylen = left > per_fragm ? per_fragm : left; - memcpy(skb_put(frag, copylen), pos, copylen); - - pos += copylen; - left -= copylen; - } - skb_trim(first, hdrlen + per_fragm); - - tx->u.tx.num_extra_frag = num_fragm - 1; - tx->u.tx.extra_frag = frags; - - return TXRX_CONTINUE; - - fail: - printk(KERN_DEBUG "%s: failed to fragment frame\n", tx->dev->name); - if (frags) { - for (i = 0; i < num_fragm - 1; i++) - if (frags[i]) - dev_kfree_skb(frags[i]); - kfree(frags); + /* FIX: what would be proper limits for MTU? + * This interface uses 802.3 frames. */ + if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6) { + printk(KERN_WARNING "%s: invalid MTU %d\n", + dev->name, new_mtu); + return -EINVAL; } - I802_DEBUG_INC(tx->local->tx_handlers_drop_fragment); - return TXRX_DROP; -} - -static int wep_encrypt_skb(struct ieee80211_txrx_data *tx, struct sk_buff *skb) -{ - if (tx->key->force_sw_encrypt) { - if (ieee80211_wep_encrypt(tx->local, skb, tx->key)) - return -1; - } else { - tx->u.tx.control->key_idx = tx->key->hw_key_idx; - if (tx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) { - if (ieee80211_wep_add_iv(tx->local, skb, tx->key) == - NULL) - return -1; - } - } +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + dev->mtu = new_mtu; return 0; } - -void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - if (tx->u.tx.extra_frag) { - struct ieee80211_hdr *fhdr; - int i; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - fhdr = (struct ieee80211_hdr *) - tx->u.tx.extra_frag[i]->data; - fhdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - } - } -} - - -static ieee80211_txrx_result -ieee80211_tx_h_wep_encrypt(struct ieee80211_txrx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - u16 fc; - - fc = le16_to_cpu(hdr->frame_control); - - if (!tx->key || tx->key->alg != ALG_WEP || - ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && - ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH))) - return TXRX_CONTINUE; - - tx->u.tx.control->iv_len = WEP_IV_LEN; - tx->u.tx.control->icv_len = WEP_ICV_LEN; - ieee80211_tx_set_iswep(tx); - - if (wep_encrypt_skb(tx, tx->skb) < 0) { - I802_DEBUG_INC(tx->local->tx_handlers_drop_wep); - return TXRX_DROP; - } - - if (tx->u.tx.extra_frag) { - int i; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - if (wep_encrypt_skb(tx, tx->u.tx.extra_frag[i]) < 0) { - I802_DEBUG_INC(tx->local-> - tx_handlers_drop_wep); - return TXRX_DROP; - } - } - } - - return TXRX_CONTINUE; -} - - -static int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, - int rate, int erp, int short_preamble) -{ - int dur; - - /* calculate duration (in microseconds, rounded up to next higher - * integer if it includes a fractional microsecond) to send frame of - * len bytes (does not include FCS) at the given rate. Duration will - * also include SIFS. - * - * rate is in 100 kbps, so divident is multiplied by 10 in the - * DIV_ROUND_UP() operations. - */ - - if (local->hw.conf.phymode == MODE_IEEE80211A || erp || - local->hw.conf.phymode == MODE_ATHEROS_TURBO) { - /* - * OFDM: - * - * N_DBPS = DATARATE x 4 - * N_SYM = Ceiling((16+8xLENGTH+6) / N_DBPS) - * (16 = SIGNAL time, 6 = tail bits) - * TXTIME = T_PREAMBLE + T_SIGNAL + T_SYM x N_SYM + Signal Ext - * - * T_SYM = 4 usec - * 802.11a - 17.5.2: aSIFSTime = 16 usec - * 802.11g - 19.8.4: aSIFSTime = 10 usec + - * signal ext = 6 usec - */ - /* FIX: Atheros Turbo may have different (shorter) duration? */ - dur = 16; /* SIFS + signal ext */ - dur += 16; /* 17.3.2.3: T_PREAMBLE = 16 usec */ - dur += 4; /* 17.3.2.3: T_SIGNAL = 4 usec */ - dur += 4 * DIV_ROUND_UP((16 + 8 * (len + 4) + 6) * 10, - 4 * rate); /* T_SYM x N_SYM */ - } else { - /* - * 802.11b or 802.11g with 802.11b compatibility: - * 18.3.4: TXTIME = PreambleLength + PLCPHeaderTime + - * Ceiling(((LENGTH+PBCC)x8)/DATARATE). PBCC=0. - * - * 802.11 (DS): 15.3.3, 802.11b: 18.3.4 - * aSIFSTime = 10 usec - * aPreambleLength = 144 usec or 72 usec with short preamble - * aPLCPHeaderLength = 48 usec or 24 usec with short preamble - */ - dur = 10; /* aSIFSTime = 10 usec */ - dur += short_preamble ? (72 + 24) : (144 + 48); - - dur += DIV_ROUND_UP(8 * (len + 4) * 10, rate); - } - - return dur; -} - - -/* Exported duration function for driver use */ -__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, - size_t frame_len, int rate) -{ - struct ieee80211_local *local = hw_to_local(hw); - u16 dur; - int erp; - - erp = ieee80211_is_erp_rate(hw->conf.phymode, rate); - dur = ieee80211_frame_duration(local, frame_len, rate, - erp, local->short_preamble); - - return cpu_to_le16(dur); -} -EXPORT_SYMBOL(ieee80211_generic_frame_duration); - - -static u16 ieee80211_duration(struct ieee80211_txrx_data *tx, int group_addr, - int next_frag_len) -{ - int rate, mrate, erp, dur, i; - struct ieee80211_rate *txrate = tx->u.tx.rate; - struct ieee80211_local *local = tx->local; - struct ieee80211_hw_mode *mode = tx->u.tx.mode; - - erp = txrate->flags & IEEE80211_RATE_ERP; - - /* - * data and mgmt (except PS Poll): - * - during CFP: 32768 - * - during contention period: - * if addr1 is group address: 0 - * if more fragments = 0 and addr1 is individual address: time to - * transmit one ACK plus SIFS - * if more fragments = 1 and addr1 is individual address: time to - * transmit next fragment plus 2 x ACK plus 3 x SIFS - * - * IEEE 802.11, 9.6: - * - control response frame (CTS or ACK) shall be transmitted using the - * same rate as the immediately previous frame in the frame exchange - * sequence, if this rate belongs to the PHY mandatory rates, or else - * at the highest possible rate belonging to the PHY rates in the - * BSSBasicRateSet - */ - - if ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) { - /* TODO: These control frames are not currently sent by - * 80211.o, but should they be implemented, this function - * needs to be updated to support duration field calculation. - * - * RTS: time needed to transmit pending data/mgmt frame plus - * one CTS frame plus one ACK frame plus 3 x SIFS - * CTS: duration of immediately previous RTS minus time - * required to transmit CTS and its SIFS - * ACK: 0 if immediately previous directed data/mgmt had - * more=0, with more=1 duration in ACK frame is duration - * from previous frame minus time needed to transmit ACK - * and its SIFS - * PS Poll: BIT(15) | BIT(14) | aid - */ - return 0; - } - - /* data/mgmt */ - if (0 /* FIX: data/mgmt during CFP */) - return 32768; - - if (group_addr) /* Group address as the destination - no ACK */ - return 0; - - /* Individual destination address: - * IEEE 802.11, Ch. 9.6 (after IEEE 802.11g changes) - * CTS and ACK frames shall be transmitted using the highest rate in - * basic rate set that is less than or equal to the rate of the - * immediately previous frame and that is using the same modulation - * (CCK or OFDM). If no basic rate set matches with these requirements, - * the highest mandatory rate of the PHY that is less than or equal to - * the rate of the previous frame is used. - * Mandatory rates for IEEE 802.11g PHY: 1, 2, 5.5, 11, 6, 12, 24 Mbps - */ - rate = -1; - mrate = 10; /* use 1 Mbps if everything fails */ - for (i = 0; i < mode->num_rates; i++) { - struct ieee80211_rate *r = &mode->rates[i]; - if (r->rate > txrate->rate) - break; - - if (IEEE80211_RATE_MODULATION(txrate->flags) != - IEEE80211_RATE_MODULATION(r->flags)) - continue; - - if (r->flags & IEEE80211_RATE_BASIC) - rate = r->rate; - else if (r->flags & IEEE80211_RATE_MANDATORY) - mrate = r->rate; - } - if (rate == -1) { - /* No matching basic rate found; use highest suitable mandatory - * PHY rate */ - rate = mrate; - } - - /* Time needed to transmit ACK - * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up - * to closest integer */ - - dur = ieee80211_frame_duration(local, 10, rate, erp, - local->short_preamble); - - if (next_frag_len) { - /* Frame is fragmented: duration increases with time needed to - * transmit next fragment plus ACK and 2 x SIFS. */ - dur *= 2; /* ACK + SIFS */ - /* next fragment */ - dur += ieee80211_frame_duration(local, next_frag_len, - txrate->rate, erp, - local->short_preamble); - } - - return dur; -} - - -static ieee80211_txrx_result -ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx) +static inline int identical_mac_addr_allowed(int type1, int type2) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - u16 dur; - struct ieee80211_tx_control *control = tx->u.tx.control; - struct ieee80211_hw_mode *mode = tx->u.tx.mode; - - if (!is_multicast_ether_addr(hdr->addr1)) { - if (tx->skb->len + FCS_LEN > tx->local->rts_threshold && - tx->local->rts_threshold < IEEE80211_MAX_RTS_THRESHOLD) { - control->flags |= IEEE80211_TXCTL_USE_RTS_CTS; - control->retry_limit = - tx->local->long_retry_limit; - } else { - control->retry_limit = - tx->local->short_retry_limit; - } - } else { - control->retry_limit = 1; - } - - if (tx->fragmented) { - /* Do not use multiple retry rates when sending fragmented - * frames. - * TODO: The last fragment could still use multiple retry - * rates. */ - control->alt_retry_rate = -1; - } - - /* Use CTS protection for unicast frames sent using extended rates if - * there are associated non-ERP stations and RTS/CTS is not configured - * for the frame. */ - if (mode->mode == MODE_IEEE80211G && - (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) && - tx->u.tx.unicast && tx->sdata->use_protection && - !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS)) - control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT; - - /* Setup duration field for the first fragment of the frame. Duration - * for remaining fragments will be updated when they are being sent - * to low-level driver in ieee80211_tx(). */ - dur = ieee80211_duration(tx, is_multicast_ether_addr(hdr->addr1), - tx->fragmented ? tx->u.tx.extra_frag[0]->len : - 0); - hdr->duration_id = cpu_to_le16(dur); - - if ((control->flags & IEEE80211_TXCTL_USE_RTS_CTS) || - (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) { - struct ieee80211_rate *rate; - - /* Do not use multiple retry rates when using RTS/CTS */ - control->alt_retry_rate = -1; - - /* Use min(data rate, max base rate) as CTS/RTS rate */ - rate = tx->u.tx.rate; - while (rate > mode->rates && - !(rate->flags & IEEE80211_RATE_BASIC)) - rate--; - - control->rts_cts_rate = rate->val; - control->rts_rate = rate; - } - - if (tx->sta) { - tx->sta->tx_packets++; - tx->sta->tx_fragments++; - tx->sta->tx_bytes += tx->skb->len; - if (tx->u.tx.extra_frag) { - int i; - tx->sta->tx_fragments += tx->u.tx.num_extra_frag; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - tx->sta->tx_bytes += - tx->u.tx.extra_frag[i]->len; - } - } - } - - return TXRX_CONTINUE; + return (type1 == IEEE80211_IF_TYPE_MNTR || + type2 == IEEE80211_IF_TYPE_MNTR || + (type1 == IEEE80211_IF_TYPE_AP && + type2 == IEEE80211_IF_TYPE_WDS) || + (type1 == IEEE80211_IF_TYPE_WDS && + (type2 == IEEE80211_IF_TYPE_WDS || + type2 == IEEE80211_IF_TYPE_AP)) || + (type1 == IEEE80211_IF_TYPE_AP && + type2 == IEEE80211_IF_TYPE_VLAN) || + (type1 == IEEE80211_IF_TYPE_VLAN && + (type2 == IEEE80211_IF_TYPE_AP || + type2 == IEEE80211_IF_TYPE_VLAN))); } - -static ieee80211_txrx_result -ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx) +static int ieee80211_open(struct net_device *dev) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - struct sk_buff *skb = tx->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - u32 sta_flags; - - if (unlikely(tx->local->sta_scanning != 0) && - ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ)) - return TXRX_DROP; + struct ieee80211_sub_if_data *sdata, *nsdata; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_if_init_conf conf; + int res; - if (tx->u.tx.ps_buffered) - return TXRX_CONTINUE; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); - sta_flags = tx->sta ? tx->sta->flags : 0; + /* we hold the RTNL here so can safely walk the list */ + list_for_each_entry(nsdata, &local->interfaces, list) { + struct net_device *ndev = nsdata->dev; - if (likely(tx->u.tx.unicast)) { - if (unlikely(!(sta_flags & WLAN_STA_ASSOC) && - tx->sdata->type != IEEE80211_IF_TYPE_IBSS && - (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: dropped data frame to not " - "associated station " MAC_FMT "\n", - tx->dev->name, MAC_ARG(hdr->addr1)); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc); - return TXRX_DROP; - } - } else { - if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - tx->local->num_sta == 0 && - !tx->local->allow_broadcast_always && - tx->sdata->type != IEEE80211_IF_TYPE_IBSS)) { + if (ndev != dev && ndev != local->mdev && netif_running(ndev) && + compare_ether_addr(dev->dev_addr, ndev->dev_addr) == 0) { /* - * No associated STAs - no need to send multicast - * frames. + * check whether it may have the same address */ - return TXRX_DROP; - } - return TXRX_CONTINUE; - } - - if (unlikely(!tx->u.tx.mgmt_interface && tx->sdata->ieee802_1x && - !(sta_flags & WLAN_STA_AUTHORIZED))) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: dropped frame to " MAC_FMT - " (unauthorized port)\n", tx->dev->name, - MAC_ARG(hdr->addr1)); -#endif - I802_DEBUG_INC(tx->local->tx_handlers_drop_unauth_port); - return TXRX_DROP; - } - - return TXRX_CONTINUE; -} - -static ieee80211_txrx_result -ieee80211_tx_h_sequence(struct ieee80211_txrx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + if (!identical_mac_addr_allowed(sdata->type, + nsdata->type)) + return -ENOTUNIQ; - if (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)) >= 24) - ieee80211_include_sequence(tx->sdata, hdr); - - return TXRX_CONTINUE; -} - -/* This function is called whenever the AP is about to exceed the maximum limit - * of buffered frames for power saving STAs. This situation should not really - * happen often during normal operation, so dropping the oldest buffered packet - * from each queue should be OK to make some room for new frames. */ -static void purge_old_ps_buffers(struct ieee80211_local *local) -{ - int total = 0, purged = 0; - struct sk_buff *skb; - struct ieee80211_sub_if_data *sdata; - struct sta_info *sta; - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - struct ieee80211_if_ap *ap; - if (sdata->dev == local->mdev || - sdata->type != IEEE80211_IF_TYPE_AP) - continue; - ap = &sdata->u.ap; - skb = skb_dequeue(&ap->ps_bc_buf); - if (skb) { - purged++; - dev_kfree_skb(skb); + /* + * can only add VLANs to enabled APs + */ + if (sdata->type == IEEE80211_IF_TYPE_VLAN && + nsdata->type == IEEE80211_IF_TYPE_AP && + netif_running(nsdata->dev)) + sdata->u.vlan.ap = nsdata; } - total += skb_queue_len(&ap->ps_bc_buf); } - read_unlock(&local->sub_if_lock); - spin_lock_bh(&local->sta_lock); - list_for_each_entry(sta, &local->sta_list, list) { - skb = skb_dequeue(&sta->ps_tx_buf); - if (skb) { - purged++; - dev_kfree_skb(skb); - } - total += skb_queue_len(&sta->ps_tx_buf); + switch (sdata->type) { + case IEEE80211_IF_TYPE_WDS: + if (is_zero_ether_addr(sdata->u.wds.remote_addr)) + return -ENOLINK; + break; + case IEEE80211_IF_TYPE_VLAN: + if (!sdata->u.vlan.ap) + return -ENOLINK; + break; + case IEEE80211_IF_TYPE_AP: + case IEEE80211_IF_TYPE_STA: + case IEEE80211_IF_TYPE_MNTR: + case IEEE80211_IF_TYPE_IBSS: + /* no special treatment */ + break; + case IEEE80211_IF_TYPE_INVALID: + /* cannot happen */ + WARN_ON(1); + break; } - spin_unlock_bh(&local->sta_lock); - - local->total_ps_buffered = total; - printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n", - local->mdev->name, purged); -} - -static inline ieee80211_txrx_result -ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx) -{ - /* broadcast/multicast frame */ - /* If any of the associated stations is in power save mode, - * the frame is buffered to be sent after DTIM beacon frame */ - if ((tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) && - tx->sdata->type != IEEE80211_IF_TYPE_WDS && - tx->sdata->bss && atomic_read(&tx->sdata->bss->num_sta_ps) && - !(tx->fc & IEEE80211_FCTL_ORDER)) { - if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) - purge_old_ps_buffers(tx->local); - if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >= - AP_MAX_BC_BUFFER) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: BC TX buffer full - " - "dropping the oldest frame\n", - tx->dev->name); - } - dev_kfree_skb(skb_dequeue(&tx->sdata->bss->ps_bc_buf)); - } else - tx->local->total_ps_buffered++; - skb_queue_tail(&tx->sdata->bss->ps_bc_buf, tx->skb); - return TXRX_QUEUED; + if (local->open_count == 0) { + res = 0; + if (local->ops->start) + res = local->ops->start(local_to_hw(local)); + if (res) + return res; } - return TXRX_CONTINUE; -} - + switch (sdata->type) { + case IEEE80211_IF_TYPE_VLAN: + list_add(&sdata->u.vlan.list, &sdata->u.vlan.ap->u.ap.vlans); + /* no need to tell driver */ + break; + case IEEE80211_IF_TYPE_MNTR: + /* must be before the call to ieee80211_configure_filter */ + local->monitors++; + if (local->monitors == 1) { + netif_tx_lock_bh(local->mdev); + ieee80211_configure_filter(local); + netif_tx_unlock_bh(local->mdev); -static inline ieee80211_txrx_result -ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx) -{ - struct sta_info *sta = tx->sta; - - if (unlikely(!sta || - ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT && - (tx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP))) - return TXRX_CONTINUE; - - if (unlikely((sta->flags & WLAN_STA_PS) && !sta->pspoll)) { - struct ieee80211_tx_packet_data *pkt_data; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "STA " MAC_FMT " aid %d: PS buffer (entries " - "before %d)\n", - MAC_ARG(sta->addr), sta->aid, - skb_queue_len(&sta->ps_tx_buf)); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - sta->flags |= WLAN_STA_TIM; - if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) - purge_old_ps_buffers(tx->local); - if (skb_queue_len(&sta->ps_tx_buf) >= STA_MAX_TX_BUFFER) { - struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf); - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: STA " MAC_FMT " TX " - "buffer full - dropping oldest frame\n", - tx->dev->name, MAC_ARG(sta->addr)); - } - dev_kfree_skb(old); - } else - tx->local->total_ps_buffered++; - /* Queue frame to be sent after STA sends an PS Poll frame */ - if (skb_queue_empty(&sta->ps_tx_buf)) { - if (tx->local->ops->set_tim) - tx->local->ops->set_tim(local_to_hw(tx->local), - sta->aid, 1); - if (tx->sdata->bss) - bss_tim_set(tx->local, tx->sdata->bss, sta->aid); + local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; + ieee80211_hw_config(local); } - pkt_data = (struct ieee80211_tx_packet_data *)tx->skb->cb; - pkt_data->jiffies = jiffies; - skb_queue_tail(&sta->ps_tx_buf, tx->skb); - return TXRX_QUEUED; - } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - else if (unlikely(sta->flags & WLAN_STA_PS)) { - printk(KERN_DEBUG "%s: STA " MAC_FMT " in PS mode, but pspoll " - "set -> send frame\n", tx->dev->name, - MAC_ARG(sta->addr)); - } -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - sta->pspoll = 0; - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx) -{ - if (unlikely(tx->u.tx.ps_buffered)) - return TXRX_CONTINUE; - - if (tx->u.tx.unicast) - return ieee80211_tx_h_unicast_ps_buf(tx); - else - return ieee80211_tx_h_multicast_ps_buf(tx); -} - - -/* - * deal with packet injection down monitor interface - * with Radiotap Header -- only called for monitor mode interface - */ - -static ieee80211_txrx_result -__ieee80211_parse_tx_radiotap( - struct ieee80211_txrx_data *tx, - struct sk_buff *skb, struct ieee80211_tx_control *control) -{ - /* - * this is the moment to interpret and discard the radiotap header that - * must be at the start of the packet injected in Monitor mode - * - * Need to take some care with endian-ness since radiotap - * args are little-endian - */ - - struct ieee80211_radiotap_iterator iterator; - struct ieee80211_radiotap_header *rthdr = - (struct ieee80211_radiotap_header *) skb->data; - struct ieee80211_hw_mode *mode = tx->local->hw.conf.mode; - int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len); - - /* - * default control situation for all injected packets - * FIXME: this does not suit all usage cases, expand to allow control - */ - - control->retry_limit = 1; /* no retry */ - control->key_idx = -1; /* no encryption key */ - control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | - IEEE80211_TXCTL_USE_CTS_PROTECT); - control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT | - IEEE80211_TXCTL_NO_ACK; - control->antenna_sel_tx = 0; /* default to default antenna */ - - /* - * for every radiotap entry that is present - * (ieee80211_radiotap_iterator_next returns -ENOENT when no more - * entries present, or -EINVAL on error) - */ - - while (!ret) { - int i, target_rate; - - ret = ieee80211_radiotap_iterator_next(&iterator); - - if (ret) - continue; - - /* see if this argument is something we can use */ - switch (iterator.this_arg_index) { - /* - * You must take care when dereferencing iterator.this_arg - * for multibyte types... the pointer is not aligned. Use - * get_unaligned((type *)iterator.this_arg) to dereference - * iterator.this_arg for type "type" safely on all arches. - */ - case IEEE80211_RADIOTAP_RATE: - /* - * radiotap rate u8 is in 500kbps units eg, 0x02=1Mbps - * ieee80211 rate int is in 100kbps units eg, 0x0a=1Mbps - */ - target_rate = (*iterator.this_arg) * 5; - for (i = 0; i < mode->num_rates; i++) { - struct ieee80211_rate *r = &mode->rates[i]; - - if (r->rate > target_rate) - continue; - - control->rate = r; - - if (r->flags & IEEE80211_RATE_PREAMBLE2) - control->tx_rate = r->val2; - else - control->tx_rate = r->val; - - /* end on exact match */ - if (r->rate == target_rate) - i = mode->num_rates; - } - break; - - case IEEE80211_RADIOTAP_ANTENNA: - /* - * radiotap uses 0 for 1st ant, mac80211 is 1 for - * 1st ant - */ - control->antenna_sel_tx = (*iterator.this_arg) + 1; - break; - - case IEEE80211_RADIOTAP_DBM_TX_POWER: - control->power_level = *iterator.this_arg; - break; + break; + case IEEE80211_IF_TYPE_STA: + case IEEE80211_IF_TYPE_IBSS: + sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET; + /* fall through */ + default: + conf.if_id = dev->ifindex; + conf.type = sdata->type; + conf.mac_addr = dev->dev_addr; + res = local->ops->add_interface(local_to_hw(local), &conf); + if (res && !local->open_count && local->ops->stop) + local->ops->stop(local_to_hw(local)); + if (res) + return res; - case IEEE80211_RADIOTAP_FLAGS: - if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { - /* - * this indicates that the skb we have been - * handed has the 32-bit FCS CRC at the end... - * we should react to that by snipping it off - * because it will be recomputed and added - * on transmission - */ - if (skb->len < (iterator.max_length + FCS_LEN)) - return TXRX_DROP; + ieee80211_if_config(dev); + ieee80211_reset_erp_info(dev); + ieee80211_enable_keys(sdata); - skb_trim(skb, skb->len - FCS_LEN); - } - break; + if (sdata->type == IEEE80211_IF_TYPE_STA && + !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) + netif_carrier_off(dev); + else + netif_carrier_on(dev); + } - default: - break; - } + if (local->open_count == 0) { + res = dev_open(local->mdev); + WARN_ON(res); + tasklet_enable(&local->tx_pending_tasklet); + tasklet_enable(&local->tasklet); } - if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ - return TXRX_DROP; + local->open_count++; - /* - * remove the radiotap header - * iterator->max_length was sanity-checked against - * skb->len by iterator init - */ - skb_pull(skb, iterator.max_length); + netif_start_queue(dev); - return TXRX_CONTINUE; + return 0; } - -static ieee80211_txrx_result inline -__ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, - struct sk_buff *skb, - struct net_device *dev, - struct ieee80211_tx_control *control) +static int ieee80211_stop(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_sub_if_data *sdata; - ieee80211_txrx_result res = TXRX_CONTINUE; - - int hdrlen; - - memset(tx, 0, sizeof(*tx)); - tx->skb = skb; - tx->dev = dev; /* use original interface */ - tx->local = local; - tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev); - tx->sta = sta_info_get(local, hdr->addr1); - tx->fc = le16_to_cpu(hdr->frame_control); - - /* - * set defaults for things that can be set by - * injected radiotap headers - */ - control->power_level = local->hw.conf.power_level; - control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; - if (local->sta_antenna_sel != STA_ANTENNA_SEL_AUTO && tx->sta) - control->antenna_sel_tx = tx->sta->antenna_sel_tx; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_if_init_conf conf; - /* process and remove the injection radiotap header */ sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { - if (__ieee80211_parse_tx_radiotap(tx, skb, control) == - TXRX_DROP) { - return TXRX_DROP; - } - /* - * we removed the radiotap header after this point, - * we filled control with what we could use - * set to the actual ieee header now - */ - hdr = (struct ieee80211_hdr *) skb->data; - res = TXRX_QUEUED; /* indication it was monitor packet */ - } - - tx->u.tx.control = control; - tx->u.tx.unicast = !is_multicast_ether_addr(hdr->addr1); - if (is_multicast_ether_addr(hdr->addr1)) - control->flags |= IEEE80211_TXCTL_NO_ACK; - else - control->flags &= ~IEEE80211_TXCTL_NO_ACK; - tx->fragmented = local->fragmentation_threshold < - IEEE80211_MAX_FRAG_THRESHOLD && tx->u.tx.unicast && - skb->len + FCS_LEN > local->fragmentation_threshold && - (!local->ops->set_frag_threshold); - if (!tx->sta) - control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; - else if (tx->sta->clear_dst_mask) { - control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; - tx->sta->clear_dst_mask = 0; - } - hdrlen = ieee80211_get_hdrlen(tx->fc); - if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) { - u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)]; - tx->ethertype = (pos[0] << 8) | pos[1]; - } - control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT; - return res; -} + netif_stop_queue(dev); -static int inline is_ieee80211_device(struct net_device *dev, - struct net_device *master) -{ - return (wdev_priv(dev->ieee80211_ptr) == - wdev_priv(master->ieee80211_ptr)); -} + dev_mc_unsync(local->mdev, dev); -/* Device in tx->dev has a reference added; use dev_put(tx->dev) when - * finished with it. */ -static int inline ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, - struct sk_buff *skb, - struct net_device *mdev, - struct ieee80211_tx_control *control) -{ - struct ieee80211_tx_packet_data *pkt_data; - struct net_device *dev; + /* down all dependent devices, that is VLANs */ + if (sdata->type == IEEE80211_IF_TYPE_AP) { + struct ieee80211_sub_if_data *vlan, *tmp; - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - dev = dev_get_by_index(pkt_data->ifindex); - if (unlikely(dev && !is_ieee80211_device(dev, mdev))) { - dev_put(dev); - dev = NULL; + list_for_each_entry_safe(vlan, tmp, &sdata->u.ap.vlans, + u.vlan.list) + dev_close(vlan->dev); + WARN_ON(!list_empty(&sdata->u.ap.vlans)); } - if (unlikely(!dev)) - return -ENODEV; - __ieee80211_tx_prepare(tx, skb, dev, control); - return 0; -} - -static inline int __ieee80211_queue_stopped(const struct ieee80211_local *local, - int queue) -{ - return test_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); -} - -static inline int __ieee80211_queue_pending(const struct ieee80211_local *local, - int queue) -{ - return test_bit(IEEE80211_LINK_STATE_PENDING, &local->state[queue]); -} - -#define IEEE80211_TX_OK 0 -#define IEEE80211_TX_AGAIN 1 -#define IEEE80211_TX_FRAG_AGAIN 2 -static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, - struct ieee80211_txrx_data *tx) -{ - struct ieee80211_tx_control *control = tx->u.tx.control; - int ret, i; + local->open_count--; - if (!ieee80211_qdisc_installed(local->mdev) && - __ieee80211_queue_stopped(local, 0)) { - netif_stop_queue(local->mdev); - return IEEE80211_TX_AGAIN; - } - if (skb) { - ieee80211_dump_frame(local->mdev->name, "TX to low-level driver", skb); - ret = local->ops->tx(local_to_hw(local), skb, control); - if (ret) - return IEEE80211_TX_AGAIN; - local->mdev->trans_start = jiffies; - ieee80211_led_tx(local, 1); - } - if (tx->u.tx.extra_frag) { - control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | - IEEE80211_TXCTL_USE_CTS_PROTECT | - IEEE80211_TXCTL_CLEAR_DST_MASK | - IEEE80211_TXCTL_FIRST_FRAGMENT); - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - if (!tx->u.tx.extra_frag[i]) - continue; - if (__ieee80211_queue_stopped(local, control->queue)) - return IEEE80211_TX_FRAG_AGAIN; - if (i == tx->u.tx.num_extra_frag) { - control->tx_rate = tx->u.tx.last_frag_hwrate; - control->rate = tx->u.tx.last_frag_rate; - if (tx->u.tx.probe_last_frag) - control->flags |= - IEEE80211_TXCTL_RATE_CTRL_PROBE; - else - control->flags &= - ~IEEE80211_TXCTL_RATE_CTRL_PROBE; - } + switch (sdata->type) { + case IEEE80211_IF_TYPE_VLAN: + list_del(&sdata->u.vlan.list); + sdata->u.vlan.ap = NULL; + /* no need to tell driver */ + break; + case IEEE80211_IF_TYPE_MNTR: + local->monitors--; + if (local->monitors == 0) { + netif_tx_lock_bh(local->mdev); + ieee80211_configure_filter(local); + netif_tx_unlock_bh(local->mdev); - ieee80211_dump_frame(local->mdev->name, - "TX to low-level driver", - tx->u.tx.extra_frag[i]); - ret = local->ops->tx(local_to_hw(local), - tx->u.tx.extra_frag[i], - control); - if (ret) - return IEEE80211_TX_FRAG_AGAIN; - local->mdev->trans_start = jiffies; - ieee80211_led_tx(local, 1); - tx->u.tx.extra_frag[i] = NULL; + local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; + ieee80211_hw_config(local); } - kfree(tx->u.tx.extra_frag); - tx->u.tx.extra_frag = NULL; - } - return IEEE80211_TX_OK; -} - -static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, - struct ieee80211_tx_control *control, int mgmt) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sta_info *sta; - ieee80211_tx_handler *handler; - struct ieee80211_txrx_data tx; - ieee80211_txrx_result res = TXRX_DROP, res_prepare; - int ret, i; - - WARN_ON(__ieee80211_queue_pending(local, control->queue)); - - if (unlikely(skb->len < 10)) { - dev_kfree_skb(skb); - return 0; - } - - res_prepare = __ieee80211_tx_prepare(&tx, skb, dev, control); - - if (res_prepare == TXRX_DROP) { - dev_kfree_skb(skb); - return 0; - } - - sta = tx.sta; - tx.u.tx.mgmt_interface = mgmt; - tx.u.tx.mode = local->hw.conf.mode; + break; + case IEEE80211_IF_TYPE_STA: + case IEEE80211_IF_TYPE_IBSS: + sdata->u.sta.state = IEEE80211_DISABLED; + del_timer_sync(&sdata->u.sta.timer); + /* + * When we get here, the interface is marked down. + * Call synchronize_rcu() to wait for the RX path + * should it be using the interface and enqueuing + * frames at this very time on another CPU. + */ + synchronize_rcu(); + skb_queue_purge(&sdata->u.sta.skb_queue); - if (res_prepare == TXRX_QUEUED) { /* if it was an injected packet */ - res = TXRX_CONTINUE; - } else { - for (handler = local->tx_handlers; *handler != NULL; - handler++) { - res = (*handler)(&tx); - if (res != TXRX_CONTINUE) - break; + if (!local->ops->hw_scan && + local->scan_dev == sdata->dev) { + local->sta_scanning = 0; + cancel_delayed_work(&local->scan_work); } + flush_workqueue(local->hw.workqueue); + /* fall through */ + default: + conf.if_id = dev->ifindex; + conf.type = sdata->type; + conf.mac_addr = dev->dev_addr; + /* disable all keys for as long as this netdev is down */ + ieee80211_disable_keys(sdata); + local->ops->remove_interface(local_to_hw(local), &conf); } - skb = tx.skb; /* handlers are allowed to change skb */ - - if (sta) - sta_info_put(sta); - - if (unlikely(res == TXRX_DROP)) { - I802_DEBUG_INC(local->tx_handlers_drop); - goto drop; - } - - if (unlikely(res == TXRX_QUEUED)) { - I802_DEBUG_INC(local->tx_handlers_queued); - return 0; - } - - if (tx.u.tx.extra_frag) { - for (i = 0; i < tx.u.tx.num_extra_frag; i++) { - int next_len, dur; - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) - tx.u.tx.extra_frag[i]->data; + if (local->open_count == 0) { + if (netif_running(local->mdev)) + dev_close(local->mdev); - if (i + 1 < tx.u.tx.num_extra_frag) { - next_len = tx.u.tx.extra_frag[i + 1]->len; - } else { - next_len = 0; - tx.u.tx.rate = tx.u.tx.last_frag_rate; - tx.u.tx.last_frag_hwrate = tx.u.tx.rate->val; - } - dur = ieee80211_duration(&tx, 0, next_len); - hdr->duration_id = cpu_to_le16(dur); - } - } + if (local->ops->stop) + local->ops->stop(local_to_hw(local)); -retry: - ret = __ieee80211_tx(local, skb, &tx); - if (ret) { - struct ieee80211_tx_stored_packet *store = - &local->pending_packet[control->queue]; - - if (ret == IEEE80211_TX_FRAG_AGAIN) - skb = NULL; - set_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[control->queue]); - smp_mb(); - /* When the driver gets out of buffers during sending of - * fragments and calls ieee80211_stop_queue, there is - * a small window between IEEE80211_LINK_STATE_XOFF and - * IEEE80211_LINK_STATE_PENDING flags are set. If a buffer - * gets available in that window (i.e. driver calls - * ieee80211_wake_queue), we would end up with ieee80211_tx - * called with IEEE80211_LINK_STATE_PENDING. Prevent this by - * continuing transmitting here when that situation is - * possible to have happened. */ - if (!__ieee80211_queue_stopped(local, control->queue)) { - clear_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[control->queue]); - goto retry; - } - memcpy(&store->control, control, - sizeof(struct ieee80211_tx_control)); - store->skb = skb; - store->extra_frag = tx.u.tx.extra_frag; - store->num_extra_frag = tx.u.tx.num_extra_frag; - store->last_frag_hwrate = tx.u.tx.last_frag_hwrate; - store->last_frag_rate = tx.u.tx.last_frag_rate; - store->last_frag_rate_ctrl_probe = tx.u.tx.probe_last_frag; + tasklet_disable(&local->tx_pending_tasklet); + tasklet_disable(&local->tasklet); } - return 0; - drop: - if (skb) - dev_kfree_skb(skb); - for (i = 0; i < tx.u.tx.num_extra_frag; i++) - if (tx.u.tx.extra_frag[i]) - dev_kfree_skb(tx.u.tx.extra_frag[i]); - kfree(tx.u.tx.extra_frag); return 0; } -static void ieee80211_tx_pending(unsigned long data) -{ - struct ieee80211_local *local = (struct ieee80211_local *)data; - struct net_device *dev = local->mdev; - struct ieee80211_tx_stored_packet *store; - struct ieee80211_txrx_data tx; - int i, ret, reschedule = 0; - - netif_tx_lock_bh(dev); - for (i = 0; i < local->hw.queues; i++) { - if (__ieee80211_queue_stopped(local, i)) - continue; - if (!__ieee80211_queue_pending(local, i)) { - reschedule = 1; - continue; - } - store = &local->pending_packet[i]; - tx.u.tx.control = &store->control; - tx.u.tx.extra_frag = store->extra_frag; - tx.u.tx.num_extra_frag = store->num_extra_frag; - tx.u.tx.last_frag_hwrate = store->last_frag_hwrate; - tx.u.tx.last_frag_rate = store->last_frag_rate; - tx.u.tx.probe_last_frag = store->last_frag_rate_ctrl_probe; - ret = __ieee80211_tx(local, store->skb, &tx); - if (ret) { - if (ret == IEEE80211_TX_FRAG_AGAIN) - store->skb = NULL; - } else { - clear_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[i]); - reschedule = 1; - } - } - netif_tx_unlock_bh(dev); - if (reschedule) { - if (!ieee80211_qdisc_installed(dev)) { - if (!__ieee80211_queue_stopped(local, 0)) - netif_wake_queue(dev); - } else - netif_schedule(dev); - } -} - -static void ieee80211_clear_tx_pending(struct ieee80211_local *local) -{ - int i, j; - struct ieee80211_tx_stored_packet *store; - - for (i = 0; i < local->hw.queues; i++) { - if (!__ieee80211_queue_pending(local, i)) - continue; - store = &local->pending_packet[i]; - kfree_skb(store->skb); - for (j = 0; j < store->num_extra_frag; j++) - kfree_skb(store->extra_frag[j]); - kfree(store->extra_frag); - clear_bit(IEEE80211_LINK_STATE_PENDING, &local->state[i]); - } -} - -static int ieee80211_master_start_xmit(struct sk_buff *skb, - struct net_device *dev) +static void ieee80211_set_multicast_list(struct net_device *dev) { - struct ieee80211_tx_control control; - struct ieee80211_tx_packet_data *pkt_data; - struct net_device *odev = NULL; - struct ieee80211_sub_if_data *osdata; - int headroom; - int ret; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + int allmulti, promisc, sdata_allmulti, sdata_promisc; - /* - * copy control out of the skb so other people can use skb->cb - */ - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(&control, 0, sizeof(struct ieee80211_tx_control)); + allmulti = !!(dev->flags & IFF_ALLMULTI); + promisc = !!(dev->flags & IFF_PROMISC); + sdata_allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; + sdata_promisc = sdata->flags & IEEE80211_SDATA_PROMISC; - if (pkt_data->ifindex) - odev = dev_get_by_index(pkt_data->ifindex); - if (unlikely(odev && !is_ieee80211_device(odev, dev))) { - dev_put(odev); - odev = NULL; - } - if (unlikely(!odev)) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Discarded packet with nonexistent " - "originating device\n", dev->name); -#endif - dev_kfree_skb(skb); - return 0; + if (allmulti != sdata_allmulti) { + if (dev->flags & IFF_ALLMULTI) + atomic_inc(&local->iff_allmultis); + else + atomic_dec(&local->iff_allmultis); + sdata->flags ^= IEEE80211_SDATA_ALLMULTI; } - osdata = IEEE80211_DEV_TO_SUB_IF(odev); - headroom = osdata->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM; - if (skb_headroom(skb) < headroom) { - if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - dev_put(odev); - return 0; - } + if (promisc != sdata_promisc) { + if (dev->flags & IFF_PROMISC) + atomic_inc(&local->iff_promiscs); + else + atomic_dec(&local->iff_promiscs); + sdata->flags ^= IEEE80211_SDATA_PROMISC; } - control.ifindex = odev->ifindex; - control.type = osdata->type; - if (pkt_data->req_tx_status) - control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS; - if (pkt_data->do_not_encrypt) - control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; - if (pkt_data->requeue) - control.flags |= IEEE80211_TXCTL_REQUEUE; - control.queue = pkt_data->queue; - - ret = ieee80211_tx(odev, skb, &control, - control.type == IEEE80211_IF_TYPE_MGMT); - dev_put(odev); - - return ret; + dev_mc_sync(local->mdev, dev); } +static const struct header_ops ieee80211_header_ops = { + .create = eth_header, + .parse = header_parse_80211, + .rebuild = eth_rebuild_header, + .cache = eth_header_cache, + .cache_update = eth_header_cache_update, +}; -int ieee80211_monitor_start_xmit(struct sk_buff *skb, - struct net_device *dev) +/* Must not be called for mdev */ +void ieee80211_if_setup(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_packet_data *pkt_data; - struct ieee80211_radiotap_header *prthdr = - (struct ieee80211_radiotap_header *)skb->data; - u16 len; - - /* - * there must be a radiotap header at the - * start in this case - */ - if (unlikely(prthdr->it_version)) { - /* only version 0 is supported */ - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - - skb->dev = local->mdev; - - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(pkt_data, 0, sizeof(*pkt_data)); - pkt_data->ifindex = dev->ifindex; - pkt_data->mgmt_iface = 0; - pkt_data->do_not_encrypt = 1; - - /* above needed because we set skb device to master */ - - /* - * fix up the pointers accounting for the radiotap - * header still being in there. We are being given - * a precooked IEEE80211 header so no need for - * normal processing - */ - len = le16_to_cpu(get_unaligned(&prthdr->it_len)); - skb_set_mac_header(skb, len); - skb_set_network_header(skb, len + sizeof(struct ieee80211_hdr)); - skb_set_transport_header(skb, len + sizeof(struct ieee80211_hdr)); - - /* - * pass the radiotap header up to - * the next stage intact - */ - dev_queue_xmit(skb); - - return NETDEV_TX_OK; + ether_setup(dev); + dev->header_ops = &ieee80211_header_ops; + dev->hard_start_xmit = ieee80211_subif_start_xmit; + dev->wireless_handlers = &ieee80211_iw_handler_def; + dev->set_multicast_list = ieee80211_set_multicast_list; + dev->change_mtu = ieee80211_change_mtu; + dev->open = ieee80211_open; + dev->stop = ieee80211_stop; + dev->destructor = ieee80211_if_free; } +/* WDS specialties */ -/** - * ieee80211_subif_start_xmit - netif start_xmit function for Ethernet-type - * subinterfaces (wlan#, WDS, and VLAN interfaces) - * @skb: packet to be sent - * @dev: incoming interface - * - * Returns: 0 on success (and frees skb in this case) or 1 on failure (skb will - * not be freed, and caller is responsible for either retrying later or freeing - * skb). - * - * This function takes in an Ethernet header and encapsulates it with suitable - * IEEE 802.11 header based on which interface the packet is coming in. The - * encapsulated packet will then be passed to master interface, wlan#.11, for - * transmission (through low-level driver). - */ -int ieee80211_subif_start_xmit(struct sk_buff *skb, - struct net_device *dev) +int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_packet_data *pkt_data; - struct ieee80211_sub_if_data *sdata; - int ret = 1, head_need; - u16 ethertype, hdrlen, fc; - struct ieee80211_hdr hdr; - const u8 *encaps_data; - int encaps_len, skip_header_bytes; - int nh_pos, h_pos, no_encrypt = 0; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; + DECLARE_MAC_BUF(mac); - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (unlikely(skb->len < ETH_HLEN)) { - printk(KERN_DEBUG "%s: short skb (len=%d)\n", - dev->name, skb->len); - ret = 0; - goto fail; - } + if (compare_ether_addr(remote_addr, sdata->u.wds.remote_addr) == 0) + return 0; - nh_pos = skb_network_header(skb) - skb->data; - h_pos = skb_transport_header(skb) - skb->data; - - /* convert Ethernet header to proper 802.11 header (based on - * operation mode) */ - ethertype = (skb->data[12] << 8) | skb->data[13]; - /* TODO: handling for 802.1x authorized/unauthorized port */ - fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA; - - if (likely(sdata->type == IEEE80211_IF_TYPE_AP || - sdata->type == IEEE80211_IF_TYPE_VLAN)) { - fc |= IEEE80211_FCTL_FROMDS; - /* DA BSSID SA */ - memcpy(hdr.addr1, skb->data, ETH_ALEN); - memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); - memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); - hdrlen = 24; - } else if (sdata->type == IEEE80211_IF_TYPE_WDS) { - fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS; - /* RA TA DA SA */ - memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN); - memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); - memcpy(hdr.addr3, skb->data, ETH_ALEN); - memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); - hdrlen = 30; - } else if (sdata->type == IEEE80211_IF_TYPE_STA) { - fc |= IEEE80211_FCTL_TODS; - /* BSSID SA DA */ - memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN); - memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, skb->data, ETH_ALEN); - hdrlen = 24; - } else if (sdata->type == IEEE80211_IF_TYPE_IBSS) { - /* DA SA BSSID */ - memcpy(hdr.addr1, skb->data, ETH_ALEN); - memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, sdata->u.sta.bssid, ETH_ALEN); - hdrlen = 24; - } else { - ret = 0; - goto fail; - } + /* Create STA entry for the new peer */ + sta = sta_info_add(local, dev, remote_addr, GFP_KERNEL); + if (!sta) + return -ENOMEM; + sta_info_put(sta); - /* receiver is QoS enabled, use a QoS type frame */ - sta = sta_info_get(local, hdr.addr1); + /* Remove STA entry for the old peer */ + sta = sta_info_get(local, sdata->u.wds.remote_addr); if (sta) { - if (sta->flags & WLAN_STA_WME) { - fc |= IEEE80211_STYPE_QOS_DATA; - hdrlen += 2; - } + sta_info_free(sta); sta_info_put(sta); - } - - hdr.frame_control = cpu_to_le16(fc); - hdr.duration_id = 0; - hdr.seq_ctrl = 0; - - skip_header_bytes = ETH_HLEN; - if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) { - encaps_data = bridge_tunnel_header; - encaps_len = sizeof(bridge_tunnel_header); - skip_header_bytes -= 2; - } else if (ethertype >= 0x600) { - encaps_data = rfc1042_header; - encaps_len = sizeof(rfc1042_header); - skip_header_bytes -= 2; } else { - encaps_data = NULL; - encaps_len = 0; - } - - skb_pull(skb, skip_header_bytes); - nh_pos -= skip_header_bytes; - h_pos -= skip_header_bytes; - - /* TODO: implement support for fragments so that there is no need to - * reallocate and copy payload; it might be enough to support one - * extra fragment that would be copied in the beginning of the frame - * data.. anyway, it would be nice to include this into skb structure - * somehow - * - * There are few options for this: - * use skb->cb as an extra space for 802.11 header - * allocate new buffer if not enough headroom - * make sure that there is enough headroom in every skb by increasing - * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and - * alloc_skb() (net/core/skbuff.c) - */ - head_need = hdrlen + encaps_len + local->tx_headroom; - head_need -= skb_headroom(skb); - - /* We are going to modify skb data, so make a copy of it if happens to - * be cloned. This could happen, e.g., with Linux bridge code passing - * us broadcast frames. */ - - if (head_need > 0 || skb_cloned(skb)) { -#if 0 - printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes " - "of headroom\n", dev->name, head_need); -#endif - - if (skb_cloned(skb)) - I802_DEBUG_INC(local->tx_expand_skb_head_cloned); - else - I802_DEBUG_INC(local->tx_expand_skb_head); - /* Since we have to reallocate the buffer, make sure that there - * is enough room for possible WEP IV/ICV and TKIP (8 bytes - * before payload and 12 after). */ - if (pskb_expand_head(skb, (head_need > 0 ? head_need + 8 : 8), - 12, GFP_ATOMIC)) { - printk(KERN_DEBUG "%s: failed to reallocate TX buffer" - "\n", dev->name); - goto fail; - } - } - - if (encaps_data) { - memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len); - nh_pos += encaps_len; - h_pos += encaps_len; - } - memcpy(skb_push(skb, hdrlen), &hdr, hdrlen); - nh_pos += hdrlen; - h_pos += hdrlen; - - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); - pkt_data->ifindex = dev->ifindex; - pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT); - pkt_data->do_not_encrypt = no_encrypt; - - skb->dev = local->mdev; - sdata->stats.tx_packets++; - sdata->stats.tx_bytes += skb->len; - - /* Update skb pointers to various headers since this modified frame - * is going to go through Linux networking code that may potentially - * need things like pointer to IP header. */ - skb_set_mac_header(skb, 0); - skb_set_network_header(skb, nh_pos); - skb_set_transport_header(skb, h_pos); - - dev->trans_start = jiffies; - dev_queue_xmit(skb); - - return 0; - - fail: - if (!ret) - dev_kfree_skb(skb); - - return ret; -} - - -/* - * This is the transmit routine for the 802.11 type interfaces - * called by upper layers of the linux networking - * stack when it has a frame to transmit - */ -static int -ieee80211_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata; - struct ieee80211_tx_packet_data *pkt_data; - struct ieee80211_hdr *hdr; - u16 fc; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (skb->len < 10) { - dev_kfree_skb(skb); - return 0; - } - - if (skb_headroom(skb) < sdata->local->tx_headroom) { - if (pskb_expand_head(skb, sdata->local->tx_headroom, - 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - return 0; - } - } - - hdr = (struct ieee80211_hdr *) skb->data; - fc = le16_to_cpu(hdr->frame_control); - - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; - memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); - pkt_data->ifindex = sdata->dev->ifindex; - pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT); - - skb->priority = 20; /* use hardcoded priority for mgmt TX queue */ - skb->dev = sdata->local->mdev; - - /* - * We're using the protocol field of the the frame control header - * to request TX callback for hostapd. BIT(1) is checked. - */ - if ((fc & BIT(1)) == BIT(1)) { - pkt_data->req_tx_status = 1; - fc &= ~BIT(1); - hdr->frame_control = cpu_to_le16(fc); + printk(KERN_DEBUG "%s: could not find STA entry for WDS link " + "peer %s\n", + dev->name, print_mac(mac, sdata->u.wds.remote_addr)); } - pkt_data->do_not_encrypt = !(fc & IEEE80211_FCTL_PROTECTED); - - sdata->stats.tx_packets++; - sdata->stats.tx_bytes += skb->len; - - dev_queue_xmit(skb); + /* Update WDS link data */ + memcpy(&sdata->u.wds.remote_addr, remote_addr, ETH_ALEN); return 0; } - -static void ieee80211_beacon_add_tim(struct ieee80211_local *local, - struct ieee80211_if_ap *bss, - struct sk_buff *skb) -{ - u8 *pos, *tim; - int aid0 = 0; - int i, have_bits = 0, n1, n2; - - /* Generate bitmap for TIM only if there are any STAs in power save - * mode. */ - spin_lock_bh(&local->sta_lock); - if (atomic_read(&bss->num_sta_ps) > 0) - /* in the hope that this is faster than - * checking byte-for-byte */ - have_bits = !bitmap_empty((unsigned long*)bss->tim, - IEEE80211_MAX_AID+1); - - if (bss->dtim_count == 0) - bss->dtim_count = bss->dtim_period - 1; - else - bss->dtim_count--; - - tim = pos = (u8 *) skb_put(skb, 6); - *pos++ = WLAN_EID_TIM; - *pos++ = 4; - *pos++ = bss->dtim_count; - *pos++ = bss->dtim_period; - - if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf)) - aid0 = 1; - - if (have_bits) { - /* Find largest even number N1 so that bits numbered 1 through - * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits - * (N2 + 1) x 8 through 2007 are 0. */ - n1 = 0; - for (i = 0; i < IEEE80211_MAX_TIM_LEN; i++) { - if (bss->tim[i]) { - n1 = i & 0xfe; - break; - } - } - n2 = n1; - for (i = IEEE80211_MAX_TIM_LEN - 1; i >= n1; i--) { - if (bss->tim[i]) { - n2 = i; - break; - } - } - - /* Bitmap control */ - *pos++ = n1 | aid0; - /* Part Virt Bitmap */ - memcpy(pos, bss->tim + n1, n2 - n1 + 1); - - tim[1] = n2 - n1 + 4; - skb_put(skb, n2 - n1); - } else { - *pos++ = aid0; /* Bitmap control */ - *pos++ = 0; /* Part Virt Bitmap */ - } - spin_unlock_bh(&local->sta_lock); -} - - -struct sk_buff * ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id, - struct ieee80211_tx_control *control) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sk_buff *skb; - struct net_device *bdev; - struct ieee80211_sub_if_data *sdata = NULL; - struct ieee80211_if_ap *ap = NULL; - struct ieee80211_rate *rate; - struct rate_control_extra extra; - u8 *b_head, *b_tail; - int bh_len, bt_len; - - bdev = dev_get_by_index(if_id); - if (bdev) { - sdata = IEEE80211_DEV_TO_SUB_IF(bdev); - ap = &sdata->u.ap; - dev_put(bdev); - } - - if (!ap || sdata->type != IEEE80211_IF_TYPE_AP || - !ap->beacon_head) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "no beacon data avail for idx=%d " - "(%s)\n", if_id, bdev ? bdev->name : "N/A"); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - return NULL; - } - - /* Assume we are generating the normal beacon locally */ - b_head = ap->beacon_head; - b_tail = ap->beacon_tail; - bh_len = ap->beacon_head_len; - bt_len = ap->beacon_tail_len; - - skb = dev_alloc_skb(local->tx_headroom + - bh_len + bt_len + 256 /* maximum TIM len */); - if (!skb) - return NULL; - - skb_reserve(skb, local->tx_headroom); - memcpy(skb_put(skb, bh_len), b_head, bh_len); - - ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data); - - ieee80211_beacon_add_tim(local, ap, skb); - - if (b_tail) { - memcpy(skb_put(skb, bt_len), b_tail, bt_len); - } - - if (control) { - memset(&extra, 0, sizeof(extra)); - extra.mode = local->oper_hw_mode; - - rate = rate_control_get_rate(local, local->mdev, skb, &extra); - if (!rate) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: ieee80211_beacon_get: no rate " - "found\n", local->mdev->name); - } - dev_kfree_skb(skb); - return NULL; - } - - control->tx_rate = (local->short_preamble && - (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? - rate->val2 : rate->val; - control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; - control->power_level = local->hw.conf.power_level; - control->flags |= IEEE80211_TXCTL_NO_ACK; - control->retry_limit = 1; - control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; - } - - ap->num_beacons++; - return skb; -} -EXPORT_SYMBOL(ieee80211_beacon_get); - -__le16 ieee80211_rts_duration(struct ieee80211_hw *hw, - size_t frame_len, - const struct ieee80211_tx_control *frame_txctl) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_rate *rate; - int short_preamble = local->short_preamble; - int erp; - u16 dur; - - rate = frame_txctl->rts_rate; - erp = !!(rate->flags & IEEE80211_RATE_ERP); - - /* CTS duration */ - dur = ieee80211_frame_duration(local, 10, rate->rate, - erp, short_preamble); - /* Data frame duration */ - dur += ieee80211_frame_duration(local, frame_len, rate->rate, - erp, short_preamble); - /* ACK duration */ - dur += ieee80211_frame_duration(local, 10, rate->rate, - erp, short_preamble); - - return cpu_to_le16(dur); -} -EXPORT_SYMBOL(ieee80211_rts_duration); - - -__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, - size_t frame_len, - const struct ieee80211_tx_control *frame_txctl) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_rate *rate; - int short_preamble = local->short_preamble; - int erp; - u16 dur; - - rate = frame_txctl->rts_rate; - erp = !!(rate->flags & IEEE80211_RATE_ERP); - - /* Data frame duration */ - dur = ieee80211_frame_duration(local, frame_len, rate->rate, - erp, short_preamble); - if (!(frame_txctl->flags & IEEE80211_TXCTL_NO_ACK)) { - /* ACK duration */ - dur += ieee80211_frame_duration(local, 10, rate->rate, - erp, short_preamble); - } - - return cpu_to_le16(dur); -} -EXPORT_SYMBOL(ieee80211_ctstoself_duration); - -void ieee80211_rts_get(struct ieee80211_hw *hw, - const void *frame, size_t frame_len, - const struct ieee80211_tx_control *frame_txctl, - struct ieee80211_rts *rts) -{ - const struct ieee80211_hdr *hdr = frame; - u16 fctl; - - fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS; - rts->frame_control = cpu_to_le16(fctl); - rts->duration = ieee80211_rts_duration(hw, frame_len, frame_txctl); - memcpy(rts->ra, hdr->addr1, sizeof(rts->ra)); - memcpy(rts->ta, hdr->addr2, sizeof(rts->ta)); -} -EXPORT_SYMBOL(ieee80211_rts_get); - -void ieee80211_ctstoself_get(struct ieee80211_hw *hw, - const void *frame, size_t frame_len, - const struct ieee80211_tx_control *frame_txctl, - struct ieee80211_cts *cts) -{ - const struct ieee80211_hdr *hdr = frame; - u16 fctl; - - fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS; - cts->frame_control = cpu_to_le16(fctl); - cts->duration = ieee80211_ctstoself_duration(hw, frame_len, frame_txctl); - memcpy(cts->ra, hdr->addr1, sizeof(cts->ra)); -} -EXPORT_SYMBOL(ieee80211_ctstoself_get); - -struct sk_buff * -ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id, - struct ieee80211_tx_control *control) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sk_buff *skb; - struct sta_info *sta; - ieee80211_tx_handler *handler; - struct ieee80211_txrx_data tx; - ieee80211_txrx_result res = TXRX_DROP; - struct net_device *bdev; - struct ieee80211_sub_if_data *sdata; - struct ieee80211_if_ap *bss = NULL; - - bdev = dev_get_by_index(if_id); - if (bdev) { - sdata = IEEE80211_DEV_TO_SUB_IF(bdev); - bss = &sdata->u.ap; - dev_put(bdev); - } - if (!bss || sdata->type != IEEE80211_IF_TYPE_AP || !bss->beacon_head) - return NULL; - - if (bss->dtim_count != 0) - return NULL; /* send buffered bc/mc only after DTIM beacon */ - memset(control, 0, sizeof(*control)); - while (1) { - skb = skb_dequeue(&bss->ps_bc_buf); - if (!skb) - return NULL; - local->total_ps_buffered--; - - if (!skb_queue_empty(&bss->ps_bc_buf) && skb->len >= 2) { - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) skb->data; - /* more buffered multicast/broadcast frames ==> set - * MoreData flag in IEEE 802.11 header to inform PS - * STAs */ - hdr->frame_control |= - cpu_to_le16(IEEE80211_FCTL_MOREDATA); - } - - if (ieee80211_tx_prepare(&tx, skb, local->mdev, control) == 0) - break; - dev_kfree_skb_any(skb); - } - sta = tx.sta; - tx.u.tx.ps_buffered = 1; - - for (handler = local->tx_handlers; *handler != NULL; handler++) { - res = (*handler)(&tx); - if (res == TXRX_DROP || res == TXRX_QUEUED) - break; - } - dev_put(tx.dev); - skb = tx.skb; /* handlers are allowed to change skb */ - - if (res == TXRX_DROP) { - I802_DEBUG_INC(local->tx_handlers_drop); - dev_kfree_skb(skb); - skb = NULL; - } else if (res == TXRX_QUEUED) { - I802_DEBUG_INC(local->tx_handlers_queued); - skb = NULL; - } - - if (sta) - sta_info_put(sta); - - return skb; -} -EXPORT_SYMBOL(ieee80211_get_buffered_bc); +/* everything else */ static int __ieee80211_if_config(struct net_device *dev, struct sk_buff *beacon, @@ -2286,7 +454,6 @@ static int __ieee80211_if_config(struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_if_conf conf; - static u8 scan_bssid[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; if (!local->ops->config_interface || !netif_running(dev)) return 0; @@ -2295,20 +462,12 @@ static int __ieee80211_if_config(struct net_device *dev, conf.type = sdata->type; if (sdata->type == IEEE80211_IF_TYPE_STA || sdata->type == IEEE80211_IF_TYPE_IBSS) { - if (local->sta_scanning && - local->scan_dev == dev) - conf.bssid = scan_bssid; - else - conf.bssid = sdata->u.sta.bssid; + conf.bssid = sdata->u.sta.bssid; conf.ssid = sdata->u.sta.ssid; conf.ssid_len = sdata->u.sta.ssid_len; - conf.generic_elem = sdata->u.sta.extra_ie; - conf.generic_elem_len = sdata->u.sta.extra_ie_len; } else if (sdata->type == IEEE80211_IF_TYPE_AP) { conf.ssid = sdata->u.ap.ssid; conf.ssid_len = sdata->u.ap.ssid_len; - conf.generic_elem = sdata->u.ap.generic_elem; - conf.generic_elem_len = sdata->u.ap.generic_elem_len; conf.beacon = beacon; conf.beacon_control = control; } @@ -2351,7 +510,12 @@ int ieee80211_hw_config(struct ieee80211_local *local) local->hw.conf.channel = chan->chan; local->hw.conf.channel_val = chan->val; - local->hw.conf.power_level = chan->power_level; + if (!local->hw.conf.power_level) { + local->hw.conf.power_level = chan->power_level; + } else { + local->hw.conf.power_level = min(chan->power_level, + local->hw.conf.power_level); + } local->hw.conf.freq = chan->freq; local->hw.conf.phymode = mode->mode; local->hw.conf.antenna_max = chan->antenna_max; @@ -2364,1971 +528,33 @@ int ieee80211_hw_config(struct ieee80211_local *local) local->hw.conf.phymode); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - if (local->ops->config) + if (local->open_count) ret = local->ops->config(local_to_hw(local), &local->hw.conf); return ret; } - -static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) -{ - /* FIX: what would be proper limits for MTU? - * This interface uses 802.3 frames. */ - if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6) { - printk(KERN_WARNING "%s: invalid MTU %d\n", - dev->name, new_mtu); - return -EINVAL; - } - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - dev->mtu = new_mtu; - return 0; -} - - -static int ieee80211_change_mtu_apdev(struct net_device *dev, int new_mtu) -{ - /* FIX: what would be proper limits for MTU? - * This interface uses 802.11 frames. */ - if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN) { - printk(KERN_WARNING "%s: invalid MTU %d\n", - dev->name, new_mtu); - return -EINVAL; - } - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - dev->mtu = new_mtu; - return 0; -} - -enum netif_tx_lock_class { - TX_LOCK_NORMAL, - TX_LOCK_MASTER, -}; - -static inline void netif_tx_lock_nested(struct net_device *dev, int subclass) -{ - spin_lock_nested(&dev->_xmit_lock, subclass); - dev->xmit_lock_owner = smp_processor_id(); -} - -static void ieee80211_set_multicast_list(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - unsigned short flags; - - netif_tx_lock_nested(local->mdev, TX_LOCK_MASTER); - if (((dev->flags & IFF_ALLMULTI) != 0) ^ (sdata->allmulti != 0)) { - if (sdata->allmulti) { - sdata->allmulti = 0; - local->iff_allmultis--; - } else { - sdata->allmulti = 1; - local->iff_allmultis++; - } - } - if (((dev->flags & IFF_PROMISC) != 0) ^ (sdata->promisc != 0)) { - if (sdata->promisc) { - sdata->promisc = 0; - local->iff_promiscs--; - } else { - sdata->promisc = 1; - local->iff_promiscs++; - } - } - if (dev->mc_count != sdata->mc_count) { - local->mc_count = local->mc_count - sdata->mc_count + - dev->mc_count; - sdata->mc_count = dev->mc_count; - } - if (local->ops->set_multicast_list) { - flags = local->mdev->flags; - if (local->iff_allmultis) - flags |= IFF_ALLMULTI; - if (local->iff_promiscs) - flags |= IFF_PROMISC; - read_lock(&local->sub_if_lock); - local->ops->set_multicast_list(local_to_hw(local), flags, - local->mc_count); - read_unlock(&local->sub_if_lock); - } - netif_tx_unlock(local->mdev); -} - -struct dev_mc_list *ieee80211_get_mc_list_item(struct ieee80211_hw *hw, - struct dev_mc_list *prev, - void **ptr) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata = *ptr; - struct dev_mc_list *mc; - - if (!prev) { - WARN_ON(sdata); - sdata = NULL; - } - if (!prev || !prev->next) { - if (sdata) - sdata = list_entry(sdata->list.next, - struct ieee80211_sub_if_data, list); - else - sdata = list_entry(local->sub_if_list.next, - struct ieee80211_sub_if_data, list); - if (&sdata->list != &local->sub_if_list) - mc = sdata->dev->mc_list; - else - mc = NULL; - } else - mc = prev->next; - - *ptr = sdata; - return mc; -} -EXPORT_SYMBOL(ieee80211_get_mc_list_item); - -static struct net_device_stats *ieee80211_get_stats(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - return &(sdata->stats); -} - -static void ieee80211_if_shutdown(struct net_device *dev) +void ieee80211_erp_info_change_notify(struct net_device *dev, u8 changes) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - ASSERT_RTNL(); - switch (sdata->type) { - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - sdata->u.sta.state = IEEE80211_DISABLED; - del_timer_sync(&sdata->u.sta.timer); - skb_queue_purge(&sdata->u.sta.skb_queue); - if (!local->ops->hw_scan && - local->scan_dev == sdata->dev) { - local->sta_scanning = 0; - cancel_delayed_work(&local->scan_work); - } - flush_workqueue(local->hw.workqueue); - break; - } -} - -static inline int identical_mac_addr_allowed(int type1, int type2) -{ - return (type1 == IEEE80211_IF_TYPE_MNTR || - type2 == IEEE80211_IF_TYPE_MNTR || - (type1 == IEEE80211_IF_TYPE_AP && - type2 == IEEE80211_IF_TYPE_WDS) || - (type1 == IEEE80211_IF_TYPE_WDS && - (type2 == IEEE80211_IF_TYPE_WDS || - type2 == IEEE80211_IF_TYPE_AP)) || - (type1 == IEEE80211_IF_TYPE_AP && - type2 == IEEE80211_IF_TYPE_VLAN) || - (type1 == IEEE80211_IF_TYPE_VLAN && - (type2 == IEEE80211_IF_TYPE_AP || - type2 == IEEE80211_IF_TYPE_VLAN))); -} - -static int ieee80211_master_open(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; - int res = -EOPNOTSUPP; - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - if (sdata->dev != dev && netif_running(sdata->dev)) { - res = 0; - break; - } - } - read_unlock(&local->sub_if_lock); - return res; -} - -static int ieee80211_master_stop(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) - if (sdata->dev != dev && netif_running(sdata->dev)) - dev_close(sdata->dev); - read_unlock(&local->sub_if_lock); - - return 0; -} - -static int ieee80211_mgmt_open(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - if (!netif_running(local->mdev)) - return -EOPNOTSUPP; - return 0; -} - -static int ieee80211_mgmt_stop(struct net_device *dev) -{ - return 0; -} - -/* Check if running monitor interfaces should go to a "soft monitor" mode - * and switch them if necessary. */ -static inline void ieee80211_start_soft_monitor(struct ieee80211_local *local) -{ - struct ieee80211_if_init_conf conf; - - if (local->open_count && local->open_count == local->monitors && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER) && - local->ops->remove_interface) { - conf.if_id = -1; - conf.type = IEEE80211_IF_TYPE_MNTR; - conf.mac_addr = NULL; - local->ops->remove_interface(local_to_hw(local), &conf); - } -} - -/* Check if running monitor interfaces should go to a "hard monitor" mode - * and switch them if necessary. */ -static void ieee80211_start_hard_monitor(struct ieee80211_local *local) -{ - struct ieee80211_if_init_conf conf; - - if (local->open_count && local->open_count == local->monitors && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { - conf.if_id = -1; - conf.type = IEEE80211_IF_TYPE_MNTR; - conf.mac_addr = NULL; - local->ops->add_interface(local_to_hw(local), &conf); - } -} - -static int ieee80211_open(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata, *nsdata; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_if_init_conf conf; - int res; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - read_lock(&local->sub_if_lock); - list_for_each_entry(nsdata, &local->sub_if_list, list) { - struct net_device *ndev = nsdata->dev; - - if (ndev != dev && ndev != local->mdev && netif_running(ndev) && - compare_ether_addr(dev->dev_addr, ndev->dev_addr) == 0 && - !identical_mac_addr_allowed(sdata->type, nsdata->type)) { - read_unlock(&local->sub_if_lock); - return -ENOTUNIQ; - } - } - read_unlock(&local->sub_if_lock); - - if (sdata->type == IEEE80211_IF_TYPE_WDS && - is_zero_ether_addr(sdata->u.wds.remote_addr)) - return -ENOLINK; - - if (sdata->type == IEEE80211_IF_TYPE_MNTR && local->open_count && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { - /* run the interface in a "soft monitor" mode */ - local->monitors++; - local->open_count++; - local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; - return 0; - } - ieee80211_start_soft_monitor(local); - - conf.if_id = dev->ifindex; - conf.type = sdata->type; - conf.mac_addr = dev->dev_addr; - res = local->ops->add_interface(local_to_hw(local), &conf); - if (res) { - if (sdata->type == IEEE80211_IF_TYPE_MNTR) - ieee80211_start_hard_monitor(local); - return res; - } - - if (local->open_count == 0) { - res = 0; - tasklet_enable(&local->tx_pending_tasklet); - tasklet_enable(&local->tasklet); - if (local->ops->open) - res = local->ops->open(local_to_hw(local)); - if (res == 0) { - res = dev_open(local->mdev); - if (res) { - if (local->ops->stop) - local->ops->stop(local_to_hw(local)); - } else { - res = ieee80211_hw_config(local); - if (res && local->ops->stop) - local->ops->stop(local_to_hw(local)); - else if (!res && local->apdev) - dev_open(local->apdev); - } - } - if (res) { - if (local->ops->remove_interface) - local->ops->remove_interface(local_to_hw(local), - &conf); - return res; - } - } - local->open_count++; - - if (sdata->type == IEEE80211_IF_TYPE_MNTR) { - local->monitors++; - local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; - } else - ieee80211_if_config(dev); - - if (sdata->type == IEEE80211_IF_TYPE_STA && - !local->user_space_mlme) - netif_carrier_off(dev); - else - netif_carrier_on(dev); - - netif_start_queue(dev); - return 0; -} - - -static int ieee80211_stop(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (sdata->type == IEEE80211_IF_TYPE_MNTR && - local->open_count > 1 && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { - /* remove "soft monitor" interface */ - local->open_count--; - local->monitors--; - if (!local->monitors) - local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; - return 0; - } - - netif_stop_queue(dev); - ieee80211_if_shutdown(dev); - - if (sdata->type == IEEE80211_IF_TYPE_MNTR) { - local->monitors--; - if (!local->monitors) - local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; - } - - local->open_count--; - if (local->open_count == 0) { - if (netif_running(local->mdev)) - dev_close(local->mdev); - if (local->apdev) - dev_close(local->apdev); - if (local->ops->stop) - local->ops->stop(local_to_hw(local)); - tasklet_disable(&local->tx_pending_tasklet); - tasklet_disable(&local->tasklet); - } - if (local->ops->remove_interface) { - struct ieee80211_if_init_conf conf; - - conf.if_id = dev->ifindex; - conf.type = sdata->type; - conf.mac_addr = dev->dev_addr; - local->ops->remove_interface(local_to_hw(local), &conf); - } - - ieee80211_start_hard_monitor(local); - - return 0; -} - - -static int header_parse_80211(struct sk_buff *skb, unsigned char *haddr) -{ - memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */ - return ETH_ALEN; -} - -static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) -{ - return compare_ether_addr(raddr, addr) == 0 || - is_broadcast_ether_addr(raddr); + if (local->ops->erp_ie_changed) + local->ops->erp_ie_changed(local_to_hw(local), changes, + !!(sdata->flags & IEEE80211_SDATA_USE_PROTECTION), + !(sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE)); } - -static ieee80211_txrx_result -ieee80211_rx_h_data(struct ieee80211_txrx_data *rx) +void ieee80211_reset_erp_info(struct net_device *dev) { - struct net_device *dev = rx->dev; - struct ieee80211_local *local = rx->local; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - u16 fc, hdrlen, ethertype; - u8 *payload; - u8 dst[ETH_ALEN]; - u8 src[ETH_ALEN]; - struct sk_buff *skb = rx->skb, *skb2; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - fc = rx->fc; - if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) - return TXRX_CONTINUE; - - if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) - return TXRX_DROP; - - hdrlen = ieee80211_get_hdrlen(fc); - - /* convert IEEE 802.11 header + possible LLC headers into Ethernet - * header - * IEEE 802.11 address fields: - * ToDS FromDS Addr1 Addr2 Addr3 Addr4 - * 0 0 DA SA BSSID n/a - * 0 1 DA BSSID SA n/a - * 1 0 BSSID SA DA n/a - * 1 1 RA TA DA SA - */ - - switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { - case IEEE80211_FCTL_TODS: - /* BSSID SA DA */ - memcpy(dst, hdr->addr3, ETH_ALEN); - memcpy(src, hdr->addr2, ETH_ALEN); - - if (unlikely(sdata->type != IEEE80211_IF_TYPE_AP && - sdata->type != IEEE80211_IF_TYPE_VLAN)) { - printk(KERN_DEBUG "%s: dropped ToDS frame (BSSID=" - MAC_FMT " SA=" MAC_FMT " DA=" MAC_FMT ")\n", - dev->name, MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2), MAC_ARG(hdr->addr3)); - return TXRX_DROP; - } - break; - case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): - /* RA TA DA SA */ - memcpy(dst, hdr->addr3, ETH_ALEN); - memcpy(src, hdr->addr4, ETH_ALEN); - - if (unlikely(sdata->type != IEEE80211_IF_TYPE_WDS)) { - printk(KERN_DEBUG "%s: dropped FromDS&ToDS frame (RA=" - MAC_FMT " TA=" MAC_FMT " DA=" MAC_FMT " SA=" - MAC_FMT ")\n", - rx->dev->name, MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2), MAC_ARG(hdr->addr3), - MAC_ARG(hdr->addr4)); - return TXRX_DROP; - } - break; - case IEEE80211_FCTL_FROMDS: - /* DA BSSID SA */ - memcpy(dst, hdr->addr1, ETH_ALEN); - memcpy(src, hdr->addr3, ETH_ALEN); - - if (sdata->type != IEEE80211_IF_TYPE_STA) { - return TXRX_DROP; - } - break; - case 0: - /* DA SA BSSID */ - memcpy(dst, hdr->addr1, ETH_ALEN); - memcpy(src, hdr->addr2, ETH_ALEN); - - if (sdata->type != IEEE80211_IF_TYPE_IBSS) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: dropped IBSS frame (DA=" - MAC_FMT " SA=" MAC_FMT " BSSID=" MAC_FMT - ")\n", - dev->name, MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2), - MAC_ARG(hdr->addr3)); - } - return TXRX_DROP; - } - break; - } - - payload = skb->data + hdrlen; - - if (unlikely(skb->len - hdrlen < 8)) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: RX too short data frame " - "payload\n", dev->name); - } - return TXRX_DROP; - } - - ethertype = (payload[6] << 8) | payload[7]; - - if (likely((compare_ether_addr(payload, rfc1042_header) == 0 && - ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || - compare_ether_addr(payload, bridge_tunnel_header) == 0)) { - /* remove RFC1042 or Bridge-Tunnel encapsulation and - * replace EtherType */ - skb_pull(skb, hdrlen + 6); - memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); - memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); - } else { - struct ethhdr *ehdr; - __be16 len; - skb_pull(skb, hdrlen); - len = htons(skb->len); - ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr)); - memcpy(ehdr->h_dest, dst, ETH_ALEN); - memcpy(ehdr->h_source, src, ETH_ALEN); - ehdr->h_proto = len; - } - skb->dev = dev; - - skb2 = NULL; - - sdata->stats.rx_packets++; - sdata->stats.rx_bytes += skb->len; - - if (local->bridge_packets && (sdata->type == IEEE80211_IF_TYPE_AP - || sdata->type == IEEE80211_IF_TYPE_VLAN) && rx->u.rx.ra_match) { - if (is_multicast_ether_addr(skb->data)) { - /* send multicast frames both to higher layers in - * local net stack and back to the wireless media */ - skb2 = skb_copy(skb, GFP_ATOMIC); - if (!skb2) - printk(KERN_DEBUG "%s: failed to clone " - "multicast frame\n", dev->name); - } else { - struct sta_info *dsta; - dsta = sta_info_get(local, skb->data); - if (dsta && !dsta->dev) { - printk(KERN_DEBUG "Station with null dev " - "structure!\n"); - } else if (dsta && dsta->dev == dev) { - /* Destination station is associated to this - * AP, so send the frame directly to it and - * do not pass the frame to local net stack. - */ - skb2 = skb; - skb = NULL; - } - if (dsta) - sta_info_put(dsta); - } - } - - if (skb) { - /* deliver to local stack */ - skb->protocol = eth_type_trans(skb, dev); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); - } - - if (skb2) { - /* send to wireless media */ - skb2->protocol = __constant_htons(ETH_P_802_3); - skb_set_network_header(skb2, 0); - skb_set_mac_header(skb2, 0); - dev_queue_xmit(skb2); - } - - return TXRX_QUEUED; -} - - -static struct ieee80211_rate * -ieee80211_get_rate(struct ieee80211_local *local, int phymode, int hw_rate) -{ - struct ieee80211_hw_mode *mode; - int r; - - list_for_each_entry(mode, &local->modes_list, list) { - if (mode->mode != phymode) - continue; - for (r = 0; r < mode->num_rates; r++) { - struct ieee80211_rate *rate = &mode->rates[r]; - if (rate->val == hw_rate || - (rate->flags & IEEE80211_RATE_PREAMBLE2 && - rate->val2 == hw_rate)) - return rate; - } - } - - return NULL; -} - -static void -ieee80211_fill_frame_info(struct ieee80211_local *local, - struct ieee80211_frame_info *fi, - struct ieee80211_rx_status *status) -{ - if (status) { - struct timespec ts; - struct ieee80211_rate *rate; - - jiffies_to_timespec(jiffies, &ts); - fi->hosttime = cpu_to_be64((u64) ts.tv_sec * 1000000 + - ts.tv_nsec / 1000); - fi->mactime = cpu_to_be64(status->mactime); - switch (status->phymode) { - case MODE_IEEE80211A: - fi->phytype = htonl(ieee80211_phytype_ofdm_dot11_a); - break; - case MODE_IEEE80211B: - fi->phytype = htonl(ieee80211_phytype_dsss_dot11_b); - break; - case MODE_IEEE80211G: - fi->phytype = htonl(ieee80211_phytype_pbcc_dot11_g); - break; - case MODE_ATHEROS_TURBO: - fi->phytype = - htonl(ieee80211_phytype_dsss_dot11_turbo); - break; - default: - fi->phytype = htonl(0xAAAAAAAA); - break; - } - fi->channel = htonl(status->channel); - rate = ieee80211_get_rate(local, status->phymode, - status->rate); - if (rate) { - fi->datarate = htonl(rate->rate); - if (rate->flags & IEEE80211_RATE_PREAMBLE2) { - if (status->rate == rate->val) - fi->preamble = htonl(2); /* long */ - else if (status->rate == rate->val2) - fi->preamble = htonl(1); /* short */ - } else - fi->preamble = htonl(0); - } else { - fi->datarate = htonl(0); - fi->preamble = htonl(0); - } - - fi->antenna = htonl(status->antenna); - fi->priority = htonl(0xffffffff); /* no clue */ - fi->ssi_type = htonl(ieee80211_ssi_raw); - fi->ssi_signal = htonl(status->ssi); - fi->ssi_noise = 0x00000000; - fi->encoding = 0; - } else { - /* clear everything because we really don't know. - * the msg_type field isn't present on monitor frames - * so we don't know whether it will be present or not, - * but it's ok to not clear it since it'll be assigned - * anyway */ - memset(fi, 0, sizeof(*fi) - sizeof(fi->msg_type)); - - fi->ssi_type = htonl(ieee80211_ssi_none); - } - fi->version = htonl(IEEE80211_FI_VERSION); - fi->length = cpu_to_be32(sizeof(*fi) - sizeof(fi->msg_type)); -} - -/* this routine is actually not just for this, but also - * for pushing fake 'management' frames into userspace. - * it shall be replaced by a netlink-based system. */ -void -ieee80211_rx_mgmt(struct ieee80211_local *local, struct sk_buff *skb, - struct ieee80211_rx_status *status, u32 msg_type) -{ - struct ieee80211_frame_info *fi; - const size_t hlen = sizeof(struct ieee80211_frame_info); - struct ieee80211_sub_if_data *sdata; - - skb->dev = local->apdev; - - sdata = IEEE80211_DEV_TO_SUB_IF(local->apdev); - - if (skb_headroom(skb) < hlen) { - I802_DEBUG_INC(local->rx_expand_skb_head); - if (pskb_expand_head(skb, hlen, 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - return; - } - } - - fi = (struct ieee80211_frame_info *) skb_push(skb, hlen); - - ieee80211_fill_frame_info(local, fi, status); - fi->msg_type = htonl(msg_type); - - sdata->stats.rx_packets++; - sdata->stats.rx_bytes += skb->len; - - skb_set_mac_header(skb, 0); - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->pkt_type = PACKET_OTHERHOST; - skb->protocol = htons(ETH_P_802_2); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); -} - -static void -ieee80211_rx_monitor(struct net_device *dev, struct sk_buff *skb, - struct ieee80211_rx_status *status) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; - struct ieee80211_rate *rate; - struct ieee80211_rtap_hdr { - struct ieee80211_radiotap_header hdr; - u8 flags; - u8 rate; - __le16 chan_freq; - __le16 chan_flags; - u8 antsignal; - } __attribute__ ((packed)) *rthdr; - - skb->dev = dev; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (status->flag & RX_FLAG_RADIOTAP) - goto out; - - if (skb_headroom(skb) < sizeof(*rthdr)) { - I802_DEBUG_INC(local->rx_expand_skb_head); - if (pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - return; - } - } - - rthdr = (struct ieee80211_rtap_hdr *) skb_push(skb, sizeof(*rthdr)); - memset(rthdr, 0, sizeof(*rthdr)); - rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); - rthdr->hdr.it_present = - cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) | - (1 << IEEE80211_RADIOTAP_RATE) | - (1 << IEEE80211_RADIOTAP_CHANNEL) | - (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL)); - rthdr->flags = local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS ? - IEEE80211_RADIOTAP_F_FCS : 0; - rate = ieee80211_get_rate(local, status->phymode, status->rate); - if (rate) - rthdr->rate = rate->rate / 5; - rthdr->chan_freq = cpu_to_le16(status->freq); - rthdr->chan_flags = - status->phymode == MODE_IEEE80211A ? - cpu_to_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ) : - cpu_to_le16(IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ); - rthdr->antsignal = status->ssi; - - out: - sdata->stats.rx_packets++; - sdata->stats.rx_bytes += skb->len; - - skb_set_mac_header(skb, 0); - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->pkt_type = PACKET_OTHERHOST; - skb->protocol = htons(ETH_P_802_2); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); -} - -int ieee80211_radar_status(struct ieee80211_hw *hw, int channel, - int radar, int radar_type) -{ - struct sk_buff *skb; - struct ieee80211_radar_info *msg; - struct ieee80211_local *local = hw_to_local(hw); - - if (!local->apdev) - return 0; - - skb = dev_alloc_skb(sizeof(struct ieee80211_frame_info) + - sizeof(struct ieee80211_radar_info)); - - if (!skb) - return -ENOMEM; - skb_reserve(skb, sizeof(struct ieee80211_frame_info)); - - msg = (struct ieee80211_radar_info *) - skb_put(skb, sizeof(struct ieee80211_radar_info)); - msg->channel = channel; - msg->radar = radar; - msg->radar_type = radar_type; - - ieee80211_rx_mgmt(local, skb, NULL, ieee80211_msg_radar); - return 0; -} -EXPORT_SYMBOL(ieee80211_radar_status); - - -static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta) -{ - struct ieee80211_sub_if_data *sdata; - sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); - - if (sdata->bss) - atomic_inc(&sdata->bss->num_sta_ps); - sta->flags |= WLAN_STA_PS; - sta->pspoll = 0; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d enters power " - "save mode\n", dev->name, MAC_ARG(sta->addr), sta->aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ -} - - -static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sk_buff *skb; - int sent = 0; - struct ieee80211_sub_if_data *sdata; - struct ieee80211_tx_packet_data *pkt_data; - - sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); - if (sdata->bss) - atomic_dec(&sdata->bss->num_sta_ps); - sta->flags &= ~(WLAN_STA_PS | WLAN_STA_TIM); - sta->pspoll = 0; - if (!skb_queue_empty(&sta->ps_tx_buf)) { - if (local->ops->set_tim) - local->ops->set_tim(local_to_hw(local), sta->aid, 0); - if (sdata->bss) - bss_tim_clear(local, sdata->bss, sta->aid); - } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d exits power " - "save mode\n", dev->name, MAC_ARG(sta->addr), sta->aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - /* Send all buffered frames to the station */ - while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; - sent++; - pkt_data->requeue = 1; - dev_queue_xmit(skb); - } - while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; - local->total_ps_buffered--; - sent++; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d send PS frame " - "since STA not sleeping anymore\n", dev->name, - MAC_ARG(sta->addr), sta->aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - pkt_data->requeue = 1; - dev_queue_xmit(skb); - } - - return sent; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx) -{ - struct sk_buff *skb; - int no_pending_pkts; - - if (likely(!rx->sta || - (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PSPOLL || - !rx->u.rx.ra_match)) - return TXRX_CONTINUE; - - skb = skb_dequeue(&rx->sta->tx_filtered); - if (!skb) { - skb = skb_dequeue(&rx->sta->ps_tx_buf); - if (skb) - rx->local->total_ps_buffered--; - } - no_pending_pkts = skb_queue_empty(&rx->sta->tx_filtered) && - skb_queue_empty(&rx->sta->ps_tx_buf); - - if (skb) { - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) skb->data; - - /* tell TX path to send one frame even though the STA may - * still remain is PS mode after this frame exchange */ - rx->sta->pspoll = 1; - -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "STA " MAC_FMT " aid %d: PS Poll (entries " - "after %d)\n", - MAC_ARG(rx->sta->addr), rx->sta->aid, - skb_queue_len(&rx->sta->ps_tx_buf)); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - - /* Use MoreData flag to indicate whether there are more - * buffered frames for this STA */ - if (no_pending_pkts) { - hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA); - rx->sta->flags &= ~WLAN_STA_TIM; - } else - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); - - dev_queue_xmit(skb); - - if (no_pending_pkts) { - if (rx->local->ops->set_tim) - rx->local->ops->set_tim(local_to_hw(rx->local), - rx->sta->aid, 0); - if (rx->sdata->bss) - bss_tim_clear(rx->local, rx->sdata->bss, rx->sta->aid); - } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - } else if (!rx->u.rx.sent_ps_buffered) { - printk(KERN_DEBUG "%s: STA " MAC_FMT " sent PS Poll even " - "though there is no buffered frames for it\n", - rx->dev->name, MAC_ARG(rx->sta->addr)); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - - } - - /* Free PS Poll skb here instead of returning TXRX_DROP that would - * count as an dropped frame. */ - dev_kfree_skb(rx->skb); - - return TXRX_QUEUED; -} - - -static inline struct ieee80211_fragment_entry * -ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, - unsigned int frag, unsigned int seq, int rx_queue, - struct sk_buff **skb) -{ - struct ieee80211_fragment_entry *entry; - int idx; - - idx = sdata->fragment_next; - entry = &sdata->fragments[sdata->fragment_next++]; - if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX) - sdata->fragment_next = 0; - - if (!skb_queue_empty(&entry->skb_list)) { -#ifdef CONFIG_MAC80211_DEBUG - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) entry->skb_list.next->data; - printk(KERN_DEBUG "%s: RX reassembly removed oldest " - "fragment entry (idx=%d age=%lu seq=%d last_frag=%d " - "addr1=" MAC_FMT " addr2=" MAC_FMT "\n", - sdata->dev->name, idx, - jiffies - entry->first_frag_time, entry->seq, - entry->last_frag, MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2)); -#endif /* CONFIG_MAC80211_DEBUG */ - __skb_queue_purge(&entry->skb_list); - } - - __skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */ - *skb = NULL; - entry->first_frag_time = jiffies; - entry->seq = seq; - entry->rx_queue = rx_queue; - entry->last_frag = frag; - entry->ccmp = 0; - entry->extra_len = 0; - - return entry; -} - - -static inline struct ieee80211_fragment_entry * -ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, - u16 fc, unsigned int frag, unsigned int seq, - int rx_queue, struct ieee80211_hdr *hdr) -{ - struct ieee80211_fragment_entry *entry; - int i, idx; - - idx = sdata->fragment_next; - for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) { - struct ieee80211_hdr *f_hdr; - u16 f_fc; - - idx--; - if (idx < 0) - idx = IEEE80211_FRAGMENT_MAX - 1; - - entry = &sdata->fragments[idx]; - if (skb_queue_empty(&entry->skb_list) || entry->seq != seq || - entry->rx_queue != rx_queue || - entry->last_frag + 1 != frag) - continue; - - f_hdr = (struct ieee80211_hdr *) entry->skb_list.next->data; - f_fc = le16_to_cpu(f_hdr->frame_control); - - if ((fc & IEEE80211_FCTL_FTYPE) != (f_fc & IEEE80211_FCTL_FTYPE) || - compare_ether_addr(hdr->addr1, f_hdr->addr1) != 0 || - compare_ether_addr(hdr->addr2, f_hdr->addr2) != 0) - continue; - - if (entry->first_frag_time + 2 * HZ < jiffies) { - __skb_queue_purge(&entry->skb_list); - continue; - } - return entry; - } - - return NULL; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_defragment(struct ieee80211_txrx_data *rx) -{ - struct ieee80211_hdr *hdr; - u16 sc; - unsigned int frag, seq; - struct ieee80211_fragment_entry *entry; - struct sk_buff *skb; - - hdr = (struct ieee80211_hdr *) rx->skb->data; - sc = le16_to_cpu(hdr->seq_ctrl); - frag = sc & IEEE80211_SCTL_FRAG; - - if (likely((!(rx->fc & IEEE80211_FCTL_MOREFRAGS) && frag == 0) || - (rx->skb)->len < 24 || - is_multicast_ether_addr(hdr->addr1))) { - /* not fragmented */ - goto out; - } - I802_DEBUG_INC(rx->local->rx_handlers_fragments); - - seq = (sc & IEEE80211_SCTL_SEQ) >> 4; - - if (frag == 0) { - /* This is the first fragment of a new frame. */ - entry = ieee80211_reassemble_add(rx->sdata, frag, seq, - rx->u.rx.queue, &(rx->skb)); - if (rx->key && rx->key->alg == ALG_CCMP && - (rx->fc & IEEE80211_FCTL_PROTECTED)) { - /* Store CCMP PN so that we can verify that the next - * fragment has a sequential PN value. */ - entry->ccmp = 1; - memcpy(entry->last_pn, - rx->key->u.ccmp.rx_pn[rx->u.rx.queue], - CCMP_PN_LEN); - } - return TXRX_QUEUED; - } - - /* This is a fragment for a frame that should already be pending in - * fragment cache. Add this fragment to the end of the pending entry. - */ - entry = ieee80211_reassemble_find(rx->sdata, rx->fc, frag, seq, - rx->u.rx.queue, hdr); - if (!entry) { - I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); - return TXRX_DROP; - } - - /* Verify that MPDUs within one MSDU have sequential PN values. - * (IEEE 802.11i, 8.3.3.4.5) */ - if (entry->ccmp) { - int i; - u8 pn[CCMP_PN_LEN], *rpn; - if (!rx->key || rx->key->alg != ALG_CCMP) - return TXRX_DROP; - memcpy(pn, entry->last_pn, CCMP_PN_LEN); - for (i = CCMP_PN_LEN - 1; i >= 0; i--) { - pn[i]++; - if (pn[i]) - break; - } - rpn = rx->key->u.ccmp.rx_pn[rx->u.rx.queue]; - if (memcmp(pn, rpn, CCMP_PN_LEN) != 0) { - printk(KERN_DEBUG "%s: defrag: CCMP PN not sequential" - " A2=" MAC_FMT " PN=%02x%02x%02x%02x%02x%02x " - "(expected %02x%02x%02x%02x%02x%02x)\n", - rx->dev->name, MAC_ARG(hdr->addr2), - rpn[0], rpn[1], rpn[2], rpn[3], rpn[4], rpn[5], - pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); - return TXRX_DROP; - } - memcpy(entry->last_pn, pn, CCMP_PN_LEN); - } - - skb_pull(rx->skb, ieee80211_get_hdrlen(rx->fc)); - __skb_queue_tail(&entry->skb_list, rx->skb); - entry->last_frag = frag; - entry->extra_len += rx->skb->len; - if (rx->fc & IEEE80211_FCTL_MOREFRAGS) { - rx->skb = NULL; - return TXRX_QUEUED; - } - - rx->skb = __skb_dequeue(&entry->skb_list); - if (skb_tailroom(rx->skb) < entry->extra_len) { - I802_DEBUG_INC(rx->local->rx_expand_skb_head2); - if (unlikely(pskb_expand_head(rx->skb, 0, entry->extra_len, - GFP_ATOMIC))) { - I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); - __skb_queue_purge(&entry->skb_list); - return TXRX_DROP; - } - } - while ((skb = __skb_dequeue(&entry->skb_list))) { - memcpy(skb_put(rx->skb, skb->len), skb->data, skb->len); - dev_kfree_skb(skb); - } - - /* Complete frame has been reassembled - process it now */ - rx->fragmented = 1; - - out: - if (rx->sta) - rx->sta->rx_packets++; - if (is_multicast_ether_addr(hdr->addr1)) - rx->local->dot11MulticastReceivedFrameCount++; - else - ieee80211_led_rx(rx->local); - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_monitor(struct ieee80211_txrx_data *rx) -{ - if (rx->sdata->type == IEEE80211_IF_TYPE_MNTR) { - ieee80211_rx_monitor(rx->dev, rx->skb, rx->u.rx.status); - return TXRX_QUEUED; - } - - if (rx->u.rx.status->flag & RX_FLAG_RADIOTAP) - skb_pull(rx->skb, ieee80211_get_radiotap_len(rx->skb)); - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_check(struct ieee80211_txrx_data *rx) -{ - struct ieee80211_hdr *hdr; - int always_sta_key; - hdr = (struct ieee80211_hdr *) rx->skb->data; - - /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ - if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { - if (unlikely(rx->fc & IEEE80211_FCTL_RETRY && - rx->sta->last_seq_ctrl[rx->u.rx.queue] == - hdr->seq_ctrl)) { - if (rx->u.rx.ra_match) { - rx->local->dot11FrameDuplicateCount++; - rx->sta->num_duplicates++; - } - return TXRX_DROP; - } else - rx->sta->last_seq_ctrl[rx->u.rx.queue] = hdr->seq_ctrl; - } - - if ((rx->local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) && - rx->skb->len > FCS_LEN) - skb_trim(rx->skb, rx->skb->len - FCS_LEN); - - if (unlikely(rx->skb->len < 16)) { - I802_DEBUG_INC(rx->local->rx_handlers_drop_short); - return TXRX_DROP; - } - - if (!rx->u.rx.ra_match) - rx->skb->pkt_type = PACKET_OTHERHOST; - else if (compare_ether_addr(rx->dev->dev_addr, hdr->addr1) == 0) - rx->skb->pkt_type = PACKET_HOST; - else if (is_multicast_ether_addr(hdr->addr1)) { - if (is_broadcast_ether_addr(hdr->addr1)) - rx->skb->pkt_type = PACKET_BROADCAST; - else - rx->skb->pkt_type = PACKET_MULTICAST; - } else - rx->skb->pkt_type = PACKET_OTHERHOST; - - /* Drop disallowed frame classes based on STA auth/assoc state; - * IEEE 802.11, Chap 5.5. - * - * 80211.o does filtering only based on association state, i.e., it - * drops Class 3 frames from not associated stations. hostapd sends - * deauth/disassoc frames when needed. In addition, hostapd is - * responsible for filtering on both auth and assoc states. - */ - if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA || - ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL && - (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) && - rx->sdata->type != IEEE80211_IF_TYPE_IBSS && - (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) { - if ((!(rx->fc & IEEE80211_FCTL_FROMDS) && - !(rx->fc & IEEE80211_FCTL_TODS) && - (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) - || !rx->u.rx.ra_match) { - /* Drop IBSS frames and frames for other hosts - * silently. */ - return TXRX_DROP; - } - - if (!rx->local->apdev) - return TXRX_DROP; - - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_sta_not_assoc); - return TXRX_QUEUED; - } - - if (rx->sdata->type == IEEE80211_IF_TYPE_STA) - always_sta_key = 0; - else - always_sta_key = 1; - - if (rx->sta && rx->sta->key && always_sta_key) { - rx->key = rx->sta->key; - } else { - if (rx->sta && rx->sta->key) - rx->key = rx->sta->key; - else - rx->key = rx->sdata->default_key; - - if ((rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) && - rx->fc & IEEE80211_FCTL_PROTECTED) { - int keyidx = ieee80211_wep_get_keyidx(rx->skb); - - if (keyidx >= 0 && keyidx < NUM_DEFAULT_KEYS && - (!rx->sta || !rx->sta->key || keyidx > 0)) - rx->key = rx->sdata->keys[keyidx]; - - if (!rx->key) { - if (!rx->u.rx.ra_match) - return TXRX_DROP; - printk(KERN_DEBUG "%s: RX WEP frame with " - "unknown keyidx %d (A1=" MAC_FMT " A2=" - MAC_FMT " A3=" MAC_FMT ")\n", - rx->dev->name, keyidx, - MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2), - MAC_ARG(hdr->addr3)); - if (!rx->local->apdev) - return TXRX_DROP; - ieee80211_rx_mgmt( - rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_wep_frame_unknown_key); - return TXRX_QUEUED; - } - } - } - - if (rx->fc & IEEE80211_FCTL_PROTECTED && rx->key && rx->u.rx.ra_match) { - rx->key->tx_rx_count++; - if (unlikely(rx->local->key_tx_rx_threshold && - rx->key->tx_rx_count > - rx->local->key_tx_rx_threshold)) { - ieee80211_key_threshold_notify(rx->dev, rx->key, - rx->sta); - } - } - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_sta_process(struct ieee80211_txrx_data *rx) -{ - struct sta_info *sta = rx->sta; - struct net_device *dev = rx->dev; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - - if (!sta) - return TXRX_CONTINUE; - - /* Update last_rx only for IBSS packets which are for the current - * BSSID to avoid keeping the current IBSS network alive in cases where - * other STAs are using different BSSID. */ - if (rx->sdata->type == IEEE80211_IF_TYPE_IBSS) { - u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len); - if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) - sta->last_rx = jiffies; - } else - if (!is_multicast_ether_addr(hdr->addr1) || - rx->sdata->type == IEEE80211_IF_TYPE_STA) { - /* Update last_rx only for unicast frames in order to prevent - * the Probe Request frames (the only broadcast frames from a - * STA in infrastructure mode) from keeping a connection alive. - */ - sta->last_rx = jiffies; - } - - if (!rx->u.rx.ra_match) - return TXRX_CONTINUE; - - sta->rx_fragments++; - sta->rx_bytes += rx->skb->len; - sta->last_rssi = (sta->last_rssi * 15 + - rx->u.rx.status->ssi) / 16; - sta->last_signal = (sta->last_signal * 15 + - rx->u.rx.status->signal) / 16; - sta->last_noise = (sta->last_noise * 15 + - rx->u.rx.status->noise) / 16; - - if (!(rx->fc & IEEE80211_FCTL_MOREFRAGS)) { - /* Change STA power saving mode only in the end of a frame - * exchange sequence */ - if ((sta->flags & WLAN_STA_PS) && !(rx->fc & IEEE80211_FCTL_PM)) - rx->u.rx.sent_ps_buffered += ap_sta_ps_end(dev, sta); - else if (!(sta->flags & WLAN_STA_PS) && - (rx->fc & IEEE80211_FCTL_PM)) - ap_sta_ps_start(dev, sta); - } - - /* Drop data::nullfunc frames silently, since they are used only to - * control station power saving mode. */ - if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_NULLFUNC) { - I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc); - /* Update counter and free packet here to avoid counting this - * as a dropped packed. */ - sta->rx_packets++; - dev_kfree_skb(rx->skb); - return TXRX_QUEUED; - } - - return TXRX_CONTINUE; -} /* ieee80211_rx_h_sta_process */ - - -static ieee80211_txrx_result -ieee80211_rx_h_wep_weak_iv_detection(struct ieee80211_txrx_data *rx) -{ - if (!rx->sta || !(rx->fc & IEEE80211_FCTL_PROTECTED) || - (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || - !rx->key || rx->key->alg != ALG_WEP || !rx->u.rx.ra_match) - return TXRX_CONTINUE; - - /* Check for weak IVs, if hwaccel did not remove IV from the frame */ - if ((rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) || - rx->key->force_sw_encrypt) { - u8 *iv = ieee80211_wep_is_weak_iv(rx->skb, rx->key); - if (iv) { - rx->sta->wep_weak_iv_count++; - } - } - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_wep_decrypt(struct ieee80211_txrx_data *rx) -{ - /* If the device handles decryption totally, skip this test */ - if (rx->local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) - return TXRX_CONTINUE; - - if ((rx->key && rx->key->alg != ALG_WEP) || - !(rx->fc & IEEE80211_FCTL_PROTECTED) || - ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && - ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH))) - return TXRX_CONTINUE; - - if (!rx->key) { - printk(KERN_DEBUG "%s: RX WEP frame, but no key set\n", - rx->dev->name); - return TXRX_DROP; - } - - if (!(rx->u.rx.status->flag & RX_FLAG_DECRYPTED) || - rx->key->force_sw_encrypt) { - if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) { - printk(KERN_DEBUG "%s: RX WEP frame, decrypt " - "failed\n", rx->dev->name); - return TXRX_DROP; - } - } else if (rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) { - ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); - /* remove ICV */ - skb_trim(rx->skb, rx->skb->len - 4); - } - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_802_1x_pae(struct ieee80211_txrx_data *rx) -{ - if (rx->sdata->eapol && ieee80211_is_eapol(rx->skb) && - rx->sdata->type != IEEE80211_IF_TYPE_STA && rx->u.rx.ra_match) { - /* Pass both encrypted and unencrypted EAPOL frames to user - * space for processing. */ - if (!rx->local->apdev) - return TXRX_DROP; - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_normal); - return TXRX_QUEUED; - } - - if (unlikely(rx->sdata->ieee802_1x && - (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && - (!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED)) && - !ieee80211_is_eapol(rx->skb))) { -#ifdef CONFIG_MAC80211_DEBUG - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) rx->skb->data; - printk(KERN_DEBUG "%s: dropped frame from " MAC_FMT - " (unauthorized port)\n", rx->dev->name, - MAC_ARG(hdr->addr2)); -#endif /* CONFIG_MAC80211_DEBUG */ - return TXRX_DROP; - } - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_drop_unencrypted(struct ieee80211_txrx_data *rx) -{ - /* If the device handles decryption totally, skip this test */ - if (rx->local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) - return TXRX_CONTINUE; - - /* Drop unencrypted frames if key is set. */ - if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) && - (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && - (rx->key || rx->sdata->drop_unencrypted) && - (rx->sdata->eapol == 0 || - !ieee80211_is_eapol(rx->skb)))) { - printk(KERN_DEBUG "%s: RX non-WEP frame, but expected " - "encryption\n", rx->dev->name); - return TXRX_DROP; - } - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx) -{ - struct ieee80211_sub_if_data *sdata; - - if (!rx->u.rx.ra_match) - return TXRX_DROP; - - sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); - if ((sdata->type == IEEE80211_IF_TYPE_STA || - sdata->type == IEEE80211_IF_TYPE_IBSS) && - !rx->local->user_space_mlme) { - ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->u.rx.status); - } else { - /* Management frames are sent to hostapd for processing */ - if (!rx->local->apdev) - return TXRX_DROP; - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_normal); - } - return TXRX_QUEUED; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_passive_scan(struct ieee80211_txrx_data *rx) -{ - struct ieee80211_local *local = rx->local; - struct sk_buff *skb = rx->skb; - - if (unlikely(local->sta_scanning != 0)) { - ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status); - return TXRX_QUEUED; - } - - if (unlikely(rx->u.rx.in_scan)) { - /* scanning finished during invoking of handlers */ - I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); - return TXRX_DROP; - } - - return TXRX_CONTINUE; -} - - -static void ieee80211_rx_michael_mic_report(struct net_device *dev, - struct ieee80211_hdr *hdr, - struct sta_info *sta, - struct ieee80211_txrx_data *rx) -{ - int keyidx, hdrlen; - - hdrlen = ieee80211_get_hdrlen_from_skb(rx->skb); - if (rx->skb->len >= hdrlen + 4) - keyidx = rx->skb->data[hdrlen + 3] >> 6; - else - keyidx = -1; - - /* TODO: verify that this is not triggered by fragmented - * frames (hw does not verify MIC for them). */ - printk(KERN_DEBUG "%s: TKIP hwaccel reported Michael MIC " - "failure from " MAC_FMT " to " MAC_FMT " keyidx=%d\n", - dev->name, MAC_ARG(hdr->addr2), MAC_ARG(hdr->addr1), keyidx); - - if (!sta) { - /* Some hardware versions seem to generate incorrect - * Michael MIC reports; ignore them to avoid triggering - * countermeasures. */ - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for unknown address " MAC_FMT "\n", - dev->name, MAC_ARG(hdr->addr2)); - goto ignore; - } - - if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) { - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for a frame with no ISWEP flag (src " - MAC_FMT ")\n", dev->name, MAC_ARG(hdr->addr2)); - goto ignore; - } - - if ((rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) && - rx->sdata->type == IEEE80211_IF_TYPE_AP) { - keyidx = ieee80211_wep_get_keyidx(rx->skb); - /* AP with Pairwise keys support should never receive Michael - * MIC errors for non-zero keyidx because these are reserved - * for group keys and only the AP is sending real multicast - * frames in BSS. */ - if (keyidx) { - printk(KERN_DEBUG "%s: ignored Michael MIC error for " - "a frame with non-zero keyidx (%d) (src " MAC_FMT - ")\n", dev->name, keyidx, MAC_ARG(hdr->addr2)); - goto ignore; - } - } - - if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && - ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)) { - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for a frame that cannot be encrypted " - "(fc=0x%04x) (src " MAC_FMT ")\n", - dev->name, rx->fc, MAC_ARG(hdr->addr2)); - goto ignore; - } - - do { - union iwreq_data wrqu; - char *buf = kmalloc(128, GFP_ATOMIC); - if (!buf) - break; - - /* TODO: needed parameters: count, key type, TSC */ - sprintf(buf, "MLME-MICHAELMICFAILURE.indication(" - "keyid=%d %scast addr=" MAC_FMT ")", - keyidx, hdr->addr1[0] & 0x01 ? "broad" : "uni", - MAC_ARG(hdr->addr2)); - memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = strlen(buf); - wireless_send_event(rx->dev, IWEVCUSTOM, &wrqu, buf); - kfree(buf); - } while (0); - - /* TODO: consider verifying the MIC error report with software - * implementation if we get too many spurious reports from the - * hardware. */ - if (!rx->local->apdev) - goto ignore; - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_michael_mic_failure); - return; - - ignore: - dev_kfree_skb(rx->skb); - rx->skb = NULL; -} - -static inline ieee80211_txrx_result __ieee80211_invoke_rx_handlers( - struct ieee80211_local *local, - ieee80211_rx_handler *handlers, - struct ieee80211_txrx_data *rx, - struct sta_info *sta) -{ - ieee80211_rx_handler *handler; - ieee80211_txrx_result res = TXRX_DROP; - - for (handler = handlers; *handler != NULL; handler++) { - res = (*handler)(rx); - if (res != TXRX_CONTINUE) { - if (res == TXRX_DROP) { - I802_DEBUG_INC(local->rx_handlers_drop); - if (sta) - sta->rx_dropped++; - } - if (res == TXRX_QUEUED) - I802_DEBUG_INC(local->rx_handlers_queued); - break; - } - } - - if (res == TXRX_DROP) { - dev_kfree_skb(rx->skb); - } - return res; -} - -static inline void ieee80211_invoke_rx_handlers(struct ieee80211_local *local, - ieee80211_rx_handler *handlers, - struct ieee80211_txrx_data *rx, - struct sta_info *sta) -{ - if (__ieee80211_invoke_rx_handlers(local, handlers, rx, sta) == - TXRX_CONTINUE) - dev_kfree_skb(rx->skb); -} - -/* - * This is the receive path handler. It is called by a low level driver when an - * 802.11 MPDU is received from the hardware. - */ -void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, - struct ieee80211_rx_status *status) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata; - struct sta_info *sta; - struct ieee80211_hdr *hdr; - struct ieee80211_txrx_data rx; - u16 type; - int multicast; - int radiotap_len = 0; - - if (status->flag & RX_FLAG_RADIOTAP) { - radiotap_len = ieee80211_get_radiotap_len(skb); - skb_pull(skb, radiotap_len); - } - - hdr = (struct ieee80211_hdr *) skb->data; - memset(&rx, 0, sizeof(rx)); - rx.skb = skb; - rx.local = local; - - rx.u.rx.status = status; - rx.fc = skb->len >= 2 ? le16_to_cpu(hdr->frame_control) : 0; - type = rx.fc & IEEE80211_FCTL_FTYPE; - if (type == IEEE80211_FTYPE_DATA || type == IEEE80211_FTYPE_MGMT) - local->dot11ReceivedFragmentCount++; - multicast = is_multicast_ether_addr(hdr->addr1); - - if (skb->len >= 16) - sta = rx.sta = sta_info_get(local, hdr->addr2); - else - sta = rx.sta = NULL; - - if (sta) { - rx.dev = sta->dev; - rx.sdata = IEEE80211_DEV_TO_SUB_IF(rx.dev); - } - - if ((status->flag & RX_FLAG_MMIC_ERROR)) { - ieee80211_rx_michael_mic_report(local->mdev, hdr, sta, &rx); - goto end; - } - - if (unlikely(local->sta_scanning)) - rx.u.rx.in_scan = 1; - - if (__ieee80211_invoke_rx_handlers(local, local->rx_pre_handlers, &rx, - sta) != TXRX_CONTINUE) - goto end; - skb = rx.skb; - - skb_push(skb, radiotap_len); - if (sta && !sta->assoc_ap && !(sta->flags & WLAN_STA_WDS) && - !local->iff_promiscs && !multicast) { - rx.u.rx.ra_match = 1; - ieee80211_invoke_rx_handlers(local, local->rx_handlers, &rx, - sta); - } else { - struct ieee80211_sub_if_data *prev = NULL; - struct sk_buff *skb_new; - u8 *bssid = ieee80211_get_bssid(hdr, skb->len - radiotap_len); - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - rx.u.rx.ra_match = 1; - switch (sdata->type) { - case IEEE80211_IF_TYPE_STA: - if (!bssid) - continue; - if (!ieee80211_bssid_match(bssid, - sdata->u.sta.bssid)) { - if (!rx.u.rx.in_scan) - continue; - rx.u.rx.ra_match = 0; - } else if (!multicast && - compare_ether_addr(sdata->dev->dev_addr, - hdr->addr1) != 0) { - if (!sdata->promisc) - continue; - rx.u.rx.ra_match = 0; - } - break; - case IEEE80211_IF_TYPE_IBSS: - if (!bssid) - continue; - if (!ieee80211_bssid_match(bssid, - sdata->u.sta.bssid)) { - if (!rx.u.rx.in_scan) - continue; - rx.u.rx.ra_match = 0; - } else if (!multicast && - compare_ether_addr(sdata->dev->dev_addr, - hdr->addr1) != 0) { - if (!sdata->promisc) - continue; - rx.u.rx.ra_match = 0; - } else if (!sta) - sta = rx.sta = - ieee80211_ibss_add_sta(sdata->dev, - skb, bssid, - hdr->addr2); - break; - case IEEE80211_IF_TYPE_AP: - if (!bssid) { - if (compare_ether_addr(sdata->dev->dev_addr, - hdr->addr1) != 0) - continue; - } else if (!ieee80211_bssid_match(bssid, - sdata->dev->dev_addr)) { - if (!rx.u.rx.in_scan) - continue; - rx.u.rx.ra_match = 0; - } - if (sdata->dev == local->mdev && - !rx.u.rx.in_scan) - /* do not receive anything via - * master device when not scanning */ - continue; - break; - case IEEE80211_IF_TYPE_WDS: - if (bssid || - (rx.fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) - continue; - if (compare_ether_addr(sdata->u.wds.remote_addr, - hdr->addr2) != 0) - continue; - break; - } - - if (prev) { - skb_new = skb_copy(skb, GFP_ATOMIC); - if (!skb_new) { - if (net_ratelimit()) - printk(KERN_DEBUG "%s: failed to copy " - "multicast frame for %s", - local->mdev->name, prev->dev->name); - continue; - } - rx.skb = skb_new; - rx.dev = prev->dev; - rx.sdata = prev; - ieee80211_invoke_rx_handlers(local, - local->rx_handlers, - &rx, sta); - } - prev = sdata; - } - if (prev) { - rx.skb = skb; - rx.dev = prev->dev; - rx.sdata = prev; - ieee80211_invoke_rx_handlers(local, local->rx_handlers, - &rx, sta); - } else - dev_kfree_skb(skb); - read_unlock(&local->sub_if_lock); - } - - end: - if (sta) - sta_info_put(sta); -} -EXPORT_SYMBOL(__ieee80211_rx); - -static ieee80211_txrx_result -ieee80211_tx_h_load_stats(struct ieee80211_txrx_data *tx) -{ - struct ieee80211_local *local = tx->local; - struct ieee80211_hw_mode *mode = tx->u.tx.mode; - struct sk_buff *skb = tx->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u32 load = 0, hdrtime; - - /* TODO: this could be part of tx_status handling, so that the number - * of retries would be known; TX rate should in that case be stored - * somewhere with the packet */ - - /* Estimate total channel use caused by this frame */ - - /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, - * 1 usec = 1/8 * (1080 / 10) = 13.5 */ - - if (mode->mode == MODE_IEEE80211A || - mode->mode == MODE_ATHEROS_TURBO || - mode->mode == MODE_ATHEROS_TURBOG || - (mode->mode == MODE_IEEE80211G && - tx->u.tx.rate->flags & IEEE80211_RATE_ERP)) - hdrtime = CHAN_UTIL_HDR_SHORT; - else - hdrtime = CHAN_UTIL_HDR_LONG; - - load = hdrtime; - if (!is_multicast_ether_addr(hdr->addr1)) - load += hdrtime; - - if (tx->u.tx.control->flags & IEEE80211_TXCTL_USE_RTS_CTS) - load += 2 * hdrtime; - else if (tx->u.tx.control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) - load += hdrtime; - - load += skb->len * tx->u.tx.rate->rate_inv; - - if (tx->u.tx.extra_frag) { - int i; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - load += 2 * hdrtime; - load += tx->u.tx.extra_frag[i]->len * - tx->u.tx.rate->rate; - } - } - - /* Divide channel_use by 8 to avoid wrapping around the counter */ - load >>= CHAN_UTIL_SHIFT; - local->channel_use_raw += load; - if (tx->sta) - tx->sta->channel_use_raw += load; - tx->sdata->channel_use_raw += load; - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx) -{ - struct ieee80211_local *local = rx->local; - struct sk_buff *skb = rx->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u32 load = 0, hdrtime; - struct ieee80211_rate *rate; - struct ieee80211_hw_mode *mode = local->hw.conf.mode; - int i; - - /* Estimate total channel use caused by this frame */ - - if (unlikely(mode->num_rates < 0)) - return TXRX_CONTINUE; - - rate = &mode->rates[0]; - for (i = 0; i < mode->num_rates; i++) { - if (mode->rates[i].val == rx->u.rx.status->rate) { - rate = &mode->rates[i]; - break; - } - } - - /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, - * 1 usec = 1/8 * (1080 / 10) = 13.5 */ - - if (mode->mode == MODE_IEEE80211A || - mode->mode == MODE_ATHEROS_TURBO || - mode->mode == MODE_ATHEROS_TURBOG || - (mode->mode == MODE_IEEE80211G && - rate->flags & IEEE80211_RATE_ERP)) - hdrtime = CHAN_UTIL_HDR_SHORT; - else - hdrtime = CHAN_UTIL_HDR_LONG; - - load = hdrtime; - if (!is_multicast_ether_addr(hdr->addr1)) - load += hdrtime; - - load += skb->len * rate->rate_inv; - - /* Divide channel_use by 8 to avoid wrapping around the counter */ - load >>= CHAN_UTIL_SHIFT; - local->channel_use_raw += load; - if (rx->sta) - rx->sta->channel_use_raw += load; - rx->u.rx.load = load; - - return TXRX_CONTINUE; -} - -static ieee80211_txrx_result -ieee80211_rx_h_if_stats(struct ieee80211_txrx_data *rx) -{ - rx->sdata->channel_use_raw += rx->u.rx.load; - return TXRX_CONTINUE; -} - -static void ieee80211_stat_refresh(unsigned long data) -{ - struct ieee80211_local *local = (struct ieee80211_local *) data; - struct sta_info *sta; - struct ieee80211_sub_if_data *sdata; - - if (!local->stat_time) - return; - - /* go through all stations */ - spin_lock_bh(&local->sta_lock); - list_for_each_entry(sta, &local->sta_list, list) { - sta->channel_use = (sta->channel_use_raw / local->stat_time) / - CHAN_UTIL_PER_10MS; - sta->channel_use_raw = 0; - } - spin_unlock_bh(&local->sta_lock); - - /* go through all subinterfaces */ - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - sdata->channel_use = (sdata->channel_use_raw / - local->stat_time) / CHAN_UTIL_PER_10MS; - sdata->channel_use_raw = 0; - } - read_unlock(&local->sub_if_lock); - - /* hardware interface */ - local->channel_use = (local->channel_use_raw / - local->stat_time) / CHAN_UTIL_PER_10MS; - local->channel_use_raw = 0; - - local->stat_timer.expires = jiffies + HZ * local->stat_time / 100; - add_timer(&local->stat_timer); + sdata->flags &= ~(IEEE80211_SDATA_USE_PROTECTION | + IEEE80211_SDATA_SHORT_PREAMBLE); + ieee80211_erp_info_change_notify(dev, + IEEE80211_ERP_CHANGE_PROTECTION | + IEEE80211_ERP_CHANGE_PREAMBLE); } - -/* This is a version of the rx handler that can be called from hard irq - * context. Post the skb on the queue and schedule the tasklet */ -void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb, - struct ieee80211_rx_status *status) -{ - struct ieee80211_local *local = hw_to_local(hw); - - BUILD_BUG_ON(sizeof(struct ieee80211_rx_status) > sizeof(skb->cb)); - - skb->dev = local->mdev; - /* copy status into skb->cb for use by tasklet */ - memcpy(skb->cb, status, sizeof(*status)); - skb->pkt_type = IEEE80211_RX_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); -} -EXPORT_SYMBOL(ieee80211_rx_irqsafe); - void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_tx_status *status) @@ -4398,14 +624,13 @@ static void ieee80211_tasklet_handler(unsigned long data) break; default: /* should never get here! */ printk(KERN_ERR "%s: Unknown message type (%d)\n", - local->mdev->name, skb->pkt_type); + wiphy_name(local->hw.wiphy), skb->pkt_type); dev_kfree_skb(skb); break; } } } - /* Remove added headers (e.g., QoS control), encryption header/MIC, etc. to * make a prepared TX frame (one that has been given to hw) to look like brand * new IEEE 802.11 frame that is ready to go through TX processing again. @@ -4420,10 +645,13 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local, pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; pkt_data->ifindex = control->ifindex; - pkt_data->mgmt_iface = (control->type == IEEE80211_IF_TYPE_MGMT); - pkt_data->req_tx_status = !!(control->flags & IEEE80211_TXCTL_REQ_TX_STATUS); - pkt_data->do_not_encrypt = !!(control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT); - pkt_data->requeue = !!(control->flags & IEEE80211_TXCTL_REQUEUE); + pkt_data->flags = 0; + if (control->flags & IEEE80211_TXCTL_REQ_TX_STATUS) + pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS; + if (control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT) + pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; + if (control->flags & IEEE80211_TXCTL_REQUEUE) + pkt_data->flags |= IEEE80211_TXPD_REQUEUE; pkt_data->queue = control->queue; hdrlen = ieee80211_get_hdrlen_from_skb(skb); @@ -4431,7 +659,7 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local, if (!key) goto no_key; - switch (key->alg) { + switch (key->conf.alg) { case ALG_WEP: iv_len = WEP_IV_LEN; mic_len = WEP_ICV_LEN; @@ -4448,7 +676,8 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local, goto no_key; } - if (skb->len >= mic_len && key->force_sw_encrypt) + if (skb->len >= mic_len && + !(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) skb_trim(skb, skb->len - mic_len); if (skb->len >= iv_len && skb->len > hdrlen) { memmove(skb->data + iv_len, skb->data, hdrlen); @@ -4468,7 +697,6 @@ no_key: } } - void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_tx_status *status) { @@ -4476,7 +704,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_local *local = hw_to_local(hw); u16 frag, type; - u32 msg_type; struct ieee80211_tx_status_rtap_hdr *rthdr; struct ieee80211_sub_if_data *sdata; int monitors; @@ -4484,7 +711,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, if (!status) { printk(KERN_ERR "%s: ieee80211_tx_status called with NULL status\n", - local->mdev->name); + wiphy_name(local->hw.wiphy)); dev_kfree_skb(skb); return; } @@ -4541,7 +768,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, printk(KERN_DEBUG "%s: dropped TX " "filtered frame queue_len=%d " "PS=%d @%lu\n", - local->mdev->name, + wiphy_name(local->hw.wiphy), skb_queue_len( &sta->tx_filtered), !!(sta->flags & WLAN_STA_PS), @@ -4591,29 +818,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, local->dot11FailedCount++; } - msg_type = (status->flags & IEEE80211_TX_STATUS_ACK) ? - ieee80211_msg_tx_callback_ack : ieee80211_msg_tx_callback_fail; - /* this was a transmitted frame, but now we want to reuse it */ skb_orphan(skb); - if ((status->control.flags & IEEE80211_TXCTL_REQ_TX_STATUS) && - local->apdev) { - if (local->monitors) { - skb2 = skb_clone(skb, GFP_ATOMIC); - } else { - skb2 = skb; - skb = NULL; - } - - if (skb2) - /* Send frame to hostapd */ - ieee80211_rx_mgmt(local, skb2, NULL, msg_type); - - if (!skb) - return; - } - if (!local->monitors) { dev_kfree_skb(skb); return; @@ -4648,9 +855,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, rthdr->data_retries = status->retry_count; - read_lock(&local->sub_if_lock); + rcu_read_lock(); monitors = local->monitors; - list_for_each_entry(sdata, &local->sub_if_list, list) { + list_for_each_entry_rcu(sdata, &local->interfaces, list) { /* * Using the monitors counter is possibly racy, but * if the value is wrong we simply either clone the skb @@ -4666,7 +873,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, continue; monitors--; if (monitors) - skb2 = skb_clone(skb, GFP_KERNEL); + skb2 = skb_clone(skb, GFP_ATOMIC); else skb2 = NULL; skb->dev = sdata->dev; @@ -4681,170 +888,12 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, } } out: - read_unlock(&local->sub_if_lock); + rcu_read_unlock(); if (skb) dev_kfree_skb(skb); } EXPORT_SYMBOL(ieee80211_tx_status); -/* TODO: implement register/unregister functions for adding TX/RX handlers - * into ordered list */ - -/* rx_pre handlers don't have dev and sdata fields available in - * ieee80211_txrx_data */ -static ieee80211_rx_handler ieee80211_rx_pre_handlers[] = -{ - ieee80211_rx_h_parse_qos, - ieee80211_rx_h_load_stats, - NULL -}; - -static ieee80211_rx_handler ieee80211_rx_handlers[] = -{ - ieee80211_rx_h_if_stats, - ieee80211_rx_h_monitor, - ieee80211_rx_h_passive_scan, - ieee80211_rx_h_check, - ieee80211_rx_h_sta_process, - ieee80211_rx_h_ccmp_decrypt, - ieee80211_rx_h_tkip_decrypt, - ieee80211_rx_h_wep_weak_iv_detection, - ieee80211_rx_h_wep_decrypt, - ieee80211_rx_h_defragment, - ieee80211_rx_h_ps_poll, - ieee80211_rx_h_michael_mic_verify, - /* this must be after decryption - so header is counted in MPDU mic - * must be before pae and data, so QOS_DATA format frames - * are not passed to user space by these functions - */ - ieee80211_rx_h_remove_qos_control, - ieee80211_rx_h_802_1x_pae, - ieee80211_rx_h_drop_unencrypted, - ieee80211_rx_h_data, - ieee80211_rx_h_mgmt, - NULL -}; - -static ieee80211_tx_handler ieee80211_tx_handlers[] = -{ - ieee80211_tx_h_check_assoc, - ieee80211_tx_h_sequence, - ieee80211_tx_h_ps_buf, - ieee80211_tx_h_select_key, - ieee80211_tx_h_michael_mic_add, - ieee80211_tx_h_fragment, - ieee80211_tx_h_tkip_encrypt, - ieee80211_tx_h_ccmp_encrypt, - ieee80211_tx_h_wep_encrypt, - ieee80211_tx_h_rate_ctrl, - ieee80211_tx_h_misc, - ieee80211_tx_h_load_stats, - NULL -}; - - -int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct sta_info *sta; - - if (compare_ether_addr(remote_addr, sdata->u.wds.remote_addr) == 0) - return 0; - - /* Create STA entry for the new peer */ - sta = sta_info_add(local, dev, remote_addr, GFP_KERNEL); - if (!sta) - return -ENOMEM; - sta_info_put(sta); - - /* Remove STA entry for the old peer */ - sta = sta_info_get(local, sdata->u.wds.remote_addr); - if (sta) { - sta_info_put(sta); - sta_info_free(sta, 0); - } else { - printk(KERN_DEBUG "%s: could not find STA entry for WDS link " - "peer " MAC_FMT "\n", - dev->name, MAC_ARG(sdata->u.wds.remote_addr)); - } - - /* Update WDS link data */ - memcpy(&sdata->u.wds.remote_addr, remote_addr, ETH_ALEN); - - return 0; -} - -/* Must not be called for mdev and apdev */ -void ieee80211_if_setup(struct net_device *dev) -{ - ether_setup(dev); - dev->hard_start_xmit = ieee80211_subif_start_xmit; - dev->wireless_handlers = &ieee80211_iw_handler_def; - dev->set_multicast_list = ieee80211_set_multicast_list; - dev->change_mtu = ieee80211_change_mtu; - dev->get_stats = ieee80211_get_stats; - dev->open = ieee80211_open; - dev->stop = ieee80211_stop; - dev->uninit = ieee80211_if_reinit; - dev->destructor = ieee80211_if_free; -} - -void ieee80211_if_mgmt_setup(struct net_device *dev) -{ - ether_setup(dev); - dev->hard_start_xmit = ieee80211_mgmt_start_xmit; - dev->change_mtu = ieee80211_change_mtu_apdev; - dev->get_stats = ieee80211_get_stats; - dev->open = ieee80211_mgmt_open; - dev->stop = ieee80211_mgmt_stop; - dev->type = ARPHRD_IEEE80211_PRISM; - dev->hard_header_parse = header_parse_80211; - dev->uninit = ieee80211_if_reinit; - dev->destructor = ieee80211_if_free; -} - -int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, - const char *name) -{ - struct rate_control_ref *ref, *old; - - ASSERT_RTNL(); - if (local->open_count || netif_running(local->mdev) || - (local->apdev && netif_running(local->apdev))) - return -EBUSY; - - ref = rate_control_alloc(name, local); - if (!ref) { - printk(KERN_WARNING "%s: Failed to select rate control " - "algorithm\n", local->mdev->name); - return -ENOENT; - } - - old = local->rate_ctrl; - local->rate_ctrl = ref; - if (old) { - rate_control_put(old); - sta_info_flush(local, NULL); - } - - printk(KERN_DEBUG "%s: Selected rate control " - "algorithm '%s'\n", local->mdev->name, - ref->ops->name); - - - return 0; -} - -static void rate_control_deinitialize(struct ieee80211_local *local) -{ - struct rate_control_ref *ref; - - ref = local->rate_ctrl; - local->rate_ctrl = NULL; - rate_control_put(ref); -} - struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -4888,8 +937,12 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); BUG_ON(!ops->tx); + BUG_ON(!ops->start); + BUG_ON(!ops->stop); BUG_ON(!ops->config); BUG_ON(!ops->add_interface); + BUG_ON(!ops->remove_interface); + BUG_ON(!ops->configure_filter); local->ops = ops; /* for now, mdev needs sub_if_data :/ */ @@ -4919,17 +972,13 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->long_retry_limit = 4; local->hw.conf.radio_enabled = 1; - local->enabled_modes = (unsigned int) -1; + local->enabled_modes = ~0; INIT_LIST_HEAD(&local->modes_list); - rwlock_init(&local->sub_if_lock); - INIT_LIST_HEAD(&local->sub_if_list); + INIT_LIST_HEAD(&local->interfaces); INIT_DELAYED_WORK(&local->scan_work, ieee80211_sta_scan_work); - init_timer(&local->stat_timer); - local->stat_timer.function = ieee80211_stat_refresh; - local->stat_timer.data = (unsigned long) local; ieee80211_rx_bss_list_init(mdev); sta_info_init(local); @@ -4938,7 +987,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, mdev->open = ieee80211_master_open; mdev->stop = ieee80211_master_stop; mdev->type = ARPHRD_IEEE80211; - mdev->hard_header_parse = header_parse_80211; + mdev->header_ops = &ieee80211_header_ops; + mdev->set_multicast_list = ieee80211_master_set_multicast_list; sdata->type = IEEE80211_IF_TYPE_AP; sdata->dev = mdev; @@ -4946,7 +996,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, sdata->u.ap.force_unicast_rateidx = -1; sdata->u.ap.max_ratectrl_rateidx = -1; ieee80211_if_sdata_init(sdata); - list_add_tail(&sdata->list, &local->sub_if_list); + /* no RCU needed since we're still during init phase */ + list_add_tail(&sdata->list, &local->interfaces); tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending, (unsigned long)local); @@ -5019,11 +1070,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_dev; ieee80211_debugfs_add_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev)); + ieee80211_if_set_type(local->mdev, IEEE80211_IF_TYPE_AP); result = ieee80211_init_rate_ctrl_alg(local, NULL); if (result < 0) { printk(KERN_DEBUG "%s: Failed to initialize rate control " - "algorithm\n", local->mdev->name); + "algorithm\n", wiphy_name(local->hw.wiphy)); goto fail_rate; } @@ -5031,7 +1083,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (result < 0) { printk(KERN_DEBUG "%s: Failed to initialize wep\n", - local->mdev->name); + wiphy_name(local->hw.wiphy)); goto fail_wep; } @@ -5042,7 +1094,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) IEEE80211_IF_TYPE_STA); if (result) printk(KERN_WARNING "%s: Failed to add default virtual iface\n", - local->mdev->name); + wiphy_name(local->hw.wiphy)); local->reg_state = IEEE80211_DEV_REGISTERED; rtnl_unlock(); @@ -5105,7 +1157,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata, *tmp; - struct list_head tmp_list; int i; tasklet_kill(&local->tx_pending_tasklet); @@ -5116,20 +1167,30 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) BUG_ON(local->reg_state != IEEE80211_DEV_REGISTERED); local->reg_state = IEEE80211_DEV_UNREGISTERED; - if (local->apdev) - ieee80211_if_del_mgmt(local); - write_lock_bh(&local->sub_if_lock); - list_replace_init(&local->sub_if_list, &tmp_list); - write_unlock_bh(&local->sub_if_lock); + /* + * At this point, interface list manipulations are fine + * because the driver cannot be handing us frames any + * more and the tasklet is killed. + */ - list_for_each_entry_safe(sdata, tmp, &tmp_list, list) + /* + * First, we remove all non-master interfaces. Do this because they + * may have bss pointer dependency on the master, and when we free + * the master these would be freed as well, breaking our list + * iteration completely. + */ + list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { + if (sdata->dev == local->mdev) + continue; + list_del(&sdata->list); __ieee80211_if_del(local, sdata); + } - rtnl_unlock(); + /* then, finally, remove the master interface */ + __ieee80211_if_del(local, IEEE80211_DEV_TO_SUB_IF(local->mdev)); - if (local->stat_time) - del_timer_sync(&local->stat_timer); + rtnl_unlock(); ieee80211_rx_bss_list_deinit(local->mdev); ieee80211_clear_tx_pending(local); @@ -5145,7 +1206,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) if (skb_queue_len(&local->skb_queue) || skb_queue_len(&local->skb_queue_unreliable)) printk(KERN_WARNING "%s: skb_queue not empty\n", - local->mdev->name); + wiphy_name(local->hw.wiphy)); skb_queue_purge(&local->skb_queue); skb_queue_purge(&local->skb_queue_unreliable); @@ -5165,72 +1226,6 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_free_hw); -void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue) -{ - struct ieee80211_local *local = hw_to_local(hw); - - if (test_and_clear_bit(IEEE80211_LINK_STATE_XOFF, - &local->state[queue])) { - if (test_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[queue])) - tasklet_schedule(&local->tx_pending_tasklet); - else - if (!ieee80211_qdisc_installed(local->mdev)) { - if (queue == 0) - netif_wake_queue(local->mdev); - } else - __netif_schedule(local->mdev); - } -} -EXPORT_SYMBOL(ieee80211_wake_queue); - -void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue) -{ - struct ieee80211_local *local = hw_to_local(hw); - - if (!ieee80211_qdisc_installed(local->mdev) && queue == 0) - netif_stop_queue(local->mdev); - set_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); -} -EXPORT_SYMBOL(ieee80211_stop_queue); - -void ieee80211_start_queues(struct ieee80211_hw *hw) -{ - struct ieee80211_local *local = hw_to_local(hw); - int i; - - for (i = 0; i < local->hw.queues; i++) - clear_bit(IEEE80211_LINK_STATE_XOFF, &local->state[i]); - if (!ieee80211_qdisc_installed(local->mdev)) - netif_start_queue(local->mdev); -} -EXPORT_SYMBOL(ieee80211_start_queues); - -void ieee80211_stop_queues(struct ieee80211_hw *hw) -{ - int i; - - for (i = 0; i < hw->queues; i++) - ieee80211_stop_queue(hw, i); -} -EXPORT_SYMBOL(ieee80211_stop_queues); - -void ieee80211_wake_queues(struct ieee80211_hw *hw) -{ - int i; - - for (i = 0; i < hw->queues; i++) - ieee80211_wake_queue(hw, i); -} -EXPORT_SYMBOL(ieee80211_wake_queues); - -struct net_device_stats *ieee80211_dev_stats(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - return &sdata->stats; -} - static int __init ieee80211_init(void) { struct sk_buff *skb; @@ -5251,7 +1246,6 @@ static int __init ieee80211_init(void) return 0; } - static void __exit ieee80211_exit(void) { ieee80211_wme_unregister(); diff --git a/net/mac80211/ieee80211_cfg.c b/net/mac80211/ieee80211_cfg.c deleted file mode 100644 index 509096edb32..00000000000 --- a/net/mac80211/ieee80211_cfg.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * mac80211 configuration hooks for cfg80211 - * - * Copyright 2006 Johannes Berg <johannes@sipsolutions.net> - * - * This file is GPLv2 as found in COPYING. - */ - -#include <linux/nl80211.h> -#include <linux/rtnetlink.h> -#include <net/cfg80211.h> -#include "ieee80211_i.h" -#include "ieee80211_cfg.h" - -static int ieee80211_add_iface(struct wiphy *wiphy, char *name, - unsigned int type) -{ - struct ieee80211_local *local = wiphy_priv(wiphy); - int itype; - - if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) - return -ENODEV; - - switch (type) { - case NL80211_IFTYPE_UNSPECIFIED: - itype = IEEE80211_IF_TYPE_STA; - break; - case NL80211_IFTYPE_ADHOC: - itype = IEEE80211_IF_TYPE_IBSS; - break; - case NL80211_IFTYPE_STATION: - itype = IEEE80211_IF_TYPE_STA; - break; - case NL80211_IFTYPE_MONITOR: - itype = IEEE80211_IF_TYPE_MNTR; - break; - default: - return -EINVAL; - } - - return ieee80211_if_add(local->mdev, name, NULL, itype); -} - -static int ieee80211_del_iface(struct wiphy *wiphy, int ifindex) -{ - struct ieee80211_local *local = wiphy_priv(wiphy); - struct net_device *dev; - char *name; - - if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) - return -ENODEV; - - dev = dev_get_by_index(ifindex); - if (!dev) - return 0; - - name = dev->name; - dev_put(dev); - - return ieee80211_if_remove(local->mdev, name, -1); -} - -struct cfg80211_ops mac80211_config_ops = { - .add_virtual_intf = ieee80211_add_iface, - .del_virtual_intf = ieee80211_del_iface, -}; diff --git a/net/mac80211/ieee80211_common.h b/net/mac80211/ieee80211_common.h index 77c6afb7f6a..c15295d43d8 100644 --- a/net/mac80211/ieee80211_common.h +++ b/net/mac80211/ieee80211_common.h @@ -48,13 +48,13 @@ enum ieee80211_msg_type { ieee80211_msg_tx_callback_ack = 1, ieee80211_msg_tx_callback_fail = 2, /* hole at 3, was ieee80211_msg_passive_scan but unused */ - ieee80211_msg_wep_frame_unknown_key = 4, + /* hole at 4, was ieee80211_msg_wep_frame_unknown_key but now unused */ ieee80211_msg_michael_mic_failure = 5, /* hole at 6, was monitor but never sent to userspace */ ieee80211_msg_sta_not_assoc = 7, /* 8 was ieee80211_msg_set_aid_for_sta */ - ieee80211_msg_key_threshold_notification = 9, - ieee80211_msg_radar = 11, + /* 9 was ieee80211_msg_key_threshold_notification */ + /* 11 was ieee80211_msg_radar */ }; struct ieee80211_msg_key_notification { @@ -73,8 +73,6 @@ enum ieee80211_phytype { ieee80211_phytype_ofdm_dot11_g = 6, ieee80211_phytype_pbcc_dot11_g = 7, ieee80211_phytype_ofdm_dot11_a = 8, - ieee80211_phytype_dsss_dot11_turbog = 255, - ieee80211_phytype_dsss_dot11_turbo = 256, }; enum ieee80211_ssi_type { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6f7bae7ef9c..4b4ed2a5803 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -21,6 +21,7 @@ #include <linux/workqueue.h> #include <linux/types.h> #include <linux/spinlock.h> +#include <linux/etherdevice.h> #include <net/wireless.h> #include "ieee80211_key.h" #include "sta_info.h" @@ -36,8 +37,6 @@ struct ieee80211_local; -#define BIT(x) (1 << (x)) - #define IEEE80211_ALIGN32_PAD(a) ((4 - ((a) & 3)) & 3) /* Maximum number of broadcast/multicast frames to buffer when some of the @@ -112,6 +111,16 @@ typedef enum { TXRX_CONTINUE, TXRX_DROP, TXRX_QUEUED } ieee80211_txrx_result; +/* flags used in struct ieee80211_txrx_data.flags */ +/* whether the MSDU was fragmented */ +#define IEEE80211_TXRXD_FRAGMENTED BIT(0) +#define IEEE80211_TXRXD_TXUNICAST BIT(1) +#define IEEE80211_TXRXD_TXPS_BUFFERED BIT(2) +#define IEEE80211_TXRXD_TXPROBE_LAST_FRAG BIT(3) +#define IEEE80211_TXRXD_RXIN_SCAN BIT(4) +/* frame is destined to interface currently processed (incl. multicast frames) */ +#define IEEE80211_TXRXD_RXRA_MATCH BIT(5) +#define IEEE80211_TXRXD_TX_INJECTED BIT(6) struct ieee80211_txrx_data { struct sk_buff *skb; struct net_device *dev; @@ -120,14 +129,10 @@ struct ieee80211_txrx_data { struct sta_info *sta; u16 fc, ethertype; struct ieee80211_key *key; - unsigned int fragmented:1; /* whether the MSDU was fragmented */ + unsigned int flags; union { struct { struct ieee80211_tx_control *control; - unsigned int unicast:1; - unsigned int ps_buffered:1; - unsigned int short_preamble:1; - unsigned int probe_last_frag:1; struct ieee80211_hw_mode *mode; struct ieee80211_rate *rate; /* use this rate (if set) for last fragment; rate can @@ -135,7 +140,6 @@ struct ieee80211_txrx_data { * when using CTS protection with IEEE 802.11g. */ struct ieee80211_rate *last_frag_rate; int last_frag_hwrate; - int mgmt_interface; /* Extra fragments (in addition to the first fragment * in skb) */ @@ -147,23 +151,22 @@ struct ieee80211_txrx_data { int sent_ps_buffered; int queue; int load; - unsigned int in_scan:1; - /* frame is destined to interface currently processed - * (including multicast frames) */ - unsigned int ra_match:1; + u32 tkip_iv32; + u16 tkip_iv16; } rx; } u; }; +/* flags used in struct ieee80211_tx_packet_data.flags */ +#define IEEE80211_TXPD_REQ_TX_STATUS BIT(0) +#define IEEE80211_TXPD_DO_NOT_ENCRYPT BIT(1) +#define IEEE80211_TXPD_REQUEUE BIT(2) /* Stored in sk_buff->cb */ struct ieee80211_tx_packet_data { int ifindex; unsigned long jiffies; - unsigned int req_tx_status:1; - unsigned int do_not_encrypt:1; - unsigned int requeue:1; - unsigned int mgmt_iface:1; - unsigned int queue:4; + unsigned int flags; + u8 queue; }; struct ieee80211_tx_stored_packet { @@ -174,7 +177,7 @@ struct ieee80211_tx_stored_packet { int last_frag_rateidx; int last_frag_hwrate; struct ieee80211_rate *last_frag_rate; - unsigned int last_frag_rate_ctrl_probe:1; + unsigned int last_frag_rate_ctrl_probe; }; typedef ieee80211_txrx_result (*ieee80211_tx_handler) @@ -187,10 +190,10 @@ struct ieee80211_if_ap { u8 *beacon_head, *beacon_tail; int beacon_head_len, beacon_tail_len; + struct list_head vlans; + u8 ssid[IEEE80211_MAX_SSID_LEN]; size_t ssid_len; - u8 *generic_elem; - size_t generic_elem_len; /* yes, this looks ugly, but guarantees that we can later use * bitmap_empty :) @@ -210,9 +213,23 @@ struct ieee80211_if_wds { }; struct ieee80211_if_vlan { - u8 id; + struct ieee80211_sub_if_data *ap; + struct list_head list; }; +/* flags used in struct ieee80211_if_sta.flags */ +#define IEEE80211_STA_SSID_SET BIT(0) +#define IEEE80211_STA_BSSID_SET BIT(1) +#define IEEE80211_STA_PREV_BSSID_SET BIT(2) +#define IEEE80211_STA_AUTHENTICATED BIT(3) +#define IEEE80211_STA_ASSOCIATED BIT(4) +#define IEEE80211_STA_PROBEREQ_POLL BIT(5) +#define IEEE80211_STA_CREATE_IBSS BIT(6) +#define IEEE80211_STA_MIXED_CELL BIT(7) +#define IEEE80211_STA_WMM_ENABLED BIT(8) +#define IEEE80211_STA_AUTO_SSID_SEL BIT(10) +#define IEEE80211_STA_AUTO_BSSID_SEL BIT(11) +#define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12) struct ieee80211_if_sta { enum { IEEE80211_DISABLED, IEEE80211_AUTHENTICATE, @@ -235,25 +252,14 @@ struct ieee80211_if_sta { int auth_tries, assoc_tries; - unsigned int ssid_set:1; - unsigned int bssid_set:1; - unsigned int prev_bssid_set:1; - unsigned int authenticated:1; - unsigned int associated:1; - unsigned int probereq_poll:1; - unsigned int create_ibss:1; - unsigned int mixed_cell:1; - unsigned int wmm_enabled:1; - unsigned int auto_ssid_sel:1; - unsigned int auto_bssid_sel:1; - unsigned int auto_channel_sel:1; + unsigned int flags; #define IEEE80211_STA_REQ_SCAN 0 #define IEEE80211_STA_REQ_AUTH 1 #define IEEE80211_STA_REQ_RUN 2 unsigned long request; struct sk_buff_head skb_queue; - int key_mgmt; + int key_management_enabled; unsigned long last_probe; #define IEEE80211_AUTH_ALG_OPEN BIT(0) @@ -271,21 +277,29 @@ struct ieee80211_if_sta { }; +/* flags used in struct ieee80211_sub_if_data.flags */ +#define IEEE80211_SDATA_ALLMULTI BIT(0) +#define IEEE80211_SDATA_PROMISC BIT(1) +#define IEEE80211_SDATA_USE_PROTECTION BIT(2) /* CTS protect ERP frames */ +/* use short preamble with IEEE 802.11b: this flag is set when the AP or beacon + * generator reports that there are no present stations that cannot support short + * preambles */ +#define IEEE80211_SDATA_SHORT_PREAMBLE BIT(3) +#define IEEE80211_SDATA_USERSPACE_MLME BIT(4) struct ieee80211_sub_if_data { struct list_head list; - unsigned int type; + enum ieee80211_if_types type; struct wireless_dev wdev; + /* keys */ + struct list_head key_list; + struct net_device *dev; struct ieee80211_local *local; - int mc_count; - unsigned int allmulti:1; - unsigned int promisc:1; - unsigned int use_protection:1; /* CTS protect ERP frames */ + unsigned int flags; - struct net_device_stats stats; int drop_unencrypted; int eapol; /* 0 = process EAPOL frames as normal data frames, * 1 = send EAPOL frames through wlan#ap to hostapd @@ -364,7 +378,6 @@ struct ieee80211_sub_if_data { struct dentry *drop_unencrypted; struct dentry *eapol; struct dentry *ieee8021_x; - struct dentry *vlan_id; } vlan; struct { struct dentry *mode; @@ -393,9 +406,9 @@ struct ieee80211_local { struct list_head modes_list; struct net_device *mdev; /* wmaster# - "master" 802.11 device */ - struct net_device *apdev; /* wlan#ap - management frames (hostapd) */ int open_count; int monitors; + unsigned int filter_flags; /* FIF_* */ struct iw_statistics wstats; u8 wstats_flags; int tx_headroom; /* required headroom for hardware/radiotap */ @@ -416,10 +429,9 @@ struct ieee80211_local { struct sk_buff_head skb_queue_unreliable; /* Station data structures */ - spinlock_t sta_lock; /* mutex for STA data structures */ + rwlock_t sta_lock; /* protects STA data structures */ int num_sta; /* number of stations in sta_list */ struct list_head sta_list; - struct list_head deleted_sta_list; struct sta_info *sta_hash[STA_HASH_SIZE]; struct timer_list sta_cleanup; @@ -427,17 +439,11 @@ struct ieee80211_local { struct ieee80211_tx_stored_packet pending_packet[NUM_TX_DATA_QUEUES]; struct tasklet_struct tx_pending_tasklet; - int mc_count; /* total count of multicast entries in all interfaces */ - int iff_allmultis, iff_promiscs; - /* number of interfaces with corresponding IFF_ flags */ + /* number of interfaces with corresponding IFF_ flags */ + atomic_t iff_allmultis, iff_promiscs; struct rate_control_ref *rate_ctrl; - int next_mode; /* MODE_IEEE80211* - * The mode preference for next channel change. This is - * used to select .11g vs. .11b channels (or 4.9 GHz vs. - * .11a) when the channel number is not unique. */ - /* Supported and basic rate filters for different modes. These are * pointers to -1 terminated lists and rates in 100 kbps units. */ int *supp_rates[NUM_IEEE80211_MODES]; @@ -447,14 +453,10 @@ struct ieee80211_local { int fragmentation_threshold; int short_retry_limit; /* dot11ShortRetryLimit */ int long_retry_limit; /* dot11LongRetryLimit */ - int short_preamble; /* use short preamble with IEEE 802.11b */ struct crypto_blkcipher *wep_tx_tfm; struct crypto_blkcipher *wep_rx_tfm; u32 wep_iv; - int key_tx_rx_threshold; /* number of times any key can be used in TX - * or RX before generating a rekey - * notification; 0 = notification disabled. */ int bridge_packets; /* bridge packets between associated stations and * deliver multicast frames both back to wireless @@ -464,9 +466,8 @@ struct ieee80211_local { ieee80211_rx_handler *rx_handlers; ieee80211_tx_handler *tx_handlers; - rwlock_t sub_if_lock; /* Protects sub_if_list. Cannot be taken under - * sta_bss_lock or sta_lock. */ - struct list_head sub_if_list; + struct list_head interfaces; + int sta_scanning; int scan_channel_idx; enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state; @@ -500,25 +501,17 @@ struct ieee80211_local { #ifdef CONFIG_MAC80211_LEDS int tx_led_counter, rx_led_counter; - struct led_trigger *tx_led, *rx_led; - char tx_led_name[32], rx_led_name[32]; + struct led_trigger *tx_led, *rx_led, *assoc_led; + char tx_led_name[32], rx_led_name[32], assoc_led_name[32]; #endif u32 channel_use; u32 channel_use_raw; - u32 stat_time; - struct timer_list stat_timer; #ifdef CONFIG_MAC80211_DEBUGFS struct work_struct sta_debugfs_add; #endif - enum { - STA_ANTENNA_SEL_AUTO = 0, - STA_ANTENNA_SEL_SW_CTRL = 1, - STA_ANTENNA_SEL_SW_CTRL_DEBUG = 2 - } sta_antenna_sel; - #ifdef CONFIG_MAC80211_DEBUG_COUNTERS /* TX/RX handler statistics */ unsigned int tx_handlers_drop; @@ -548,16 +541,9 @@ struct ieee80211_local { #endif /* CONFIG_MAC80211_DEBUG_COUNTERS */ - int default_wep_only; /* only default WEP keys are used with this - * interface; this is used to decide when hwaccel - * can be used with default keys */ int total_ps_buffered; /* total number of all buffered unicast and * multicast packets for power saving stations */ - int allow_broadcast_always; /* whether to allow TX of broadcast frames - * even when there are no associated STAs - */ - int wifi_wme_noack_test; unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */ @@ -566,17 +552,13 @@ struct ieee80211_local { unsigned int hw_modes; /* bitfield of supported hardware modes; * (1 << MODE_*) */ - int user_space_mlme; - #ifdef CONFIG_MAC80211_DEBUGFS struct local_debugfsdentries { struct dentry *channel; struct dentry *frequency; - struct dentry *radar_detect; struct dentry *antenna_sel_tx; struct dentry *antenna_sel_rx; struct dentry *bridge_packets; - struct dentry *key_tx_rx_threshold; struct dentry *rts_threshold; struct dentry *fragmentation_threshold; struct dentry *short_retry_limit; @@ -584,7 +566,6 @@ struct ieee80211_local { struct dentry *total_ps_buffered; struct dentry *mode; struct dentry *wep_iv; - struct dentry *tx_power_reduction; struct dentry *modes; struct dentry *statistics; struct local_debugfsdentries_statsdentries { @@ -656,38 +637,38 @@ struct sta_attribute { ssize_t (*store)(struct sta_info *, const char *buf, size_t count); }; -static inline void __bss_tim_set(struct ieee80211_if_ap *bss, int aid) +static inline void __bss_tim_set(struct ieee80211_if_ap *bss, u16 aid) { /* - * This format has ben mandated by the IEEE specifications, + * This format has been mandated by the IEEE specifications, * so this line may not be changed to use the __set_bit() format. */ - bss->tim[(aid)/8] |= 1<<((aid) % 8); + bss->tim[aid / 8] |= (1 << (aid % 8)); } static inline void bss_tim_set(struct ieee80211_local *local, - struct ieee80211_if_ap *bss, int aid) + struct ieee80211_if_ap *bss, u16 aid) { - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); __bss_tim_set(bss, aid); - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); } -static inline void __bss_tim_clear(struct ieee80211_if_ap *bss, int aid) +static inline void __bss_tim_clear(struct ieee80211_if_ap *bss, u16 aid) { /* - * This format has ben mandated by the IEEE specifications, + * This format has been mandated by the IEEE specifications, * so this line may not be changed to use the __clear_bit() format. */ - bss->tim[(aid)/8] &= !(1<<((aid) % 8)); + bss->tim[aid / 8] &= ~(1 << (aid % 8)); } static inline void bss_tim_clear(struct ieee80211_local *local, - struct ieee80211_if_ap *bss, int aid) + struct ieee80211_if_ap *bss, u16 aid) { - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); __bss_tim_clear(bss, aid); - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); } /** @@ -707,35 +688,28 @@ static inline int ieee80211_is_erp_rate(int phymode, int rate) return 0; } +static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) +{ + return compare_ether_addr(raddr, addr) == 0 || + is_broadcast_ether_addr(raddr); +} + + /* ieee80211.c */ int ieee80211_hw_config(struct ieee80211_local *local); int ieee80211_if_config(struct net_device *dev); int ieee80211_if_config_beacon(struct net_device *dev); -struct ieee80211_key_conf * -ieee80211_key_data2conf(struct ieee80211_local *local, - const struct ieee80211_key *data); -struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata, - int idx, size_t key_len, gfp_t flags); -void ieee80211_key_free(struct ieee80211_key *key); -void ieee80211_rx_mgmt(struct ieee80211_local *local, struct sk_buff *skb, - struct ieee80211_rx_status *status, u32 msg_type); void ieee80211_prepare_rates(struct ieee80211_local *local, struct ieee80211_hw_mode *mode); void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx); int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr); -int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); -int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); void ieee80211_if_setup(struct net_device *dev); -void ieee80211_if_mgmt_setup(struct net_device *dev); -int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, - const char *name); -struct net_device_stats *ieee80211_dev_stats(struct net_device *dev); +struct ieee80211_rate *ieee80211_get_rate(struct ieee80211_local *local, + int phymode, int hwrate); /* ieee80211_ioctl.c */ extern const struct iw_handler_def ieee80211_iw_handler_def; -void ieee80211_update_default_wep_only(struct ieee80211_local *local); - /* Least common multiple of the used rates (in 100 kbps). This is used to * calculate rate_inv values for each rate so that only integers are needed. */ @@ -783,6 +757,8 @@ struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev, u8 *addr); int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason); int ieee80211_sta_disassociate(struct net_device *dev, u16 reason); +void ieee80211_erp_info_change_notify(struct net_device *dev, u8 changes); +void ieee80211_reset_erp_info(struct net_device *dev); /* ieee80211_iface.c */ int ieee80211_if_add(struct net_device *dev, const char *name, @@ -794,14 +770,32 @@ void __ieee80211_if_del(struct ieee80211_local *local, int ieee80211_if_remove(struct net_device *dev, const char *name, int id); void ieee80211_if_free(struct net_device *dev); void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata); -int ieee80211_if_add_mgmt(struct ieee80211_local *local); -void ieee80211_if_del_mgmt(struct ieee80211_local *local); /* regdomain.c */ void ieee80211_regdomain_init(void); void ieee80211_set_default_regdomain(struct ieee80211_hw_mode *mode); -/* for wiphy privid */ -extern void *mac80211_wiphy_privid; +/* rx handling */ +extern ieee80211_rx_handler ieee80211_rx_pre_handlers[]; +extern ieee80211_rx_handler ieee80211_rx_handlers[]; + +/* tx handling */ +extern ieee80211_tx_handler ieee80211_tx_handlers[]; +void ieee80211_clear_tx_pending(struct ieee80211_local *local); +void ieee80211_tx_pending(unsigned long data); +int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev); +int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); +int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); + +/* utility functions/constants */ +extern void *mac80211_wiphy_privid; /* for wiphy privid */ +extern const unsigned char rfc1042_header[6]; +extern const unsigned char bridge_tunnel_header[6]; +u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len); +int ieee80211_is_eapol(const struct sk_buff *skb); +int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, + int rate, int erp, int short_preamble); +void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, + struct ieee80211_hdr *hdr); #endif /* IEEE80211_I_H */ diff --git a/net/mac80211/ieee80211_iface.c b/net/mac80211/ieee80211_iface.c index 8532a5ccdd1..43e505d2945 100644 --- a/net/mac80211/ieee80211_iface.c +++ b/net/mac80211/ieee80211_iface.c @@ -25,6 +25,8 @@ void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata) sdata->eapol = 1; for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) skb_queue_head_init(&sdata->fragments[i].skb_list); + + INIT_LIST_HEAD(&sdata->key_list); } static void ieee80211_if_sdata_deinit(struct ieee80211_sub_if_data *sdata) @@ -77,18 +79,15 @@ int ieee80211_if_add(struct net_device *dev, const char *name, ieee80211_debugfs_add_netdev(sdata); ieee80211_if_set_type(ndev, type); - write_lock_bh(&local->sub_if_lock); + /* we're under RTNL so all this is fine */ if (unlikely(local->reg_state == IEEE80211_DEV_UNREGISTERED)) { - write_unlock_bh(&local->sub_if_lock); __ieee80211_if_del(local, sdata); return -ENODEV; } - list_add(&sdata->list, &local->sub_if_list); + list_add_tail_rcu(&sdata->list, &local->interfaces); + if (new_dev) *new_dev = ndev; - write_unlock_bh(&local->sub_if_lock); - - ieee80211_update_default_wep_only(local); return 0; @@ -97,74 +96,35 @@ fail: return ret; } -int ieee80211_if_add_mgmt(struct ieee80211_local *local) -{ - struct net_device *ndev; - struct ieee80211_sub_if_data *nsdata; - int ret; - - ASSERT_RTNL(); - - ndev = alloc_netdev(sizeof(struct ieee80211_sub_if_data), "wmgmt%d", - ieee80211_if_mgmt_setup); - if (!ndev) - return -ENOMEM; - ret = dev_alloc_name(ndev, ndev->name); - if (ret < 0) - goto fail; - - memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); - SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); - - nsdata = IEEE80211_DEV_TO_SUB_IF(ndev); - ndev->ieee80211_ptr = &nsdata->wdev; - nsdata->wdev.wiphy = local->hw.wiphy; - nsdata->type = IEEE80211_IF_TYPE_MGMT; - nsdata->dev = ndev; - nsdata->local = local; - ieee80211_if_sdata_init(nsdata); - - ret = register_netdevice(ndev); - if (ret) - goto fail; - - ieee80211_debugfs_add_netdev(nsdata); - - if (local->open_count > 0) - dev_open(ndev); - local->apdev = ndev; - return 0; - -fail: - free_netdev(ndev); - return ret; -} - -void ieee80211_if_del_mgmt(struct ieee80211_local *local) -{ - struct net_device *apdev; - - ASSERT_RTNL(); - apdev = local->apdev; - ieee80211_debugfs_remove_netdev(IEEE80211_DEV_TO_SUB_IF(apdev)); - local->apdev = NULL; - unregister_netdevice(apdev); -} - void ieee80211_if_set_type(struct net_device *dev, int type) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); int oldtype = sdata->type; - dev->hard_start_xmit = ieee80211_subif_start_xmit; - + /* + * We need to call this function on the master interface + * which already has a hard_start_xmit routine assigned + * which must not be changed. + */ + if (dev != sdata->local->mdev) + dev->hard_start_xmit = ieee80211_subif_start_xmit; + + /* + * Called even when register_netdevice fails, it would + * oops if assigned before initialising the rest. + */ + dev->uninit = ieee80211_if_reinit; + + /* most have no BSS pointer */ + sdata->bss = NULL; sdata->type = type; + switch (type) { case IEEE80211_IF_TYPE_WDS: - sdata->bss = NULL; + /* nothing special */ break; case IEEE80211_IF_TYPE_VLAN: + sdata->u.vlan.ap = NULL; break; case IEEE80211_IF_TYPE_AP: sdata->u.ap.dtim_period = 2; @@ -172,6 +132,7 @@ void ieee80211_if_set_type(struct net_device *dev, int type) sdata->u.ap.max_ratectrl_rateidx = -1; skb_queue_head_init(&sdata->u.ap.ps_bc_buf); sdata->bss = &sdata->u.ap; + INIT_LIST_HEAD(&sdata->u.ap.vlans); break; case IEEE80211_IF_TYPE_STA: case IEEE80211_IF_TYPE_IBSS: { @@ -187,10 +148,10 @@ void ieee80211_if_set_type(struct net_device *dev, int type) ifsta->capab = WLAN_CAPABILITY_ESS; ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN | IEEE80211_AUTH_ALG_SHARED_KEY; - ifsta->create_ibss = 1; - ifsta->wmm_enabled = 1; - ifsta->auto_channel_sel = 1; - ifsta->auto_bssid_sel = 1; + ifsta->flags |= IEEE80211_STA_CREATE_IBSS | + IEEE80211_STA_WMM_ENABLED | + IEEE80211_STA_AUTO_BSSID_SEL | + IEEE80211_STA_AUTO_CHANNEL_SEL; msdata = IEEE80211_DEV_TO_SUB_IF(sdata->local->mdev); sdata->bss = &msdata->u.ap; @@ -205,7 +166,6 @@ void ieee80211_if_set_type(struct net_device *dev, int type) dev->name, __FUNCTION__, type); } ieee80211_debugfs_change_if_type(sdata, oldtype); - ieee80211_update_default_wep_only(local); } /* Must be called with rtnl lock held. */ @@ -214,57 +174,46 @@ void ieee80211_if_reinit(struct net_device *dev) struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; - int i; + struct sk_buff *skb; ASSERT_RTNL(); + + ieee80211_free_keys(sdata); + ieee80211_if_sdata_deinit(sdata); - for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - if (!sdata->keys[i]) - continue; -#if 0 - /* The interface is down at the moment, so there is not - * really much point in disabling the keys at this point. */ - memset(addr, 0xff, ETH_ALEN); - if (local->ops->set_key) - local->ops->set_key(local_to_hw(local), DISABLE_KEY, addr, - local->keys[i], 0); -#endif - ieee80211_key_free(sdata->keys[i]); - sdata->keys[i] = NULL; - } switch (sdata->type) { + case IEEE80211_IF_TYPE_INVALID: + /* cannot happen */ + WARN_ON(1); + break; case IEEE80211_IF_TYPE_AP: { /* Remove all virtual interfaces that use this BSS * as their sdata->bss */ struct ieee80211_sub_if_data *tsdata, *n; - LIST_HEAD(tmp_list); - write_lock_bh(&local->sub_if_lock); - list_for_each_entry_safe(tsdata, n, &local->sub_if_list, list) { + list_for_each_entry_safe(tsdata, n, &local->interfaces, list) { if (tsdata != sdata && tsdata->bss == &sdata->u.ap) { printk(KERN_DEBUG "%s: removing virtual " "interface %s because its BSS interface" " is being removed\n", sdata->dev->name, tsdata->dev->name); - list_move_tail(&tsdata->list, &tmp_list); + list_del_rcu(&tsdata->list); + /* + * We have lots of time and can afford + * to sync for each interface + */ + synchronize_rcu(); + __ieee80211_if_del(local, tsdata); } } - write_unlock_bh(&local->sub_if_lock); - - list_for_each_entry_safe(tsdata, n, &tmp_list, list) - __ieee80211_if_del(local, tsdata); kfree(sdata->u.ap.beacon_head); kfree(sdata->u.ap.beacon_tail); - kfree(sdata->u.ap.generic_elem); - if (dev != local->mdev) { - struct sk_buff *skb; - while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) { - local->total_ps_buffered--; - dev_kfree_skb(skb); - } + while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) { + local->total_ps_buffered--; + dev_kfree_skb(skb); } break; @@ -272,8 +221,8 @@ void ieee80211_if_reinit(struct net_device *dev) case IEEE80211_IF_TYPE_WDS: sta = sta_info_get(local, sdata->u.wds.remote_addr); if (sta) { + sta_info_free(sta); sta_info_put(sta); - sta_info_free(sta, 0); } else { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Someone had deleted my STA " @@ -298,6 +247,9 @@ void ieee80211_if_reinit(struct net_device *dev) case IEEE80211_IF_TYPE_MNTR: dev->type = ARPHRD_ETHER; break; + case IEEE80211_IF_TYPE_VLAN: + sdata->u.vlan.ap = NULL; + break; } /* remove all STAs that are bound to this virtual interface */ @@ -327,29 +279,23 @@ int ieee80211_if_remove(struct net_device *dev, const char *name, int id) ASSERT_RTNL(); - write_lock_bh(&local->sub_if_lock); - list_for_each_entry_safe(sdata, n, &local->sub_if_list, list) { + list_for_each_entry_safe(sdata, n, &local->interfaces, list) { if ((sdata->type == id || id == -1) && strcmp(name, sdata->dev->name) == 0 && sdata->dev != local->mdev) { - list_del(&sdata->list); - write_unlock_bh(&local->sub_if_lock); + list_del_rcu(&sdata->list); + synchronize_rcu(); __ieee80211_if_del(local, sdata); - ieee80211_update_default_wep_only(local); return 0; } } - write_unlock_bh(&local->sub_if_lock); return -ENODEV; } void ieee80211_if_free(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - /* local->apdev must be NULL when freeing management interface */ - BUG_ON(dev == local->apdev); ieee80211_if_sdata_deinit(sdata); free_netdev(dev); } diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index e7904db5532..6caa3ec2cff 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -21,77 +21,41 @@ #include <net/mac80211.h> #include "ieee80211_i.h" -#include "hostapd_ioctl.h" #include "ieee80211_rate.h" #include "wpa.h" #include "aes_ccm.h" -#include "debugfs_key.h" - -static void ieee80211_set_hw_encryption(struct net_device *dev, - struct sta_info *sta, u8 addr[ETH_ALEN], - struct ieee80211_key *key) -{ - struct ieee80211_key_conf *keyconf = NULL; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - /* default to sw encryption; this will be cleared by low-level - * driver if the hw supports requested encryption */ - if (key) - key->force_sw_encrypt = 1; - - if (key && local->ops->set_key && - (keyconf = ieee80211_key_data2conf(local, key))) { - if (local->ops->set_key(local_to_hw(local), SET_KEY, addr, - keyconf, sta ? sta->aid : 0)) { - key->force_sw_encrypt = 1; - key->hw_key_idx = HW_KEY_IDX_INVALID; - } else { - key->force_sw_encrypt = - !!(keyconf->flags & IEEE80211_KEY_FORCE_SW_ENCRYPT); - key->hw_key_idx = - keyconf->hw_key_idx; - - } - } - kfree(keyconf); -} static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr, - int idx, int alg, int set_tx_key, - const u8 *_key, size_t key_len) + int idx, int alg, int remove, + int set_tx_key, const u8 *_key, + size_t key_len) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); int ret = 0; struct sta_info *sta; - struct ieee80211_key *key, *old_key; - int try_hwaccel = 1; - struct ieee80211_key_conf *keyconf; + struct ieee80211_key *key; struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (idx < 0 || idx >= NUM_DEFAULT_KEYS) { + printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n", + dev->name, idx); + return -EINVAL; + } + if (is_broadcast_ether_addr(sta_addr)) { sta = NULL; - if (idx >= NUM_DEFAULT_KEYS) { - printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n", - dev->name, idx); - return -EINVAL; - } key = sdata->keys[idx]; - - /* TODO: consider adding hwaccel support for these; at least - * Atheros key cache should be able to handle this since AP is - * only transmitting frames with default keys. */ - /* FIX: hw key cache can be used when only one virtual - * STA is associated with each AP. If more than one STA - * is associated to the same AP, software encryption - * must be used. This should be done automatically - * based on configured station devices. For the time - * being, this can be only set at compile time. */ } else { set_tx_key = 0; - if (idx != 0) { + /* + * According to the standard, the key index of a pairwise + * key must be zero. However, some AP are broken when it + * comes to WEP key indices, so we work around this. + */ + if (idx != 0 && alg != ALG_WEP) { printk(KERN_DEBUG "%s: set_encrypt - non-zero idx for " "individual key\n", dev->name); return -EINVAL; @@ -100,9 +64,10 @@ static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr, sta = sta_info_get(local, sta_addr); if (!sta) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG + DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "%s: set_encrypt - unknown addr " - MAC_FMT "\n", - dev->name, MAC_ARG(sta_addr)); + "%s\n", + dev->name, print_mac(mac, sta_addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ return -ENOENT; @@ -111,144 +76,25 @@ static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr, key = sta->key; } - /* FIX: - * Cannot configure default hwaccel keys with WEP algorithm, if - * any of the virtual interfaces is using static WEP - * configuration because hwaccel would otherwise try to decrypt - * these frames. - * - * For now, just disable WEP hwaccel for broadcast when there is - * possibility of conflict with default keys. This can maybe later be - * optimized by using non-default keys (at least with Atheros ar521x). - */ - if (!sta && alg == ALG_WEP && !local->default_wep_only && - sdata->type != IEEE80211_IF_TYPE_IBSS && - sdata->type != IEEE80211_IF_TYPE_AP) { - try_hwaccel = 0; - } - - if (local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) { - /* Software encryption cannot be used with devices that hide - * encryption from the host system, so always try to use - * hardware acceleration with such devices. */ - try_hwaccel = 1; - } - - if ((local->hw.flags & IEEE80211_HW_NO_TKIP_WMM_HWACCEL) && - alg == ALG_TKIP) { - if (sta && (sta->flags & WLAN_STA_WME)) { - /* Hardware does not support hwaccel with TKIP when using WMM. - */ - try_hwaccel = 0; - } - else if (sdata->type == IEEE80211_IF_TYPE_STA) { - sta = sta_info_get(local, sdata->u.sta.bssid); - if (sta) { - if (sta->flags & WLAN_STA_WME) { - try_hwaccel = 0; - } - sta_info_put(sta); - sta = NULL; - } - } - } - - if (alg == ALG_NONE) { - keyconf = NULL; - if (try_hwaccel && key && - key->hw_key_idx != HW_KEY_IDX_INVALID && - local->ops->set_key && - (keyconf = ieee80211_key_data2conf(local, key)) != NULL && - local->ops->set_key(local_to_hw(local), DISABLE_KEY, - sta_addr, keyconf, sta ? sta->aid : 0)) { - printk(KERN_DEBUG "%s: set_encrypt - low-level disable" - " failed\n", dev->name); - ret = -EINVAL; - } - kfree(keyconf); - - if (set_tx_key || sdata->default_key == key) { - ieee80211_debugfs_key_remove_default(sdata); - sdata->default_key = NULL; - } - ieee80211_debugfs_key_remove(key); - if (sta) - sta->key = NULL; - else - sdata->keys[idx] = NULL; + if (remove) { ieee80211_key_free(key); key = NULL; } else { - old_key = key; - key = ieee80211_key_alloc(sta ? NULL : sdata, idx, key_len, - GFP_KERNEL); + /* + * Automatically frees any old key if present. + */ + key = ieee80211_key_alloc(sdata, sta, alg, idx, key_len, _key); if (!key) { ret = -ENOMEM; goto err_out; } - - /* default to sw encryption; low-level driver sets these if the - * requested encryption is supported */ - key->hw_key_idx = HW_KEY_IDX_INVALID; - key->force_sw_encrypt = 1; - - key->alg = alg; - key->keyidx = idx; - key->keylen = key_len; - memcpy(key->key, _key, key_len); - if (set_tx_key) - key->default_tx_key = 1; - - if (alg == ALG_CCMP) { - /* Initialize AES key state here as an optimization - * so that it does not need to be initialized for every - * packet. */ - key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt( - key->key); - if (!key->u.ccmp.tfm) { - ret = -ENOMEM; - goto err_free; - } - } - - if (set_tx_key || sdata->default_key == old_key) { - ieee80211_debugfs_key_remove_default(sdata); - sdata->default_key = NULL; - } - ieee80211_debugfs_key_remove(old_key); - if (sta) - sta->key = key; - else - sdata->keys[idx] = key; - ieee80211_key_free(old_key); - ieee80211_debugfs_key_add(local, key); - if (sta) - ieee80211_debugfs_key_sta_link(key, sta); - - if (try_hwaccel && - (alg == ALG_WEP || alg == ALG_TKIP || alg == ALG_CCMP)) - ieee80211_set_hw_encryption(dev, sta, sta_addr, key); - } - - if (set_tx_key || (!sta && !sdata->default_key && key)) { - sdata->default_key = key; - if (key) - ieee80211_debugfs_key_add_default(sdata); - - if (local->ops->set_key_idx && - local->ops->set_key_idx(local_to_hw(local), idx)) - printk(KERN_DEBUG "%s: failed to set TX key idx for " - "low-level driver\n", dev->name); } - if (sta) - sta_info_put(sta); - - return 0; + if (set_tx_key || (!sta && !sdata->default_key && key)) + ieee80211_set_default_key(sdata, idx); -err_free: - ieee80211_key_free(key); -err_out: + ret = 0; + err_out: if (sta) sta_info_put(sta); return ret; @@ -259,44 +105,25 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev, struct iw_point *data, char *extra) { struct ieee80211_sub_if_data *sdata; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (local->user_space_mlme) + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type == IEEE80211_IF_TYPE_STA || sdata->type == IEEE80211_IF_TYPE_IBSS) { int ret = ieee80211_sta_set_extra_ie(dev, extra, data->length); if (ret) return ret; - sdata->u.sta.auto_bssid_sel = 0; + sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; ieee80211_sta_req_auth(dev, &sdata->u.sta); return 0; } - if (sdata->type == IEEE80211_IF_TYPE_AP) { - kfree(sdata->u.ap.generic_elem); - sdata->u.ap.generic_elem = kmalloc(data->length, GFP_KERNEL); - if (!sdata->u.ap.generic_elem) - return -ENOMEM; - memcpy(sdata->u.ap.generic_elem, extra, data->length); - sdata->u.ap.generic_elem_len = data->length; - return ieee80211_if_config(dev); - } return -EOPNOTSUPP; } -static int ieee80211_ioctl_set_radio_enabled(struct net_device *dev, - int val) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_conf *conf = &local->hw.conf; - - conf->radio_enabled = val; - return ieee80211_hw_config(wdev_priv(dev->ieee80211_ptr)); -} - static int ieee80211_ioctl_giwname(struct net_device *dev, struct iw_request_info *info, char *name, char *extra) @@ -313,9 +140,6 @@ static int ieee80211_ioctl_giwname(struct net_device *dev, case MODE_IEEE80211G: strcpy(name, "IEEE 802.11g"); break; - case MODE_ATHEROS_TURBO: - strcpy(name, "5GHz Turbo"); - break; default: strcpy(name, "IEEE 802.11"); break; @@ -480,16 +304,14 @@ int ieee80211_set_channel(struct ieee80211_local *local, int channel, int freq) struct ieee80211_channel *chan = &mode->channels[c]; if (chan->flag & IEEE80211_CHAN_W_SCAN && ((chan->chan == channel) || (chan->freq == freq))) { - /* Use next_mode as the mode preference to - * resolve non-unique channel numbers. */ - if (set && mode->mode != local->next_mode) - continue; - local->oper_channel = chan; local->oper_hw_mode = mode; - set++; + set = 1; + break; } } + if (set) + break; } if (set) { @@ -512,13 +334,14 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type == IEEE80211_IF_TYPE_STA) - sdata->u.sta.auto_channel_sel = 0; + sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */ if (freq->e == 0) { if (freq->m < 0) { if (sdata->type == IEEE80211_IF_TYPE_STA) - sdata->u.sta.auto_channel_sel = 1; + sdata->u.sta.flags |= + IEEE80211_STA_AUTO_CHANNEL_SEL; return 0; } else return ieee80211_set_channel(local, freq->m, -1); @@ -554,7 +377,6 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *ssid) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata; size_t len = data->length; @@ -566,14 +388,17 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, if (sdata->type == IEEE80211_IF_TYPE_STA || sdata->type == IEEE80211_IF_TYPE_IBSS) { int ret; - if (local->user_space_mlme) { + if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { if (len > IEEE80211_MAX_SSID_LEN) return -EINVAL; memcpy(sdata->u.sta.ssid, ssid, len); sdata->u.sta.ssid_len = len; return 0; } - sdata->u.sta.auto_ssid_sel = !data->flags; + if (data->flags) + sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_SSID_SEL; + else + sdata->u.sta.flags |= IEEE80211_STA_AUTO_SSID_SEL; ret = ieee80211_sta_set_ssid(dev, ssid, len); if (ret) return ret; @@ -628,25 +453,24 @@ static int ieee80211_ioctl_siwap(struct net_device *dev, struct iw_request_info *info, struct sockaddr *ap_addr, char *extra) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type == IEEE80211_IF_TYPE_STA || sdata->type == IEEE80211_IF_TYPE_IBSS) { int ret; - if (local->user_space_mlme) { + if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data, ETH_ALEN); return 0; } - if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) { - sdata->u.sta.auto_bssid_sel = 1; - sdata->u.sta.auto_channel_sel = 1; - } else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data)) - sdata->u.sta.auto_bssid_sel = 1; + if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) + sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL | + IEEE80211_STA_AUTO_CHANNEL_SEL; + else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data)) + sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL; else - sdata->u.sta.auto_bssid_sel = 0; + sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; ret = ieee80211_sta_set_bssid(dev, (u8 *) &ap_addr->sa_data); if (ret) return ret; @@ -687,10 +511,11 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, static int ieee80211_ioctl_siwscan(struct net_device *dev, struct iw_request_info *info, - struct iw_point *data, char *extra) + union iwreq_data *wrqu, char *extra) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct iw_scan_req *req = NULL; u8 *ssid = NULL; size_t ssid_len = 0; @@ -715,6 +540,14 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev, return -EOPNOTSUPP; } + /* if SSID was specified explicitly then use that */ + if (wrqu->data.length == sizeof(struct iw_scan_req) && + wrqu->data.flags & IW_SCAN_THIS_ESSID) { + req = (struct iw_scan_req *)extra; + ssid = req->essid; + ssid_len = req->essid_len; + } + return ieee80211_sta_req_scan(dev, ssid, ssid_len); } @@ -762,9 +595,6 @@ static int ieee80211_ioctl_siwrate(struct net_device *dev, struct ieee80211_rate *rates = &mode->rates[i]; int this_rate = rates->rate; - if (mode->mode == MODE_ATHEROS_TURBO || - mode->mode == MODE_ATHEROS_TURBOG) - this_rate *= 2; if (target_rate == this_rate) { sdata->bss->max_ratectrl_rateidx = i; if (rate->fixed) @@ -798,6 +628,52 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev, return 0; } +static int ieee80211_ioctl_siwtxpower(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *data, char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + bool need_reconfig = 0; + + if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) + return -EINVAL; + if (data->txpower.flags & IW_TXPOW_RANGE) + return -EINVAL; + if (!data->txpower.fixed) + return -EINVAL; + + if (local->hw.conf.power_level != data->txpower.value) { + local->hw.conf.power_level = data->txpower.value; + need_reconfig = 1; + } + if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) { + local->hw.conf.radio_enabled = !(data->txpower.disabled); + need_reconfig = 1; + } + if (need_reconfig) { + ieee80211_hw_config(local); + /* The return value of hw_config is not of big interest here, + * as it doesn't say that it failed because of _this_ config + * change or something else. Ignore it. */ + } + + return 0; +} + +static int ieee80211_ioctl_giwtxpower(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *data, char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + + data->txpower.fixed = 1; + data->txpower.disabled = !(local->hw.conf.radio_enabled); + data->txpower.value = local->hw.conf.power_level; + data->txpower.flags = IW_TXPOW_DBM; + + return 0; +} + static int ieee80211_ioctl_siwrts(struct net_device *dev, struct iw_request_info *info, struct iw_param *rts, char *extra) @@ -930,335 +806,6 @@ static int ieee80211_ioctl_giwretry(struct net_device *dev, return 0; } -static void ieee80211_key_enable_hwaccel(struct ieee80211_local *local, - struct ieee80211_key *key) -{ - struct ieee80211_key_conf *keyconf; - u8 addr[ETH_ALEN]; - - if (!key || key->alg != ALG_WEP || !key->force_sw_encrypt || - (local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP)) - return; - - memset(addr, 0xff, ETH_ALEN); - keyconf = ieee80211_key_data2conf(local, key); - if (keyconf && local->ops->set_key && - local->ops->set_key(local_to_hw(local), - SET_KEY, addr, keyconf, 0) == 0) { - key->force_sw_encrypt = - !!(keyconf->flags & IEEE80211_KEY_FORCE_SW_ENCRYPT); - key->hw_key_idx = keyconf->hw_key_idx; - } - kfree(keyconf); -} - - -static void ieee80211_key_disable_hwaccel(struct ieee80211_local *local, - struct ieee80211_key *key) -{ - struct ieee80211_key_conf *keyconf; - u8 addr[ETH_ALEN]; - - if (!key || key->alg != ALG_WEP || key->force_sw_encrypt || - (local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP)) - return; - - memset(addr, 0xff, ETH_ALEN); - keyconf = ieee80211_key_data2conf(local, key); - if (keyconf && local->ops->set_key) - local->ops->set_key(local_to_hw(local), DISABLE_KEY, - addr, keyconf, 0); - kfree(keyconf); - key->force_sw_encrypt = 1; -} - - -static int ieee80211_ioctl_default_wep_only(struct ieee80211_local *local, - int value) -{ - int i; - struct ieee80211_sub_if_data *sdata; - - local->default_wep_only = value; - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) - for (i = 0; i < NUM_DEFAULT_KEYS; i++) - if (value) - ieee80211_key_enable_hwaccel(local, - sdata->keys[i]); - else - ieee80211_key_disable_hwaccel(local, - sdata->keys[i]); - read_unlock(&local->sub_if_lock); - - return 0; -} - - -void ieee80211_update_default_wep_only(struct ieee80211_local *local) -{ - int i = 0; - struct ieee80211_sub_if_data *sdata; - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - - if (sdata->dev == local->mdev) - continue; - - /* If there is an AP interface then depend on userspace to - set default_wep_only correctly. */ - if (sdata->type == IEEE80211_IF_TYPE_AP) { - read_unlock(&local->sub_if_lock); - return; - } - - i++; - } - - read_unlock(&local->sub_if_lock); - - if (i <= 1) - ieee80211_ioctl_default_wep_only(local, 1); - else - ieee80211_ioctl_default_wep_only(local, 0); -} - - -static int ieee80211_ioctl_prism2_param(struct net_device *dev, - struct iw_request_info *info, - void *wrqu, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; - int *i = (int *) extra; - int param = *i; - int value = *(i + 1); - int ret = 0; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - switch (param) { - case PRISM2_PARAM_IEEE_802_1X: - if (local->ops->set_ieee8021x) - ret = local->ops->set_ieee8021x(local_to_hw(local), - value); - if (ret) - printk(KERN_DEBUG "%s: failed to set IEEE 802.1X (%d) " - "for low-level driver\n", dev->name, value); - else - sdata->ieee802_1x = value; - break; - - case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: - if (sdata->type != IEEE80211_IF_TYPE_AP) - ret = -ENOENT; - else - sdata->use_protection = value; - break; - - case PRISM2_PARAM_PREAMBLE: - local->short_preamble = value; - break; - - case PRISM2_PARAM_STAT_TIME: - if (!local->stat_time && value) { - local->stat_timer.expires = jiffies + HZ * value / 100; - add_timer(&local->stat_timer); - } else if (local->stat_time && !value) { - del_timer_sync(&local->stat_timer); - } - local->stat_time = value; - break; - case PRISM2_PARAM_SHORT_SLOT_TIME: - if (value) - local->hw.conf.flags |= IEEE80211_CONF_SHORT_SLOT_TIME; - else - local->hw.conf.flags &= ~IEEE80211_CONF_SHORT_SLOT_TIME; - if (ieee80211_hw_config(local)) - ret = -EINVAL; - break; - - case PRISM2_PARAM_NEXT_MODE: - local->next_mode = value; - break; - - case PRISM2_PARAM_RADIO_ENABLED: - ret = ieee80211_ioctl_set_radio_enabled(dev, value); - break; - - case PRISM2_PARAM_ANTENNA_MODE: - local->hw.conf.antenna_mode = value; - if (ieee80211_hw_config(local)) - ret = -EINVAL; - break; - - case PRISM2_PARAM_STA_ANTENNA_SEL: - local->sta_antenna_sel = value; - break; - - case PRISM2_PARAM_TX_POWER_REDUCTION: - if (value < 0) - ret = -EINVAL; - else - local->hw.conf.tx_power_reduction = value; - break; - - case PRISM2_PARAM_KEY_TX_RX_THRESHOLD: - local->key_tx_rx_threshold = value; - break; - - case PRISM2_PARAM_DEFAULT_WEP_ONLY: - ret = ieee80211_ioctl_default_wep_only(local, value); - break; - - case PRISM2_PARAM_WIFI_WME_NOACK_TEST: - local->wifi_wme_noack_test = value; - break; - - case PRISM2_PARAM_SCAN_FLAGS: - local->scan_flags = value; - break; - - case PRISM2_PARAM_MIXED_CELL: - if (sdata->type != IEEE80211_IF_TYPE_STA && - sdata->type != IEEE80211_IF_TYPE_IBSS) - ret = -EINVAL; - else - sdata->u.sta.mixed_cell = !!value; - break; - - case PRISM2_PARAM_HW_MODES: - local->enabled_modes = value; - break; - - case PRISM2_PARAM_CREATE_IBSS: - if (sdata->type != IEEE80211_IF_TYPE_IBSS) - ret = -EINVAL; - else - sdata->u.sta.create_ibss = !!value; - break; - case PRISM2_PARAM_WMM_ENABLED: - if (sdata->type != IEEE80211_IF_TYPE_STA && - sdata->type != IEEE80211_IF_TYPE_IBSS) - ret = -EINVAL; - else - sdata->u.sta.wmm_enabled = !!value; - break; - case PRISM2_PARAM_RADAR_DETECT: - local->hw.conf.radar_detect = value; - break; - case PRISM2_PARAM_SPECTRUM_MGMT: - local->hw.conf.spect_mgmt = value; - break; - default: - ret = -EOPNOTSUPP; - break; - } - - return ret; -} - - -static int ieee80211_ioctl_get_prism2_param(struct net_device *dev, - struct iw_request_info *info, - void *wrqu, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; - int *param = (int *) extra; - int ret = 0; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - switch (*param) { - case PRISM2_PARAM_IEEE_802_1X: - *param = sdata->ieee802_1x; - break; - - case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: - *param = sdata->use_protection; - break; - - case PRISM2_PARAM_PREAMBLE: - *param = local->short_preamble; - break; - - case PRISM2_PARAM_STAT_TIME: - *param = local->stat_time; - break; - case PRISM2_PARAM_SHORT_SLOT_TIME: - *param = !!(local->hw.conf.flags & IEEE80211_CONF_SHORT_SLOT_TIME); - break; - - case PRISM2_PARAM_NEXT_MODE: - *param = local->next_mode; - break; - - case PRISM2_PARAM_ANTENNA_MODE: - *param = local->hw.conf.antenna_mode; - break; - - case PRISM2_PARAM_STA_ANTENNA_SEL: - *param = local->sta_antenna_sel; - break; - - case PRISM2_PARAM_TX_POWER_REDUCTION: - *param = local->hw.conf.tx_power_reduction; - break; - - case PRISM2_PARAM_KEY_TX_RX_THRESHOLD: - *param = local->key_tx_rx_threshold; - break; - - case PRISM2_PARAM_DEFAULT_WEP_ONLY: - *param = local->default_wep_only; - break; - - case PRISM2_PARAM_WIFI_WME_NOACK_TEST: - *param = local->wifi_wme_noack_test; - break; - - case PRISM2_PARAM_SCAN_FLAGS: - *param = local->scan_flags; - break; - - case PRISM2_PARAM_HW_MODES: - *param = local->enabled_modes; - break; - - case PRISM2_PARAM_CREATE_IBSS: - if (sdata->type != IEEE80211_IF_TYPE_IBSS) - ret = -EINVAL; - else - *param = !!sdata->u.sta.create_ibss; - break; - - case PRISM2_PARAM_MIXED_CELL: - if (sdata->type != IEEE80211_IF_TYPE_STA && - sdata->type != IEEE80211_IF_TYPE_IBSS) - ret = -EINVAL; - else - *param = !!sdata->u.sta.mixed_cell; - break; - case PRISM2_PARAM_WMM_ENABLED: - if (sdata->type != IEEE80211_IF_TYPE_STA && - sdata->type != IEEE80211_IF_TYPE_IBSS) - ret = -EINVAL; - else - *param = !!sdata->u.sta.wmm_enabled; - break; - default: - ret = -EOPNOTSUPP; - break; - } - - return ret; -} - static int ieee80211_ioctl_siwmlme(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *extra) @@ -1291,6 +838,7 @@ static int ieee80211_ioctl_siwencode(struct net_device *dev, struct ieee80211_sub_if_data *sdata; int idx, i, alg = ALG_WEP; u8 bcaddr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + int remove = 0; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1309,21 +857,16 @@ static int ieee80211_ioctl_siwencode(struct net_device *dev, idx--; if (erq->flags & IW_ENCODE_DISABLED) - alg = ALG_NONE; + remove = 1; else if (erq->length == 0) { /* No key data - just set the default TX key index */ - if (sdata->default_key != sdata->keys[idx]) { - ieee80211_debugfs_key_remove_default(sdata); - sdata->default_key = sdata->keys[idx]; - if (sdata->default_key) - ieee80211_debugfs_key_add_default(sdata); - } + ieee80211_set_default_key(sdata, idx); return 0; } return ieee80211_set_encryption( dev, bcaddr, - idx, alg, + idx, alg, remove, !sdata->default_key, keybuf, erq->length); } @@ -1362,9 +905,9 @@ static int ieee80211_ioctl_giwencode(struct net_device *dev, return 0; } - memcpy(key, sdata->keys[idx]->key, - min((int)erq->length, sdata->keys[idx]->keylen)); - erq->length = sdata->keys[idx]->keylen; + memcpy(key, sdata->keys[idx]->conf.key, + min_t(int, erq->length, sdata->keys[idx]->conf.keylen)); + erq->length = sdata->keys[idx]->conf.keylen; erq->flags |= IW_ENCODE_ENABLED; return 0; @@ -1390,22 +933,12 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, ret = -EINVAL; else { /* - * TODO: sdata->u.sta.key_mgmt does not match with WE18 - * value completely; could consider modifying this to - * be closer to WE18. For now, this value is not really - * used for anything else than Privacy matching, so the - * current code here should be more or less OK. + * Key management was set by wpa_supplicant, + * we only need this to associate to a network + * that has privacy enabled regardless of not + * having a key. */ - if (data->value & IW_AUTH_KEY_MGMT_802_1X) { - sdata->u.sta.key_mgmt = - IEEE80211_KEY_MGMT_WPA_EAP; - } else if (data->value & IW_AUTH_KEY_MGMT_PSK) { - sdata->u.sta.key_mgmt = - IEEE80211_KEY_MGMT_WPA_PSK; - } else { - sdata->u.sta.key_mgmt = - IEEE80211_KEY_MGMT_NONE; - } + sdata->u.sta.key_management_enabled = !!data->value; } break; case IW_AUTH_80211_AUTH_ALG: @@ -1484,11 +1017,11 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct iw_encode_ext *ext = (struct iw_encode_ext *) extra; - int alg, idx, i; + int uninitialized_var(alg), idx, i, remove = 0; switch (ext->alg) { case IW_ENCODE_ALG_NONE: - alg = ALG_NONE; + remove = 1; break; case IW_ENCODE_ALG_WEP: alg = ALG_WEP; @@ -1504,7 +1037,7 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev, } if (erq->flags & IW_ENCODE_DISABLED) - alg = ALG_NONE; + remove = 1; idx = erq->flags & IW_ENCODE_INDEX; if (idx < 1 || idx > 4) { @@ -1523,20 +1056,13 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev, idx--; return ieee80211_set_encryption(dev, ext->addr.sa_data, idx, alg, + remove, ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, ext->key, ext->key_len); } -static const struct iw_priv_args ieee80211_ioctl_priv[] = { - { PRISM2_IOCTL_PRISM2_PARAM, - IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 2, 0, "param" }, - { PRISM2_IOCTL_GET_PRISM2_PARAM, - IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, - IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "get_param" }, -}; - /* Structures to export the Wireless Handlers */ static const iw_handler ieee80211_handler[] = @@ -1557,10 +1083,10 @@ static const iw_handler ieee80211_handler[] = (iw_handler) NULL /* kernel code */, /* SIOCGIWPRIV */ (iw_handler) NULL /* not used */, /* SIOCSIWSTATS */ (iw_handler) NULL /* kernel code */, /* SIOCGIWSTATS */ - iw_handler_set_spy, /* SIOCSIWSPY */ - iw_handler_get_spy, /* SIOCGIWSPY */ - iw_handler_set_thrspy, /* SIOCSIWTHRSPY */ - iw_handler_get_thrspy, /* SIOCGIWTHRSPY */ + (iw_handler) NULL, /* SIOCSIWSPY */ + (iw_handler) NULL, /* SIOCGIWSPY */ + (iw_handler) NULL, /* SIOCSIWTHRSPY */ + (iw_handler) NULL, /* SIOCGIWTHRSPY */ (iw_handler) ieee80211_ioctl_siwap, /* SIOCSIWAP */ (iw_handler) ieee80211_ioctl_giwap, /* SIOCGIWAP */ (iw_handler) ieee80211_ioctl_siwmlme, /* SIOCSIWMLME */ @@ -1579,8 +1105,8 @@ static const iw_handler ieee80211_handler[] = (iw_handler) ieee80211_ioctl_giwrts, /* SIOCGIWRTS */ (iw_handler) ieee80211_ioctl_siwfrag, /* SIOCSIWFRAG */ (iw_handler) ieee80211_ioctl_giwfrag, /* SIOCGIWFRAG */ - (iw_handler) NULL, /* SIOCSIWTXPOW */ - (iw_handler) NULL, /* SIOCGIWTXPOW */ + (iw_handler) ieee80211_ioctl_siwtxpower, /* SIOCSIWTXPOW */ + (iw_handler) ieee80211_ioctl_giwtxpower, /* SIOCGIWTXPOW */ (iw_handler) ieee80211_ioctl_siwretry, /* SIOCSIWRETRY */ (iw_handler) ieee80211_ioctl_giwretry, /* SIOCGIWRETRY */ (iw_handler) ieee80211_ioctl_siwencode, /* SIOCSIWENCODE */ @@ -1599,19 +1125,9 @@ static const iw_handler ieee80211_handler[] = (iw_handler) NULL, /* -- hole -- */ }; -static const iw_handler ieee80211_private_handler[] = -{ /* SIOCIWFIRSTPRIV + */ - (iw_handler) ieee80211_ioctl_prism2_param, /* 0 */ - (iw_handler) ieee80211_ioctl_get_prism2_param, /* 1 */ -}; - const struct iw_handler_def ieee80211_iw_handler_def = { .num_standard = ARRAY_SIZE(ieee80211_handler), - .num_private = ARRAY_SIZE(ieee80211_private_handler), - .num_private_args = ARRAY_SIZE(ieee80211_ioctl_priv), .standard = (iw_handler *) ieee80211_handler, - .private = (iw_handler *) ieee80211_private_handler, - .private_args = (struct iw_priv_args *) ieee80211_ioctl_priv, .get_wireless_stats = ieee80211_get_wireless_stats, }; diff --git a/net/mac80211/ieee80211_key.h b/net/mac80211/ieee80211_key.h index c3338491278..fc770e98d47 100644 --- a/net/mac80211/ieee80211_key.h +++ b/net/mac80211/ieee80211_key.h @@ -11,7 +11,7 @@ #define IEEE80211_KEY_H #include <linux/types.h> -#include <linux/kref.h> +#include <linux/list.h> #include <linux/crypto.h> #include <net/mac80211.h> @@ -41,11 +41,21 @@ #define NUM_RX_DATA_QUEUES 17 +struct ieee80211_local; +struct ieee80211_sub_if_data; +struct sta_info; + +#define KEY_FLAG_UPLOADED_TO_HARDWARE (1<<0) + struct ieee80211_key { - struct kref kref; + struct ieee80211_local *local; + struct ieee80211_sub_if_data *sdata; + struct sta_info *sta; + + struct list_head list; + + unsigned int flags; - int hw_key_idx; /* filled and used by low-level driver */ - ieee80211_key_alg alg; union { struct { /* last used TSC */ @@ -73,22 +83,16 @@ struct ieee80211_key { u8 rx_crypto_buf[6 * AES_BLOCK_LEN]; } ccmp; } u; - int tx_rx_count; /* number of times this key has been used */ - int keylen; - /* if the low level driver can provide hardware acceleration it should - * clear this flag */ - unsigned int force_sw_encrypt:1; - unsigned int default_tx_key:1; /* This key is the new default TX key - * (used only for broadcast keys). */ - s8 keyidx; /* WEP key index */ + /* number of times this key has been used */ + int tx_rx_count; #ifdef CONFIG_MAC80211_DEBUGFS struct { struct dentry *stalink; struct dentry *dir; struct dentry *keylen; - struct dentry *force_sw_encrypt; + struct dentry *flags; struct dentry *keyidx; struct dentry *hw_key_idx; struct dentry *tx_rx_count; @@ -97,10 +101,27 @@ struct ieee80211_key { struct dentry *rx_spec; struct dentry *replays; struct dentry *key; + struct dentry *ifindex; } debugfs; #endif - u8 key[0]; + /* + * key config, must be last because it contains key + * material as variable length member + */ + struct ieee80211_key_conf conf; }; +struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + enum ieee80211_key_alg alg, + int idx, + size_t key_len, + const u8 *key_data); +void ieee80211_key_free(struct ieee80211_key *key); +void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); +void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); +void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); +void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata); + #endif /* IEEE80211_KEY_H */ diff --git a/net/mac80211/ieee80211_led.c b/net/mac80211/ieee80211_led.c index 719d75b2070..4cf89af9d10 100644 --- a/net/mac80211/ieee80211_led.c +++ b/net/mac80211/ieee80211_led.c @@ -33,33 +33,58 @@ void ieee80211_led_tx(struct ieee80211_local *local, int q) led_trigger_event(local->tx_led, LED_FULL); } +void ieee80211_led_assoc(struct ieee80211_local *local, bool associated) +{ + if (unlikely(!local->assoc_led)) + return; + if (associated) + led_trigger_event(local->assoc_led, LED_FULL); + else + led_trigger_event(local->assoc_led, LED_OFF); +} + void ieee80211_led_init(struct ieee80211_local *local) { local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); - if (!local->rx_led) - return; - snprintf(local->rx_led_name, sizeof(local->rx_led_name), - "%srx", wiphy_name(local->hw.wiphy)); - local->rx_led->name = local->rx_led_name; - if (led_trigger_register(local->rx_led)) { - kfree(local->rx_led); - local->rx_led = NULL; + if (local->rx_led) { + snprintf(local->rx_led_name, sizeof(local->rx_led_name), + "%srx", wiphy_name(local->hw.wiphy)); + local->rx_led->name = local->rx_led_name; + if (led_trigger_register(local->rx_led)) { + kfree(local->rx_led); + local->rx_led = NULL; + } } local->tx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); - if (!local->tx_led) - return; - snprintf(local->tx_led_name, sizeof(local->tx_led_name), - "%stx", wiphy_name(local->hw.wiphy)); - local->tx_led->name = local->tx_led_name; - if (led_trigger_register(local->tx_led)) { - kfree(local->tx_led); - local->tx_led = NULL; + if (local->tx_led) { + snprintf(local->tx_led_name, sizeof(local->tx_led_name), + "%stx", wiphy_name(local->hw.wiphy)); + local->tx_led->name = local->tx_led_name; + if (led_trigger_register(local->tx_led)) { + kfree(local->tx_led); + local->tx_led = NULL; + } + } + + local->assoc_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); + if (local->assoc_led) { + snprintf(local->assoc_led_name, sizeof(local->assoc_led_name), + "%sassoc", wiphy_name(local->hw.wiphy)); + local->assoc_led->name = local->assoc_led_name; + if (led_trigger_register(local->assoc_led)) { + kfree(local->assoc_led); + local->assoc_led = NULL; + } } } void ieee80211_led_exit(struct ieee80211_local *local) { + if (local->assoc_led) { + led_trigger_unregister(local->assoc_led); + kfree(local->assoc_led); + } if (local->tx_led) { led_trigger_unregister(local->tx_led); kfree(local->tx_led); @@ -70,6 +95,16 @@ void ieee80211_led_exit(struct ieee80211_local *local) } } +char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + + if (local->assoc_led) + return local->assoc_led_name; + return NULL; +} +EXPORT_SYMBOL(__ieee80211_get_assoc_led_name); + char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); diff --git a/net/mac80211/ieee80211_led.h b/net/mac80211/ieee80211_led.h index 5c8ab826387..0feb2261983 100644 --- a/net/mac80211/ieee80211_led.h +++ b/net/mac80211/ieee80211_led.h @@ -14,6 +14,8 @@ #ifdef CONFIG_MAC80211_LEDS extern void ieee80211_led_rx(struct ieee80211_local *local); extern void ieee80211_led_tx(struct ieee80211_local *local, int q); +extern void ieee80211_led_assoc(struct ieee80211_local *local, + bool associated); extern void ieee80211_led_init(struct ieee80211_local *local); extern void ieee80211_led_exit(struct ieee80211_local *local); #else @@ -23,6 +25,10 @@ static inline void ieee80211_led_rx(struct ieee80211_local *local) static inline void ieee80211_led_tx(struct ieee80211_local *local, int q) { } +static inline void ieee80211_led_assoc(struct ieee80211_local *local, + bool associated) +{ +} static inline void ieee80211_led_init(struct ieee80211_local *local) { } diff --git a/net/mac80211/ieee80211_rate.c b/net/mac80211/ieee80211_rate.c index 2118de04fc3..93abb8fff14 100644 --- a/net/mac80211/ieee80211_rate.c +++ b/net/mac80211/ieee80211_rate.c @@ -9,6 +9,7 @@ */ #include <linux/kernel.h> +#include <linux/rtnetlink.h> #include "ieee80211_rate.h" #include "ieee80211_i.h" @@ -137,3 +138,43 @@ void rate_control_put(struct rate_control_ref *ref) { kref_put(&ref->kref, rate_control_release); } + +int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, + const char *name) +{ + struct rate_control_ref *ref, *old; + + ASSERT_RTNL(); + if (local->open_count || netif_running(local->mdev)) + return -EBUSY; + + ref = rate_control_alloc(name, local); + if (!ref) { + printk(KERN_WARNING "%s: Failed to select rate control " + "algorithm\n", wiphy_name(local->hw.wiphy)); + return -ENOENT; + } + + old = local->rate_ctrl; + local->rate_ctrl = ref; + if (old) { + rate_control_put(old); + sta_info_flush(local, NULL); + } + + printk(KERN_DEBUG "%s: Selected rate control " + "algorithm '%s'\n", wiphy_name(local->hw.wiphy), + ref->ops->name); + + + return 0; +} + +void rate_control_deinitialize(struct ieee80211_local *local) +{ + struct rate_control_ref *ref; + + ref = local->rate_ctrl; + local->rate_ctrl = NULL; + rate_control_put(ref); +} diff --git a/net/mac80211/ieee80211_rate.h b/net/mac80211/ieee80211_rate.h index f021a028d9d..7cd1ebab4f8 100644 --- a/net/mac80211/ieee80211_rate.h +++ b/net/mac80211/ieee80211_rate.h @@ -30,8 +30,6 @@ struct rate_control_extra { /* parameters from the caller to rate_control_get_rate(): */ struct ieee80211_hw_mode *mode; - int mgmt_data; /* this is data frame that is used for management - * (e.g., IEEE 802.1X EAPOL) */ u16 ethertype; }; @@ -141,4 +139,10 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta) #endif } + +/* functions for rate control related to a device */ +int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, + const char *name); +void rate_control_deinitialize(struct ieee80211_local *local); + #endif /* IEEE80211_RATE_H */ diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index 0d99b685df5..fda0e06453e 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -12,7 +12,6 @@ */ /* TODO: - * BSS table: use <BSSID,SSID> as the key to support multi-SSID APs * order BSS list by RSSI(?) ("quality of AP") * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE, * SSID) @@ -31,7 +30,7 @@ #include <net/mac80211.h> #include "ieee80211_i.h" #include "ieee80211_rate.h" -#include "hostapd_ioctl.h" +#include "ieee80211_led.h" #define IEEE80211_AUTH_TIMEOUT (HZ / 5) #define IEEE80211_AUTH_MAX_TRIES 3 @@ -61,7 +60,8 @@ static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst, u8 *ssid, size_t ssid_len); static struct ieee80211_sta_bss * -ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid); +ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int channel, + u8 *ssid, u8 ssid_len); static void ieee80211_rx_bss_put(struct net_device *dev, struct ieee80211_sta_bss *bss); static int ieee80211_sta_find_ibss(struct net_device *dev, @@ -108,15 +108,11 @@ struct ieee802_11_elems { u8 wmm_param_len; }; -typedef enum { ParseOK = 0, ParseUnknown = 1, ParseFailed = -1 } ParseRes; - - -static ParseRes ieee802_11_parse_elems(u8 *start, size_t len, - struct ieee802_11_elems *elems) +static void ieee802_11_parse_elems(u8 *start, size_t len, + struct ieee802_11_elems *elems) { size_t left = len; u8 *pos = start; - int unknown = 0; memset(elems, 0, sizeof(*elems)); @@ -127,15 +123,8 @@ static ParseRes ieee802_11_parse_elems(u8 *start, size_t len, elen = *pos++; left -= 2; - if (elen > left) { -#if 0 - if (net_ratelimit()) - printk(KERN_DEBUG "IEEE 802.11 element parse " - "failed (id=%d elen=%d left=%d)\n", - id, elen, left); -#endif - return ParseFailed; - } + if (elen > left) + return; switch (id) { case WLAN_EID_SSID: @@ -202,28 +191,15 @@ static ParseRes ieee802_11_parse_elems(u8 *start, size_t len, elems->ext_supp_rates_len = elen; break; default: -#if 0 - printk(KERN_DEBUG "IEEE 802.11 element parse ignored " - "unknown element (id=%d elen=%d)\n", - id, elen); -#endif - unknown++; break; } left -= elen; pos += elen; } - - /* Do not trigger error if left == 1 as Apple Airport base stations - * send AssocResps that are one spurious byte too long. */ - - return unknown ? ParseUnknown : ParseOK; } - - static int ecw2cw(int ecw) { int cw = 1; @@ -234,7 +210,6 @@ static int ecw2cw(int ecw) return cw - 1; } - static void ieee80211_sta_wmm_params(struct net_device *dev, struct ieee80211_if_sta *ifsta, u8 *wmm_param, size_t wmm_param_len) @@ -318,17 +293,43 @@ static void ieee80211_handle_erp_ie(struct net_device *dev, u8 erp_value) struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; int use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; + int preamble_mode = (erp_value & WLAN_ERP_BARKER_PREAMBLE) != 0; + u8 changes = 0; + DECLARE_MAC_BUF(mac); - if (use_protection != sdata->use_protection) { + if (use_protection != !!(sdata->flags & IEEE80211_SDATA_USE_PROTECTION)) { if (net_ratelimit()) { printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" - MAC_FMT ")\n", + "%s)\n", dev->name, use_protection ? "enabled" : "disabled", - MAC_ARG(ifsta->bssid)); + print_mac(mac, ifsta->bssid)); + } + if (use_protection) + sdata->flags |= IEEE80211_SDATA_USE_PROTECTION; + else + sdata->flags &= ~IEEE80211_SDATA_USE_PROTECTION; + changes |= IEEE80211_ERP_CHANGE_PROTECTION; + } + + if (preamble_mode != !(sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE)) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: switched to %s barker preamble" + " (BSSID=%s)\n", + dev->name, + (preamble_mode == WLAN_ERP_PREAMBLE_SHORT) ? + "short" : "long", + print_mac(mac, ifsta->bssid)); } - sdata->use_protection = use_protection; + if (preamble_mode) + sdata->flags &= ~IEEE80211_SDATA_SHORT_PREAMBLE; + else + sdata->flags |= IEEE80211_SDATA_SHORT_PREAMBLE; + changes |= IEEE80211_ERP_CHANGE_PREAMBLE; } + + if (changes) + ieee80211_erp_info_change_notify(dev, changes); } @@ -344,7 +345,7 @@ static void ieee80211_sta_send_associnfo(struct net_device *dev, return; buf = kmalloc(50 + 2 * (ifsta->assocreq_ies_len + - ifsta->assocresp_ies_len), GFP_ATOMIC); + ifsta->assocresp_ies_len), GFP_KERNEL); if (!buf) return; @@ -384,24 +385,28 @@ static void ieee80211_sta_send_associnfo(struct net_device *dev, static void ieee80211_set_associated(struct net_device *dev, - struct ieee80211_if_sta *ifsta, int assoc) + struct ieee80211_if_sta *ifsta, + bool assoc) { + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); union iwreq_data wrqu; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (ifsta->associated == assoc) + if (!!(ifsta->flags & IEEE80211_STA_ASSOCIATED) == assoc) return; - ifsta->associated = assoc; - if (assoc) { struct ieee80211_sub_if_data *sdata; struct ieee80211_sta_bss *bss; + + ifsta->flags |= IEEE80211_STA_ASSOCIATED; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type != IEEE80211_IF_TYPE_STA) return; - bss = ieee80211_rx_bss_get(dev, ifsta->bssid); + bss = ieee80211_rx_bss_get(dev, ifsta->bssid, + local->hw.conf.channel, + ifsta->ssid, ifsta->ssid_len); if (bss) { if (bss->has_erp_value) ieee80211_handle_erp_ie(dev, bss->erp_value); @@ -409,18 +414,21 @@ static void ieee80211_set_associated(struct net_device *dev, } netif_carrier_on(dev); - ifsta->prev_bssid_set = 1; + ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); ieee80211_sta_send_associnfo(dev, ifsta); } else { + ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; + netif_carrier_off(dev); - sdata->use_protection = 0; + ieee80211_reset_erp_info(dev); memset(wrqu.ap_addr.sa_data, 0, ETH_ALEN); } wrqu.ap_addr.sa_family = ARPHRD_ETHER; wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); ifsta->last_probe = jiffies; + ieee80211_led_assoc(local, assoc); } static void ieee80211_set_disassoc(struct net_device *dev, @@ -447,8 +455,8 @@ static void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb, pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); pkt_data->ifindex = sdata->dev->ifindex; - pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT); - pkt_data->do_not_encrypt = !encrypt; + if (!encrypt) + pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; dev_queue_xmit(skb); } @@ -495,18 +503,20 @@ static void ieee80211_send_auth(struct net_device *dev, static void ieee80211_authenticate(struct net_device *dev, struct ieee80211_if_sta *ifsta) { + DECLARE_MAC_BUF(mac); + ifsta->auth_tries++; if (ifsta->auth_tries > IEEE80211_AUTH_MAX_TRIES) { - printk(KERN_DEBUG "%s: authentication with AP " MAC_FMT + printk(KERN_DEBUG "%s: authentication with AP %s" " timed out\n", - dev->name, MAC_ARG(ifsta->bssid)); + dev->name, print_mac(mac, ifsta->bssid)); ifsta->state = IEEE80211_DISABLED; return; } ifsta->state = IEEE80211_AUTHENTICATE; - printk(KERN_DEBUG "%s: authenticate with AP " MAC_FMT "\n", - dev->name, MAC_ARG(ifsta->bssid)); + printk(KERN_DEBUG "%s: authenticate with AP %s\n", + dev->name, print_mac(mac, ifsta->bssid)); ieee80211_send_auth(dev, ifsta, 1, NULL, 0, 0); @@ -543,7 +553,8 @@ static void ieee80211_send_assoc(struct net_device *dev, capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME | WLAN_CAPABILITY_SHORT_PREAMBLE; } - bss = ieee80211_rx_bss_get(dev, ifsta->bssid); + bss = ieee80211_rx_bss_get(dev, ifsta->bssid, local->hw.conf.channel, + ifsta->ssid, ifsta->ssid_len); if (bss) { if (bss->capability & WLAN_CAPABILITY_PRIVACY) capab |= WLAN_CAPABILITY_PRIVACY; @@ -559,7 +570,7 @@ static void ieee80211_send_assoc(struct net_device *dev, memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - if (ifsta->prev_bssid_set) { + if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) { skb_put(skb, 10); mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, IEEE80211_STYPE_REASSOC_REQ); @@ -589,8 +600,6 @@ static void ieee80211_send_assoc(struct net_device *dev, *pos++ = len; for (i = 0; i < len; i++) { int rate = mode->rates[i].rate; - if (mode->mode == MODE_ATHEROS_TURBO) - rate /= 2; *pos++ = (u8) (rate / 5); } @@ -600,8 +609,6 @@ static void ieee80211_send_assoc(struct net_device *dev, *pos++ = mode->num_rates - len; for (i = len; i < mode->num_rates; i++) { int rate = mode->rates[i].rate; - if (mode->mode == MODE_ATHEROS_TURBO) - rate /= 2; *pos++ = (u8) (rate / 5); } } @@ -611,7 +618,7 @@ static void ieee80211_send_assoc(struct net_device *dev, memcpy(pos, ifsta->extra_ie, ifsta->extra_ie_len); } - if (wmm && ifsta->wmm_enabled) { + if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { pos = skb_put(skb, 9); *pos++ = WLAN_EID_VENDOR_SPECIFIC; *pos++ = 7; /* len */ @@ -626,7 +633,7 @@ static void ieee80211_send_assoc(struct net_device *dev, kfree(ifsta->assocreq_ies); ifsta->assocreq_ies_len = (skb->data + skb->len) - ies; - ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_ATOMIC); + ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_KERNEL); if (ifsta->assocreq_ies) memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len); @@ -695,14 +702,16 @@ static void ieee80211_send_disassoc(struct net_device *dev, static int ieee80211_privacy_mismatch(struct net_device *dev, struct ieee80211_if_sta *ifsta) { + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sta_bss *bss; int res = 0; - if (!ifsta || ifsta->mixed_cell || - ifsta->key_mgmt != IEEE80211_KEY_MGMT_NONE) + if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL) || + ifsta->key_management_enabled) return 0; - bss = ieee80211_rx_bss_get(dev, ifsta->bssid); + bss = ieee80211_rx_bss_get(dev, ifsta->bssid, local->hw.conf.channel, + ifsta->ssid, ifsta->ssid_len); if (!bss) return 0; @@ -719,18 +728,20 @@ static int ieee80211_privacy_mismatch(struct net_device *dev, static void ieee80211_associate(struct net_device *dev, struct ieee80211_if_sta *ifsta) { + DECLARE_MAC_BUF(mac); + ifsta->assoc_tries++; if (ifsta->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) { - printk(KERN_DEBUG "%s: association with AP " MAC_FMT + printk(KERN_DEBUG "%s: association with AP %s" " timed out\n", - dev->name, MAC_ARG(ifsta->bssid)); + dev->name, print_mac(mac, ifsta->bssid)); ifsta->state = IEEE80211_DISABLED; return; } ifsta->state = IEEE80211_ASSOCIATE; - printk(KERN_DEBUG "%s: associate with AP " MAC_FMT "\n", - dev->name, MAC_ARG(ifsta->bssid)); + printk(KERN_DEBUG "%s: associate with AP %s\n", + dev->name, print_mac(mac, ifsta->bssid)); if (ieee80211_privacy_mismatch(dev, ifsta)) { printk(KERN_DEBUG "%s: mismatch in privacy configuration and " "mixed-cell disabled - abort association\n", dev->name); @@ -750,6 +761,7 @@ static void ieee80211_associated(struct net_device *dev, struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct sta_info *sta; int disassoc; + DECLARE_MAC_BUF(mac); /* TODO: start monitoring current AP signal quality and number of * missed beacons. Scan other channels every now and then and search @@ -760,29 +772,27 @@ static void ieee80211_associated(struct net_device *dev, sta = sta_info_get(local, ifsta->bssid); if (!sta) { - printk(KERN_DEBUG "%s: No STA entry for own AP " MAC_FMT "\n", - dev->name, MAC_ARG(ifsta->bssid)); + printk(KERN_DEBUG "%s: No STA entry for own AP %s\n", + dev->name, print_mac(mac, ifsta->bssid)); disassoc = 1; } else { disassoc = 0; if (time_after(jiffies, sta->last_rx + IEEE80211_MONITORING_INTERVAL)) { - if (ifsta->probereq_poll) { + if (ifsta->flags & IEEE80211_STA_PROBEREQ_POLL) { printk(KERN_DEBUG "%s: No ProbeResp from " - "current AP " MAC_FMT " - assume out of " + "current AP %s - assume out of " "range\n", - dev->name, MAC_ARG(ifsta->bssid)); + dev->name, print_mac(mac, ifsta->bssid)); disassoc = 1; - sta_info_free(sta, 0); - ifsta->probereq_poll = 0; - } else { + sta_info_free(sta); + } else ieee80211_send_probe_req(dev, ifsta->bssid, local->scan_ssid, local->scan_ssid_len); - ifsta->probereq_poll = 1; - } + ifsta->flags ^= IEEE80211_STA_PROBEREQ_POLL; } else { - ifsta->probereq_poll = 0; + ifsta->flags &= ~IEEE80211_STA_PROBEREQ_POLL; if (time_after(jiffies, ifsta->last_probe + IEEE80211_PROBE_INTERVAL)) { ifsta->last_probe = jiffies; @@ -862,10 +872,7 @@ static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst, pos = skb_put(skb, 1); supp_rates[1]++; } - if (mode->mode == MODE_ATHEROS_TURBO) - *pos = rate->rate / 10; - else - *pos = rate->rate / 5; + *pos = rate->rate / 5; } ieee80211_sta_tx(dev, skb, 0); @@ -876,7 +883,7 @@ static int ieee80211_sta_wep_configured(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (!sdata || !sdata->default_key || - sdata->default_key->alg != ALG_WEP) + sdata->default_key->conf.alg != ALG_WEP) return 0; return 1; } @@ -886,7 +893,7 @@ static void ieee80211_auth_completed(struct net_device *dev, struct ieee80211_if_sta *ifsta) { printk(KERN_DEBUG "%s: authenticated\n", dev->name); - ifsta->authenticated = 1; + ifsta->flags |= IEEE80211_STA_AUTHENTICATED; ieee80211_associate(dev, ifsta); } @@ -901,12 +908,7 @@ static void ieee80211_auth_challenge(struct net_device *dev, printk(KERN_DEBUG "%s: replying to auth challenge\n", dev->name); pos = mgmt->u.auth.variable; - if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems) - == ParseFailed) { - printk(KERN_DEBUG "%s: failed to parse Auth(challenge)\n", - dev->name); - return; - } + ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); if (!elems.challenge) { printk(KERN_DEBUG "%s: no challenge IE in shared key auth " "frame\n", dev->name); @@ -924,37 +926,38 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); u16 auth_alg, auth_transaction, status_code; + DECLARE_MAC_BUF(mac); if (ifsta->state != IEEE80211_AUTHENTICATE && sdata->type != IEEE80211_IF_TYPE_IBSS) { printk(KERN_DEBUG "%s: authentication frame received from " - MAC_FMT ", but not in authenticate state - ignored\n", - dev->name, MAC_ARG(mgmt->sa)); + "%s, but not in authenticate state - ignored\n", + dev->name, print_mac(mac, mgmt->sa)); return; } if (len < 24 + 6) { printk(KERN_DEBUG "%s: too short (%zd) authentication frame " - "received from " MAC_FMT " - ignored\n", - dev->name, len, MAC_ARG(mgmt->sa)); + "received from %s - ignored\n", + dev->name, len, print_mac(mac, mgmt->sa)); return; } if (sdata->type != IEEE80211_IF_TYPE_IBSS && memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { printk(KERN_DEBUG "%s: authentication frame received from " - "unknown AP (SA=" MAC_FMT " BSSID=" MAC_FMT ") - " - "ignored\n", dev->name, MAC_ARG(mgmt->sa), - MAC_ARG(mgmt->bssid)); + "unknown AP (SA=%s BSSID=%s) - " + "ignored\n", dev->name, print_mac(mac, mgmt->sa), + print_mac(mac, mgmt->bssid)); return; } if (sdata->type != IEEE80211_IF_TYPE_IBSS && memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) { printk(KERN_DEBUG "%s: authentication frame received from " - "unknown BSSID (SA=" MAC_FMT " BSSID=" MAC_FMT ") - " - "ignored\n", dev->name, MAC_ARG(mgmt->sa), - MAC_ARG(mgmt->bssid)); + "unknown BSSID (SA=%s BSSID=%s) - " + "ignored\n", dev->name, print_mac(mac, mgmt->sa), + print_mac(mac, mgmt->bssid)); return; } @@ -962,9 +965,9 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev, auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); status_code = le16_to_cpu(mgmt->u.auth.status_code); - printk(KERN_DEBUG "%s: RX authentication from " MAC_FMT " (alg=%d " + printk(KERN_DEBUG "%s: RX authentication from %s (alg=%d " "transaction=%d status=%d)\n", - dev->name, MAC_ARG(mgmt->sa), auth_alg, + dev->name, print_mac(mac, mgmt->sa), auth_alg, auth_transaction, status_code); if (sdata->type == IEEE80211_IF_TYPE_IBSS) { @@ -1051,29 +1054,30 @@ static void ieee80211_rx_mgmt_deauth(struct net_device *dev, size_t len) { u16 reason_code; + DECLARE_MAC_BUF(mac); if (len < 24 + 2) { printk(KERN_DEBUG "%s: too short (%zd) deauthentication frame " - "received from " MAC_FMT " - ignored\n", - dev->name, len, MAC_ARG(mgmt->sa)); + "received from %s - ignored\n", + dev->name, len, print_mac(mac, mgmt->sa)); return; } if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { printk(KERN_DEBUG "%s: deauthentication frame received from " - "unknown AP (SA=" MAC_FMT " BSSID=" MAC_FMT ") - " - "ignored\n", dev->name, MAC_ARG(mgmt->sa), - MAC_ARG(mgmt->bssid)); + "unknown AP (SA=%s BSSID=%s) - " + "ignored\n", dev->name, print_mac(mac, mgmt->sa), + print_mac(mac, mgmt->bssid)); return; } reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); - printk(KERN_DEBUG "%s: RX deauthentication from " MAC_FMT + printk(KERN_DEBUG "%s: RX deauthentication from %s" " (reason=%d)\n", - dev->name, MAC_ARG(mgmt->sa), reason_code); + dev->name, print_mac(mac, mgmt->sa), reason_code); - if (ifsta->authenticated) { + if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) { printk(KERN_DEBUG "%s: deauthenticated\n", dev->name); } @@ -1086,7 +1090,7 @@ static void ieee80211_rx_mgmt_deauth(struct net_device *dev, } ieee80211_set_disassoc(dev, ifsta, 1); - ifsta->authenticated = 0; + ifsta->flags &= ~IEEE80211_STA_AUTHENTICATED; } @@ -1096,29 +1100,30 @@ static void ieee80211_rx_mgmt_disassoc(struct net_device *dev, size_t len) { u16 reason_code; + DECLARE_MAC_BUF(mac); if (len < 24 + 2) { printk(KERN_DEBUG "%s: too short (%zd) disassociation frame " - "received from " MAC_FMT " - ignored\n", - dev->name, len, MAC_ARG(mgmt->sa)); + "received from %s - ignored\n", + dev->name, len, print_mac(mac, mgmt->sa)); return; } if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { printk(KERN_DEBUG "%s: disassociation frame received from " - "unknown AP (SA=" MAC_FMT " BSSID=" MAC_FMT ") - " - "ignored\n", dev->name, MAC_ARG(mgmt->sa), - MAC_ARG(mgmt->bssid)); + "unknown AP (SA=%s BSSID=%s) - " + "ignored\n", dev->name, print_mac(mac, mgmt->sa), + print_mac(mac, mgmt->bssid)); return; } reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); - printk(KERN_DEBUG "%s: RX disassociation from " MAC_FMT + printk(KERN_DEBUG "%s: RX disassociation from %s" " (reason=%d)\n", - dev->name, MAC_ARG(mgmt->sa), reason_code); + dev->name, print_mac(mac, mgmt->sa), reason_code); - if (ifsta->associated) + if (ifsta->flags & IEEE80211_STA_ASSOCIATED) printk(KERN_DEBUG "%s: disassociated\n", dev->name); if (ifsta->state == IEEE80211_ASSOCIATED) { @@ -1145,60 +1150,59 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, struct ieee802_11_elems elems; u8 *pos; int i, j; + DECLARE_MAC_BUF(mac); /* AssocResp and ReassocResp have identical structure, so process both * of them in this function. */ if (ifsta->state != IEEE80211_ASSOCIATE) { printk(KERN_DEBUG "%s: association frame received from " - MAC_FMT ", but not in associate state - ignored\n", - dev->name, MAC_ARG(mgmt->sa)); + "%s, but not in associate state - ignored\n", + dev->name, print_mac(mac, mgmt->sa)); return; } if (len < 24 + 6) { printk(KERN_DEBUG "%s: too short (%zd) association frame " - "received from " MAC_FMT " - ignored\n", - dev->name, len, MAC_ARG(mgmt->sa)); + "received from %s - ignored\n", + dev->name, len, print_mac(mac, mgmt->sa)); return; } if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { printk(KERN_DEBUG "%s: association frame received from " - "unknown AP (SA=" MAC_FMT " BSSID=" MAC_FMT ") - " - "ignored\n", dev->name, MAC_ARG(mgmt->sa), - MAC_ARG(mgmt->bssid)); + "unknown AP (SA=%s BSSID=%s) - " + "ignored\n", dev->name, print_mac(mac, mgmt->sa), + print_mac(mac, mgmt->bssid)); return; } capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); aid = le16_to_cpu(mgmt->u.assoc_resp.aid); - if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14))) - printk(KERN_DEBUG "%s: invalid aid value %d; bits 15:14 not " - "set\n", dev->name, aid); - aid &= ~(BIT(15) | BIT(14)); - printk(KERN_DEBUG "%s: RX %sssocResp from " MAC_FMT " (capab=0x%x " + printk(KERN_DEBUG "%s: RX %sssocResp from %s (capab=0x%x " "status=%d aid=%d)\n", - dev->name, reassoc ? "Rea" : "A", MAC_ARG(mgmt->sa), - capab_info, status_code, aid); + dev->name, reassoc ? "Rea" : "A", print_mac(mac, mgmt->sa), + capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); if (status_code != WLAN_STATUS_SUCCESS) { printk(KERN_DEBUG "%s: AP denied association (code=%d)\n", dev->name, status_code); - if (status_code == WLAN_STATUS_REASSOC_NO_ASSOC) - ifsta->prev_bssid_set = 0; + /* if this was a reassociation, ensure we try a "full" + * association next time. This works around some broken APs + * which do not correctly reject reassociation requests. */ + ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; return; } + if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14))) + printk(KERN_DEBUG "%s: invalid aid value %d; bits 15:14 not " + "set\n", dev->name, aid); + aid &= ~(BIT(15) | BIT(14)); + pos = mgmt->u.assoc_resp.variable; - if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems) - == ParseFailed) { - printk(KERN_DEBUG "%s: failed to parse AssocResp\n", - dev->name); - return; - } + ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); if (!elems.supp_rates) { printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n", @@ -1210,7 +1214,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, * update our stored copy */ if (elems.erp_info && elems.erp_info_len >= 1) { struct ieee80211_sta_bss *bss - = ieee80211_rx_bss_get(dev, ifsta->bssid); + = ieee80211_rx_bss_get(dev, ifsta->bssid, + local->hw.conf.channel, + ifsta->ssid, ifsta->ssid_len); if (bss) { bss->erp_value = elems.erp_info[0]; bss->has_erp_value = 1; @@ -1224,7 +1230,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, kfree(ifsta->assocresp_ies); ifsta->assocresp_ies_len = len - (pos - (u8 *) mgmt); - ifsta->assocresp_ies = kmalloc(ifsta->assocresp_ies_len, GFP_ATOMIC); + ifsta->assocresp_ies = kmalloc(ifsta->assocresp_ies_len, GFP_KERNEL); if (ifsta->assocresp_ies) memcpy(ifsta->assocresp_ies, pos, ifsta->assocresp_ies_len); @@ -1234,13 +1240,15 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, sta = sta_info_get(local, ifsta->bssid); if (!sta) { struct ieee80211_sta_bss *bss; - sta = sta_info_add(local, dev, ifsta->bssid, GFP_ATOMIC); + sta = sta_info_add(local, dev, ifsta->bssid, GFP_KERNEL); if (!sta) { printk(KERN_DEBUG "%s: failed to add STA entry for the" " AP\n", dev->name); return; } - bss = ieee80211_rx_bss_get(dev, ifsta->bssid); + bss = ieee80211_rx_bss_get(dev, ifsta->bssid, + local->hw.conf.channel, + ifsta->ssid, ifsta->ssid_len); if (bss) { sta->last_rssi = bss->rssi; sta->last_signal = bss->signal; @@ -1250,23 +1258,18 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, } sta->dev = dev; - sta->flags |= WLAN_STA_AUTH | WLAN_STA_ASSOC; - sta->assoc_ap = 1; + sta->flags |= WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP; rates = 0; mode = local->oper_hw_mode; for (i = 0; i < elems.supp_rates_len; i++) { int rate = (elems.supp_rates[i] & 0x7f) * 5; - if (mode->mode == MODE_ATHEROS_TURBO) - rate *= 2; for (j = 0; j < mode->num_rates; j++) if (mode->rates[j].rate == rate) rates |= BIT(j); } for (i = 0; i < elems.ext_supp_rates_len; i++) { int rate = (elems.ext_supp_rates[i] & 0x7f) * 5; - if (mode->mode == MODE_ATHEROS_TURBO) - rate *= 2; for (j = 0; j < mode->num_rates; j++) if (mode->rates[j].rate == rate) rates |= BIT(j); @@ -1275,7 +1278,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, rate_control_rate_init(sta, local); - if (elems.wmm_param && ifsta->wmm_enabled) { + if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { sta->flags |= WLAN_STA_WME; ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, elems.wmm_param_len); @@ -1321,7 +1324,8 @@ static void __ieee80211_rx_bss_hash_del(struct net_device *dev, static struct ieee80211_sta_bss * -ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid) +ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid, int channel, + u8 *ssid, u8 ssid_len) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sta_bss *bss; @@ -1332,6 +1336,11 @@ ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid) atomic_inc(&bss->users); atomic_inc(&bss->users); memcpy(bss->bssid, bssid, ETH_ALEN); + bss->channel = channel; + if (ssid && ssid_len <= IEEE80211_MAX_SSID_LEN) { + memcpy(bss->ssid, ssid, ssid_len); + bss->ssid_len = ssid_len; + } spin_lock_bh(&local->sta_bss_lock); /* TODO: order by RSSI? */ @@ -1343,7 +1352,8 @@ ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid) static struct ieee80211_sta_bss * -ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid) +ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int channel, + u8 *ssid, u8 ssid_len) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sta_bss *bss; @@ -1351,7 +1361,10 @@ ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid) spin_lock_bh(&local->sta_bss_lock); bss = local->sta_bss_hash[STA_HASH(bssid)]; while (bss) { - if (memcmp(bss->bssid, bssid, ETH_ALEN) == 0) { + if (!memcmp(bss->bssid, bssid, ETH_ALEN) && + bss->channel == channel && + bss->ssid_len == ssid_len && + (ssid_len == 0 || !memcmp(bss->ssid, ssid, ssid_len))) { atomic_inc(&bss->users); break; } @@ -1413,19 +1426,21 @@ static void ieee80211_rx_bss_info(struct net_device *dev, struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee802_11_elems elems; size_t baselen; - int channel, invalid = 0, clen; + int channel, clen; struct ieee80211_sta_bss *bss; struct sta_info *sta; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); u64 timestamp; + DECLARE_MAC_BUF(mac); + DECLARE_MAC_BUF(mac2); if (!beacon && memcmp(mgmt->da, dev->dev_addr, ETH_ALEN)) return; /* ignore ProbeResp to foreign address */ #if 0 - printk(KERN_DEBUG "%s: RX %s from " MAC_FMT " to " MAC_FMT "\n", + printk(KERN_DEBUG "%s: RX %s from %s to %s\n", dev->name, beacon ? "Beacon" : "Probe Response", - MAC_ARG(mgmt->sa), MAC_ARG(mgmt->da)); + print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da)); #endif baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; @@ -1444,10 +1459,10 @@ static void ieee80211_rx_bss_info(struct net_device *dev, else tsf = -1LLU; if (time_after(jiffies, last_tsf_debug + 5 * HZ)) { - printk(KERN_DEBUG "RX beacon SA=" MAC_FMT " BSSID=" - MAC_FMT " TSF=0x%llx BCN=0x%llx diff=%lld " + printk(KERN_DEBUG "RX beacon SA=%s BSSID=" + "%s TSF=0x%llx BCN=0x%llx diff=%lld " "@%lu\n", - MAC_ARG(mgmt->sa), MAC_ARG(mgmt->bssid), + print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->bssid), (unsigned long long)tsf, (unsigned long long)timestamp, (unsigned long long)(tsf - timestamp), @@ -1457,9 +1472,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev, #endif /* CONFIG_MAC80211_IBSS_DEBUG */ } - if (ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, - &elems) == ParseFailed) - invalid = 1; + ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); if (sdata->type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates && memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 && @@ -1486,8 +1499,6 @@ static void ieee80211_rx_bss_info(struct net_device *dev, rate = elems.ext_supp_rates [i - elems.supp_rates_len]; own_rate = 5 * (rate & 0x7f); - if (mode->mode == MODE_ATHEROS_TURBO) - own_rate *= 2; for (j = 0; j < num_rates; j++) if (rates[j].rate == own_rate) supp_rates |= BIT(j); @@ -1503,9 +1514,9 @@ static void ieee80211_rx_bss_info(struct net_device *dev, } if (sta->supp_rates != prev_rates) { printk(KERN_DEBUG "%s: updated supp_rates set for " - MAC_FMT " based on beacon info (0x%x & 0x%x -> " + "%s based on beacon info (0x%x & 0x%x -> " "0x%x)\n", - dev->name, MAC_ARG(sta->addr), prev_rates, + dev->name, print_mac(mac, sta->addr), prev_rates, supp_rates, sta->supp_rates); } sta_info_put(sta); @@ -1519,9 +1530,11 @@ static void ieee80211_rx_bss_info(struct net_device *dev, else channel = rx_status->channel; - bss = ieee80211_rx_bss_get(dev, mgmt->bssid); + bss = ieee80211_rx_bss_get(dev, mgmt->bssid, channel, + elems.ssid, elems.ssid_len); if (!bss) { - bss = ieee80211_rx_bss_add(dev, mgmt->bssid); + bss = ieee80211_rx_bss_add(dev, mgmt->bssid, channel, + elems.ssid, elems.ssid_len); if (!bss) return; } else { @@ -1547,10 +1560,6 @@ static void ieee80211_rx_bss_info(struct net_device *dev, bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int); bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info); - if (elems.ssid && elems.ssid_len <= IEEE80211_MAX_SSID_LEN) { - memcpy(bss->ssid, elems.ssid, elems.ssid_len); - bss->ssid_len = elems.ssid_len; - } bss->supp_rates_len = 0; if (elems.supp_rates) { @@ -1621,7 +1630,6 @@ static void ieee80211_rx_bss_info(struct net_device *dev, bss->hw_mode = rx_status->phymode; - bss->channel = channel; bss->freq = rx_status->freq; if (channel != rx_status->channel && (bss->hw_mode == MODE_IEEE80211G || @@ -1672,7 +1680,7 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, return; ifsta = &sdata->u.sta; - if (!ifsta->associated || + if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED) || memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) return; @@ -1681,14 +1689,12 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, if (baselen > len) return; - if (ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, - &elems) == ParseFailed) - return; + ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); if (elems.erp_info && elems.erp_info_len >= 1) ieee80211_handle_erp_ie(dev, elems.erp_info[0]); - if (elems.wmm_param && ifsta->wmm_enabled) { + if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, elems.wmm_param_len); } @@ -1707,6 +1713,11 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, struct sk_buff *skb; struct ieee80211_mgmt *resp; u8 *pos, *end; + DECLARE_MAC_BUF(mac); +#ifdef CONFIG_MAC80211_IBSS_DEBUG + DECLARE_MAC_BUF(mac2); + DECLARE_MAC_BUF(mac3); +#endif if (sdata->type != IEEE80211_IF_TYPE_IBSS || ifsta->state != IEEE80211_IBSS_JOINED || @@ -1719,10 +1730,10 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, tx_last_beacon = 1; #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "%s: RX ProbeReq SA=" MAC_FMT " DA=" MAC_FMT " BSSID=" - MAC_FMT " (tx_last_beacon=%d)\n", - dev->name, MAC_ARG(mgmt->sa), MAC_ARG(mgmt->da), - MAC_ARG(mgmt->bssid), tx_last_beacon); + printk(KERN_DEBUG "%s: RX ProbeReq SA=%s DA=%s BSSID=" + "%s (tx_last_beacon=%d)\n", + dev->name, print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da), + print_mac(mac3, mgmt->bssid), tx_last_beacon); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ if (!tx_last_beacon) @@ -1738,8 +1749,8 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, pos + 2 + pos[1] > end) { if (net_ratelimit()) { printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq " - "from " MAC_FMT "\n", - dev->name, MAC_ARG(mgmt->sa)); + "from %s\n", + dev->name, print_mac(mac, mgmt->sa)); } return; } @@ -1751,15 +1762,15 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, } /* Reply with ProbeResp */ - skb = skb_copy(ifsta->probe_resp, GFP_ATOMIC); + skb = skb_copy(ifsta->probe_resp, GFP_KERNEL); if (!skb) return; resp = (struct ieee80211_mgmt *) skb->data; memcpy(resp->da, mgmt->sa, ETH_ALEN); #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "%s: Sending ProbeResp to " MAC_FMT "\n", - dev->name, MAC_ARG(resp->da)); + printk(KERN_DEBUG "%s: Sending ProbeResp to %s\n", + dev->name, print_mac(mac, resp->da)); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ ieee80211_sta_tx(dev, skb, 0); } @@ -1890,7 +1901,7 @@ static int ieee80211_sta_active_ibss(struct net_device *dev) int active = 0; struct sta_info *sta; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); list_for_each_entry(sta, &local->sta_list, list) { if (sta->dev == dev && time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL, @@ -1899,7 +1910,7 @@ static int ieee80211_sta_active_ibss(struct net_device *dev) break; } } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); return active; } @@ -1909,16 +1920,25 @@ static void ieee80211_sta_expire(struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct sta_info *sta, *tmp; + LIST_HEAD(tmp_list); + DECLARE_MAC_BUF(mac); - spin_lock_bh(&local->sta_lock); + write_lock_bh(&local->sta_lock); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) if (time_after(jiffies, sta->last_rx + IEEE80211_IBSS_INACTIVITY_LIMIT)) { - printk(KERN_DEBUG "%s: expiring inactive STA " MAC_FMT - "\n", dev->name, MAC_ARG(sta->addr)); - sta_info_free(sta, 1); + printk(KERN_DEBUG "%s: expiring inactive STA %s\n", + dev->name, print_mac(mac, sta->addr)); + __sta_info_get(sta); + sta_info_remove(sta); + list_add(&sta->list, &tmp_list); } - spin_unlock_bh(&local->sta_lock); + write_unlock_bh(&local->sta_lock); + + list_for_each_entry_safe(sta, tmp, &tmp_list, list) { + sta_info_free(sta); + sta_info_put(sta); + } } @@ -2047,7 +2067,8 @@ static void ieee80211_sta_reset_auth(struct net_device *dev, printk(KERN_DEBUG "%s: Initial auth_alg=%d\n", dev->name, ifsta->auth_alg); ifsta->auth_transaction = -1; - ifsta->associated = ifsta->auth_tries = ifsta->assoc_tries = 0; + ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; + ifsta->auth_tries = ifsta->assoc_tries = 0; netif_carrier_off(dev); } @@ -2061,8 +2082,10 @@ void ieee80211_sta_req_auth(struct net_device *dev, if (sdata->type != IEEE80211_IF_TYPE_STA) return; - if ((ifsta->bssid_set || ifsta->auto_bssid_sel) && - (ifsta->ssid_set || ifsta->auto_ssid_sel)) { + if ((ifsta->flags & (IEEE80211_STA_BSSID_SET | + IEEE80211_STA_AUTO_BSSID_SEL)) && + (ifsta->flags & (IEEE80211_STA_SSID_SET | + IEEE80211_STA_AUTO_SSID_SEL))) { set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); queue_work(local->hw.workqueue, &ifsta->work); } @@ -2073,10 +2096,11 @@ static int ieee80211_sta_match_ssid(struct ieee80211_if_sta *ifsta, { int tmp, hidden_ssid; - if (!memcmp(ifsta->ssid, ssid, ssid_len)) + if (ssid_len == ifsta->ssid_len && + !memcmp(ifsta->ssid, ssid, ssid_len)) return 1; - if (ifsta->auto_bssid_sel) + if (ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) return 0; hidden_ssid = 1; @@ -2105,8 +2129,8 @@ static int ieee80211_sta_config_auth(struct net_device *dev, struct ieee80211_sta_bss *bss, *selected = NULL; int top_rssi = 0, freq; - if (!ifsta->auto_channel_sel && !ifsta->auto_bssid_sel && - !ifsta->auto_ssid_sel) { + if (!(ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | + IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL))) { ifsta->state = IEEE80211_AUTHENTICATE; ieee80211_sta_reset_auth(dev, ifsta); return 0; @@ -2122,14 +2146,15 @@ static int ieee80211_sta_config_auth(struct net_device *dev, !!sdata->default_key) continue; - if (!ifsta->auto_channel_sel && bss->freq != freq) + if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) && + bss->freq != freq) continue; - if (!ifsta->auto_bssid_sel && + if (!(ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) && memcmp(bss->bssid, ifsta->bssid, ETH_ALEN)) continue; - if (!ifsta->auto_ssid_sel && + if (!(ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) && !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len)) continue; @@ -2144,7 +2169,7 @@ static int ieee80211_sta_config_auth(struct net_device *dev, if (selected) { ieee80211_set_channel(local, -1, selected->freq); - if (!ifsta->ssid_set) + if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) ieee80211_sta_set_ssid(dev, selected->ssid, selected->ssid_len); ieee80211_sta_set_bssid(dev, selected->bssid); @@ -2154,7 +2179,7 @@ static int ieee80211_sta_config_auth(struct net_device *dev, return 0; } else { if (ifsta->state != IEEE80211_AUTHENTICATE) { - if (ifsta->auto_ssid_sel) + if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) ieee80211_sta_start_scan(dev, NULL, 0); else ieee80211_sta_start_scan(dev, ifsta->ssid, @@ -2271,8 +2296,9 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, "for IBSS beacon\n", dev->name); break; } - control.tx_rate = (local->short_preamble && - (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? + control.tx_rate = + ((sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && + (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? rate->val2 : rate->val; control.antenna_sel_tx = local->hw.conf.antenna_sel_tx; control.power_level = local->hw.conf.power_level; @@ -2303,8 +2329,6 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, mode = local->oper_hw_mode; for (i = 0; i < bss->supp_rates_len; i++) { int bitrate = (bss->supp_rates[i] & 0x7f) * 5; - if (mode->mode == MODE_ATHEROS_TURBO) - bitrate *= 2; for (j = 0; j < mode->num_rates; j++) if (mode->rates[j].rate == bitrate) rates |= BIT(j); @@ -2332,10 +2356,11 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sta_bss *bss; - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_hw_mode *mode; u8 bssid[ETH_ALEN], *pos; int i; + DECLARE_MAC_BUF(mac); #if 0 /* Easier testing, use fixed BSSID. */ @@ -2351,21 +2376,20 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, bssid[0] |= 0x02; #endif - printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID " MAC_FMT "\n", - dev->name, MAC_ARG(bssid)); + printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %s\n", + dev->name, print_mac(mac, bssid)); - bss = ieee80211_rx_bss_add(dev, bssid); + bss = ieee80211_rx_bss_add(dev, bssid, local->hw.conf.channel, + sdata->u.sta.ssid, sdata->u.sta.ssid_len); if (!bss) return -ENOMEM; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); mode = local->oper_hw_mode; if (local->hw.conf.beacon_int == 0) local->hw.conf.beacon_int = 100; bss->beacon_int = local->hw.conf.beacon_int; bss->hw_mode = local->hw.conf.phymode; - bss->channel = local->hw.conf.channel; bss->freq = local->hw.conf.freq; bss->last_update = jiffies; bss->capability = WLAN_CAPABILITY_IBSS; @@ -2377,8 +2401,6 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, pos = bss->supp_rates; for (i = 0; i < mode->num_rates; i++) { int rate = mode->rates[i].rate; - if (mode->mode == MODE_ATHEROS_TURBO) - rate /= 2; *pos++ = (u8) (rate / 5); } @@ -2394,6 +2416,8 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, int found = 0; u8 bssid[ETH_ALEN]; int active_ibss; + DECLARE_MAC_BUF(mac); + DECLARE_MAC_BUF(mac2); if (ifsta->ssid_len == 0) return -EINVAL; @@ -2410,8 +2434,8 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, || !(bss->capability & WLAN_CAPABILITY_IBSS)) continue; #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG " bssid=" MAC_FMT " found\n", - MAC_ARG(bss->bssid)); + printk(KERN_DEBUG " bssid=%s found\n", + print_mac(mac, bss->bssid)); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ memcpy(bssid, bss->bssid, ETH_ALEN); found = 1; @@ -2421,14 +2445,15 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, spin_unlock_bh(&local->sta_bss_lock); #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG " sta_find_ibss: selected " MAC_FMT " current " - MAC_FMT "\n", MAC_ARG(bssid), MAC_ARG(ifsta->bssid)); + printk(KERN_DEBUG " sta_find_ibss: selected %s current " + "%s\n", print_mac(mac, bssid), print_mac(mac2, ifsta->bssid)); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 && - (bss = ieee80211_rx_bss_get(dev, bssid))) { - printk(KERN_DEBUG "%s: Selected IBSS BSSID " MAC_FMT + (bss = ieee80211_rx_bss_get(dev, bssid, local->hw.conf.channel, + ifsta->ssid, ifsta->ssid_len))) { + printk(KERN_DEBUG "%s: Selected IBSS BSSID %s" " based on configured SSID\n", - dev->name, MAC_ARG(bssid)); + dev->name, print_mac(mac, bssid)); return ieee80211_sta_join_ibss(dev, ifsta, bss); } #ifdef CONFIG_MAC80211_IBSS_DEBUG @@ -2451,10 +2476,10 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, if (time_after(jiffies, ifsta->ibss_join_req + IEEE80211_IBSS_JOIN_TIMEOUT)) { - if (ifsta->create_ibss && + if ((ifsta->flags & IEEE80211_STA_CREATE_IBSS) && local->oper_channel->flag & IEEE80211_CHAN_W_IBSS) return ieee80211_sta_create_ibss(dev, ifsta); - if (ifsta->create_ibss) { + if (ifsta->flags & IEEE80211_STA_CREATE_IBSS) { printk(KERN_DEBUG "%s: IBSS not allowed on the" " configured channel %d (%d MHz)\n", dev->name, local->hw.conf.channel, @@ -2515,13 +2540,17 @@ int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len) ifsta = &sdata->u.sta; if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) - ifsta->prev_bssid_set = 0; + ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; memcpy(ifsta->ssid, ssid, len); memset(ifsta->ssid + len, 0, IEEE80211_MAX_SSID_LEN - len); ifsta->ssid_len = len; - ifsta->ssid_set = len ? 1 : 0; - if (sdata->type == IEEE80211_IF_TYPE_IBSS && !ifsta->bssid_set) { + if (len) + ifsta->flags |= IEEE80211_STA_SSID_SET; + else + ifsta->flags &= ~IEEE80211_STA_SSID_SET; + if (sdata->type == IEEE80211_IF_TYPE_IBSS && + !(ifsta->flags & IEEE80211_STA_BSSID_SET)) { ifsta->ibss_join_req = jiffies; ifsta->state = IEEE80211_IBSS_SEARCH; return ieee80211_sta_find_ibss(dev, ifsta); @@ -2559,10 +2588,11 @@ int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid) } } - if (!is_valid_ether_addr(bssid)) - ifsta->bssid_set = 0; + if (is_valid_ether_addr(bssid)) + ifsta->flags |= IEEE80211_STA_BSSID_SET; else - ifsta->bssid_set = 1; + ifsta->flags &= ~IEEE80211_STA_BSSID_SET; + return 0; } @@ -2613,35 +2643,41 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) printk(KERN_DEBUG "%s: failed to restore operational" "channel after scan\n", dev->name); - if (!(local->hw.flags & IEEE80211_HW_NO_PROBE_FILTERING) && - ieee80211_if_config(dev)) - printk(KERN_DEBUG "%s: failed to restore operational" - "BSSID after scan\n", dev->name); + + netif_tx_lock_bh(local->mdev); + local->filter_flags &= ~FIF_BCN_PRBRESP_PROMISC; + local->ops->configure_filter(local_to_hw(local), + FIF_BCN_PRBRESP_PROMISC, + &local->filter_flags, + local->mdev->mc_count, + local->mdev->mc_list); + + netif_tx_unlock_bh(local->mdev); memset(&wrqu, 0, sizeof(wrqu)); wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL); - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { /* No need to wake the master device. */ if (sdata->dev == local->mdev) continue; if (sdata->type == IEEE80211_IF_TYPE_STA) { - if (sdata->u.sta.associated) + if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) ieee80211_send_nullfunc(local, sdata, 0); ieee80211_sta_timer((unsigned long)sdata); } netif_wake_queue(sdata->dev); } - read_unlock(&local->sub_if_lock); + rcu_read_unlock(); sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type == IEEE80211_IF_TYPE_IBSS) { struct ieee80211_if_sta *ifsta = &sdata->u.sta; - if (!ifsta->bssid_set || + if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) || (!ifsta->state == IEEE80211_IBSS_JOINED && !ieee80211_sta_active_ibss(dev))) ieee80211_sta_find_ibss(dev, ifsta); @@ -2773,8 +2809,8 @@ static int ieee80211_sta_start_scan(struct net_device *dev, local->sta_scanning = 1; - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { /* Don't stop the master interface, otherwise we can't transmit * probes! */ @@ -2783,10 +2819,10 @@ static int ieee80211_sta_start_scan(struct net_device *dev, netif_stop_queue(sdata->dev); if (sdata->type == IEEE80211_IF_TYPE_STA && - sdata->u.sta.associated) + (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)) ieee80211_send_nullfunc(local, sdata, 1); } - read_unlock(&local->sub_if_lock); + rcu_read_unlock(); if (ssid) { local->scan_ssid_len = ssid_len; @@ -2800,10 +2836,14 @@ static int ieee80211_sta_start_scan(struct net_device *dev, local->scan_channel_idx = 0; local->scan_dev = dev; - if (!(local->hw.flags & IEEE80211_HW_NO_PROBE_FILTERING) && - ieee80211_if_config(dev)) - printk(KERN_DEBUG "%s: failed to set BSSID for scan\n", - dev->name); + netif_tx_lock_bh(local->mdev); + local->filter_flags |= FIF_BCN_PRBRESP_PROMISC; + local->ops->configure_filter(local_to_hw(local), + FIF_BCN_PRBRESP_PROMISC, + &local->filter_flags, + local->mdev->mc_count, + local->mdev->mc_list); + netif_tx_unlock_bh(local->mdev); /* TODO: start scan as soon as all nullfunc frames are ACKed */ queue_delayed_work(local->hw.workqueue, &local->scan_work, @@ -3041,19 +3081,20 @@ struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev, struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct sta_info *sta; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + DECLARE_MAC_BUF(mac); /* TODO: Could consider removing the least recently used entry and * allow new one to be added. */ if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { if (net_ratelimit()) { printk(KERN_DEBUG "%s: No room for a new IBSS STA " - "entry " MAC_FMT "\n", dev->name, MAC_ARG(addr)); + "entry %s\n", dev->name, print_mac(mac, addr)); } return NULL; } - printk(KERN_DEBUG "%s: Adding new IBSS station " MAC_FMT " (dev=%s)\n", - local->mdev->name, MAC_ARG(addr), dev->name); + printk(KERN_DEBUG "%s: Adding new IBSS station %s (dev=%s)\n", + wiphy_name(local->hw.wiphy), print_mac(mac, addr), dev->name); sta = sta_info_add(local, dev, addr, GFP_ATOMIC); if (!sta) @@ -3096,7 +3137,7 @@ int ieee80211_sta_disassociate(struct net_device *dev, u16 reason) if (sdata->type != IEEE80211_IF_TYPE_STA) return -EINVAL; - if (!ifsta->associated) + if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED)) return -1; ieee80211_send_disassoc(dev, ifsta, reason); diff --git a/net/mac80211/key.c b/net/mac80211/key.c new file mode 100644 index 00000000000..0b2328f7d67 --- /dev/null +++ b/net/mac80211/key.c @@ -0,0 +1,279 @@ +/* + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/if_ether.h> +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/rcupdate.h> +#include <net/mac80211.h> +#include "ieee80211_i.h" +#include "debugfs_key.h" +#include "aes_ccm.h" + + +/* + * Key handling basics + * + * Key handling in mac80211 is done based on per-interface (sub_if_data) + * keys and per-station keys. Since each station belongs to an interface, + * each station key also belongs to that interface. + * + * Hardware acceleration is done on a best-effort basis, for each key + * that is eligible the hardware is asked to enable that key but if + * it cannot do that they key is simply kept for software encryption. + * There is currently no way of knowing this except by looking into + * debugfs. + * + * All operations here are called under RTNL so no extra locking is + * required. + */ + +static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; +static const u8 zero_addr[ETH_ALEN]; + +static const u8 *get_mac_for_key(struct ieee80211_key *key) +{ + const u8 *addr = bcast_addr; + + /* + * If we're an AP we won't ever receive frames with a non-WEP + * group key so we tell the driver that by using the zero MAC + * address to indicate a transmit-only key. + */ + if (key->conf.alg != ALG_WEP && + (key->sdata->type == IEEE80211_IF_TYPE_AP || + key->sdata->type == IEEE80211_IF_TYPE_VLAN)) + addr = zero_addr; + + if (key->sta) + addr = key->sta->addr; + + return addr; +} + +static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) +{ + const u8 *addr; + int ret; + DECLARE_MAC_BUF(mac); + + if (!key->local->ops->set_key) + return; + + addr = get_mac_for_key(key); + + ret = key->local->ops->set_key(local_to_hw(key->local), SET_KEY, + key->sdata->dev->dev_addr, addr, + &key->conf); + + if (!ret) + key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; + + if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) + printk(KERN_ERR "mac80211-%s: failed to set key " + "(%d, %s) to hardware (%d)\n", + wiphy_name(key->local->hw.wiphy), + key->conf.keyidx, print_mac(mac, addr), ret); +} + +static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) +{ + const u8 *addr; + int ret; + DECLARE_MAC_BUF(mac); + + if (!key->local->ops->set_key) + return; + + if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + return; + + addr = get_mac_for_key(key); + + ret = key->local->ops->set_key(local_to_hw(key->local), DISABLE_KEY, + key->sdata->dev->dev_addr, addr, + &key->conf); + + if (ret) + printk(KERN_ERR "mac80211-%s: failed to remove key " + "(%d, %s) from hardware (%d)\n", + wiphy_name(key->local->hw.wiphy), + key->conf.keyidx, print_mac(mac, addr), ret); + + key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; +} + +struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + enum ieee80211_key_alg alg, + int idx, + size_t key_len, + const u8 *key_data) +{ + struct ieee80211_key *key; + + BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS); + + key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); + if (!key) + return NULL; + + /* + * Default to software encryption; we'll later upload the + * key to the hardware if possible. + */ + key->conf.flags = 0; + key->flags = 0; + + key->conf.alg = alg; + key->conf.keyidx = idx; + key->conf.keylen = key_len; + memcpy(key->conf.key, key_data, key_len); + + key->local = sdata->local; + key->sdata = sdata; + key->sta = sta; + + if (alg == ALG_CCMP) { + /* + * Initialize AES key state here as an optimization so that + * it does not need to be initialized for every packet. + */ + key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data); + if (!key->u.ccmp.tfm) { + ieee80211_key_free(key); + return NULL; + } + } + + ieee80211_debugfs_key_add(key->local, key); + + /* remove key first */ + if (sta) + ieee80211_key_free(sta->key); + else + ieee80211_key_free(sdata->keys[idx]); + + if (sta) { + ieee80211_debugfs_key_sta_link(key, sta); + + /* + * some hardware cannot handle TKIP with QoS, so + * we indicate whether QoS could be in use. + */ + if (sta->flags & WLAN_STA_WME) + key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; + } else { + if (sdata->type == IEEE80211_IF_TYPE_STA) { + struct sta_info *ap; + + /* same here, the AP could be using QoS */ + ap = sta_info_get(key->local, key->sdata->u.sta.bssid); + if (ap) { + if (ap->flags & WLAN_STA_WME) + key->conf.flags |= + IEEE80211_KEY_FLAG_WMM_STA; + sta_info_put(ap); + } + } + } + + /* enable hwaccel if appropriate */ + if (netif_running(key->sdata->dev)) + ieee80211_key_enable_hw_accel(key); + + if (sta) + rcu_assign_pointer(sta->key, key); + else + rcu_assign_pointer(sdata->keys[idx], key); + + list_add(&key->list, &sdata->key_list); + + return key; +} + +void ieee80211_key_free(struct ieee80211_key *key) +{ + if (!key) + return; + + if (key->sta) { + rcu_assign_pointer(key->sta->key, NULL); + } else { + if (key->sdata->default_key == key) + ieee80211_set_default_key(key->sdata, -1); + if (key->conf.keyidx >= 0 && + key->conf.keyidx < NUM_DEFAULT_KEYS) + rcu_assign_pointer(key->sdata->keys[key->conf.keyidx], + NULL); + else + WARN_ON(1); + } + + /* wait for all key users to complete */ + synchronize_rcu(); + + /* remove from hwaccel if appropriate */ + ieee80211_key_disable_hw_accel(key); + + if (key->conf.alg == ALG_CCMP) + ieee80211_aes_key_free(key->u.ccmp.tfm); + ieee80211_debugfs_key_remove(key); + + list_del(&key->list); + + kfree(key); +} + +void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx) +{ + struct ieee80211_key *key = NULL; + + if (idx >= 0 && idx < NUM_DEFAULT_KEYS) + key = sdata->keys[idx]; + + if (sdata->default_key != key) { + ieee80211_debugfs_key_remove_default(sdata); + + rcu_assign_pointer(sdata->default_key, key); + + if (sdata->default_key) + ieee80211_debugfs_key_add_default(sdata); + } +} + +void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_key *key, *tmp; + + list_for_each_entry_safe(key, tmp, &sdata->key_list, list) + ieee80211_key_free(key); +} + +void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_key *key; + + WARN_ON(!netif_running(sdata->dev)); + if (!netif_running(sdata->dev)) + return; + + list_for_each_entry(key, &sdata->key_list, list) + ieee80211_key_enable_hw_accel(key); +} + +void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_key *key; + + list_for_each_entry(key, &sdata->key_list, list) + ieee80211_key_disable_hw_accel(key); +} diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c index 17b9f46bbf2..314b8de8886 100644 --- a/net/mac80211/rc80211_simple.c +++ b/net/mac80211/rc80211_simple.c @@ -147,14 +147,6 @@ static void rate_control_simple_tx_status(void *priv, struct net_device *dev, srctrl = sta->rate_ctrl_priv; srctrl->tx_num_xmit++; if (status->excessive_retries) { - sta->antenna_sel_tx = sta->antenna_sel_tx == 1 ? 2 : 1; - sta->antenna_sel_rx = sta->antenna_sel_rx == 1 ? 2 : 1; - if (local->sta_antenna_sel == STA_ANTENNA_SEL_SW_CTRL_DEBUG) { - printk(KERN_DEBUG "%s: " MAC_FMT " TX antenna --> %d " - "RX antenna --> %d (@%lu)\n", - dev->name, MAC_ARG(hdr->addr1), - sta->antenna_sel_tx, sta->antenna_sel_rx, jiffies); - } srctrl->tx_num_failures++; sta->tx_retry_failed++; sta->tx_num_consecutive_failures++; @@ -209,9 +201,10 @@ static void rate_control_simple_tx_status(void *priv, struct net_device *dev, srctrl->avg_rate_update = jiffies; if (srctrl->tx_avg_rate_num > 0) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: STA " MAC_FMT " Average rate: " + DECLARE_MAC_BUF(mac); + printk(KERN_DEBUG "%s: STA %s Average rate: " "%d (%d/%d)\n", - dev->name, MAC_ARG(sta->addr), + dev->name, print_mac(mac, sta->addr), srctrl->tx_avg_rate_sum / srctrl->tx_avg_rate_num, srctrl->tx_avg_rate_sum, diff --git a/net/mac80211/regdomain.c b/net/mac80211/regdomain.c index b697a2afbb4..f42678fa62d 100644 --- a/net/mac80211/regdomain.c +++ b/net/mac80211/regdomain.c @@ -82,12 +82,6 @@ static void ieee80211_unmask_channel(int mode, struct ieee80211_channel *chan) chan->flag = 0; - if (ieee80211_regdom == 64 && - (mode == MODE_ATHEROS_TURBO || mode == MODE_ATHEROS_TURBOG)) { - /* Do not allow Turbo modes in Japan. */ - return; - } - for (i = 0; channel_range[i].start_freq; i++) { const struct ieee80211_channel_range *r = &channel_range[i]; if (r->start_freq <= chan->freq && r->end_freq >= chan->freq) { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c new file mode 100644 index 00000000000..ece77766ea2 --- /dev/null +++ b/net/mac80211/rx.c @@ -0,0 +1,1588 @@ +/* + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rcupdate.h> +#include <net/mac80211.h> +#include <net/ieee80211_radiotap.h> + +#include "ieee80211_i.h" +#include "ieee80211_led.h" +#include "wep.h" +#include "wpa.h" +#include "tkip.h" +#include "wme.h" + +/* + * monitor mode reception + * + * This function cleans up the SKB, i.e. it removes all the stuff + * only useful for monitoring. + */ +static struct sk_buff *remove_monitor_info(struct ieee80211_local *local, + struct sk_buff *skb, + int rtap_len) +{ + skb_pull(skb, rtap_len); + + if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) { + if (likely(skb->len > FCS_LEN)) + skb_trim(skb, skb->len - FCS_LEN); + else { + /* driver bug */ + WARN_ON(1); + dev_kfree_skb(skb); + skb = NULL; + } + } + + return skb; +} + +static inline int should_drop_frame(struct ieee80211_rx_status *status, + struct sk_buff *skb, + int present_fcs_len, + int radiotap_len) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + + if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) + return 1; + if (unlikely(skb->len < 16 + present_fcs_len + radiotap_len)) + return 1; + if ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == + cpu_to_le16(IEEE80211_FTYPE_CTL)) + return 1; + return 0; +} + +/* + * This function copies a received frame to all monitor interfaces and + * returns a cleaned-up SKB that no longer includes the FCS nor the + * radiotap header the driver might have added. + */ +static struct sk_buff * +ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, + struct ieee80211_rx_status *status) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_rate *rate; + int needed_headroom = 0; + struct ieee80211_rtap_hdr { + struct ieee80211_radiotap_header hdr; + u8 flags; + u8 rate; + __le16 chan_freq; + __le16 chan_flags; + u8 antsignal; + u8 padding_for_rxflags; + __le16 rx_flags; + } __attribute__ ((packed)) *rthdr; + struct sk_buff *skb, *skb2; + struct net_device *prev_dev = NULL; + int present_fcs_len = 0; + int rtap_len = 0; + + /* + * First, we may need to make a copy of the skb because + * (1) we need to modify it for radiotap (if not present), and + * (2) the other RX handlers will modify the skb we got. + * + * We don't need to, of course, if we aren't going to return + * the SKB because it has a bad FCS/PLCP checksum. + */ + if (status->flag & RX_FLAG_RADIOTAP) + rtap_len = ieee80211_get_radiotap_len(origskb->data); + else + needed_headroom = sizeof(*rthdr); + + if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) + present_fcs_len = FCS_LEN; + + if (!local->monitors) { + if (should_drop_frame(status, origskb, present_fcs_len, + rtap_len)) { + dev_kfree_skb(origskb); + return NULL; + } + + return remove_monitor_info(local, origskb, rtap_len); + } + + if (should_drop_frame(status, origskb, present_fcs_len, rtap_len)) { + /* only need to expand headroom if necessary */ + skb = origskb; + origskb = NULL; + + /* + * This shouldn't trigger often because most devices have an + * RX header they pull before we get here, and that should + * be big enough for our radiotap information. We should + * probably export the length to drivers so that we can have + * them allocate enough headroom to start with. + */ + if (skb_headroom(skb) < needed_headroom && + pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) { + dev_kfree_skb(skb); + return NULL; + } + } else { + /* + * Need to make a copy and possibly remove radiotap header + * and FCS from the original. + */ + skb = skb_copy_expand(origskb, needed_headroom, 0, GFP_ATOMIC); + + origskb = remove_monitor_info(local, origskb, rtap_len); + + if (!skb) + return origskb; + } + + /* if necessary, prepend radiotap information */ + if (!(status->flag & RX_FLAG_RADIOTAP)) { + rthdr = (void *) skb_push(skb, sizeof(*rthdr)); + memset(rthdr, 0, sizeof(*rthdr)); + rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); + rthdr->hdr.it_present = + cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) | + (1 << IEEE80211_RADIOTAP_RATE) | + (1 << IEEE80211_RADIOTAP_CHANNEL) | + (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL) | + (1 << IEEE80211_RADIOTAP_RX_FLAGS)); + rthdr->flags = local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS ? + IEEE80211_RADIOTAP_F_FCS : 0; + + /* FIXME: when radiotap gets a 'bad PLCP' flag use it here */ + rthdr->rx_flags = 0; + if (status->flag & + (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) + rthdr->rx_flags |= + cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS); + + rate = ieee80211_get_rate(local, status->phymode, + status->rate); + if (rate) + rthdr->rate = rate->rate / 5; + + rthdr->chan_freq = cpu_to_le16(status->freq); + + if (status->phymode == MODE_IEEE80211A) + rthdr->chan_flags = + cpu_to_le16(IEEE80211_CHAN_OFDM | + IEEE80211_CHAN_5GHZ); + else + rthdr->chan_flags = + cpu_to_le16(IEEE80211_CHAN_DYN | + IEEE80211_CHAN_2GHZ); + + rthdr->antsignal = status->ssi; + } + + skb_set_mac_header(skb, 0); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->pkt_type = PACKET_OTHERHOST; + skb->protocol = htons(ETH_P_802_2); + + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (!netif_running(sdata->dev)) + continue; + + if (sdata->type != IEEE80211_IF_TYPE_MNTR) + continue; + + if (prev_dev) { + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2) { + skb2->dev = prev_dev; + netif_rx(skb2); + } + } + + prev_dev = sdata->dev; + sdata->dev->stats.rx_packets++; + sdata->dev->stats.rx_bytes += skb->len; + } + + if (prev_dev) { + skb->dev = prev_dev; + netif_rx(skb); + } else + dev_kfree_skb(skb); + + return origskb; +} + + +/* pre-rx handlers + * + * these don't have dev/sdata fields in the rx data + * The sta value should also not be used because it may + * be NULL even though a STA (in IBSS mode) will be added. + */ + +static ieee80211_txrx_result +ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx) +{ + u8 *data = rx->skb->data; + int tid; + + /* does the frame have a qos control field? */ + if (WLAN_FC_IS_QOS_DATA(rx->fc)) { + u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN; + /* frame has qos control */ + tid = qc[0] & QOS_CONTROL_TID_MASK; + } else { + if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) { + /* Separate TID for management frames */ + tid = NUM_RX_DATA_QUEUES - 1; + } else { + /* no qos control present */ + tid = 0; /* 802.1d - Best Effort */ + } + } + + I802_DEBUG_INC(rx->local->wme_rx_queue[tid]); + /* only a debug counter, sta might not be assigned properly yet */ + if (rx->sta) + I802_DEBUG_INC(rx->sta->wme_rx_queue[tid]); + + rx->u.rx.queue = tid; + /* Set skb->priority to 1d tag if highest order bit of TID is not set. + * For now, set skb->priority to 0 for other cases. */ + rx->skb->priority = (tid > 7) ? 0 : tid; + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_local *local = rx->local; + struct sk_buff *skb = rx->skb; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + u32 load = 0, hdrtime; + struct ieee80211_rate *rate; + struct ieee80211_hw_mode *mode = local->hw.conf.mode; + int i; + + /* Estimate total channel use caused by this frame */ + + if (unlikely(mode->num_rates < 0)) + return TXRX_CONTINUE; + + rate = &mode->rates[0]; + for (i = 0; i < mode->num_rates; i++) { + if (mode->rates[i].val == rx->u.rx.status->rate) { + rate = &mode->rates[i]; + break; + } + } + + /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, + * 1 usec = 1/8 * (1080 / 10) = 13.5 */ + + if (mode->mode == MODE_IEEE80211A || + (mode->mode == MODE_IEEE80211G && + rate->flags & IEEE80211_RATE_ERP)) + hdrtime = CHAN_UTIL_HDR_SHORT; + else + hdrtime = CHAN_UTIL_HDR_LONG; + + load = hdrtime; + if (!is_multicast_ether_addr(hdr->addr1)) + load += hdrtime; + + load += skb->len * rate->rate_inv; + + /* Divide channel_use by 8 to avoid wrapping around the counter */ + load >>= CHAN_UTIL_SHIFT; + local->channel_use_raw += load; + rx->u.rx.load = load; + + return TXRX_CONTINUE; +} + +ieee80211_rx_handler ieee80211_rx_pre_handlers[] = +{ + ieee80211_rx_h_parse_qos, + ieee80211_rx_h_load_stats, + NULL +}; + +/* rx handlers */ + +static ieee80211_txrx_result +ieee80211_rx_h_if_stats(struct ieee80211_txrx_data *rx) +{ + if (rx->sta) + rx->sta->channel_use_raw += rx->u.rx.load; + rx->sdata->channel_use_raw += rx->u.rx.load; + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_passive_scan(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_local *local = rx->local; + struct sk_buff *skb = rx->skb; + + if (unlikely(local->sta_scanning != 0)) { + ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status); + return TXRX_QUEUED; + } + + if (unlikely(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) { + /* scanning finished during invoking of handlers */ + I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); + return TXRX_DROP; + } + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_check(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_hdr *hdr; + hdr = (struct ieee80211_hdr *) rx->skb->data; + + /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ + if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { + if (unlikely(rx->fc & IEEE80211_FCTL_RETRY && + rx->sta->last_seq_ctrl[rx->u.rx.queue] == + hdr->seq_ctrl)) { + if (rx->flags & IEEE80211_TXRXD_RXRA_MATCH) { + rx->local->dot11FrameDuplicateCount++; + rx->sta->num_duplicates++; + } + return TXRX_DROP; + } else + rx->sta->last_seq_ctrl[rx->u.rx.queue] = hdr->seq_ctrl; + } + + if (unlikely(rx->skb->len < 16)) { + I802_DEBUG_INC(rx->local->rx_handlers_drop_short); + return TXRX_DROP; + } + + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + rx->skb->pkt_type = PACKET_OTHERHOST; + else if (compare_ether_addr(rx->dev->dev_addr, hdr->addr1) == 0) + rx->skb->pkt_type = PACKET_HOST; + else if (is_multicast_ether_addr(hdr->addr1)) { + if (is_broadcast_ether_addr(hdr->addr1)) + rx->skb->pkt_type = PACKET_BROADCAST; + else + rx->skb->pkt_type = PACKET_MULTICAST; + } else + rx->skb->pkt_type = PACKET_OTHERHOST; + + /* Drop disallowed frame classes based on STA auth/assoc state; + * IEEE 802.11, Chap 5.5. + * + * 80211.o does filtering only based on association state, i.e., it + * drops Class 3 frames from not associated stations. hostapd sends + * deauth/disassoc frames when needed. In addition, hostapd is + * responsible for filtering on both auth and assoc states. + */ + if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA || + ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL && + (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) && + rx->sdata->type != IEEE80211_IF_TYPE_IBSS && + (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) { + if ((!(rx->fc & IEEE80211_FCTL_FROMDS) && + !(rx->fc & IEEE80211_FCTL_TODS) && + (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) + || !(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) { + /* Drop IBSS frames and frames for other hosts + * silently. */ + return TXRX_DROP; + } + + return TXRX_DROP; + } + + return TXRX_CONTINUE; +} + + +static ieee80211_txrx_result +ieee80211_rx_h_decrypt(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; + int keyidx; + int hdrlen; + ieee80211_txrx_result result = TXRX_DROP; + struct ieee80211_key *stakey = NULL; + + /* + * Key selection 101 + * + * There are three types of keys: + * - GTK (group keys) + * - PTK (pairwise keys) + * - STK (station-to-station pairwise keys) + * + * When selecting a key, we have to distinguish between multicast + * (including broadcast) and unicast frames, the latter can only + * use PTKs and STKs while the former always use GTKs. Unless, of + * course, actual WEP keys ("pre-RSNA") are used, then unicast + * frames can also use key indizes like GTKs. Hence, if we don't + * have a PTK/STK we check the key index for a WEP key. + * + * Note that in a regular BSS, multicast frames are sent by the + * AP only, associated stations unicast the frame to the AP first + * which then multicasts it on their behalf. + * + * There is also a slight problem in IBSS mode: GTKs are negotiated + * with each station, that is something we don't currently handle. + * The spec seems to expect that one negotiates the same key with + * every station but there's no such requirement; VLANs could be + * possible. + */ + + if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) + return TXRX_CONTINUE; + + /* + * No point in finding a key and decrypting if the frame is neither + * addressed to us nor a multicast frame. + */ + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + return TXRX_CONTINUE; + + if (rx->sta) + stakey = rcu_dereference(rx->sta->key); + + if (!is_multicast_ether_addr(hdr->addr1) && stakey) { + rx->key = stakey; + } else { + /* + * The device doesn't give us the IV so we won't be + * able to look up the key. That's ok though, we + * don't need to decrypt the frame, we just won't + * be able to keep statistics accurate. + * Except for key threshold notifications, should + * we somehow allow the driver to tell us which key + * the hardware used if this flag is set? + */ + if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) && + (rx->u.rx.status->flag & RX_FLAG_IV_STRIPPED)) + return TXRX_CONTINUE; + + hdrlen = ieee80211_get_hdrlen(rx->fc); + + if (rx->skb->len < 8 + hdrlen) + return TXRX_DROP; /* TODO: count this? */ + + /* + * no need to call ieee80211_wep_get_keyidx, + * it verifies a bunch of things we've done already + */ + keyidx = rx->skb->data[hdrlen + 3] >> 6; + + rx->key = rcu_dereference(rx->sdata->keys[keyidx]); + + /* + * RSNA-protected unicast frames should always be sent with + * pairwise or station-to-station keys, but for WEP we allow + * using a key index as well. + */ + if (rx->key && rx->key->conf.alg != ALG_WEP && + !is_multicast_ether_addr(hdr->addr1)) + rx->key = NULL; + } + + if (rx->key) { + rx->key->tx_rx_count++; + /* TODO: add threshold stuff again */ + } else { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: RX protected frame," + " but have no key\n", rx->dev->name); + return TXRX_DROP; + } + + /* Check for weak IVs if possible */ + if (rx->sta && rx->key->conf.alg == ALG_WEP && + ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) && + (!(rx->u.rx.status->flag & RX_FLAG_IV_STRIPPED) || + !(rx->u.rx.status->flag & RX_FLAG_DECRYPTED)) && + ieee80211_wep_is_weak_iv(rx->skb, rx->key)) + rx->sta->wep_weak_iv_count++; + + switch (rx->key->conf.alg) { + case ALG_WEP: + result = ieee80211_crypto_wep_decrypt(rx); + break; + case ALG_TKIP: + result = ieee80211_crypto_tkip_decrypt(rx); + break; + case ALG_CCMP: + result = ieee80211_crypto_ccmp_decrypt(rx); + break; + } + + /* either the frame has been decrypted or will be dropped */ + rx->u.rx.status->flag |= RX_FLAG_DECRYPTED; + + return result; +} + +static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata; + DECLARE_MAC_BUF(mac); + + sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); + + if (sdata->bss) + atomic_inc(&sdata->bss->num_sta_ps); + sta->flags |= WLAN_STA_PS; + sta->pspoll = 0; +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "%s: STA %s aid %d enters power save mode\n", + dev->name, print_mac(mac, sta->addr), sta->aid); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ +} + +static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct sk_buff *skb; + int sent = 0; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_tx_packet_data *pkt_data; + DECLARE_MAC_BUF(mac); + + sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); + if (sdata->bss) + atomic_dec(&sdata->bss->num_sta_ps); + sta->flags &= ~(WLAN_STA_PS | WLAN_STA_TIM); + sta->pspoll = 0; + if (!skb_queue_empty(&sta->ps_tx_buf)) { + if (local->ops->set_tim) + local->ops->set_tim(local_to_hw(local), sta->aid, 0); + if (sdata->bss) + bss_tim_clear(local, sdata->bss, sta->aid); + } +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "%s: STA %s aid %d exits power save mode\n", + dev->name, print_mac(mac, sta->addr), sta->aid); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + /* Send all buffered frames to the station */ + while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { + pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + sent++; + pkt_data->flags |= IEEE80211_TXPD_REQUEUE; + dev_queue_xmit(skb); + } + while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { + pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + local->total_ps_buffered--; + sent++; +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "%s: STA %s aid %d send PS frame " + "since STA not sleeping anymore\n", dev->name, + print_mac(mac, sta->addr), sta->aid); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + pkt_data->flags |= IEEE80211_TXPD_REQUEUE; + dev_queue_xmit(skb); + } + + return sent; +} + +static ieee80211_txrx_result +ieee80211_rx_h_sta_process(struct ieee80211_txrx_data *rx) +{ + struct sta_info *sta = rx->sta; + struct net_device *dev = rx->dev; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; + + if (!sta) + return TXRX_CONTINUE; + + /* Update last_rx only for IBSS packets which are for the current + * BSSID to avoid keeping the current IBSS network alive in cases where + * other STAs are using different BSSID. */ + if (rx->sdata->type == IEEE80211_IF_TYPE_IBSS) { + u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len); + if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) + sta->last_rx = jiffies; + } else + if (!is_multicast_ether_addr(hdr->addr1) || + rx->sdata->type == IEEE80211_IF_TYPE_STA) { + /* Update last_rx only for unicast frames in order to prevent + * the Probe Request frames (the only broadcast frames from a + * STA in infrastructure mode) from keeping a connection alive. + */ + sta->last_rx = jiffies; + } + + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + return TXRX_CONTINUE; + + sta->rx_fragments++; + sta->rx_bytes += rx->skb->len; + sta->last_rssi = rx->u.rx.status->ssi; + sta->last_signal = rx->u.rx.status->signal; + sta->last_noise = rx->u.rx.status->noise; + + if (!(rx->fc & IEEE80211_FCTL_MOREFRAGS)) { + /* Change STA power saving mode only in the end of a frame + * exchange sequence */ + if ((sta->flags & WLAN_STA_PS) && !(rx->fc & IEEE80211_FCTL_PM)) + rx->u.rx.sent_ps_buffered += ap_sta_ps_end(dev, sta); + else if (!(sta->flags & WLAN_STA_PS) && + (rx->fc & IEEE80211_FCTL_PM)) + ap_sta_ps_start(dev, sta); + } + + /* Drop data::nullfunc frames silently, since they are used only to + * control station power saving mode. */ + if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && + (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_NULLFUNC) { + I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc); + /* Update counter and free packet here to avoid counting this + * as a dropped packed. */ + sta->rx_packets++; + dev_kfree_skb(rx->skb); + return TXRX_QUEUED; + } + + return TXRX_CONTINUE; +} /* ieee80211_rx_h_sta_process */ + +static inline struct ieee80211_fragment_entry * +ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, + unsigned int frag, unsigned int seq, int rx_queue, + struct sk_buff **skb) +{ + struct ieee80211_fragment_entry *entry; + int idx; + + idx = sdata->fragment_next; + entry = &sdata->fragments[sdata->fragment_next++]; + if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX) + sdata->fragment_next = 0; + + if (!skb_queue_empty(&entry->skb_list)) { +#ifdef CONFIG_MAC80211_DEBUG + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) entry->skb_list.next->data; + DECLARE_MAC_BUF(mac); + DECLARE_MAC_BUF(mac2); + printk(KERN_DEBUG "%s: RX reassembly removed oldest " + "fragment entry (idx=%d age=%lu seq=%d last_frag=%d " + "addr1=%s addr2=%s\n", + sdata->dev->name, idx, + jiffies - entry->first_frag_time, entry->seq, + entry->last_frag, print_mac(mac, hdr->addr1), + print_mac(mac2, hdr->addr2)); +#endif /* CONFIG_MAC80211_DEBUG */ + __skb_queue_purge(&entry->skb_list); + } + + __skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */ + *skb = NULL; + entry->first_frag_time = jiffies; + entry->seq = seq; + entry->rx_queue = rx_queue; + entry->last_frag = frag; + entry->ccmp = 0; + entry->extra_len = 0; + + return entry; +} + +static inline struct ieee80211_fragment_entry * +ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, + u16 fc, unsigned int frag, unsigned int seq, + int rx_queue, struct ieee80211_hdr *hdr) +{ + struct ieee80211_fragment_entry *entry; + int i, idx; + + idx = sdata->fragment_next; + for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) { + struct ieee80211_hdr *f_hdr; + u16 f_fc; + + idx--; + if (idx < 0) + idx = IEEE80211_FRAGMENT_MAX - 1; + + entry = &sdata->fragments[idx]; + if (skb_queue_empty(&entry->skb_list) || entry->seq != seq || + entry->rx_queue != rx_queue || + entry->last_frag + 1 != frag) + continue; + + f_hdr = (struct ieee80211_hdr *) entry->skb_list.next->data; + f_fc = le16_to_cpu(f_hdr->frame_control); + + if ((fc & IEEE80211_FCTL_FTYPE) != (f_fc & IEEE80211_FCTL_FTYPE) || + compare_ether_addr(hdr->addr1, f_hdr->addr1) != 0 || + compare_ether_addr(hdr->addr2, f_hdr->addr2) != 0) + continue; + + if (entry->first_frag_time + 2 * HZ < jiffies) { + __skb_queue_purge(&entry->skb_list); + continue; + } + return entry; + } + + return NULL; +} + +static ieee80211_txrx_result +ieee80211_rx_h_defragment(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_hdr *hdr; + u16 sc; + unsigned int frag, seq; + struct ieee80211_fragment_entry *entry; + struct sk_buff *skb; + DECLARE_MAC_BUF(mac); + + hdr = (struct ieee80211_hdr *) rx->skb->data; + sc = le16_to_cpu(hdr->seq_ctrl); + frag = sc & IEEE80211_SCTL_FRAG; + + if (likely((!(rx->fc & IEEE80211_FCTL_MOREFRAGS) && frag == 0) || + (rx->skb)->len < 24 || + is_multicast_ether_addr(hdr->addr1))) { + /* not fragmented */ + goto out; + } + I802_DEBUG_INC(rx->local->rx_handlers_fragments); + + seq = (sc & IEEE80211_SCTL_SEQ) >> 4; + + if (frag == 0) { + /* This is the first fragment of a new frame. */ + entry = ieee80211_reassemble_add(rx->sdata, frag, seq, + rx->u.rx.queue, &(rx->skb)); + if (rx->key && rx->key->conf.alg == ALG_CCMP && + (rx->fc & IEEE80211_FCTL_PROTECTED)) { + /* Store CCMP PN so that we can verify that the next + * fragment has a sequential PN value. */ + entry->ccmp = 1; + memcpy(entry->last_pn, + rx->key->u.ccmp.rx_pn[rx->u.rx.queue], + CCMP_PN_LEN); + } + return TXRX_QUEUED; + } + + /* This is a fragment for a frame that should already be pending in + * fragment cache. Add this fragment to the end of the pending entry. + */ + entry = ieee80211_reassemble_find(rx->sdata, rx->fc, frag, seq, + rx->u.rx.queue, hdr); + if (!entry) { + I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); + return TXRX_DROP; + } + + /* Verify that MPDUs within one MSDU have sequential PN values. + * (IEEE 802.11i, 8.3.3.4.5) */ + if (entry->ccmp) { + int i; + u8 pn[CCMP_PN_LEN], *rpn; + if (!rx->key || rx->key->conf.alg != ALG_CCMP) + return TXRX_DROP; + memcpy(pn, entry->last_pn, CCMP_PN_LEN); + for (i = CCMP_PN_LEN - 1; i >= 0; i--) { + pn[i]++; + if (pn[i]) + break; + } + rpn = rx->key->u.ccmp.rx_pn[rx->u.rx.queue]; + if (memcmp(pn, rpn, CCMP_PN_LEN) != 0) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: defrag: CCMP PN not " + "sequential A2=%s" + " PN=%02x%02x%02x%02x%02x%02x " + "(expected %02x%02x%02x%02x%02x%02x)\n", + rx->dev->name, print_mac(mac, hdr->addr2), + rpn[0], rpn[1], rpn[2], rpn[3], rpn[4], + rpn[5], pn[0], pn[1], pn[2], pn[3], + pn[4], pn[5]); + return TXRX_DROP; + } + memcpy(entry->last_pn, pn, CCMP_PN_LEN); + } + + skb_pull(rx->skb, ieee80211_get_hdrlen(rx->fc)); + __skb_queue_tail(&entry->skb_list, rx->skb); + entry->last_frag = frag; + entry->extra_len += rx->skb->len; + if (rx->fc & IEEE80211_FCTL_MOREFRAGS) { + rx->skb = NULL; + return TXRX_QUEUED; + } + + rx->skb = __skb_dequeue(&entry->skb_list); + if (skb_tailroom(rx->skb) < entry->extra_len) { + I802_DEBUG_INC(rx->local->rx_expand_skb_head2); + if (unlikely(pskb_expand_head(rx->skb, 0, entry->extra_len, + GFP_ATOMIC))) { + I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); + __skb_queue_purge(&entry->skb_list); + return TXRX_DROP; + } + } + while ((skb = __skb_dequeue(&entry->skb_list))) { + memcpy(skb_put(rx->skb, skb->len), skb->data, skb->len); + dev_kfree_skb(skb); + } + + /* Complete frame has been reassembled - process it now */ + rx->flags |= IEEE80211_TXRXD_FRAGMENTED; + + out: + if (rx->sta) + rx->sta->rx_packets++; + if (is_multicast_ether_addr(hdr->addr1)) + rx->local->dot11MulticastReceivedFrameCount++; + else + ieee80211_led_rx(rx->local); + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx) +{ + struct sk_buff *skb; + int no_pending_pkts; + DECLARE_MAC_BUF(mac); + + if (likely(!rx->sta || + (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL || + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PSPOLL || + !(rx->flags & IEEE80211_TXRXD_RXRA_MATCH))) + return TXRX_CONTINUE; + + skb = skb_dequeue(&rx->sta->tx_filtered); + if (!skb) { + skb = skb_dequeue(&rx->sta->ps_tx_buf); + if (skb) + rx->local->total_ps_buffered--; + } + no_pending_pkts = skb_queue_empty(&rx->sta->tx_filtered) && + skb_queue_empty(&rx->sta->ps_tx_buf); + + if (skb) { + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) skb->data; + + /* tell TX path to send one frame even though the STA may + * still remain is PS mode after this frame exchange */ + rx->sta->pspoll = 1; + +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "STA %s aid %d: PS Poll (entries after %d)\n", + print_mac(mac, rx->sta->addr), rx->sta->aid, + skb_queue_len(&rx->sta->ps_tx_buf)); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + + /* Use MoreData flag to indicate whether there are more + * buffered frames for this STA */ + if (no_pending_pkts) { + hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA); + rx->sta->flags &= ~WLAN_STA_TIM; + } else + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); + + dev_queue_xmit(skb); + + if (no_pending_pkts) { + if (rx->local->ops->set_tim) + rx->local->ops->set_tim(local_to_hw(rx->local), + rx->sta->aid, 0); + if (rx->sdata->bss) + bss_tim_clear(rx->local, rx->sdata->bss, rx->sta->aid); + } +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + } else if (!rx->u.rx.sent_ps_buffered) { + printk(KERN_DEBUG "%s: STA %s sent PS Poll even " + "though there is no buffered frames for it\n", + rx->dev->name, print_mac(mac, rx->sta->addr)); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + + } + + /* Free PS Poll skb here instead of returning TXRX_DROP that would + * count as an dropped frame. */ + dev_kfree_skb(rx->skb); + + return TXRX_QUEUED; +} + +static ieee80211_txrx_result +ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx) +{ + u16 fc = rx->fc; + u8 *data = rx->skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) data; + + if (!WLAN_FC_IS_QOS_DATA(fc)) + return TXRX_CONTINUE; + + /* remove the qos control field, update frame type and meta-data */ + memmove(data + 2, data, ieee80211_get_hdrlen(fc) - 2); + hdr = (struct ieee80211_hdr *) skb_pull(rx->skb, 2); + /* change frame type to non QOS */ + rx->fc = fc &= ~IEEE80211_STYPE_QOS_DATA; + hdr->frame_control = cpu_to_le16(fc); + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_802_1x_pae(struct ieee80211_txrx_data *rx) +{ + if (rx->sdata->eapol && ieee80211_is_eapol(rx->skb) && + rx->sdata->type != IEEE80211_IF_TYPE_STA && + (rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + return TXRX_CONTINUE; + + if (unlikely(rx->sdata->ieee802_1x && + (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && + (!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED)) && + !ieee80211_is_eapol(rx->skb))) { +#ifdef CONFIG_MAC80211_DEBUG + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) rx->skb->data; + DECLARE_MAC_BUF(mac); + printk(KERN_DEBUG "%s: dropped frame from %s" + " (unauthorized port)\n", rx->dev->name, + print_mac(mac, hdr->addr2)); +#endif /* CONFIG_MAC80211_DEBUG */ + return TXRX_DROP; + } + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_drop_unencrypted(struct ieee80211_txrx_data *rx) +{ + /* + * Pass through unencrypted frames if the hardware has + * decrypted them already. + */ + if (rx->u.rx.status->flag & RX_FLAG_DECRYPTED) + return TXRX_CONTINUE; + + /* Drop unencrypted frames if key is set. */ + if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) && + (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && + rx->sdata->drop_unencrypted && + (rx->sdata->eapol == 0 || !ieee80211_is_eapol(rx->skb)))) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: RX non-WEP frame, but expected " + "encryption\n", rx->dev->name); + return TXRX_DROP; + } + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_data(struct ieee80211_txrx_data *rx) +{ + struct net_device *dev = rx->dev; + struct ieee80211_local *local = rx->local; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; + u16 fc, hdrlen, ethertype; + u8 *payload; + u8 dst[ETH_ALEN]; + u8 src[ETH_ALEN]; + struct sk_buff *skb = rx->skb, *skb2; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + DECLARE_MAC_BUF(mac); + DECLARE_MAC_BUF(mac2); + DECLARE_MAC_BUF(mac3); + DECLARE_MAC_BUF(mac4); + + fc = rx->fc; + if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) + return TXRX_CONTINUE; + + if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) + return TXRX_DROP; + + hdrlen = ieee80211_get_hdrlen(fc); + + /* convert IEEE 802.11 header + possible LLC headers into Ethernet + * header + * IEEE 802.11 address fields: + * ToDS FromDS Addr1 Addr2 Addr3 Addr4 + * 0 0 DA SA BSSID n/a + * 0 1 DA BSSID SA n/a + * 1 0 BSSID SA DA n/a + * 1 1 RA TA DA SA + */ + + switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + case IEEE80211_FCTL_TODS: + /* BSSID SA DA */ + memcpy(dst, hdr->addr3, ETH_ALEN); + memcpy(src, hdr->addr2, ETH_ALEN); + + if (unlikely(sdata->type != IEEE80211_IF_TYPE_AP && + sdata->type != IEEE80211_IF_TYPE_VLAN)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: dropped ToDS frame " + "(BSSID=%s SA=%s DA=%s)\n", + dev->name, + print_mac(mac, hdr->addr1), + print_mac(mac2, hdr->addr2), + print_mac(mac3, hdr->addr3)); + return TXRX_DROP; + } + break; + case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): + /* RA TA DA SA */ + memcpy(dst, hdr->addr3, ETH_ALEN); + memcpy(src, hdr->addr4, ETH_ALEN); + + if (unlikely(sdata->type != IEEE80211_IF_TYPE_WDS)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: dropped FromDS&ToDS " + "frame (RA=%s TA=%s DA=%s SA=%s)\n", + rx->dev->name, + print_mac(mac, hdr->addr1), + print_mac(mac2, hdr->addr2), + print_mac(mac3, hdr->addr3), + print_mac(mac4, hdr->addr4)); + return TXRX_DROP; + } + break; + case IEEE80211_FCTL_FROMDS: + /* DA BSSID SA */ + memcpy(dst, hdr->addr1, ETH_ALEN); + memcpy(src, hdr->addr3, ETH_ALEN); + + if (sdata->type != IEEE80211_IF_TYPE_STA || + (is_multicast_ether_addr(dst) && + !compare_ether_addr(src, dev->dev_addr))) + return TXRX_DROP; + break; + case 0: + /* DA SA BSSID */ + memcpy(dst, hdr->addr1, ETH_ALEN); + memcpy(src, hdr->addr2, ETH_ALEN); + + if (sdata->type != IEEE80211_IF_TYPE_IBSS) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: dropped IBSS frame " + "(DA=%s SA=%s BSSID=%s)\n", + dev->name, + print_mac(mac, hdr->addr1), + print_mac(mac2, hdr->addr2), + print_mac(mac3, hdr->addr3)); + } + return TXRX_DROP; + } + break; + } + + payload = skb->data + hdrlen; + + if (unlikely(skb->len - hdrlen < 8)) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: RX too short data frame " + "payload\n", dev->name); + } + return TXRX_DROP; + } + + ethertype = (payload[6] << 8) | payload[7]; + + if (likely((compare_ether_addr(payload, rfc1042_header) == 0 && + ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || + compare_ether_addr(payload, bridge_tunnel_header) == 0)) { + /* remove RFC1042 or Bridge-Tunnel encapsulation and + * replace EtherType */ + skb_pull(skb, hdrlen + 6); + memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); + memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); + } else { + struct ethhdr *ehdr; + __be16 len; + skb_pull(skb, hdrlen); + len = htons(skb->len); + ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr)); + memcpy(ehdr->h_dest, dst, ETH_ALEN); + memcpy(ehdr->h_source, src, ETH_ALEN); + ehdr->h_proto = len; + } + skb->dev = dev; + + skb2 = NULL; + + dev->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; + + if (local->bridge_packets && (sdata->type == IEEE80211_IF_TYPE_AP + || sdata->type == IEEE80211_IF_TYPE_VLAN) && + (rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) { + if (is_multicast_ether_addr(skb->data)) { + /* send multicast frames both to higher layers in + * local net stack and back to the wireless media */ + skb2 = skb_copy(skb, GFP_ATOMIC); + if (!skb2 && net_ratelimit()) + printk(KERN_DEBUG "%s: failed to clone " + "multicast frame\n", dev->name); + } else { + struct sta_info *dsta; + dsta = sta_info_get(local, skb->data); + if (dsta && !dsta->dev) { + if (net_ratelimit()) + printk(KERN_DEBUG "Station with null " + "dev structure!\n"); + } else if (dsta && dsta->dev == dev) { + /* Destination station is associated to this + * AP, so send the frame directly to it and + * do not pass the frame to local net stack. + */ + skb2 = skb; + skb = NULL; + } + if (dsta) + sta_info_put(dsta); + } + } + + if (skb) { + /* deliver to local stack */ + skb->protocol = eth_type_trans(skb, dev); + memset(skb->cb, 0, sizeof(skb->cb)); + netif_rx(skb); + } + + if (skb2) { + /* send to wireless media */ + skb2->protocol = __constant_htons(ETH_P_802_3); + skb_set_network_header(skb2, 0); + skb_set_mac_header(skb2, 0); + dev_queue_xmit(skb2); + } + + return TXRX_QUEUED; +} + +static ieee80211_txrx_result +ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_sub_if_data *sdata; + + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + return TXRX_DROP; + + sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); + if ((sdata->type == IEEE80211_IF_TYPE_STA || + sdata->type == IEEE80211_IF_TYPE_IBSS) && + !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) + ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->u.rx.status); + else + return TXRX_DROP; + + return TXRX_QUEUED; +} + +static inline ieee80211_txrx_result __ieee80211_invoke_rx_handlers( + struct ieee80211_local *local, + ieee80211_rx_handler *handlers, + struct ieee80211_txrx_data *rx, + struct sta_info *sta) +{ + ieee80211_rx_handler *handler; + ieee80211_txrx_result res = TXRX_DROP; + + for (handler = handlers; *handler != NULL; handler++) { + res = (*handler)(rx); + + switch (res) { + case TXRX_CONTINUE: + continue; + case TXRX_DROP: + I802_DEBUG_INC(local->rx_handlers_drop); + if (sta) + sta->rx_dropped++; + break; + case TXRX_QUEUED: + I802_DEBUG_INC(local->rx_handlers_queued); + break; + } + break; + } + + if (res == TXRX_DROP) + dev_kfree_skb(rx->skb); + return res; +} + +static inline void ieee80211_invoke_rx_handlers(struct ieee80211_local *local, + ieee80211_rx_handler *handlers, + struct ieee80211_txrx_data *rx, + struct sta_info *sta) +{ + if (__ieee80211_invoke_rx_handlers(local, handlers, rx, sta) == + TXRX_CONTINUE) + dev_kfree_skb(rx->skb); +} + +static void ieee80211_rx_michael_mic_report(struct net_device *dev, + struct ieee80211_hdr *hdr, + struct sta_info *sta, + struct ieee80211_txrx_data *rx) +{ + int keyidx, hdrlen; + DECLARE_MAC_BUF(mac); + DECLARE_MAC_BUF(mac2); + + hdrlen = ieee80211_get_hdrlen_from_skb(rx->skb); + if (rx->skb->len >= hdrlen + 4) + keyidx = rx->skb->data[hdrlen + 3] >> 6; + else + keyidx = -1; + + if (net_ratelimit()) + printk(KERN_DEBUG "%s: TKIP hwaccel reported Michael MIC " + "failure from %s to %s keyidx=%d\n", + dev->name, print_mac(mac, hdr->addr2), + print_mac(mac2, hdr->addr1), keyidx); + + if (!sta) { + /* + * Some hardware seem to generate incorrect Michael MIC + * reports; ignore them to avoid triggering countermeasures. + */ + if (net_ratelimit()) + printk(KERN_DEBUG "%s: ignored spurious Michael MIC " + "error for unknown address %s\n", + dev->name, print_mac(mac, hdr->addr2)); + goto ignore; + } + + if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: ignored spurious Michael MIC " + "error for a frame with no PROTECTED flag (src " + "%s)\n", dev->name, print_mac(mac, hdr->addr2)); + goto ignore; + } + + if (rx->sdata->type == IEEE80211_IF_TYPE_AP && keyidx) { + /* + * APs with pairwise keys should never receive Michael MIC + * errors for non-zero keyidx because these are reserved for + * group keys and only the AP is sending real multicast + * frames in the BSS. + */ + if (net_ratelimit()) + printk(KERN_DEBUG "%s: ignored Michael MIC error for " + "a frame with non-zero keyidx (%d)" + " (src %s)\n", dev->name, keyidx, + print_mac(mac, hdr->addr2)); + goto ignore; + } + + if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && + ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: ignored spurious Michael MIC " + "error for a frame that cannot be encrypted " + "(fc=0x%04x) (src %s)\n", + dev->name, rx->fc, print_mac(mac, hdr->addr2)); + goto ignore; + } + + mac80211_ev_michael_mic_failure(rx->dev, keyidx, hdr); + ignore: + dev_kfree_skb(rx->skb); + rx->skb = NULL; +} + +ieee80211_rx_handler ieee80211_rx_handlers[] = +{ + ieee80211_rx_h_if_stats, + ieee80211_rx_h_passive_scan, + ieee80211_rx_h_check, + ieee80211_rx_h_decrypt, + ieee80211_rx_h_sta_process, + ieee80211_rx_h_defragment, + ieee80211_rx_h_ps_poll, + ieee80211_rx_h_michael_mic_verify, + /* this must be after decryption - so header is counted in MPDU mic + * must be before pae and data, so QOS_DATA format frames + * are not passed to user space by these functions + */ + ieee80211_rx_h_remove_qos_control, + ieee80211_rx_h_802_1x_pae, + ieee80211_rx_h_drop_unencrypted, + ieee80211_rx_h_data, + ieee80211_rx_h_mgmt, + NULL +}; + +/* main receive path */ + +static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, + u8 *bssid, struct ieee80211_txrx_data *rx, + struct ieee80211_hdr *hdr) +{ + int multicast = is_multicast_ether_addr(hdr->addr1); + + switch (sdata->type) { + case IEEE80211_IF_TYPE_STA: + if (!bssid) + return 0; + if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { + if (!(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } else if (!multicast && + compare_ether_addr(sdata->dev->dev_addr, + hdr->addr1) != 0) { + if (!(sdata->dev->flags & IFF_PROMISC)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } + break; + case IEEE80211_IF_TYPE_IBSS: + if (!bssid) + return 0; + if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { + if (!(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } else if (!multicast && + compare_ether_addr(sdata->dev->dev_addr, + hdr->addr1) != 0) { + if (!(sdata->dev->flags & IFF_PROMISC)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } else if (!rx->sta) + rx->sta = ieee80211_ibss_add_sta(sdata->dev, rx->skb, + bssid, hdr->addr2); + break; + case IEEE80211_IF_TYPE_VLAN: + case IEEE80211_IF_TYPE_AP: + if (!bssid) { + if (compare_ether_addr(sdata->dev->dev_addr, + hdr->addr1)) + return 0; + } else if (!ieee80211_bssid_match(bssid, + sdata->dev->dev_addr)) { + if (!(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } + if (sdata->dev == sdata->local->mdev && + !(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) + /* do not receive anything via + * master device when not scanning */ + return 0; + break; + case IEEE80211_IF_TYPE_WDS: + if (bssid || + (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) + return 0; + if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2)) + return 0; + break; + case IEEE80211_IF_TYPE_MNTR: + /* take everything */ + break; + case IEEE80211_IF_TYPE_INVALID: + /* should never get here */ + WARN_ON(1); + break; + } + + return 1; +} + +/* + * This is the receive path handler. It is called by a low level driver when an + * 802.11 MPDU is received from the hardware. + */ +void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, + struct ieee80211_rx_status *status) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; + struct sta_info *sta; + struct ieee80211_hdr *hdr; + struct ieee80211_txrx_data rx; + u16 type; + int prepres; + struct ieee80211_sub_if_data *prev = NULL; + struct sk_buff *skb_new; + u8 *bssid; + + /* + * key references and virtual interfaces are protected using RCU + * and this requires that we are in a read-side RCU section during + * receive processing + */ + rcu_read_lock(); + + /* + * Frames with failed FCS/PLCP checksum are not returned, + * all other frames are returned without radiotap header + * if it was previously present. + * Also, frames with less than 16 bytes are dropped. + */ + skb = ieee80211_rx_monitor(local, skb, status); + if (!skb) { + rcu_read_unlock(); + return; + } + + hdr = (struct ieee80211_hdr *) skb->data; + memset(&rx, 0, sizeof(rx)); + rx.skb = skb; + rx.local = local; + + rx.u.rx.status = status; + rx.fc = le16_to_cpu(hdr->frame_control); + type = rx.fc & IEEE80211_FCTL_FTYPE; + + if (type == IEEE80211_FTYPE_DATA || type == IEEE80211_FTYPE_MGMT) + local->dot11ReceivedFragmentCount++; + + sta = rx.sta = sta_info_get(local, hdr->addr2); + if (sta) { + rx.dev = rx.sta->dev; + rx.sdata = IEEE80211_DEV_TO_SUB_IF(rx.dev); + } + + if ((status->flag & RX_FLAG_MMIC_ERROR)) { + ieee80211_rx_michael_mic_report(local->mdev, hdr, sta, &rx); + goto end; + } + + if (unlikely(local->sta_scanning)) + rx.flags |= IEEE80211_TXRXD_RXIN_SCAN; + + if (__ieee80211_invoke_rx_handlers(local, local->rx_pre_handlers, &rx, + sta) != TXRX_CONTINUE) + goto end; + skb = rx.skb; + + if (sta && !(sta->flags & (WLAN_STA_WDS | WLAN_STA_ASSOC_AP)) && + !atomic_read(&local->iff_promiscs) && + !is_multicast_ether_addr(hdr->addr1)) { + rx.flags |= IEEE80211_TXRXD_RXRA_MATCH; + ieee80211_invoke_rx_handlers(local, local->rx_handlers, &rx, + rx.sta); + sta_info_put(sta); + rcu_read_unlock(); + return; + } + + bssid = ieee80211_get_bssid(hdr, skb->len); + + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (!netif_running(sdata->dev)) + continue; + + if (sdata->type == IEEE80211_IF_TYPE_MNTR) + continue; + + rx.flags |= IEEE80211_TXRXD_RXRA_MATCH; + prepres = prepare_for_handlers(sdata, bssid, &rx, hdr); + /* prepare_for_handlers can change sta */ + sta = rx.sta; + + if (!prepres) + continue; + + /* + * frame is destined for this interface, but if it's not + * also for the previous one we handle that after the + * loop to avoid copying the SKB once too much + */ + + if (!prev) { + prev = sdata; + continue; + } + + /* + * frame was destined for the previous interface + * so invoke RX handlers for it + */ + + skb_new = skb_copy(skb, GFP_ATOMIC); + if (!skb_new) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: failed to copy " + "multicast frame for %s", + wiphy_name(local->hw.wiphy), + prev->dev->name); + continue; + } + rx.skb = skb_new; + rx.dev = prev->dev; + rx.sdata = prev; + ieee80211_invoke_rx_handlers(local, local->rx_handlers, + &rx, sta); + prev = sdata; + } + if (prev) { + rx.skb = skb; + rx.dev = prev->dev; + rx.sdata = prev; + ieee80211_invoke_rx_handlers(local, local->rx_handlers, + &rx, sta); + } else + dev_kfree_skb(skb); + + end: + rcu_read_unlock(); + + if (sta) + sta_info_put(sta); +} +EXPORT_SYMBOL(__ieee80211_rx); + +/* This is a version of the rx handler that can be called from hard irq + * context. Post the skb on the queue and schedule the tasklet */ +void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb, + struct ieee80211_rx_status *status) +{ + struct ieee80211_local *local = hw_to_local(hw); + + BUILD_BUG_ON(sizeof(struct ieee80211_rx_status) > sizeof(skb->cb)); + + skb->dev = local->mdev; + /* copy status into skb->cb for use by tasklet */ + memcpy(skb->cb, status, sizeof(*status)); + skb->pkt_type = IEEE80211_RX_MSG; + skb_queue_tail(&local->skb_queue, skb); + tasklet_schedule(&local->tasklet); +} +EXPORT_SYMBOL(ieee80211_rx_irqsafe); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ab7b1f067c6..e8491554a5d 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -19,7 +19,6 @@ #include "ieee80211_i.h" #include "ieee80211_rate.h" #include "sta_info.h" -#include "debugfs_key.h" #include "debugfs_sta.h" /* Caller must hold local->sta_lock */ @@ -32,38 +31,34 @@ static void sta_info_hash_add(struct ieee80211_local *local, /* Caller must hold local->sta_lock */ -static void sta_info_hash_del(struct ieee80211_local *local, - struct sta_info *sta) +static int sta_info_hash_del(struct ieee80211_local *local, + struct sta_info *sta) { struct sta_info *s; s = local->sta_hash[STA_HASH(sta->addr)]; if (!s) - return; - if (memcmp(s->addr, sta->addr, ETH_ALEN) == 0) { + return -ENOENT; + if (s == sta) { local->sta_hash[STA_HASH(sta->addr)] = s->hnext; - return; + return 0; } - while (s->hnext && memcmp(s->hnext->addr, sta->addr, ETH_ALEN) != 0) + while (s->hnext && s->hnext != sta) s = s->hnext; - if (s->hnext) - s->hnext = s->hnext->hnext; - else - printk(KERN_ERR "%s: could not remove STA " MAC_FMT " from " - "hash table\n", local->mdev->name, MAC_ARG(sta->addr)); -} + if (s->hnext) { + s->hnext = sta->hnext; + return 0; + } -static inline void __sta_info_get(struct sta_info *sta) -{ - kref_get(&sta->kref); + return -ENOENT; } struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr) { struct sta_info *sta; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); sta = local->sta_hash[STA_HASH(addr)]; while (sta) { if (memcmp(sta->addr, addr, ETH_ALEN) == 0) { @@ -72,7 +67,7 @@ struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr) } sta = sta->hnext; } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); return sta; } @@ -85,7 +80,7 @@ int sta_info_min_txrate_get(struct ieee80211_local *local) int min_txrate = 9999999; int i; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); mode = local->oper_hw_mode; for (i = 0; i < STA_HASH_SIZE; i++) { sta = local->sta_hash[i]; @@ -95,7 +90,7 @@ int sta_info_min_txrate_get(struct ieee80211_local *local) sta = sta->hnext; } } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); if (min_txrate == 9999999) min_txrate = 0; @@ -122,8 +117,6 @@ static void sta_info_release(struct kref *kref) } rate_control_free_sta(sta->rate_ctrl, sta->rate_ctrl_priv); rate_control_put(sta->rate_ctrl); - if (sta->key) - ieee80211_debugfs_key_sta_del(sta->key, sta); kfree(sta); } @@ -139,6 +132,7 @@ struct sta_info * sta_info_add(struct ieee80211_local *local, struct net_device *dev, u8 *addr, gfp_t gfp) { struct sta_info *sta; + DECLARE_MAC_BUF(mac); sta = kzalloc(sizeof(*sta), gfp); if (!sta) @@ -150,7 +144,6 @@ struct sta_info * sta_info_add(struct ieee80211_local *local, sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, gfp); if (!sta->rate_ctrl_priv) { rate_control_put(sta->rate_ctrl); - kref_put(&sta->kref, sta_info_release); kfree(sta); return NULL; } @@ -162,63 +155,40 @@ struct sta_info * sta_info_add(struct ieee80211_local *local, skb_queue_head_init(&sta->tx_filtered); __sta_info_get(sta); /* sta used by caller, decremented by * sta_info_put() */ - spin_lock_bh(&local->sta_lock); + write_lock_bh(&local->sta_lock); list_add(&sta->list, &local->sta_list); local->num_sta++; sta_info_hash_add(local, sta); - spin_unlock_bh(&local->sta_lock); - if (local->ops->sta_table_notification) - local->ops->sta_table_notification(local_to_hw(local), - local->num_sta); - sta->key_idx_compression = HW_KEY_IDX_INVALID; + if (local->ops->sta_notify) + local->ops->sta_notify(local_to_hw(local), dev->ifindex, + STA_NOTIFY_ADD, addr); + write_unlock_bh(&local->sta_lock); #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Added STA " MAC_FMT "\n", - local->mdev->name, MAC_ARG(addr)); + printk(KERN_DEBUG "%s: Added STA %s\n", + wiphy_name(local->hw.wiphy), print_mac(mac, addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ #ifdef CONFIG_MAC80211_DEBUGFS - if (!in_interrupt()) { - sta->debugfs_registered = 1; - ieee80211_sta_debugfs_add(sta); - rate_control_add_sta_debugfs(sta); - } else { - /* debugfs entry adding might sleep, so schedule process - * context task for adding entry for STAs that do not yet - * have one. */ - queue_work(local->hw.workqueue, &local->sta_debugfs_add); - } + /* debugfs entry adding might sleep, so schedule process + * context task for adding entry for STAs that do not yet + * have one. */ + queue_work(local->hw.workqueue, &local->sta_debugfs_add); #endif return sta; } -static void finish_sta_info_free(struct ieee80211_local *local, - struct sta_info *sta) -{ -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Removed STA " MAC_FMT "\n", - local->mdev->name, MAC_ARG(sta->addr)); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - - if (sta->key) { - ieee80211_debugfs_key_remove(sta->key); - ieee80211_key_free(sta->key); - sta->key = NULL; - } - - rate_control_remove_sta_debugfs(sta); - ieee80211_sta_debugfs_remove(sta); - - sta_info_put(sta); -} - -static void sta_info_remove(struct sta_info *sta) +/* Caller must hold local->sta_lock */ +void sta_info_remove(struct sta_info *sta) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata; - sta_info_hash_del(local, sta); + /* don't do anything if we've been removed already */ + if (sta_info_hash_del(local, sta)) + return; + list_del(&sta->list); sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); if (sta->flags & WLAN_STA_PS) { @@ -228,61 +198,45 @@ static void sta_info_remove(struct sta_info *sta) } local->num_sta--; sta_info_remove_aid_ptr(sta); + } -void sta_info_free(struct sta_info *sta, int locked) +void sta_info_free(struct sta_info *sta) { struct sk_buff *skb; struct ieee80211_local *local = sta->local; + DECLARE_MAC_BUF(mac); - if (!locked) { - spin_lock_bh(&local->sta_lock); - sta_info_remove(sta); - spin_unlock_bh(&local->sta_lock); - } else { - sta_info_remove(sta); - } - if (local->ops->sta_table_notification) - local->ops->sta_table_notification(local_to_hw(local), - local->num_sta); + might_sleep(); + + write_lock_bh(&local->sta_lock); + sta_info_remove(sta); + write_unlock_bh(&local->sta_lock); while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { local->total_ps_buffered--; - dev_kfree_skb_any(skb); + dev_kfree_skb(skb); } while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { - dev_kfree_skb_any(skb); + dev_kfree_skb(skb); } - if (sta->key) { - if (local->ops->set_key) { - struct ieee80211_key_conf *key; - key = ieee80211_key_data2conf(local, sta->key); - if (key) { - local->ops->set_key(local_to_hw(local), - DISABLE_KEY, - sta->addr, key, sta->aid); - kfree(key); - } - } - } else if (sta->key_idx_compression != HW_KEY_IDX_INVALID) { - struct ieee80211_key_conf conf; - memset(&conf, 0, sizeof(conf)); - conf.hw_key_idx = sta->key_idx_compression; - conf.alg = ALG_NULL; - conf.flags |= IEEE80211_KEY_FORCE_SW_ENCRYPT; - local->ops->set_key(local_to_hw(local), DISABLE_KEY, - sta->addr, &conf, sta->aid); - sta->key_idx_compression = HW_KEY_IDX_INVALID; - } +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: Removed STA %s\n", + wiphy_name(local->hw.wiphy), print_mac(mac, sta->addr)); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ -#ifdef CONFIG_MAC80211_DEBUGFS - if (in_atomic()) { - list_add(&sta->list, &local->deleted_sta_list); - queue_work(local->hw.workqueue, &local->sta_debugfs_add); - } else -#endif - finish_sta_info_free(local, sta); + ieee80211_key_free(sta->key); + sta->key = NULL; + + if (local->ops->sta_notify) + local->ops->sta_notify(local_to_hw(local), sta->dev->ifindex, + STA_NOTIFY_REMOVE, sta->addr); + + rate_control_remove_sta_debugfs(sta); + ieee80211_sta_debugfs_remove(sta); + + sta_info_put(sta); } @@ -312,6 +266,7 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, { unsigned long flags; struct sk_buff *skb; + DECLARE_MAC_BUF(mac); if (skb_queue_empty(&sta->ps_tx_buf)) return; @@ -330,7 +285,7 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, if (skb) { local->total_ps_buffered--; printk(KERN_DEBUG "Buffered frame expired (STA " - MAC_FMT ")\n", MAC_ARG(sta->addr)); + "%s)\n", print_mac(mac, sta->addr)); dev_kfree_skb(skb); } else break; @@ -343,13 +298,13 @@ static void sta_info_cleanup(unsigned long data) struct ieee80211_local *local = (struct ieee80211_local *) data; struct sta_info *sta; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); list_for_each_entry(sta, &local->sta_list, list) { __sta_info_get(sta); sta_info_cleanup_expire_buffered(local, sta); sta_info_put(sta); } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); local->sta_cleanup.expires = jiffies + STA_INFO_CLEANUP_INTERVAL; add_timer(&local->sta_cleanup); @@ -363,35 +318,20 @@ static void sta_info_debugfs_add_task(struct work_struct *work) struct sta_info *sta, *tmp; while (1) { - spin_lock_bh(&local->sta_lock); - if (!list_empty(&local->deleted_sta_list)) { - sta = list_entry(local->deleted_sta_list.next, - struct sta_info, list); - list_del(local->deleted_sta_list.next); - } else - sta = NULL; - spin_unlock_bh(&local->sta_lock); - if (!sta) - break; - finish_sta_info_free(local, sta); - } - - while (1) { sta = NULL; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); list_for_each_entry(tmp, &local->sta_list, list) { - if (!tmp->debugfs_registered) { + if (!tmp->debugfs.dir) { sta = tmp; __sta_info_get(sta); break; } } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); if (!sta) break; - sta->debugfs_registered = 1; ieee80211_sta_debugfs_add(sta); rate_control_add_sta_debugfs(sta); sta_info_put(sta); @@ -401,9 +341,8 @@ static void sta_info_debugfs_add_task(struct work_struct *work) void sta_info_init(struct ieee80211_local *local) { - spin_lock_init(&local->sta_lock); + rwlock_init(&local->sta_lock); INIT_LIST_HEAD(&local->sta_list); - INIT_LIST_HEAD(&local->deleted_sta_list); init_timer(&local->sta_cleanup); local->sta_cleanup.expires = jiffies + STA_INFO_CLEANUP_INTERVAL; @@ -423,17 +362,8 @@ int sta_info_start(struct ieee80211_local *local) void sta_info_stop(struct ieee80211_local *local) { - struct sta_info *sta, *tmp; - del_timer(&local->sta_cleanup); - - list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { - /* sta_info_free must be called with 0 as the last - * parameter to ensure all debugfs sta entries are - * unregistered. We don't need locking at this - * point. */ - sta_info_free(sta, 0); - } + sta_info_flush(local, NULL); } void sta_info_remove_aid_ptr(struct sta_info *sta) @@ -461,10 +391,19 @@ void sta_info_remove_aid_ptr(struct sta_info *sta) void sta_info_flush(struct ieee80211_local *local, struct net_device *dev) { struct sta_info *sta, *tmp; + LIST_HEAD(tmp_list); - spin_lock_bh(&local->sta_lock); + write_lock_bh(&local->sta_lock); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) - if (!dev || dev == sta->dev) - sta_info_free(sta, 1); - spin_unlock_bh(&local->sta_lock); + if (!dev || dev == sta->dev) { + __sta_info_get(sta); + sta_info_remove(sta); + list_add_tail(&sta->list, &tmp_list); + } + write_unlock_bh(&local->sta_lock); + + list_for_each_entry_safe(sta, tmp, &tmp_list, list) { + sta_info_free(sta); + sta_info_put(sta); + } } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index b5591d2f60a..8f7ebe41c02 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -26,6 +26,8 @@ * send and receive non-IEEE 802.1X frames */ #define WLAN_STA_SHORT_PREAMBLE BIT(7) +/* whether this is an AP that we are associated with as a client */ +#define WLAN_STA_ASSOC_AP BIT(8) #define WLAN_STA_WME BIT(9) #define WLAN_STA_WDS BIT(27) @@ -90,27 +92,11 @@ struct sta_info { int channel_use; int channel_use_raw; - u8 antenna_sel_tx; - u8 antenna_sel_rx; - - - int key_idx_compression; /* key table index for compression and TX - * filtering; used only if sta->key is not - * set */ - -#ifdef CONFIG_MAC80211_DEBUGFS - int debugfs_registered; -#endif - int assoc_ap; /* whether this is an AP that we are - * associated with as a client */ - #ifdef CONFIG_MAC80211_DEBUG_COUNTERS unsigned int wme_rx_queue[NUM_RX_DATA_QUEUES]; unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES]; #endif /* CONFIG_MAC80211_DEBUG_COUNTERS */ - int vlan_id; - u16 listen_interval; #ifdef CONFIG_MAC80211_DEBUGFS @@ -149,12 +135,18 @@ struct sta_info { */ #define STA_INFO_CLEANUP_INTERVAL (10 * HZ) +static inline void __sta_info_get(struct sta_info *sta) +{ + kref_get(&sta->kref); +} + struct sta_info * sta_info_get(struct ieee80211_local *local, u8 *addr); int sta_info_min_txrate_get(struct ieee80211_local *local); void sta_info_put(struct sta_info *sta); struct sta_info * sta_info_add(struct ieee80211_local *local, struct net_device *dev, u8 *addr, gfp_t gfp); -void sta_info_free(struct sta_info *sta, int locked); +void sta_info_remove(struct sta_info *sta); +void sta_info_free(struct sta_info *sta); void sta_info_init(struct ieee80211_local *local); int sta_info_start(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 41621720e56..3abe194e4d5 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -182,7 +182,7 @@ u8 * ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, *pos++ = iv0; *pos++ = iv1; *pos++ = iv2; - *pos++ = (key->keyidx << 6) | (1 << 5) /* Ext IV */; + *pos++ = (key->conf.keyidx << 6) | (1 << 5) /* Ext IV */; *pos++ = key->u.tkip.iv32 & 0xff; *pos++ = (key->u.tkip.iv32 >> 8) & 0xff; *pos++ = (key->u.tkip.iv32 >> 16) & 0xff; @@ -194,7 +194,7 @@ u8 * ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, void ieee80211_tkip_gen_phase1key(struct ieee80211_key *key, u8 *ta, u16 *phase1key) { - tkip_mixing_phase1(ta, &key->key[ALG_TKIP_TEMP_ENCR_KEY], + tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], key->u.tkip.iv32, phase1key); } @@ -204,12 +204,13 @@ void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta, /* Calculate per-packet key */ if (key->u.tkip.iv16 == 0 || !key->u.tkip.tx_initialized) { /* IV16 wrapped around - perform TKIP phase 1 */ - tkip_mixing_phase1(ta, &key->key[ALG_TKIP_TEMP_ENCR_KEY], + tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], key->u.tkip.iv32, key->u.tkip.p1k); key->u.tkip.tx_initialized = 1; } - tkip_mixing_phase2(key->u.tkip.p1k, &key->key[ALG_TKIP_TEMP_ENCR_KEY], + tkip_mixing_phase2(key->u.tkip.p1k, + &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], key->u.tkip.iv16, rc4key); } @@ -237,7 +238,8 @@ void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, struct ieee80211_key *key, u8 *payload, size_t payload_len, u8 *ta, - int only_iv, int queue) + int only_iv, int queue, + u32 *out_iv32, u16 *out_iv16) { u32 iv32; u32 iv16; @@ -266,7 +268,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, if (!(keyid & (1 << 5))) return TKIP_DECRYPT_NO_EXT_IV; - if ((keyid >> 6) != key->keyidx) + if ((keyid >> 6) != key->conf.keyidx) return TKIP_DECRYPT_INVALID_KEYIDX; if (key->u.tkip.rx_initialized[queue] && @@ -274,9 +276,10 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, (iv32 == key->u.tkip.iv32_rx[queue] && iv16 <= key->u.tkip.iv16_rx[queue]))) { #ifdef CONFIG_TKIP_DEBUG + DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "TKIP replay detected for RX frame from " - MAC_FMT " (RX IV (%04x,%02x) <= prev. IV (%04x,%02x)\n", - MAC_ARG(ta), + "%s (RX IV (%04x,%02x) <= prev. IV (%04x,%02x)\n", + print_mac(mac, ta), iv32, iv16, key->u.tkip.iv32_rx[queue], key->u.tkip.iv16_rx[queue]); #endif /* CONFIG_TKIP_DEBUG */ @@ -293,16 +296,18 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, key->u.tkip.iv32_rx[queue] != iv32) { key->u.tkip.rx_initialized[queue] = 1; /* IV16 wrapped around - perform TKIP phase 1 */ - tkip_mixing_phase1(ta, &key->key[ALG_TKIP_TEMP_ENCR_KEY], + tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], iv32, key->u.tkip.p1k_rx[queue]); #ifdef CONFIG_TKIP_DEBUG { int i; - printk(KERN_DEBUG "TKIP decrypt: Phase1 TA=" MAC_FMT - " TK=", MAC_ARG(ta)); + DECLARE_MAC_BUF(mac); + printk(KERN_DEBUG "TKIP decrypt: Phase1 TA=%s" + " TK=", print_mac(mac, ta)); for (i = 0; i < 16; i++) printk("%02x ", - key->key[ALG_TKIP_TEMP_ENCR_KEY + i]); + key->conf.key[ + ALG_TKIP_TEMP_ENCR_KEY + i]); printk("\n"); printk(KERN_DEBUG "TKIP decrypt: P1K="); for (i = 0; i < 5; i++) @@ -313,7 +318,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, } tkip_mixing_phase2(key->u.tkip.p1k_rx[queue], - &key->key[ALG_TKIP_TEMP_ENCR_KEY], + &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], iv16, rc4key); #ifdef CONFIG_TKIP_DEBUG { @@ -328,11 +333,14 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, res = ieee80211_wep_decrypt_data(tfm, rc4key, 16, pos, payload_len - 12); done: if (res == TKIP_DECRYPT_OK) { - /* FIX: these should be updated only after Michael MIC has been - * verified */ - /* Record previously received IV */ - key->u.tkip.iv32_rx[queue] = iv32; - key->u.tkip.iv16_rx[queue] = iv16; + /* + * Record previously received IV, will be copied into the + * key information after MIC verification. It is possible + * that we don't catch replays of fragments but that's ok + * because the Michael MIC verication will then fail. + */ + *out_iv32 = iv32; + *out_iv16 = iv16; } return res; diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h index a0d181a1804..73d8ef2a93b 100644 --- a/net/mac80211/tkip.h +++ b/net/mac80211/tkip.h @@ -31,6 +31,7 @@ enum { int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, struct ieee80211_key *key, u8 *payload, size_t payload_len, u8 *ta, - int only_iv, int queue); + int only_iv, int queue, + u32 *out_iv32, u16 *out_iv16); #endif /* TKIP_H */ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c new file mode 100644 index 00000000000..1a531543bcc --- /dev/null +++ b/net/mac80211/tx.c @@ -0,0 +1,1903 @@ +/* + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * + * Transmit and frame generation functions. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/bitmap.h> +#include <linux/rcupdate.h> +#include <net/net_namespace.h> +#include <net/ieee80211_radiotap.h> +#include <net/cfg80211.h> +#include <net/mac80211.h> +#include <asm/unaligned.h> + +#include "ieee80211_i.h" +#include "ieee80211_led.h" +#include "wep.h" +#include "wpa.h" +#include "wme.h" +#include "ieee80211_rate.h" + +#define IEEE80211_TX_OK 0 +#define IEEE80211_TX_AGAIN 1 +#define IEEE80211_TX_FRAG_AGAIN 2 + +/* misc utils */ + +static inline void ieee80211_include_sequence(struct ieee80211_sub_if_data *sdata, + struct ieee80211_hdr *hdr) +{ + /* Set the sequence number for this frame. */ + hdr->seq_ctrl = cpu_to_le16(sdata->sequence); + + /* Increase the sequence number. */ + sdata->sequence = (sdata->sequence + 0x10) & IEEE80211_SCTL_SEQ; +} + +#ifdef CONFIG_MAC80211_LOWTX_FRAME_DUMP +static void ieee80211_dump_frame(const char *ifname, const char *title, + const struct sk_buff *skb) +{ + const struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + u16 fc; + int hdrlen; + DECLARE_MAC_BUF(mac); + + printk(KERN_DEBUG "%s: %s (len=%d)", ifname, title, skb->len); + if (skb->len < 4) { + printk("\n"); + return; + } + + fc = le16_to_cpu(hdr->frame_control); + hdrlen = ieee80211_get_hdrlen(fc); + if (hdrlen > skb->len) + hdrlen = skb->len; + if (hdrlen >= 4) + printk(" FC=0x%04x DUR=0x%04x", + fc, le16_to_cpu(hdr->duration_id)); + if (hdrlen >= 10) + printk(" A1=%s", print_mac(mac, hdr->addr1)); + if (hdrlen >= 16) + printk(" A2=%s", print_mac(mac, hdr->addr2)); + if (hdrlen >= 24) + printk(" A3=%s", print_mac(mac, hdr->addr3)); + if (hdrlen >= 30) + printk(" A4=%s", print_mac(mac, hdr->addr4)); + printk("\n"); +} +#else /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ +static inline void ieee80211_dump_frame(const char *ifname, const char *title, + struct sk_buff *skb) +{ +} +#endif /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ + +static u16 ieee80211_duration(struct ieee80211_txrx_data *tx, int group_addr, + int next_frag_len) +{ + int rate, mrate, erp, dur, i; + struct ieee80211_rate *txrate = tx->u.tx.rate; + struct ieee80211_local *local = tx->local; + struct ieee80211_hw_mode *mode = tx->u.tx.mode; + + erp = txrate->flags & IEEE80211_RATE_ERP; + + /* + * data and mgmt (except PS Poll): + * - during CFP: 32768 + * - during contention period: + * if addr1 is group address: 0 + * if more fragments = 0 and addr1 is individual address: time to + * transmit one ACK plus SIFS + * if more fragments = 1 and addr1 is individual address: time to + * transmit next fragment plus 2 x ACK plus 3 x SIFS + * + * IEEE 802.11, 9.6: + * - control response frame (CTS or ACK) shall be transmitted using the + * same rate as the immediately previous frame in the frame exchange + * sequence, if this rate belongs to the PHY mandatory rates, or else + * at the highest possible rate belonging to the PHY rates in the + * BSSBasicRateSet + */ + + if ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) { + /* TODO: These control frames are not currently sent by + * 80211.o, but should they be implemented, this function + * needs to be updated to support duration field calculation. + * + * RTS: time needed to transmit pending data/mgmt frame plus + * one CTS frame plus one ACK frame plus 3 x SIFS + * CTS: duration of immediately previous RTS minus time + * required to transmit CTS and its SIFS + * ACK: 0 if immediately previous directed data/mgmt had + * more=0, with more=1 duration in ACK frame is duration + * from previous frame minus time needed to transmit ACK + * and its SIFS + * PS Poll: BIT(15) | BIT(14) | aid + */ + return 0; + } + + /* data/mgmt */ + if (0 /* FIX: data/mgmt during CFP */) + return 32768; + + if (group_addr) /* Group address as the destination - no ACK */ + return 0; + + /* Individual destination address: + * IEEE 802.11, Ch. 9.6 (after IEEE 802.11g changes) + * CTS and ACK frames shall be transmitted using the highest rate in + * basic rate set that is less than or equal to the rate of the + * immediately previous frame and that is using the same modulation + * (CCK or OFDM). If no basic rate set matches with these requirements, + * the highest mandatory rate of the PHY that is less than or equal to + * the rate of the previous frame is used. + * Mandatory rates for IEEE 802.11g PHY: 1, 2, 5.5, 11, 6, 12, 24 Mbps + */ + rate = -1; + mrate = 10; /* use 1 Mbps if everything fails */ + for (i = 0; i < mode->num_rates; i++) { + struct ieee80211_rate *r = &mode->rates[i]; + if (r->rate > txrate->rate) + break; + + if (IEEE80211_RATE_MODULATION(txrate->flags) != + IEEE80211_RATE_MODULATION(r->flags)) + continue; + + if (r->flags & IEEE80211_RATE_BASIC) + rate = r->rate; + else if (r->flags & IEEE80211_RATE_MANDATORY) + mrate = r->rate; + } + if (rate == -1) { + /* No matching basic rate found; use highest suitable mandatory + * PHY rate */ + rate = mrate; + } + + /* Time needed to transmit ACK + * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up + * to closest integer */ + + dur = ieee80211_frame_duration(local, 10, rate, erp, + tx->sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE); + + if (next_frag_len) { + /* Frame is fragmented: duration increases with time needed to + * transmit next fragment plus ACK and 2 x SIFS. */ + dur *= 2; /* ACK + SIFS */ + /* next fragment */ + dur += ieee80211_frame_duration(local, next_frag_len, + txrate->rate, erp, + tx->sdata->flags & + IEEE80211_SDATA_SHORT_PREAMBLE); + } + + return dur; +} + +static inline int __ieee80211_queue_stopped(const struct ieee80211_local *local, + int queue) +{ + return test_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); +} + +static inline int __ieee80211_queue_pending(const struct ieee80211_local *local, + int queue) +{ + return test_bit(IEEE80211_LINK_STATE_PENDING, &local->state[queue]); +} + +static int inline is_ieee80211_device(struct net_device *dev, + struct net_device *master) +{ + return (wdev_priv(dev->ieee80211_ptr) == + wdev_priv(master->ieee80211_ptr)); +} + +/* tx handlers */ + +static ieee80211_txrx_result +ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx) +{ +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + struct sk_buff *skb = tx->skb; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + u32 sta_flags; + + if (unlikely(tx->flags & IEEE80211_TXRXD_TX_INJECTED)) + return TXRX_CONTINUE; + + if (unlikely(tx->local->sta_scanning != 0) && + ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || + (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ)) + return TXRX_DROP; + + if (tx->flags & IEEE80211_TXRXD_TXPS_BUFFERED) + return TXRX_CONTINUE; + + sta_flags = tx->sta ? tx->sta->flags : 0; + + if (likely(tx->flags & IEEE80211_TXRXD_TXUNICAST)) { + if (unlikely(!(sta_flags & WLAN_STA_ASSOC) && + tx->sdata->type != IEEE80211_IF_TYPE_IBSS && + (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + DECLARE_MAC_BUF(mac); + printk(KERN_DEBUG "%s: dropped data frame to not " + "associated station %s\n", + tx->dev->name, print_mac(mac, hdr->addr1)); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc); + return TXRX_DROP; + } + } else { + if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && + tx->local->num_sta == 0 && + tx->sdata->type != IEEE80211_IF_TYPE_IBSS)) { + /* + * No associated STAs - no need to send multicast + * frames. + */ + return TXRX_DROP; + } + return TXRX_CONTINUE; + } + + if (unlikely(/* !injected && */ tx->sdata->ieee802_1x && + !(sta_flags & WLAN_STA_AUTHORIZED))) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + DECLARE_MAC_BUF(mac); + printk(KERN_DEBUG "%s: dropped frame to %s" + " (unauthorized port)\n", tx->dev->name, + print_mac(mac, hdr->addr1)); +#endif + I802_DEBUG_INC(tx->local->tx_handlers_drop_unauth_port); + return TXRX_DROP; + } + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_tx_h_sequence(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + + if (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)) >= 24) + ieee80211_include_sequence(tx->sdata, hdr); + + return TXRX_CONTINUE; +} + +/* This function is called whenever the AP is about to exceed the maximum limit + * of buffered frames for power saving STAs. This situation should not really + * happen often during normal operation, so dropping the oldest buffered packet + * from each queue should be OK to make some room for new frames. */ +static void purge_old_ps_buffers(struct ieee80211_local *local) +{ + int total = 0, purged = 0; + struct sk_buff *skb; + struct ieee80211_sub_if_data *sdata; + struct sta_info *sta; + + /* + * virtual interfaces are protected by RCU + */ + rcu_read_lock(); + + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + struct ieee80211_if_ap *ap; + if (sdata->dev == local->mdev || + sdata->type != IEEE80211_IF_TYPE_AP) + continue; + ap = &sdata->u.ap; + skb = skb_dequeue(&ap->ps_bc_buf); + if (skb) { + purged++; + dev_kfree_skb(skb); + } + total += skb_queue_len(&ap->ps_bc_buf); + } + rcu_read_unlock(); + + read_lock_bh(&local->sta_lock); + list_for_each_entry(sta, &local->sta_list, list) { + skb = skb_dequeue(&sta->ps_tx_buf); + if (skb) { + purged++; + dev_kfree_skb(skb); + } + total += skb_queue_len(&sta->ps_tx_buf); + } + read_unlock_bh(&local->sta_lock); + + local->total_ps_buffered = total; + printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n", + wiphy_name(local->hw.wiphy), purged); +} + +static inline ieee80211_txrx_result +ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx) +{ + /* broadcast/multicast frame */ + /* If any of the associated stations is in power save mode, + * the frame is buffered to be sent after DTIM beacon frame */ + if ((tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) && + tx->sdata->type != IEEE80211_IF_TYPE_WDS && + tx->sdata->bss && atomic_read(&tx->sdata->bss->num_sta_ps) && + !(tx->fc & IEEE80211_FCTL_ORDER)) { + if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) + purge_old_ps_buffers(tx->local); + if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >= + AP_MAX_BC_BUFFER) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: BC TX buffer full - " + "dropping the oldest frame\n", + tx->dev->name); + } + dev_kfree_skb(skb_dequeue(&tx->sdata->bss->ps_bc_buf)); + } else + tx->local->total_ps_buffered++; + skb_queue_tail(&tx->sdata->bss->ps_bc_buf, tx->skb); + return TXRX_QUEUED; + } + + return TXRX_CONTINUE; +} + +static inline ieee80211_txrx_result +ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx) +{ + struct sta_info *sta = tx->sta; + DECLARE_MAC_BUF(mac); + + if (unlikely(!sta || + ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT && + (tx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP))) + return TXRX_CONTINUE; + + if (unlikely((sta->flags & WLAN_STA_PS) && !sta->pspoll)) { + struct ieee80211_tx_packet_data *pkt_data; +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "STA %s aid %d: PS buffer (entries " + "before %d)\n", + print_mac(mac, sta->addr), sta->aid, + skb_queue_len(&sta->ps_tx_buf)); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + sta->flags |= WLAN_STA_TIM; + if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) + purge_old_ps_buffers(tx->local); + if (skb_queue_len(&sta->ps_tx_buf) >= STA_MAX_TX_BUFFER) { + struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf); + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: STA %s TX " + "buffer full - dropping oldest frame\n", + tx->dev->name, print_mac(mac, sta->addr)); + } + dev_kfree_skb(old); + } else + tx->local->total_ps_buffered++; + /* Queue frame to be sent after STA sends an PS Poll frame */ + if (skb_queue_empty(&sta->ps_tx_buf)) { + if (tx->local->ops->set_tim) + tx->local->ops->set_tim(local_to_hw(tx->local), + sta->aid, 1); + if (tx->sdata->bss) + bss_tim_set(tx->local, tx->sdata->bss, sta->aid); + } + pkt_data = (struct ieee80211_tx_packet_data *)tx->skb->cb; + pkt_data->jiffies = jiffies; + skb_queue_tail(&sta->ps_tx_buf, tx->skb); + return TXRX_QUEUED; + } +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + else if (unlikely(sta->flags & WLAN_STA_PS)) { + printk(KERN_DEBUG "%s: STA %s in PS mode, but pspoll " + "set -> send frame\n", tx->dev->name, + print_mac(mac, sta->addr)); + } +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + sta->pspoll = 0; + + return TXRX_CONTINUE; +} + + +static ieee80211_txrx_result +ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx) +{ + if (unlikely(tx->flags & IEEE80211_TXRXD_TXPS_BUFFERED)) + return TXRX_CONTINUE; + + if (tx->flags & IEEE80211_TXRXD_TXUNICAST) + return ieee80211_tx_h_unicast_ps_buf(tx); + else + return ieee80211_tx_h_multicast_ps_buf(tx); +} + + + + +static ieee80211_txrx_result +ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_key *key; + + if (unlikely(tx->u.tx.control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT)) + tx->key = NULL; + else if (tx->sta && (key = rcu_dereference(tx->sta->key))) + tx->key = key; + else if ((key = rcu_dereference(tx->sdata->default_key))) + tx->key = key; + else if (tx->sdata->drop_unencrypted && + !(tx->sdata->eapol && ieee80211_is_eapol(tx->skb))) { + I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); + return TXRX_DROP; + } else { + tx->key = NULL; + tx->u.tx.control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; + } + + if (tx->key) { + tx->key->tx_rx_count++; + /* TODO: add threshold stuff again */ + } + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_tx_h_fragment(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; + size_t hdrlen, per_fragm, num_fragm, payload_len, left; + struct sk_buff **frags, *first, *frag; + int i; + u16 seq; + u8 *pos; + int frag_threshold = tx->local->fragmentation_threshold; + + if (!(tx->flags & IEEE80211_TXRXD_FRAGMENTED)) + return TXRX_CONTINUE; + + first = tx->skb; + + hdrlen = ieee80211_get_hdrlen(tx->fc); + payload_len = first->len - hdrlen; + per_fragm = frag_threshold - hdrlen - FCS_LEN; + num_fragm = DIV_ROUND_UP(payload_len, per_fragm); + + frags = kzalloc(num_fragm * sizeof(struct sk_buff *), GFP_ATOMIC); + if (!frags) + goto fail; + + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREFRAGS); + seq = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ; + pos = first->data + hdrlen + per_fragm; + left = payload_len - per_fragm; + for (i = 0; i < num_fragm - 1; i++) { + struct ieee80211_hdr *fhdr; + size_t copylen; + + if (left <= 0) + goto fail; + + /* reserve enough extra head and tail room for possible + * encryption */ + frag = frags[i] = + dev_alloc_skb(tx->local->tx_headroom + + frag_threshold + + IEEE80211_ENCRYPT_HEADROOM + + IEEE80211_ENCRYPT_TAILROOM); + if (!frag) + goto fail; + /* Make sure that all fragments use the same priority so + * that they end up using the same TX queue */ + frag->priority = first->priority; + skb_reserve(frag, tx->local->tx_headroom + + IEEE80211_ENCRYPT_HEADROOM); + fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen); + memcpy(fhdr, first->data, hdrlen); + if (i == num_fragm - 2) + fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS); + fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG)); + copylen = left > per_fragm ? per_fragm : left; + memcpy(skb_put(frag, copylen), pos, copylen); + + pos += copylen; + left -= copylen; + } + skb_trim(first, hdrlen + per_fragm); + + tx->u.tx.num_extra_frag = num_fragm - 1; + tx->u.tx.extra_frag = frags; + + return TXRX_CONTINUE; + + fail: + printk(KERN_DEBUG "%s: failed to fragment frame\n", tx->dev->name); + if (frags) { + for (i = 0; i < num_fragm - 1; i++) + if (frags[i]) + dev_kfree_skb(frags[i]); + kfree(frags); + } + I802_DEBUG_INC(tx->local->tx_handlers_drop_fragment); + return TXRX_DROP; +} + +static ieee80211_txrx_result +ieee80211_tx_h_encrypt(struct ieee80211_txrx_data *tx) +{ + if (!tx->key) + return TXRX_CONTINUE; + + switch (tx->key->conf.alg) { + case ALG_WEP: + return ieee80211_crypto_wep_encrypt(tx); + case ALG_TKIP: + return ieee80211_crypto_tkip_encrypt(tx); + case ALG_CCMP: + return ieee80211_crypto_ccmp_encrypt(tx); + } + + /* not reached */ + WARN_ON(1); + return TXRX_DROP; +} + +static ieee80211_txrx_result +ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx) +{ + struct rate_control_extra extra; + + if (likely(!tx->u.tx.rate)) { + memset(&extra, 0, sizeof(extra)); + extra.mode = tx->u.tx.mode; + extra.ethertype = tx->ethertype; + + tx->u.tx.rate = rate_control_get_rate(tx->local, tx->dev, + tx->skb, &extra); + if (unlikely(extra.probe != NULL)) { + tx->u.tx.control->flags |= + IEEE80211_TXCTL_RATE_CTRL_PROBE; + tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG; + tx->u.tx.control->alt_retry_rate = tx->u.tx.rate->val; + tx->u.tx.rate = extra.probe; + } else + tx->u.tx.control->alt_retry_rate = -1; + + if (!tx->u.tx.rate) + return TXRX_DROP; + } else + tx->u.tx.control->alt_retry_rate = -1; + + if (tx->u.tx.mode->mode == MODE_IEEE80211G && + (tx->sdata->flags & IEEE80211_SDATA_USE_PROTECTION) && + (tx->flags & IEEE80211_TXRXD_FRAGMENTED) && extra.nonerp) { + tx->u.tx.last_frag_rate = tx->u.tx.rate; + if (extra.probe) + tx->flags &= ~IEEE80211_TXRXD_TXPROBE_LAST_FRAG; + else + tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG; + tx->u.tx.rate = extra.nonerp; + tx->u.tx.control->rate = extra.nonerp; + tx->u.tx.control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE; + } else { + tx->u.tx.last_frag_rate = tx->u.tx.rate; + tx->u.tx.control->rate = tx->u.tx.rate; + } + tx->u.tx.control->tx_rate = tx->u.tx.rate->val; + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; + u16 fc = le16_to_cpu(hdr->frame_control); + u16 dur; + struct ieee80211_tx_control *control = tx->u.tx.control; + struct ieee80211_hw_mode *mode = tx->u.tx.mode; + + if (!control->retry_limit) { + if (!is_multicast_ether_addr(hdr->addr1)) { + if (tx->skb->len + FCS_LEN > tx->local->rts_threshold + && tx->local->rts_threshold < + IEEE80211_MAX_RTS_THRESHOLD) { + control->flags |= + IEEE80211_TXCTL_USE_RTS_CTS; + control->flags |= + IEEE80211_TXCTL_LONG_RETRY_LIMIT; + control->retry_limit = + tx->local->long_retry_limit; + } else { + control->retry_limit = + tx->local->short_retry_limit; + } + } else { + control->retry_limit = 1; + } + } + + if (tx->flags & IEEE80211_TXRXD_FRAGMENTED) { + /* Do not use multiple retry rates when sending fragmented + * frames. + * TODO: The last fragment could still use multiple retry + * rates. */ + control->alt_retry_rate = -1; + } + + /* Use CTS protection for unicast frames sent using extended rates if + * there are associated non-ERP stations and RTS/CTS is not configured + * for the frame. */ + if (mode->mode == MODE_IEEE80211G && + (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) && + (tx->flags & IEEE80211_TXRXD_TXUNICAST) && + (tx->sdata->flags & IEEE80211_SDATA_USE_PROTECTION) && + !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS)) + control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT; + + /* Transmit data frames using short preambles if the driver supports + * short preambles at the selected rate and short preambles are + * available on the network at the current point in time. */ + if (((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) && + (tx->u.tx.rate->flags & IEEE80211_RATE_PREAMBLE2) && + (tx->sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && + (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) { + tx->u.tx.control->tx_rate = tx->u.tx.rate->val2; + } + + /* Setup duration field for the first fragment of the frame. Duration + * for remaining fragments will be updated when they are being sent + * to low-level driver in ieee80211_tx(). */ + dur = ieee80211_duration(tx, is_multicast_ether_addr(hdr->addr1), + (tx->flags & IEEE80211_TXRXD_FRAGMENTED) ? + tx->u.tx.extra_frag[0]->len : 0); + hdr->duration_id = cpu_to_le16(dur); + + if ((control->flags & IEEE80211_TXCTL_USE_RTS_CTS) || + (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) { + struct ieee80211_rate *rate; + + /* Do not use multiple retry rates when using RTS/CTS */ + control->alt_retry_rate = -1; + + /* Use min(data rate, max base rate) as CTS/RTS rate */ + rate = tx->u.tx.rate; + while (rate > mode->rates && + !(rate->flags & IEEE80211_RATE_BASIC)) + rate--; + + control->rts_cts_rate = rate->val; + control->rts_rate = rate; + } + + if (tx->sta) { + tx->sta->tx_packets++; + tx->sta->tx_fragments++; + tx->sta->tx_bytes += tx->skb->len; + if (tx->u.tx.extra_frag) { + int i; + tx->sta->tx_fragments += tx->u.tx.num_extra_frag; + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + tx->sta->tx_bytes += + tx->u.tx.extra_frag[i]->len; + } + } + } + + /* + * Tell hardware to not encrypt when we had sw crypto. + * Because we use the same flag to internally indicate that + * no (software) encryption should be done, we have to set it + * after all crypto handlers. + */ + if (tx->key && !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + tx->u.tx.control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_tx_h_load_stats(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_local *local = tx->local; + struct ieee80211_hw_mode *mode = tx->u.tx.mode; + struct sk_buff *skb = tx->skb; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + u32 load = 0, hdrtime; + + /* TODO: this could be part of tx_status handling, so that the number + * of retries would be known; TX rate should in that case be stored + * somewhere with the packet */ + + /* Estimate total channel use caused by this frame */ + + /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, + * 1 usec = 1/8 * (1080 / 10) = 13.5 */ + + if (mode->mode == MODE_IEEE80211A || + (mode->mode == MODE_IEEE80211G && + tx->u.tx.rate->flags & IEEE80211_RATE_ERP)) + hdrtime = CHAN_UTIL_HDR_SHORT; + else + hdrtime = CHAN_UTIL_HDR_LONG; + + load = hdrtime; + if (!is_multicast_ether_addr(hdr->addr1)) + load += hdrtime; + + if (tx->u.tx.control->flags & IEEE80211_TXCTL_USE_RTS_CTS) + load += 2 * hdrtime; + else if (tx->u.tx.control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) + load += hdrtime; + + load += skb->len * tx->u.tx.rate->rate_inv; + + if (tx->u.tx.extra_frag) { + int i; + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + load += 2 * hdrtime; + load += tx->u.tx.extra_frag[i]->len * + tx->u.tx.rate->rate; + } + } + + /* Divide channel_use by 8 to avoid wrapping around the counter */ + load >>= CHAN_UTIL_SHIFT; + local->channel_use_raw += load; + if (tx->sta) + tx->sta->channel_use_raw += load; + tx->sdata->channel_use_raw += load; + + return TXRX_CONTINUE; +} + +/* TODO: implement register/unregister functions for adding TX/RX handlers + * into ordered list */ + +ieee80211_tx_handler ieee80211_tx_handlers[] = +{ + ieee80211_tx_h_check_assoc, + ieee80211_tx_h_sequence, + ieee80211_tx_h_ps_buf, + ieee80211_tx_h_select_key, + ieee80211_tx_h_michael_mic_add, + ieee80211_tx_h_fragment, + ieee80211_tx_h_encrypt, + ieee80211_tx_h_rate_ctrl, + ieee80211_tx_h_misc, + ieee80211_tx_h_load_stats, + NULL +}; + +/* actual transmit path */ + +/* + * deal with packet injection down monitor interface + * with Radiotap Header -- only called for monitor mode interface + */ +static ieee80211_txrx_result +__ieee80211_parse_tx_radiotap(struct ieee80211_txrx_data *tx, + struct sk_buff *skb) +{ + /* + * this is the moment to interpret and discard the radiotap header that + * must be at the start of the packet injected in Monitor mode + * + * Need to take some care with endian-ness since radiotap + * args are little-endian + */ + + struct ieee80211_radiotap_iterator iterator; + struct ieee80211_radiotap_header *rthdr = + (struct ieee80211_radiotap_header *) skb->data; + struct ieee80211_hw_mode *mode = tx->local->hw.conf.mode; + int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len); + struct ieee80211_tx_control *control = tx->u.tx.control; + + control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; + tx->flags |= IEEE80211_TXRXD_TX_INJECTED; + tx->flags &= ~IEEE80211_TXRXD_FRAGMENTED; + + /* + * for every radiotap entry that is present + * (ieee80211_radiotap_iterator_next returns -ENOENT when no more + * entries present, or -EINVAL on error) + */ + + while (!ret) { + int i, target_rate; + + ret = ieee80211_radiotap_iterator_next(&iterator); + + if (ret) + continue; + + /* see if this argument is something we can use */ + switch (iterator.this_arg_index) { + /* + * You must take care when dereferencing iterator.this_arg + * for multibyte types... the pointer is not aligned. Use + * get_unaligned((type *)iterator.this_arg) to dereference + * iterator.this_arg for type "type" safely on all arches. + */ + case IEEE80211_RADIOTAP_RATE: + /* + * radiotap rate u8 is in 500kbps units eg, 0x02=1Mbps + * ieee80211 rate int is in 100kbps units eg, 0x0a=1Mbps + */ + target_rate = (*iterator.this_arg) * 5; + for (i = 0; i < mode->num_rates; i++) { + struct ieee80211_rate *r = &mode->rates[i]; + + if (r->rate == target_rate) { + tx->u.tx.rate = r; + break; + } + } + break; + + case IEEE80211_RADIOTAP_ANTENNA: + /* + * radiotap uses 0 for 1st ant, mac80211 is 1 for + * 1st ant + */ + control->antenna_sel_tx = (*iterator.this_arg) + 1; + break; + + case IEEE80211_RADIOTAP_DBM_TX_POWER: + control->power_level = *iterator.this_arg; + break; + + case IEEE80211_RADIOTAP_FLAGS: + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { + /* + * this indicates that the skb we have been + * handed has the 32-bit FCS CRC at the end... + * we should react to that by snipping it off + * because it will be recomputed and added + * on transmission + */ + if (skb->len < (iterator.max_length + FCS_LEN)) + return TXRX_DROP; + + skb_trim(skb, skb->len - FCS_LEN); + } + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP) + control->flags &= + ~IEEE80211_TXCTL_DO_NOT_ENCRYPT; + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) + tx->flags |= IEEE80211_TXRXD_FRAGMENTED; + break; + + /* + * Please update the file + * Documentation/networking/mac80211-injection.txt + * when parsing new fields here. + */ + + default: + break; + } + } + + if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ + return TXRX_DROP; + + /* + * remove the radiotap header + * iterator->max_length was sanity-checked against + * skb->len by iterator init + */ + skb_pull(skb, iterator.max_length); + + return TXRX_CONTINUE; +} + +/* + * initialises @tx + */ +static ieee80211_txrx_result +__ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, + struct sk_buff *skb, + struct net_device *dev, + struct ieee80211_tx_control *control) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_hdr *hdr; + struct ieee80211_sub_if_data *sdata; + ieee80211_txrx_result res = TXRX_CONTINUE; + + int hdrlen; + + memset(tx, 0, sizeof(*tx)); + tx->skb = skb; + tx->dev = dev; /* use original interface */ + tx->local = local; + tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev); + tx->u.tx.control = control; + /* + * Set this flag (used below to indicate "automatic fragmentation"), + * it will be cleared/left by radiotap as desired. + */ + tx->flags |= IEEE80211_TXRXD_FRAGMENTED; + + /* process and remove the injection radiotap header */ + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { + if (__ieee80211_parse_tx_radiotap(tx, skb) == TXRX_DROP) + return TXRX_DROP; + + /* + * __ieee80211_parse_tx_radiotap has now removed + * the radiotap header that was present and pre-filled + * 'tx' with tx control information. + */ + } + + hdr = (struct ieee80211_hdr *) skb->data; + + tx->sta = sta_info_get(local, hdr->addr1); + tx->fc = le16_to_cpu(hdr->frame_control); + + if (is_multicast_ether_addr(hdr->addr1)) { + tx->flags &= ~IEEE80211_TXRXD_TXUNICAST; + control->flags |= IEEE80211_TXCTL_NO_ACK; + } else { + tx->flags |= IEEE80211_TXRXD_TXUNICAST; + control->flags &= ~IEEE80211_TXCTL_NO_ACK; + } + + if (tx->flags & IEEE80211_TXRXD_FRAGMENTED) { + if ((tx->flags & IEEE80211_TXRXD_TXUNICAST) && + skb->len + FCS_LEN > local->fragmentation_threshold && + !local->ops->set_frag_threshold) + tx->flags |= IEEE80211_TXRXD_FRAGMENTED; + else + tx->flags &= ~IEEE80211_TXRXD_FRAGMENTED; + } + + if (!tx->sta) + control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; + else if (tx->sta->clear_dst_mask) { + control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; + tx->sta->clear_dst_mask = 0; + } + + hdrlen = ieee80211_get_hdrlen(tx->fc); + if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) { + u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)]; + tx->ethertype = (pos[0] << 8) | pos[1]; + } + control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT; + + return res; +} + +/* Device in tx->dev has a reference added; use dev_put(tx->dev) when + * finished with it. + * + * NB: @tx is uninitialised when passed in here + */ +static int ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, + struct sk_buff *skb, + struct net_device *mdev, + struct ieee80211_tx_control *control) +{ + struct ieee80211_tx_packet_data *pkt_data; + struct net_device *dev; + + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + dev = dev_get_by_index(&init_net, pkt_data->ifindex); + if (unlikely(dev && !is_ieee80211_device(dev, mdev))) { + dev_put(dev); + dev = NULL; + } + if (unlikely(!dev)) + return -ENODEV; + /* initialises tx with control */ + __ieee80211_tx_prepare(tx, skb, dev, control); + return 0; +} + +static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, + struct ieee80211_txrx_data *tx) +{ + struct ieee80211_tx_control *control = tx->u.tx.control; + int ret, i; + + if (!ieee80211_qdisc_installed(local->mdev) && + __ieee80211_queue_stopped(local, 0)) { + netif_stop_queue(local->mdev); + return IEEE80211_TX_AGAIN; + } + if (skb) { + ieee80211_dump_frame(wiphy_name(local->hw.wiphy), + "TX to low-level driver", skb); + ret = local->ops->tx(local_to_hw(local), skb, control); + if (ret) + return IEEE80211_TX_AGAIN; + local->mdev->trans_start = jiffies; + ieee80211_led_tx(local, 1); + } + if (tx->u.tx.extra_frag) { + control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | + IEEE80211_TXCTL_USE_CTS_PROTECT | + IEEE80211_TXCTL_CLEAR_DST_MASK | + IEEE80211_TXCTL_FIRST_FRAGMENT); + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + if (!tx->u.tx.extra_frag[i]) + continue; + if (__ieee80211_queue_stopped(local, control->queue)) + return IEEE80211_TX_FRAG_AGAIN; + if (i == tx->u.tx.num_extra_frag) { + control->tx_rate = tx->u.tx.last_frag_hwrate; + control->rate = tx->u.tx.last_frag_rate; + if (tx->flags & IEEE80211_TXRXD_TXPROBE_LAST_FRAG) + control->flags |= + IEEE80211_TXCTL_RATE_CTRL_PROBE; + else + control->flags &= + ~IEEE80211_TXCTL_RATE_CTRL_PROBE; + } + + ieee80211_dump_frame(wiphy_name(local->hw.wiphy), + "TX to low-level driver", + tx->u.tx.extra_frag[i]); + ret = local->ops->tx(local_to_hw(local), + tx->u.tx.extra_frag[i], + control); + if (ret) + return IEEE80211_TX_FRAG_AGAIN; + local->mdev->trans_start = jiffies; + ieee80211_led_tx(local, 1); + tx->u.tx.extra_frag[i] = NULL; + } + kfree(tx->u.tx.extra_frag); + tx->u.tx.extra_frag = NULL; + } + return IEEE80211_TX_OK; +} + +static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, + struct ieee80211_tx_control *control) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct sta_info *sta; + ieee80211_tx_handler *handler; + struct ieee80211_txrx_data tx; + ieee80211_txrx_result res = TXRX_DROP, res_prepare; + int ret, i; + + WARN_ON(__ieee80211_queue_pending(local, control->queue)); + + if (unlikely(skb->len < 10)) { + dev_kfree_skb(skb); + return 0; + } + + /* initialises tx */ + res_prepare = __ieee80211_tx_prepare(&tx, skb, dev, control); + + if (res_prepare == TXRX_DROP) { + dev_kfree_skb(skb); + return 0; + } + + /* + * key references are protected using RCU and this requires that + * we are in a read-site RCU section during receive processing + */ + rcu_read_lock(); + + sta = tx.sta; + tx.u.tx.mode = local->hw.conf.mode; + + for (handler = local->tx_handlers; *handler != NULL; + handler++) { + res = (*handler)(&tx); + if (res != TXRX_CONTINUE) + break; + } + + skb = tx.skb; /* handlers are allowed to change skb */ + + if (sta) + sta_info_put(sta); + + if (unlikely(res == TXRX_DROP)) { + I802_DEBUG_INC(local->tx_handlers_drop); + goto drop; + } + + if (unlikely(res == TXRX_QUEUED)) { + I802_DEBUG_INC(local->tx_handlers_queued); + rcu_read_unlock(); + return 0; + } + + if (tx.u.tx.extra_frag) { + for (i = 0; i < tx.u.tx.num_extra_frag; i++) { + int next_len, dur; + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) + tx.u.tx.extra_frag[i]->data; + + if (i + 1 < tx.u.tx.num_extra_frag) { + next_len = tx.u.tx.extra_frag[i + 1]->len; + } else { + next_len = 0; + tx.u.tx.rate = tx.u.tx.last_frag_rate; + tx.u.tx.last_frag_hwrate = tx.u.tx.rate->val; + } + dur = ieee80211_duration(&tx, 0, next_len); + hdr->duration_id = cpu_to_le16(dur); + } + } + +retry: + ret = __ieee80211_tx(local, skb, &tx); + if (ret) { + struct ieee80211_tx_stored_packet *store = + &local->pending_packet[control->queue]; + + if (ret == IEEE80211_TX_FRAG_AGAIN) + skb = NULL; + set_bit(IEEE80211_LINK_STATE_PENDING, + &local->state[control->queue]); + smp_mb(); + /* When the driver gets out of buffers during sending of + * fragments and calls ieee80211_stop_queue, there is + * a small window between IEEE80211_LINK_STATE_XOFF and + * IEEE80211_LINK_STATE_PENDING flags are set. If a buffer + * gets available in that window (i.e. driver calls + * ieee80211_wake_queue), we would end up with ieee80211_tx + * called with IEEE80211_LINK_STATE_PENDING. Prevent this by + * continuing transmitting here when that situation is + * possible to have happened. */ + if (!__ieee80211_queue_stopped(local, control->queue)) { + clear_bit(IEEE80211_LINK_STATE_PENDING, + &local->state[control->queue]); + goto retry; + } + memcpy(&store->control, control, + sizeof(struct ieee80211_tx_control)); + store->skb = skb; + store->extra_frag = tx.u.tx.extra_frag; + store->num_extra_frag = tx.u.tx.num_extra_frag; + store->last_frag_hwrate = tx.u.tx.last_frag_hwrate; + store->last_frag_rate = tx.u.tx.last_frag_rate; + store->last_frag_rate_ctrl_probe = + !!(tx.flags & IEEE80211_TXRXD_TXPROBE_LAST_FRAG); + } + rcu_read_unlock(); + return 0; + + drop: + if (skb) + dev_kfree_skb(skb); + for (i = 0; i < tx.u.tx.num_extra_frag; i++) + if (tx.u.tx.extra_frag[i]) + dev_kfree_skb(tx.u.tx.extra_frag[i]); + kfree(tx.u.tx.extra_frag); + rcu_read_unlock(); + return 0; +} + +/* device xmit handlers */ + +int ieee80211_master_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ieee80211_tx_control control; + struct ieee80211_tx_packet_data *pkt_data; + struct net_device *odev = NULL; + struct ieee80211_sub_if_data *osdata; + int headroom; + int ret; + + /* + * copy control out of the skb so other people can use skb->cb + */ + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + memset(&control, 0, sizeof(struct ieee80211_tx_control)); + + if (pkt_data->ifindex) + odev = dev_get_by_index(&init_net, pkt_data->ifindex); + if (unlikely(odev && !is_ieee80211_device(odev, dev))) { + dev_put(odev); + odev = NULL; + } + if (unlikely(!odev)) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: Discarded packet with nonexistent " + "originating device\n", dev->name); +#endif + dev_kfree_skb(skb); + return 0; + } + osdata = IEEE80211_DEV_TO_SUB_IF(odev); + + headroom = osdata->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM; + if (skb_headroom(skb) < headroom) { + if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { + dev_kfree_skb(skb); + dev_put(odev); + return 0; + } + } + + control.ifindex = odev->ifindex; + control.type = osdata->type; + if (pkt_data->flags & IEEE80211_TXPD_REQ_TX_STATUS) + control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS; + if (pkt_data->flags & IEEE80211_TXPD_DO_NOT_ENCRYPT) + control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; + if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) + control.flags |= IEEE80211_TXCTL_REQUEUE; + control.queue = pkt_data->queue; + + ret = ieee80211_tx(odev, skb, &control); + dev_put(odev); + + return ret; +} + +int ieee80211_monitor_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_radiotap_header *prthdr = + (struct ieee80211_radiotap_header *)skb->data; + u16 len_rthdr; + + /* check for not even having the fixed radiotap header part */ + if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) + goto fail; /* too short to be possibly valid */ + + /* is it a header version we can trust to find length from? */ + if (unlikely(prthdr->it_version)) + goto fail; /* only version 0 is supported */ + + /* then there must be a radiotap header with a length we can use */ + len_rthdr = ieee80211_get_radiotap_len(skb->data); + + /* does the skb contain enough to deliver on the alleged length? */ + if (unlikely(skb->len < len_rthdr)) + goto fail; /* skb too short for claimed rt header extent */ + + skb->dev = local->mdev; + + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + memset(pkt_data, 0, sizeof(*pkt_data)); + /* needed because we set skb device to master */ + pkt_data->ifindex = dev->ifindex; + + pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; + + /* + * fix up the pointers accounting for the radiotap + * header still being in there. We are being given + * a precooked IEEE80211 header so no need for + * normal processing + */ + skb_set_mac_header(skb, len_rthdr); + /* + * these are just fixed to the end of the rt area since we + * don't have any better information and at this point, nobody cares + */ + skb_set_network_header(skb, len_rthdr); + skb_set_transport_header(skb, len_rthdr); + + /* pass the radiotap header up to the next stage intact */ + dev_queue_xmit(skb); + return NETDEV_TX_OK; + +fail: + dev_kfree_skb(skb); + return NETDEV_TX_OK; /* meaning, we dealt with the skb */ +} + +/** + * ieee80211_subif_start_xmit - netif start_xmit function for Ethernet-type + * subinterfaces (wlan#, WDS, and VLAN interfaces) + * @skb: packet to be sent + * @dev: incoming interface + * + * Returns: 0 on success (and frees skb in this case) or 1 on failure (skb will + * not be freed, and caller is responsible for either retrying later or freeing + * skb). + * + * This function takes in an Ethernet header and encapsulates it with suitable + * IEEE 802.11 header based on which interface the packet is coming in. The + * encapsulated packet will then be passed to master interface, wlan#.11, for + * transmission (through low-level driver). + */ +int ieee80211_subif_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_sub_if_data *sdata; + int ret = 1, head_need; + u16 ethertype, hdrlen, fc; + struct ieee80211_hdr hdr; + const u8 *encaps_data; + int encaps_len, skip_header_bytes; + int nh_pos, h_pos; + struct sta_info *sta; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (unlikely(skb->len < ETH_HLEN)) { + printk(KERN_DEBUG "%s: short skb (len=%d)\n", + dev->name, skb->len); + ret = 0; + goto fail; + } + + nh_pos = skb_network_header(skb) - skb->data; + h_pos = skb_transport_header(skb) - skb->data; + + /* convert Ethernet header to proper 802.11 header (based on + * operation mode) */ + ethertype = (skb->data[12] << 8) | skb->data[13]; + /* TODO: handling for 802.1x authorized/unauthorized port */ + fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA; + + switch (sdata->type) { + case IEEE80211_IF_TYPE_AP: + case IEEE80211_IF_TYPE_VLAN: + fc |= IEEE80211_FCTL_FROMDS; + /* DA BSSID SA */ + memcpy(hdr.addr1, skb->data, ETH_ALEN); + memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); + memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); + hdrlen = 24; + break; + case IEEE80211_IF_TYPE_WDS: + fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS; + /* RA TA DA SA */ + memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN); + memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); + memcpy(hdr.addr3, skb->data, ETH_ALEN); + memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); + hdrlen = 30; + break; + case IEEE80211_IF_TYPE_STA: + fc |= IEEE80211_FCTL_TODS; + /* BSSID SA DA */ + memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN); + memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); + memcpy(hdr.addr3, skb->data, ETH_ALEN); + hdrlen = 24; + break; + case IEEE80211_IF_TYPE_IBSS: + /* DA SA BSSID */ + memcpy(hdr.addr1, skb->data, ETH_ALEN); + memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); + memcpy(hdr.addr3, sdata->u.sta.bssid, ETH_ALEN); + hdrlen = 24; + break; + default: + ret = 0; + goto fail; + } + + /* receiver is QoS enabled, use a QoS type frame */ + sta = sta_info_get(local, hdr.addr1); + if (sta) { + if (sta->flags & WLAN_STA_WME) { + fc |= IEEE80211_STYPE_QOS_DATA; + hdrlen += 2; + } + sta_info_put(sta); + } + + hdr.frame_control = cpu_to_le16(fc); + hdr.duration_id = 0; + hdr.seq_ctrl = 0; + + skip_header_bytes = ETH_HLEN; + if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) { + encaps_data = bridge_tunnel_header; + encaps_len = sizeof(bridge_tunnel_header); + skip_header_bytes -= 2; + } else if (ethertype >= 0x600) { + encaps_data = rfc1042_header; + encaps_len = sizeof(rfc1042_header); + skip_header_bytes -= 2; + } else { + encaps_data = NULL; + encaps_len = 0; + } + + skb_pull(skb, skip_header_bytes); + nh_pos -= skip_header_bytes; + h_pos -= skip_header_bytes; + + /* TODO: implement support for fragments so that there is no need to + * reallocate and copy payload; it might be enough to support one + * extra fragment that would be copied in the beginning of the frame + * data.. anyway, it would be nice to include this into skb structure + * somehow + * + * There are few options for this: + * use skb->cb as an extra space for 802.11 header + * allocate new buffer if not enough headroom + * make sure that there is enough headroom in every skb by increasing + * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and + * alloc_skb() (net/core/skbuff.c) + */ + head_need = hdrlen + encaps_len + local->tx_headroom; + head_need -= skb_headroom(skb); + + /* We are going to modify skb data, so make a copy of it if happens to + * be cloned. This could happen, e.g., with Linux bridge code passing + * us broadcast frames. */ + + if (head_need > 0 || skb_cloned(skb)) { +#if 0 + printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes " + "of headroom\n", dev->name, head_need); +#endif + + if (skb_cloned(skb)) + I802_DEBUG_INC(local->tx_expand_skb_head_cloned); + else + I802_DEBUG_INC(local->tx_expand_skb_head); + /* Since we have to reallocate the buffer, make sure that there + * is enough room for possible WEP IV/ICV and TKIP (8 bytes + * before payload and 12 after). */ + if (pskb_expand_head(skb, (head_need > 0 ? head_need + 8 : 8), + 12, GFP_ATOMIC)) { + printk(KERN_DEBUG "%s: failed to reallocate TX buffer" + "\n", dev->name); + goto fail; + } + } + + if (encaps_data) { + memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len); + nh_pos += encaps_len; + h_pos += encaps_len; + } + + if (fc & IEEE80211_STYPE_QOS_DATA) { + __le16 *qos_control; + + qos_control = (__le16*) skb_push(skb, 2); + memcpy(skb_push(skb, hdrlen - 2), &hdr, hdrlen - 2); + /* + * Maybe we could actually set some fields here, for now just + * initialise to zero to indicate no special operation. + */ + *qos_control = 0; + } else + memcpy(skb_push(skb, hdrlen), &hdr, hdrlen); + + nh_pos += hdrlen; + h_pos += hdrlen; + + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); + pkt_data->ifindex = dev->ifindex; + + skb->dev = local->mdev; + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + /* Update skb pointers to various headers since this modified frame + * is going to go through Linux networking code that may potentially + * need things like pointer to IP header. */ + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, nh_pos); + skb_set_transport_header(skb, h_pos); + + dev->trans_start = jiffies; + dev_queue_xmit(skb); + + return 0; + + fail: + if (!ret) + dev_kfree_skb(skb); + + return ret; +} + +/* + * This is the transmit routine for the 802.11 type interfaces + * called by upper layers of the linux networking + * stack when it has a frame to transmit + */ +int ieee80211_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_hdr *hdr; + u16 fc; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (skb->len < 10) { + dev_kfree_skb(skb); + return 0; + } + + if (skb_headroom(skb) < sdata->local->tx_headroom) { + if (pskb_expand_head(skb, sdata->local->tx_headroom, + 0, GFP_ATOMIC)) { + dev_kfree_skb(skb); + return 0; + } + } + + hdr = (struct ieee80211_hdr *) skb->data; + fc = le16_to_cpu(hdr->frame_control); + + pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); + pkt_data->ifindex = sdata->dev->ifindex; + + skb->priority = 20; /* use hardcoded priority for mgmt TX queue */ + skb->dev = sdata->local->mdev; + + /* + * We're using the protocol field of the the frame control header + * to request TX callback for hostapd. BIT(1) is checked. + */ + if ((fc & BIT(1)) == BIT(1)) { + pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS; + fc &= ~BIT(1); + hdr->frame_control = cpu_to_le16(fc); + } + + if (!(fc & IEEE80211_FCTL_PROTECTED)) + pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + dev_queue_xmit(skb); + + return 0; +} + +/* helper functions for pending packets for when queues are stopped */ + +void ieee80211_clear_tx_pending(struct ieee80211_local *local) +{ + int i, j; + struct ieee80211_tx_stored_packet *store; + + for (i = 0; i < local->hw.queues; i++) { + if (!__ieee80211_queue_pending(local, i)) + continue; + store = &local->pending_packet[i]; + kfree_skb(store->skb); + for (j = 0; j < store->num_extra_frag; j++) + kfree_skb(store->extra_frag[j]); + kfree(store->extra_frag); + clear_bit(IEEE80211_LINK_STATE_PENDING, &local->state[i]); + } +} + +void ieee80211_tx_pending(unsigned long data) +{ + struct ieee80211_local *local = (struct ieee80211_local *)data; + struct net_device *dev = local->mdev; + struct ieee80211_tx_stored_packet *store; + struct ieee80211_txrx_data tx; + int i, ret, reschedule = 0; + + netif_tx_lock_bh(dev); + for (i = 0; i < local->hw.queues; i++) { + if (__ieee80211_queue_stopped(local, i)) + continue; + if (!__ieee80211_queue_pending(local, i)) { + reschedule = 1; + continue; + } + store = &local->pending_packet[i]; + tx.u.tx.control = &store->control; + tx.u.tx.extra_frag = store->extra_frag; + tx.u.tx.num_extra_frag = store->num_extra_frag; + tx.u.tx.last_frag_hwrate = store->last_frag_hwrate; + tx.u.tx.last_frag_rate = store->last_frag_rate; + tx.flags = 0; + if (store->last_frag_rate_ctrl_probe) + tx.flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG; + ret = __ieee80211_tx(local, store->skb, &tx); + if (ret) { + if (ret == IEEE80211_TX_FRAG_AGAIN) + store->skb = NULL; + } else { + clear_bit(IEEE80211_LINK_STATE_PENDING, + &local->state[i]); + reschedule = 1; + } + } + netif_tx_unlock_bh(dev); + if (reschedule) { + if (!ieee80211_qdisc_installed(dev)) { + if (!__ieee80211_queue_stopped(local, 0)) + netif_wake_queue(dev); + } else + netif_schedule(dev); + } +} + +/* functions for drivers to get certain frames */ + +static void ieee80211_beacon_add_tim(struct ieee80211_local *local, + struct ieee80211_if_ap *bss, + struct sk_buff *skb) +{ + u8 *pos, *tim; + int aid0 = 0; + int i, have_bits = 0, n1, n2; + + /* Generate bitmap for TIM only if there are any STAs in power save + * mode. */ + read_lock_bh(&local->sta_lock); + if (atomic_read(&bss->num_sta_ps) > 0) + /* in the hope that this is faster than + * checking byte-for-byte */ + have_bits = !bitmap_empty((unsigned long*)bss->tim, + IEEE80211_MAX_AID+1); + + if (bss->dtim_count == 0) + bss->dtim_count = bss->dtim_period - 1; + else + bss->dtim_count--; + + tim = pos = (u8 *) skb_put(skb, 6); + *pos++ = WLAN_EID_TIM; + *pos++ = 4; + *pos++ = bss->dtim_count; + *pos++ = bss->dtim_period; + + if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf)) + aid0 = 1; + + if (have_bits) { + /* Find largest even number N1 so that bits numbered 1 through + * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits + * (N2 + 1) x 8 through 2007 are 0. */ + n1 = 0; + for (i = 0; i < IEEE80211_MAX_TIM_LEN; i++) { + if (bss->tim[i]) { + n1 = i & 0xfe; + break; + } + } + n2 = n1; + for (i = IEEE80211_MAX_TIM_LEN - 1; i >= n1; i--) { + if (bss->tim[i]) { + n2 = i; + break; + } + } + + /* Bitmap control */ + *pos++ = n1 | aid0; + /* Part Virt Bitmap */ + memcpy(pos, bss->tim + n1, n2 - n1 + 1); + + tim[1] = n2 - n1 + 4; + skb_put(skb, n2 - n1); + } else { + *pos++ = aid0; /* Bitmap control */ + *pos++ = 0; /* Part Virt Bitmap */ + } + read_unlock_bh(&local->sta_lock); +} + +struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id, + struct ieee80211_tx_control *control) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sk_buff *skb; + struct net_device *bdev; + struct ieee80211_sub_if_data *sdata = NULL; + struct ieee80211_if_ap *ap = NULL; + struct ieee80211_rate *rate; + struct rate_control_extra extra; + u8 *b_head, *b_tail; + int bh_len, bt_len; + + bdev = dev_get_by_index(&init_net, if_id); + if (bdev) { + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + ap = &sdata->u.ap; + dev_put(bdev); + } + + if (!ap || sdata->type != IEEE80211_IF_TYPE_AP || + !ap->beacon_head) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "no beacon data avail for idx=%d " + "(%s)\n", if_id, bdev ? bdev->name : "N/A"); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + return NULL; + } + + /* Assume we are generating the normal beacon locally */ + b_head = ap->beacon_head; + b_tail = ap->beacon_tail; + bh_len = ap->beacon_head_len; + bt_len = ap->beacon_tail_len; + + skb = dev_alloc_skb(local->tx_headroom + + bh_len + bt_len + 256 /* maximum TIM len */); + if (!skb) + return NULL; + + skb_reserve(skb, local->tx_headroom); + memcpy(skb_put(skb, bh_len), b_head, bh_len); + + ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data); + + ieee80211_beacon_add_tim(local, ap, skb); + + if (b_tail) { + memcpy(skb_put(skb, bt_len), b_tail, bt_len); + } + + if (control) { + memset(&extra, 0, sizeof(extra)); + extra.mode = local->oper_hw_mode; + + rate = rate_control_get_rate(local, local->mdev, skb, &extra); + if (!rate) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: ieee80211_beacon_get: no rate " + "found\n", wiphy_name(local->hw.wiphy)); + } + dev_kfree_skb(skb); + return NULL; + } + + control->tx_rate = + ((sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && + (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? + rate->val2 : rate->val; + control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; + control->power_level = local->hw.conf.power_level; + control->flags |= IEEE80211_TXCTL_NO_ACK; + control->retry_limit = 1; + control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; + } + + ap->num_beacons++; + return skb; +} +EXPORT_SYMBOL(ieee80211_beacon_get); + +void ieee80211_rts_get(struct ieee80211_hw *hw, int if_id, + const void *frame, size_t frame_len, + const struct ieee80211_tx_control *frame_txctl, + struct ieee80211_rts *rts) +{ + const struct ieee80211_hdr *hdr = frame; + u16 fctl; + + fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS; + rts->frame_control = cpu_to_le16(fctl); + rts->duration = ieee80211_rts_duration(hw, if_id, frame_len, frame_txctl); + memcpy(rts->ra, hdr->addr1, sizeof(rts->ra)); + memcpy(rts->ta, hdr->addr2, sizeof(rts->ta)); +} +EXPORT_SYMBOL(ieee80211_rts_get); + +void ieee80211_ctstoself_get(struct ieee80211_hw *hw, int if_id, + const void *frame, size_t frame_len, + const struct ieee80211_tx_control *frame_txctl, + struct ieee80211_cts *cts) +{ + const struct ieee80211_hdr *hdr = frame; + u16 fctl; + + fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS; + cts->frame_control = cpu_to_le16(fctl); + cts->duration = ieee80211_ctstoself_duration(hw, if_id, frame_len, frame_txctl); + memcpy(cts->ra, hdr->addr1, sizeof(cts->ra)); +} +EXPORT_SYMBOL(ieee80211_ctstoself_get); + +struct sk_buff * +ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id, + struct ieee80211_tx_control *control) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sk_buff *skb; + struct sta_info *sta; + ieee80211_tx_handler *handler; + struct ieee80211_txrx_data tx; + ieee80211_txrx_result res = TXRX_DROP; + struct net_device *bdev; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_ap *bss = NULL; + + bdev = dev_get_by_index(&init_net, if_id); + if (bdev) { + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + bss = &sdata->u.ap; + dev_put(bdev); + } + if (!bss || sdata->type != IEEE80211_IF_TYPE_AP || !bss->beacon_head) + return NULL; + + if (bss->dtim_count != 0) + return NULL; /* send buffered bc/mc only after DTIM beacon */ + memset(control, 0, sizeof(*control)); + while (1) { + skb = skb_dequeue(&bss->ps_bc_buf); + if (!skb) + return NULL; + local->total_ps_buffered--; + + if (!skb_queue_empty(&bss->ps_bc_buf) && skb->len >= 2) { + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) skb->data; + /* more buffered multicast/broadcast frames ==> set + * MoreData flag in IEEE 802.11 header to inform PS + * STAs */ + hdr->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + } + + if (!ieee80211_tx_prepare(&tx, skb, local->mdev, control)) + break; + dev_kfree_skb_any(skb); + } + sta = tx.sta; + tx.flags |= IEEE80211_TXRXD_TXPS_BUFFERED; + tx.u.tx.mode = local->hw.conf.mode; + + for (handler = local->tx_handlers; *handler != NULL; handler++) { + res = (*handler)(&tx); + if (res == TXRX_DROP || res == TXRX_QUEUED) + break; + } + dev_put(tx.dev); + skb = tx.skb; /* handlers are allowed to change skb */ + + if (res == TXRX_DROP) { + I802_DEBUG_INC(local->tx_handlers_drop); + dev_kfree_skb(skb); + skb = NULL; + } else if (res == TXRX_QUEUED) { + I802_DEBUG_INC(local->tx_handlers_queued); + skb = NULL; + } + + if (sta) + sta_info_put(sta); + + return skb; +} +EXPORT_SYMBOL(ieee80211_get_buffered_bc); diff --git a/net/mac80211/util.c b/net/mac80211/util.c new file mode 100644 index 00000000000..5a0564e1dbd --- /dev/null +++ b/net/mac80211/util.c @@ -0,0 +1,486 @@ +/* + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * utilities for mac80211 + */ + +#include <net/mac80211.h> +#include <linux/netdevice.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/if_arp.h> +#include <linux/wireless.h> +#include <linux/bitmap.h> +#include <net/net_namespace.h> +#include <net/cfg80211.h> + +#include "ieee80211_i.h" +#include "ieee80211_rate.h" +#include "wme.h" + +/* privid for wiphys to determine whether they belong to us or not */ +void *mac80211_wiphy_privid = &mac80211_wiphy_privid; + +/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ +/* Ethernet-II snap header (RFC1042 for most EtherTypes) */ +const unsigned char rfc1042_header[] = + { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; + +/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ +const unsigned char bridge_tunnel_header[] = + { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; + +/* No encapsulation header if EtherType < 0x600 (=length) */ +static const unsigned char eapol_header[] = + { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00, 0x88, 0x8e }; + + +static int rate_list_match(const int *rate_list, int rate) +{ + int i; + + if (!rate_list) + return 0; + + for (i = 0; rate_list[i] >= 0; i++) + if (rate_list[i] == rate) + return 1; + + return 0; +} + +void ieee80211_prepare_rates(struct ieee80211_local *local, + struct ieee80211_hw_mode *mode) +{ + int i; + + for (i = 0; i < mode->num_rates; i++) { + struct ieee80211_rate *rate = &mode->rates[i]; + + rate->flags &= ~(IEEE80211_RATE_SUPPORTED | + IEEE80211_RATE_BASIC); + + if (local->supp_rates[mode->mode]) { + if (!rate_list_match(local->supp_rates[mode->mode], + rate->rate)) + continue; + } + + rate->flags |= IEEE80211_RATE_SUPPORTED; + + /* Use configured basic rate set if it is available. If not, + * use defaults that are sane for most cases. */ + if (local->basic_rates[mode->mode]) { + if (rate_list_match(local->basic_rates[mode->mode], + rate->rate)) + rate->flags |= IEEE80211_RATE_BASIC; + } else switch (mode->mode) { + case MODE_IEEE80211A: + if (rate->rate == 60 || rate->rate == 120 || + rate->rate == 240) + rate->flags |= IEEE80211_RATE_BASIC; + break; + case MODE_IEEE80211B: + if (rate->rate == 10 || rate->rate == 20) + rate->flags |= IEEE80211_RATE_BASIC; + break; + case MODE_IEEE80211G: + if (rate->rate == 10 || rate->rate == 20 || + rate->rate == 55 || rate->rate == 110) + rate->flags |= IEEE80211_RATE_BASIC; + break; + case NUM_IEEE80211_MODES: + /* not useful */ + break; + } + + /* Set ERP and MANDATORY flags based on phymode */ + switch (mode->mode) { + case MODE_IEEE80211A: + if (rate->rate == 60 || rate->rate == 120 || + rate->rate == 240) + rate->flags |= IEEE80211_RATE_MANDATORY; + break; + case MODE_IEEE80211B: + if (rate->rate == 10) + rate->flags |= IEEE80211_RATE_MANDATORY; + break; + case MODE_IEEE80211G: + if (rate->rate == 10 || rate->rate == 20 || + rate->rate == 55 || rate->rate == 110 || + rate->rate == 60 || rate->rate == 120 || + rate->rate == 240) + rate->flags |= IEEE80211_RATE_MANDATORY; + break; + case NUM_IEEE80211_MODES: + /* not useful */ + break; + } + if (ieee80211_is_erp_rate(mode->mode, rate->rate)) + rate->flags |= IEEE80211_RATE_ERP; + } +} + +u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len) +{ + u16 fc; + + if (len < 24) + return NULL; + + fc = le16_to_cpu(hdr->frame_control); + + switch (fc & IEEE80211_FCTL_FTYPE) { + case IEEE80211_FTYPE_DATA: + switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + case IEEE80211_FCTL_TODS: + return hdr->addr1; + case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): + return NULL; + case IEEE80211_FCTL_FROMDS: + return hdr->addr2; + case 0: + return hdr->addr3; + } + break; + case IEEE80211_FTYPE_MGMT: + return hdr->addr3; + case IEEE80211_FTYPE_CTL: + if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL) + return hdr->addr1; + else + return NULL; + } + + return NULL; +} + +int ieee80211_get_hdrlen(u16 fc) +{ + int hdrlen = 24; + + switch (fc & IEEE80211_FCTL_FTYPE) { + case IEEE80211_FTYPE_DATA: + if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS)) + hdrlen = 30; /* Addr4 */ + /* + * The QoS Control field is two bytes and its presence is + * indicated by the IEEE80211_STYPE_QOS_DATA bit. Add 2 to + * hdrlen if that bit is set. + * This works by masking out the bit and shifting it to + * bit position 1 so the result has the value 0 or 2. + */ + hdrlen += (fc & IEEE80211_STYPE_QOS_DATA) + >> (ilog2(IEEE80211_STYPE_QOS_DATA)-1); + break; + case IEEE80211_FTYPE_CTL: + /* + * ACK and CTS are 10 bytes, all others 16. To see how + * to get this condition consider + * subtype mask: 0b0000000011110000 (0x00F0) + * ACK subtype: 0b0000000011010000 (0x00D0) + * CTS subtype: 0b0000000011000000 (0x00C0) + * bits that matter: ^^^ (0x00E0) + * value of those: 0b0000000011000000 (0x00C0) + */ + if ((fc & 0xE0) == 0xC0) + hdrlen = 10; + else + hdrlen = 16; + break; + } + + return hdrlen; +} +EXPORT_SYMBOL(ieee80211_get_hdrlen); + +int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb) +{ + const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *) skb->data; + int hdrlen; + + if (unlikely(skb->len < 10)) + return 0; + hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)); + if (unlikely(hdrlen > skb->len)) + return 0; + return hdrlen; +} +EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb); + +int ieee80211_is_eapol(const struct sk_buff *skb) +{ + const struct ieee80211_hdr *hdr; + u16 fc; + int hdrlen; + + if (unlikely(skb->len < 10)) + return 0; + + hdr = (const struct ieee80211_hdr *) skb->data; + fc = le16_to_cpu(hdr->frame_control); + + if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) + return 0; + + hdrlen = ieee80211_get_hdrlen(fc); + + if (unlikely(skb->len >= hdrlen + sizeof(eapol_header) && + memcmp(skb->data + hdrlen, eapol_header, + sizeof(eapol_header)) == 0)) + return 1; + + return 0; +} + +void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; + + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + if (tx->u.tx.extra_frag) { + struct ieee80211_hdr *fhdr; + int i; + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + fhdr = (struct ieee80211_hdr *) + tx->u.tx.extra_frag[i]->data; + fhdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + } + } +} + +int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, + int rate, int erp, int short_preamble) +{ + int dur; + + /* calculate duration (in microseconds, rounded up to next higher + * integer if it includes a fractional microsecond) to send frame of + * len bytes (does not include FCS) at the given rate. Duration will + * also include SIFS. + * + * rate is in 100 kbps, so divident is multiplied by 10 in the + * DIV_ROUND_UP() operations. + */ + + if (local->hw.conf.phymode == MODE_IEEE80211A || erp) { + /* + * OFDM: + * + * N_DBPS = DATARATE x 4 + * N_SYM = Ceiling((16+8xLENGTH+6) / N_DBPS) + * (16 = SIGNAL time, 6 = tail bits) + * TXTIME = T_PREAMBLE + T_SIGNAL + T_SYM x N_SYM + Signal Ext + * + * T_SYM = 4 usec + * 802.11a - 17.5.2: aSIFSTime = 16 usec + * 802.11g - 19.8.4: aSIFSTime = 10 usec + + * signal ext = 6 usec + */ + dur = 16; /* SIFS + signal ext */ + dur += 16; /* 17.3.2.3: T_PREAMBLE = 16 usec */ + dur += 4; /* 17.3.2.3: T_SIGNAL = 4 usec */ + dur += 4 * DIV_ROUND_UP((16 + 8 * (len + 4) + 6) * 10, + 4 * rate); /* T_SYM x N_SYM */ + } else { + /* + * 802.11b or 802.11g with 802.11b compatibility: + * 18.3.4: TXTIME = PreambleLength + PLCPHeaderTime + + * Ceiling(((LENGTH+PBCC)x8)/DATARATE). PBCC=0. + * + * 802.11 (DS): 15.3.3, 802.11b: 18.3.4 + * aSIFSTime = 10 usec + * aPreambleLength = 144 usec or 72 usec with short preamble + * aPLCPHeaderLength = 48 usec or 24 usec with short preamble + */ + dur = 10; /* aSIFSTime = 10 usec */ + dur += short_preamble ? (72 + 24) : (144 + 48); + + dur += DIV_ROUND_UP(8 * (len + 4) * 10, rate); + } + + return dur; +} + +/* Exported duration function for driver use */ +__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, int if_id, + size_t frame_len, int rate) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct net_device *bdev = dev_get_by_index(&init_net, if_id); + struct ieee80211_sub_if_data *sdata; + u16 dur; + int erp; + + if (unlikely(!bdev)) + return 0; + + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + erp = ieee80211_is_erp_rate(hw->conf.phymode, rate); + dur = ieee80211_frame_duration(local, frame_len, rate, + erp, sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE); + + dev_put(bdev); + return cpu_to_le16(dur); +} +EXPORT_SYMBOL(ieee80211_generic_frame_duration); + +__le16 ieee80211_rts_duration(struct ieee80211_hw *hw, int if_id, + size_t frame_len, + const struct ieee80211_tx_control *frame_txctl) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_rate *rate; + struct net_device *bdev = dev_get_by_index(&init_net, if_id); + struct ieee80211_sub_if_data *sdata; + int short_preamble; + int erp; + u16 dur; + + if (unlikely(!bdev)) + return 0; + + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + short_preamble = sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE; + + rate = frame_txctl->rts_rate; + erp = !!(rate->flags & IEEE80211_RATE_ERP); + + /* CTS duration */ + dur = ieee80211_frame_duration(local, 10, rate->rate, + erp, short_preamble); + /* Data frame duration */ + dur += ieee80211_frame_duration(local, frame_len, rate->rate, + erp, short_preamble); + /* ACK duration */ + dur += ieee80211_frame_duration(local, 10, rate->rate, + erp, short_preamble); + + dev_put(bdev); + return cpu_to_le16(dur); +} +EXPORT_SYMBOL(ieee80211_rts_duration); + +__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, int if_id, + size_t frame_len, + const struct ieee80211_tx_control *frame_txctl) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_rate *rate; + struct net_device *bdev = dev_get_by_index(&init_net, if_id); + struct ieee80211_sub_if_data *sdata; + int short_preamble; + int erp; + u16 dur; + + if (unlikely(!bdev)) + return 0; + + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + short_preamble = sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE; + + rate = frame_txctl->rts_rate; + erp = !!(rate->flags & IEEE80211_RATE_ERP); + + /* Data frame duration */ + dur = ieee80211_frame_duration(local, frame_len, rate->rate, + erp, short_preamble); + if (!(frame_txctl->flags & IEEE80211_TXCTL_NO_ACK)) { + /* ACK duration */ + dur += ieee80211_frame_duration(local, 10, rate->rate, + erp, short_preamble); + } + + dev_put(bdev); + return cpu_to_le16(dur); +} +EXPORT_SYMBOL(ieee80211_ctstoself_duration); + +struct ieee80211_rate * +ieee80211_get_rate(struct ieee80211_local *local, int phymode, int hw_rate) +{ + struct ieee80211_hw_mode *mode; + int r; + + list_for_each_entry(mode, &local->modes_list, list) { + if (mode->mode != phymode) + continue; + for (r = 0; r < mode->num_rates; r++) { + struct ieee80211_rate *rate = &mode->rates[r]; + if (rate->val == hw_rate || + (rate->flags & IEEE80211_RATE_PREAMBLE2 && + rate->val2 == hw_rate)) + return rate; + } + } + + return NULL; +} + +void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue) +{ + struct ieee80211_local *local = hw_to_local(hw); + + if (test_and_clear_bit(IEEE80211_LINK_STATE_XOFF, + &local->state[queue])) { + if (test_bit(IEEE80211_LINK_STATE_PENDING, + &local->state[queue])) + tasklet_schedule(&local->tx_pending_tasklet); + else + if (!ieee80211_qdisc_installed(local->mdev)) { + if (queue == 0) + netif_wake_queue(local->mdev); + } else + __netif_schedule(local->mdev); + } +} +EXPORT_SYMBOL(ieee80211_wake_queue); + +void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue) +{ + struct ieee80211_local *local = hw_to_local(hw); + + if (!ieee80211_qdisc_installed(local->mdev) && queue == 0) + netif_stop_queue(local->mdev); + set_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); +} +EXPORT_SYMBOL(ieee80211_stop_queue); + +void ieee80211_start_queues(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + int i; + + for (i = 0; i < local->hw.queues; i++) + clear_bit(IEEE80211_LINK_STATE_XOFF, &local->state[i]); + if (!ieee80211_qdisc_installed(local->mdev)) + netif_start_queue(local->mdev); +} +EXPORT_SYMBOL(ieee80211_start_queues); + +void ieee80211_stop_queues(struct ieee80211_hw *hw) +{ + int i; + + for (i = 0; i < hw->queues; i++) + ieee80211_stop_queue(hw, i); +} +EXPORT_SYMBOL(ieee80211_stop_queues); + +void ieee80211_wake_queues(struct ieee80211_hw *hw) +{ + int i; + + for (i = 0; i < hw->queues; i++) + ieee80211_wake_queue(hw, i); +} +EXPORT_SYMBOL(ieee80211_wake_queues); diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 1ad3d75281c..a84a23310ff 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -16,7 +16,7 @@ #include <linux/crypto.h> #include <linux/err.h> #include <linux/mm.h> -#include <asm/scatterlist.h> +#include <linux/scatterlist.h> #include <net/mac80211.h> #include "ieee80211_i.h" @@ -63,11 +63,11 @@ static inline int ieee80211_wep_weak_iv(u32 iv, int keylen) } -void ieee80211_wep_get_iv(struct ieee80211_local *local, - struct ieee80211_key *key, u8 *iv) +static void ieee80211_wep_get_iv(struct ieee80211_local *local, + struct ieee80211_key *key, u8 *iv) { local->wep_iv++; - if (ieee80211_wep_weak_iv(local->wep_iv, key->keylen)) + if (ieee80211_wep_weak_iv(local->wep_iv, key->conf.keylen)) local->wep_iv += 0x0100; if (!iv) @@ -76,13 +76,13 @@ void ieee80211_wep_get_iv(struct ieee80211_local *local, *iv++ = (local->wep_iv >> 16) & 0xff; *iv++ = (local->wep_iv >> 8) & 0xff; *iv++ = local->wep_iv & 0xff; - *iv++ = key->keyidx << 6; + *iv++ = key->conf.keyidx << 6; } -u8 * ieee80211_wep_add_iv(struct ieee80211_local *local, - struct sk_buff *skb, - struct ieee80211_key *key) +static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local, + struct sk_buff *skb, + struct ieee80211_key *key) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; u16 fc; @@ -109,9 +109,9 @@ u8 * ieee80211_wep_add_iv(struct ieee80211_local *local, } -void ieee80211_wep_remove_iv(struct ieee80211_local *local, - struct sk_buff *skb, - struct ieee80211_key *key) +static void ieee80211_wep_remove_iv(struct ieee80211_local *local, + struct sk_buff *skb, + struct ieee80211_key *key) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; u16 fc; @@ -138,9 +138,7 @@ void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, *icv = cpu_to_le32(~crc32_le(~0, data, data_len)); crypto_blkcipher_setkey(tfm, rc4key, klen); - sg.page = virt_to_page(data); - sg.offset = offset_in_page(data); - sg.length = data_len + WEP_ICV_LEN; + sg_init_one(&sg, data, data_len + WEP_ICV_LEN); crypto_blkcipher_encrypt(&desc, &sg, &sg, sg.length); } @@ -159,10 +157,10 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, struct sk_buff *skb, u8 *rc4key, *iv; size_t len; - if (!key || key->alg != ALG_WEP) + if (!key || key->conf.alg != ALG_WEP) return -1; - klen = 3 + key->keylen; + klen = 3 + key->conf.keylen; rc4key = kmalloc(klen, GFP_ATOMIC); if (!rc4key) return -1; @@ -179,7 +177,7 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, struct sk_buff *skb, memcpy(rc4key, iv, 3); /* Copy rest of the WEP key (the secret part) */ - memcpy(rc4key + 3, key->key, key->keylen); + memcpy(rc4key + 3, key->conf.key, key->conf.keylen); /* Add room for ICV */ skb_put(skb, WEP_ICV_LEN); @@ -204,9 +202,7 @@ int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, __le32 crc; crypto_blkcipher_setkey(tfm, rc4key, klen); - sg.page = virt_to_page(data); - sg.offset = offset_in_page(data); - sg.length = data_len + WEP_ICV_LEN; + sg_init_one(&sg, data, data_len + WEP_ICV_LEN); crypto_blkcipher_decrypt(&desc, &sg, &sg, sg.length); crc = cpu_to_le32(~crc32_le(~0, data, data_len)); @@ -251,10 +247,10 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, keyidx = skb->data[hdrlen + 3] >> 6; - if (!key || keyidx != key->keyidx || key->alg != ALG_WEP) + if (!key || keyidx != key->conf.keyidx || key->conf.alg != ALG_WEP) return -1; - klen = 3 + key->keylen; + klen = 3 + key->conf.keylen; rc4key = kmalloc(klen, GFP_ATOMIC); if (!rc4key) @@ -264,7 +260,7 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, memcpy(rc4key, skb->data + hdrlen, 3); /* Copy rest of the WEP key (the secret part) */ - memcpy(rc4key + 3, key->key, key->keylen); + memcpy(rc4key + 3, key->conf.key, key->conf.keylen); if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen, skb->data + hdrlen + WEP_IV_LEN, @@ -286,43 +282,99 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, } -int ieee80211_wep_get_keyidx(struct sk_buff *skb) +u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; u16 fc; int hdrlen; + u8 *ivpos; + u32 iv; fc = le16_to_cpu(hdr->frame_control); if (!(fc & IEEE80211_FCTL_PROTECTED)) - return -1; + return NULL; hdrlen = ieee80211_get_hdrlen(fc); + ivpos = skb->data + hdrlen; + iv = (ivpos[0] << 16) | (ivpos[1] << 8) | ivpos[2]; - if (skb->len < 8 + hdrlen) - return -1; + if (ieee80211_wep_weak_iv(iv, key->conf.keylen)) + return ivpos; - return skb->data[hdrlen + 3] >> 6; + return NULL; } +ieee80211_txrx_result +ieee80211_crypto_wep_decrypt(struct ieee80211_txrx_data *rx) +{ + if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && + ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)) + return TXRX_CONTINUE; + + if (!(rx->u.rx.status->flag & RX_FLAG_DECRYPTED)) { + if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: RX WEP frame, decrypt " + "failed\n", rx->dev->name); + return TXRX_DROP; + } + } else if (!(rx->u.rx.status->flag & RX_FLAG_IV_STRIPPED)) { + ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); + /* remove ICV */ + skb_trim(rx->skb, rx->skb->len - 4); + } -u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key) + return TXRX_CONTINUE; +} + +static int wep_encrypt_skb(struct ieee80211_txrx_data *tx, struct sk_buff *skb) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { + if (ieee80211_wep_encrypt(tx->local, skb, tx->key)) + return -1; + } else { + tx->u.tx.control->key_idx = tx->key->conf.hw_key_idx; + if (tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) { + if (!ieee80211_wep_add_iv(tx->local, skb, tx->key)) + return -1; + } + } + return 0; +} + +ieee80211_txrx_result +ieee80211_crypto_wep_encrypt(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; u16 fc; - int hdrlen; - u8 *ivpos; - u32 iv; fc = le16_to_cpu(hdr->frame_control); - if (!(fc & IEEE80211_FCTL_PROTECTED)) - return NULL; - hdrlen = ieee80211_get_hdrlen(fc); - ivpos = skb->data + hdrlen; - iv = (ivpos[0] << 16) | (ivpos[1] << 8) | ivpos[2]; + if (((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && + ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || + (fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH))) + return TXRX_CONTINUE; - if (ieee80211_wep_weak_iv(iv, key->keylen)) - return ivpos; + tx->u.tx.control->iv_len = WEP_IV_LEN; + tx->u.tx.control->icv_len = WEP_ICV_LEN; + ieee80211_tx_set_iswep(tx); - return NULL; + if (wep_encrypt_skb(tx, tx->skb) < 0) { + I802_DEBUG_INC(tx->local->tx_handlers_drop_wep); + return TXRX_DROP; + } + + if (tx->u.tx.extra_frag) { + int i; + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + if (wep_encrypt_skb(tx, tx->u.tx.extra_frag[i]) < 0) { + I802_DEBUG_INC(tx->local-> + tx_handlers_drop_wep); + return TXRX_DROP; + } + } + } + + return TXRX_CONTINUE; } diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h index bfe29e8e10a..785fbb4e0dd 100644 --- a/net/mac80211/wep.h +++ b/net/mac80211/wep.h @@ -18,14 +18,6 @@ int ieee80211_wep_init(struct ieee80211_local *local); void ieee80211_wep_free(struct ieee80211_local *local); -void ieee80211_wep_get_iv(struct ieee80211_local *local, - struct ieee80211_key *key, u8 *iv); -u8 * ieee80211_wep_add_iv(struct ieee80211_local *local, - struct sk_buff *skb, - struct ieee80211_key *key); -void ieee80211_wep_remove_iv(struct ieee80211_local *local, - struct sk_buff *skb, - struct ieee80211_key *key); void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, size_t klen, u8 *data, size_t data_len); int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, @@ -34,7 +26,11 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_key *key); int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_key *key); -int ieee80211_wep_get_keyidx(struct sk_buff *skb); u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key); +ieee80211_txrx_result +ieee80211_crypto_wep_decrypt(struct ieee80211_txrx_data *rx); +ieee80211_txrx_result +ieee80211_crypto_wep_encrypt(struct ieee80211_txrx_data *tx); + #endif /* WEP_H */ diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 7ab82b376e1..5b8a157975a 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -18,70 +18,6 @@ #include "ieee80211_i.h" #include "wme.h" -static inline int WLAN_FC_IS_QOS_DATA(u16 fc) -{ - return (fc & 0x8C) == 0x88; -} - - -ieee80211_txrx_result -ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx) -{ - u8 *data = rx->skb->data; - int tid; - - /* does the frame have a qos control field? */ - if (WLAN_FC_IS_QOS_DATA(rx->fc)) { - u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN; - /* frame has qos control */ - tid = qc[0] & QOS_CONTROL_TID_MASK; - } else { - if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) { - /* Separate TID for management frames */ - tid = NUM_RX_DATA_QUEUES - 1; - } else { - /* no qos control present */ - tid = 0; /* 802.1d - Best Effort */ - } - } -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - I802_DEBUG_INC(rx->local->wme_rx_queue[tid]); - if (rx->sta) { - I802_DEBUG_INC(rx->sta->wme_rx_queue[tid]); - } -#endif /* CONFIG_MAC80211_DEBUG_COUNTERS */ - - rx->u.rx.queue = tid; - /* Set skb->priority to 1d tag if highest order bit of TID is not set. - * For now, set skb->priority to 0 for other cases. */ - rx->skb->priority = (tid > 7) ? 0 : tid; - - return TXRX_CONTINUE; -} - - -ieee80211_txrx_result -ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx) -{ - u16 fc = rx->fc; - u8 *data = rx->skb->data; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) data; - - if (!WLAN_FC_IS_QOS_DATA(fc)) - return TXRX_CONTINUE; - - /* remove the qos control field, update frame type and meta-data */ - memmove(data + 2, data, ieee80211_get_hdrlen(fc) - 2); - hdr = (struct ieee80211_hdr *) skb_pull(rx->skb, 2); - /* change frame type to non QOS */ - rx->fc = fc &= ~IEEE80211_STYPE_QOS_DATA; - hdr->frame_control = cpu_to_le16(fc); - - return TXRX_CONTINUE; -} - - -#ifdef CONFIG_NET_SCHED /* maximum number of hardware queues we support. */ #define TC_80211_MAX_QUEUES 8 @@ -158,8 +94,6 @@ static inline int wme_downgrade_ac(struct sk_buff *skb) static inline int classify80211(struct sk_buff *skb, struct Qdisc *qd) { struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_tx_packet_data *pkt_data = - (struct ieee80211_tx_packet_data *) skb->cb; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; unsigned short fc = le16_to_cpu(hdr->frame_control); int qos; @@ -172,12 +106,8 @@ static inline int classify80211(struct sk_buff *skb, struct Qdisc *qd) return IEEE80211_TX_QUEUE_DATA0; } - if (unlikely(pkt_data->mgmt_iface)) { - /* Data frames from hostapd (mainly, EAPOL) use AC_VO - * and they will include QoS control fields if - * the target STA is using WME. */ - skb->priority = 7; - return ieee802_1d_to_ac[skb->priority]; + if (0 /* injected */) { + /* use AC from radiotap */ } /* is this a QoS frame? */ @@ -189,14 +119,13 @@ static inline int classify80211(struct sk_buff *skb, struct Qdisc *qd) } /* use the data classifier to determine what 802.1d tag the - * data frame has */ + * data frame has */ skb->priority = classify_1d(skb, qd); - /* incase we are a client verify acm is not set for this ac */ + /* in case we are a client verify acm is not set for this ac */ while (unlikely(local->wmm_acm & BIT(skb->priority))) { if (wme_downgrade_ac(skb)) { - /* No AC with lower priority has acm=0, - * drop packet. */ + /* No AC with lower priority has acm=0, drop packet. */ return -1; } } @@ -217,7 +146,7 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd) struct Qdisc *qdisc; int err, queue; - if (pkt_data->requeue) { + if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) { skb_queue_tail(&q->requeued[pkt_data->queue], skb); qd->q.qlen++; return 0; @@ -675,4 +604,3 @@ void ieee80211_wme_unregister(void) { unregister_qdisc(&wme_qdisc_ops); } -#endif /* CONFIG_NET_SCHED */ diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h index f0bff10f0e0..76c713a6450 100644 --- a/net/mac80211/wme.h +++ b/net/mac80211/wme.h @@ -24,11 +24,10 @@ #define QOS_CONTROL_TAG1D_MASK 0x07 -ieee80211_txrx_result -ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx); - -ieee80211_txrx_result -ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx); +static inline int WLAN_FC_IS_QOS_DATA(u16 fc) +{ + return (fc & 0x8C) == 0x88; +} #ifdef CONFIG_NET_SCHED void ieee80211_install_qdisc(struct net_device *dev); diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 783af32c691..6695efba57e 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -11,10 +11,8 @@ #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/compiler.h> -#include <net/iw_handler.h> - #include <net/mac80211.h> -#include "ieee80211_common.h" + #include "ieee80211_i.h" #include "michael.h" #include "tkip.h" @@ -84,16 +82,16 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_txrx_data *tx) fc = tx->fc; - if (!tx->key || tx->key->alg != ALG_TKIP || skb->len < 24 || + if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 || !WLAN_FC_DATA_PRESENT(fc)) return TXRX_CONTINUE; if (ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len)) return TXRX_DROP; - if (!tx->key->force_sw_encrypt && - !tx->fragmented && - !(tx->local->hw.flags & IEEE80211_HW_TKIP_INCLUDE_MMIC) && + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && + !(tx->flags & IEEE80211_TXRXD_FRAGMENTED) && + !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) && !wpa_test) { /* hwaccel - with no need for preallocated room for Michael MIC */ @@ -116,8 +114,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_txrx_data *tx) #else authenticator = 1; #endif - key = &tx->key->key[authenticator ? ALG_TKIP_TEMP_AUTH_TX_MIC_KEY : - ALG_TKIP_TEMP_AUTH_RX_MIC_KEY]; + key = &tx->key->conf.key[authenticator ? ALG_TKIP_TEMP_AUTH_TX_MIC_KEY : + ALG_TKIP_TEMP_AUTH_RX_MIC_KEY]; mic = skb_put(skb, MICHAEL_MIC_LEN); michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic); @@ -134,31 +132,20 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_txrx_data *rx) u8 mic[MICHAEL_MIC_LEN]; struct sk_buff *skb = rx->skb; int authenticator = 1, wpa_test = 0; + DECLARE_MAC_BUF(mac); fc = rx->fc; - /* If device handles decryption totally, skip this check */ - if ((rx->local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) || - (rx->local->hw.flags & IEEE80211_HW_DEVICE_STRIPS_MIC)) + /* + * No way to verify the MIC if the hardware stripped it + */ + if (rx->u.rx.status->flag & RX_FLAG_MMIC_STRIPPED) return TXRX_CONTINUE; - if (!rx->key || rx->key->alg != ALG_TKIP || + if (!rx->key || rx->key->conf.alg != ALG_TKIP || !(rx->fc & IEEE80211_FCTL_PROTECTED) || !WLAN_FC_DATA_PRESENT(fc)) return TXRX_CONTINUE; - if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) && - !rx->key->force_sw_encrypt) { - if (rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) { - if (skb->len < MICHAEL_MIC_LEN) - return TXRX_DROP; - } - /* Need to verify Michael MIC sometimes in software even when - * hwaccel is used. Atheros ar5212: fragmented frames and QoS - * frames. */ - if (!rx->fragmented && !wpa_test) - goto remove_mic; - } - if (ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len) || data_len < MICHAEL_MIC_LEN) return TXRX_DROP; @@ -170,49 +157,28 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_txrx_data *rx) #else authenticator = 1; #endif - key = &rx->key->key[authenticator ? ALG_TKIP_TEMP_AUTH_RX_MIC_KEY : - ALG_TKIP_TEMP_AUTH_TX_MIC_KEY]; + key = &rx->key->conf.key[authenticator ? ALG_TKIP_TEMP_AUTH_RX_MIC_KEY : + ALG_TKIP_TEMP_AUTH_TX_MIC_KEY]; michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic); if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) { - if (!rx->u.rx.ra_match) + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) return TXRX_DROP; printk(KERN_DEBUG "%s: invalid Michael MIC in data frame from " - MAC_FMT "\n", rx->dev->name, MAC_ARG(sa)); - - do { - struct ieee80211_hdr *hdr; - union iwreq_data wrqu; - char *buf = kmalloc(128, GFP_ATOMIC); - if (!buf) - break; - - /* TODO: needed parameters: count, key type, TSC */ - hdr = (struct ieee80211_hdr *) skb->data; - sprintf(buf, "MLME-MICHAELMICFAILURE.indication(" - "keyid=%d %scast addr=" MAC_FMT ")", - rx->key->keyidx, - hdr->addr1[0] & 0x01 ? "broad" : "uni", - MAC_ARG(hdr->addr2)); - memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = strlen(buf); - wireless_send_event(rx->dev, IWEVCUSTOM, &wrqu, buf); - kfree(buf); - } while (0); - - if (!rx->local->apdev) - return TXRX_DROP; - - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_michael_mic_failure); + "%s\n", rx->dev->name, print_mac(mac, sa)); - return TXRX_QUEUED; + mac80211_ev_michael_mic_failure(rx->dev, rx->key->conf.keyidx, + (void *) skb->data); + return TXRX_DROP; } - remove_mic: /* remove Michael MIC from payload */ skb_trim(skb, skb->len - MICHAEL_MIC_LEN); + /* update IV in key information to be able to detect replays */ + rx->key->u.tkip.iv32_rx[rx->u.rx.queue] = rx->u.rx.tkip_iv32; + rx->key->u.tkip.iv16_rx[rx->u.rx.queue] = rx->u.rx.tkip_iv16; + return TXRX_CONTINUE; } @@ -230,7 +196,11 @@ static int tkip_encrypt_skb(struct ieee80211_txrx_data *tx, hdrlen = ieee80211_get_hdrlen(fc); len = skb->len - hdrlen; - tailneed = !tx->key->force_sw_encrypt ? 0 : TKIP_ICV_LEN; + if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) + tailneed = 0; + else + tailneed = TKIP_ICV_LEN; + if ((skb_headroom(skb) < TKIP_IV_LEN || skb_tailroom(skb) < tailneed)) { I802_DEBUG_INC(tx->local->tx_expand_skb_head); @@ -248,8 +218,7 @@ static int tkip_encrypt_skb(struct ieee80211_txrx_data *tx, if (key->u.tkip.iv16 == 0) key->u.tkip.iv32++; - if (!tx->key->force_sw_encrypt) { - u32 flags = tx->local->hw.flags; + if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { hdr = (struct ieee80211_hdr *)skb->data; /* hwaccel - with preallocated room for IV */ @@ -259,23 +228,7 @@ static int tkip_encrypt_skb(struct ieee80211_txrx_data *tx, 0x7f), (u8) key->u.tkip.iv16); - if (flags & IEEE80211_HW_TKIP_REQ_PHASE2_KEY) - ieee80211_tkip_gen_rc4key(key, hdr->addr2, - tx->u.tx.control->tkip_key); - else if (flags & IEEE80211_HW_TKIP_REQ_PHASE1_KEY) { - if (key->u.tkip.iv16 == 0 || - !key->u.tkip.tx_initialized) { - ieee80211_tkip_gen_phase1key(key, hdr->addr2, - (u16 *)tx->u.tx.control->tkip_key); - key->u.tkip.tx_initialized = 1; - tx->u.tx.control->flags |= - IEEE80211_TXCTL_TKIP_NEW_PHASE1_KEY; - } else - tx->u.tx.control->flags &= - ~IEEE80211_TXCTL_TKIP_NEW_PHASE1_KEY; - } - - tx->u.tx.control->key_idx = tx->key->hw_key_idx; + tx->u.tx.control->key_idx = tx->key->conf.hw_key_idx; return 0; } @@ -290,28 +243,27 @@ static int tkip_encrypt_skb(struct ieee80211_txrx_data *tx, ieee80211_txrx_result -ieee80211_tx_h_tkip_encrypt(struct ieee80211_txrx_data *tx) +ieee80211_crypto_tkip_encrypt(struct ieee80211_txrx_data *tx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; u16 fc; - struct ieee80211_key *key = tx->key; struct sk_buff *skb = tx->skb; int wpa_test = 0, test = 0; fc = le16_to_cpu(hdr->frame_control); - if (!key || key->alg != ALG_TKIP || !WLAN_FC_DATA_PRESENT(fc)) + if (!WLAN_FC_DATA_PRESENT(fc)) return TXRX_CONTINUE; tx->u.tx.control->icv_len = TKIP_ICV_LEN; tx->u.tx.control->iv_len = TKIP_IV_LEN; ieee80211_tx_set_iswep(tx); - if (!tx->key->force_sw_encrypt && - !(tx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) && + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && + !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) && !wpa_test) { /* hwaccel - with no need for preallocated room for IV/ICV */ - tx->u.tx.control->key_idx = tx->key->hw_key_idx; + tx->u.tx.control->key_idx = tx->key->conf.hw_key_idx; return TXRX_CONTINUE; } @@ -332,30 +284,31 @@ ieee80211_tx_h_tkip_encrypt(struct ieee80211_txrx_data *tx) ieee80211_txrx_result -ieee80211_rx_h_tkip_decrypt(struct ieee80211_txrx_data *rx) +ieee80211_crypto_tkip_decrypt(struct ieee80211_txrx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; u16 fc; int hdrlen, res, hwaccel = 0, wpa_test = 0; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; + DECLARE_MAC_BUF(mac); fc = le16_to_cpu(hdr->frame_control); hdrlen = ieee80211_get_hdrlen(fc); - if (!rx->key || rx->key->alg != ALG_TKIP || - !(rx->fc & IEEE80211_FCTL_PROTECTED) || - (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) + if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) return TXRX_CONTINUE; if (!rx->sta || skb->len - hdrlen < 12) return TXRX_DROP; - if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) && - !rx->key->force_sw_encrypt) { - if (!(rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV)) { - /* Hardware takes care of all processing, including - * replay protection, so no need to continue here. */ + if (rx->u.rx.status->flag & RX_FLAG_DECRYPTED) { + if (rx->u.rx.status->flag & RX_FLAG_IV_STRIPPED) { + /* + * Hardware took care of all processing, including + * replay protection, and stripped the ICV/IV so + * we cannot do any checks here. + */ return TXRX_CONTINUE; } @@ -366,11 +319,13 @@ ieee80211_rx_h_tkip_decrypt(struct ieee80211_txrx_data *rx) res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm, key, skb->data + hdrlen, skb->len - hdrlen, rx->sta->addr, - hwaccel, rx->u.rx.queue); + hwaccel, rx->u.rx.queue, + &rx->u.rx.tkip_iv32, + &rx->u.rx.tkip_iv16); if (res != TKIP_DECRYPT_OK || wpa_test) { printk(KERN_DEBUG "%s: TKIP decrypt failed for RX frame from " - MAC_FMT " (res=%d)\n", - rx->dev->name, MAC_ARG(rx->sta->addr), res); + "%s (res=%d)\n", + rx->dev->name, print_mac(mac, rx->sta->addr), res); return TXRX_DROP; } @@ -496,7 +451,10 @@ static int ccmp_encrypt_skb(struct ieee80211_txrx_data *tx, hdrlen = ieee80211_get_hdrlen(fc); len = skb->len - hdrlen; - tailneed = !key->force_sw_encrypt ? 0 : CCMP_MIC_LEN; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) + tailneed = 0; + else + tailneed = CCMP_MIC_LEN; if ((skb_headroom(skb) < CCMP_HDR_LEN || skb_tailroom(skb) < tailneed)) { @@ -520,11 +478,11 @@ static int ccmp_encrypt_skb(struct ieee80211_txrx_data *tx, break; } - ccmp_pn2hdr(pos, pn, key->keyidx); + ccmp_pn2hdr(pos, pn, key->conf.keyidx); - if (!key->force_sw_encrypt) { + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { /* hwaccel - with preallocated room for CCMP header */ - tx->u.tx.control->key_idx = key->hw_key_idx; + tx->u.tx.control->key_idx = key->conf.hw_key_idx; return 0; } @@ -538,28 +496,27 @@ static int ccmp_encrypt_skb(struct ieee80211_txrx_data *tx, ieee80211_txrx_result -ieee80211_tx_h_ccmp_encrypt(struct ieee80211_txrx_data *tx) +ieee80211_crypto_ccmp_encrypt(struct ieee80211_txrx_data *tx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - struct ieee80211_key *key = tx->key; u16 fc; struct sk_buff *skb = tx->skb; int test = 0; fc = le16_to_cpu(hdr->frame_control); - if (!key || key->alg != ALG_CCMP || !WLAN_FC_DATA_PRESENT(fc)) + if (!WLAN_FC_DATA_PRESENT(fc)) return TXRX_CONTINUE; tx->u.tx.control->icv_len = CCMP_MIC_LEN; tx->u.tx.control->iv_len = CCMP_HDR_LEN; ieee80211_tx_set_iswep(tx); - if (!tx->key->force_sw_encrypt && - !(tx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV)) { + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && + !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { /* hwaccel - with no need for preallocated room for CCMP " * header or MIC fields */ - tx->u.tx.control->key_idx = tx->key->hw_key_idx; + tx->u.tx.control->key_idx = tx->key->conf.hw_key_idx; return TXRX_CONTINUE; } @@ -568,7 +525,6 @@ ieee80211_tx_h_ccmp_encrypt(struct ieee80211_txrx_data *tx) if (tx->u.tx.extra_frag) { int i; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { if (ccmp_encrypt_skb(tx, tx->u.tx.extra_frag[i], test) < 0) @@ -581,7 +537,7 @@ ieee80211_tx_h_ccmp_encrypt(struct ieee80211_txrx_data *tx) ieee80211_txrx_result -ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx) +ieee80211_crypto_ccmp_decrypt(struct ieee80211_txrx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; u16 fc; @@ -590,13 +546,12 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx) struct sk_buff *skb = rx->skb; u8 pn[CCMP_PN_LEN]; int data_len; + DECLARE_MAC_BUF(mac); fc = le16_to_cpu(hdr->frame_control); hdrlen = ieee80211_get_hdrlen(fc); - if (!key || key->alg != ALG_CCMP || - !(rx->fc & IEEE80211_FCTL_PROTECTED) || - (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) + if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) return TXRX_CONTINUE; data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN; @@ -604,8 +559,7 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx) return TXRX_DROP; if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) && - !key->force_sw_encrypt && - !(rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV)) + (rx->u.rx.status->flag & RX_FLAG_IV_STRIPPED)) return TXRX_CONTINUE; (void) ccmp_hdr2pn(pn, skb->data + hdrlen); @@ -613,10 +567,11 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx) if (memcmp(pn, key->u.ccmp.rx_pn[rx->u.rx.queue], CCMP_PN_LEN) <= 0) { #ifdef CONFIG_MAC80211_DEBUG u8 *ppn = key->u.ccmp.rx_pn[rx->u.rx.queue]; + printk(KERN_DEBUG "%s: CCMP replay detected for RX frame from " - MAC_FMT " (RX PN %02x%02x%02x%02x%02x%02x <= prev. PN " + "%s (RX PN %02x%02x%02x%02x%02x%02x <= prev. PN " "%02x%02x%02x%02x%02x%02x)\n", rx->dev->name, - MAC_ARG(rx->sta->addr), + print_mac(mac, rx->sta->addr), pn[0], pn[1], pn[2], pn[3], pn[4], pn[5], ppn[0], ppn[1], ppn[2], ppn[3], ppn[4], ppn[5]); #endif /* CONFIG_MAC80211_DEBUG */ @@ -624,10 +579,8 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx) return TXRX_DROP; } - if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) && - !key->force_sw_encrypt) { - /* hwaccel has already decrypted frame and verified MIC */ - } else { + if (!(rx->u.rx.status->flag & RX_FLAG_DECRYPTED)) { + /* hardware didn't decrypt/verify MIC */ u8 *scratch, *b_0, *aad; scratch = key->u.ccmp.rx_crypto_buf; @@ -642,8 +595,8 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx) skb->data + skb->len - CCMP_MIC_LEN, skb->data + hdrlen + CCMP_HDR_LEN)) { printk(KERN_DEBUG "%s: CCMP decrypt failed for RX " - "frame from " MAC_FMT "\n", rx->dev->name, - MAC_ARG(rx->sta->addr)); + "frame from %s\n", rx->dev->name, + print_mac(mac, rx->sta->addr)); return TXRX_DROP; } } @@ -657,4 +610,3 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx) return TXRX_CONTINUE; } - diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h index da3b9594f9c..49d80cf0cd7 100644 --- a/net/mac80211/wpa.h +++ b/net/mac80211/wpa.h @@ -19,13 +19,13 @@ ieee80211_txrx_result ieee80211_rx_h_michael_mic_verify(struct ieee80211_txrx_data *rx); ieee80211_txrx_result -ieee80211_tx_h_tkip_encrypt(struct ieee80211_txrx_data *tx); +ieee80211_crypto_tkip_encrypt(struct ieee80211_txrx_data *tx); ieee80211_txrx_result -ieee80211_rx_h_tkip_decrypt(struct ieee80211_txrx_data *rx); +ieee80211_crypto_tkip_decrypt(struct ieee80211_txrx_data *rx); ieee80211_txrx_result -ieee80211_tx_h_ccmp_encrypt(struct ieee80211_txrx_data *tx); +ieee80211_crypto_ccmp_encrypt(struct ieee80211_txrx_data *tx); ieee80211_txrx_result -ieee80211_rx_h_ccmp_decrypt(struct ieee80211_txrx_data *rx); +ieee80211_crypto_ccmp_decrypt(struct ieee80211_txrx_data *rx); #endif /* WPA_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 3599770a247..d7a600a5720 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -665,6 +665,20 @@ config NETFILTER_XT_MATCH_TCPMSS To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_TIME + tristate '"time" match support' + depends on NETFILTER_XTABLES + ---help--- + This option adds a "time" match, which allows you to match based on + the packet arrival time (at the machine which netfilter is running) + on) or departure time/date (for locally generated packets). + + If you say Y here, try `iptables -m time --help` for + more information. + + If you want to compile it as a module, say M here. + If unsure, say N. + config NETFILTER_XT_MATCH_U32 tristate '"u32" match support' depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 0c054bf2797..93c58f97383 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -73,6 +73,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o +obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 381a77cf0c9..bed9ba01e8e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -19,6 +19,7 @@ #include <linux/inetdevice.h> #include <linux/proc_fs.h> #include <linux/mutex.h> +#include <net/net_namespace.h> #include <net/sock.h> #include "nf_internals.h" @@ -116,7 +117,7 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) EXPORT_SYMBOL(nf_unregister_hooks); unsigned int nf_iterate(struct list_head *head, - struct sk_buff **skb, + struct sk_buff *skb, int hook, const struct net_device *indev, const struct net_device *outdev, @@ -159,7 +160,7 @@ unsigned int nf_iterate(struct list_head *head, /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. */ -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, +int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), @@ -174,17 +175,17 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, elem = &nf_hooks[pf][hook]; next_hook: - verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev, + verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev, outdev, &elem, okfn, hook_thresh); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; goto unlock; } else if (verdict == NF_DROP) { - kfree_skb(*pskb); + kfree_skb(skb); ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn, + if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS)) goto next_hook; } @@ -195,34 +196,24 @@ unlock: EXPORT_SYMBOL(nf_hook_slow); -int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) +int skb_make_writable(struct sk_buff *skb, unsigned int writable_len) { - struct sk_buff *nskb; - - if (writable_len > (*pskb)->len) + if (writable_len > skb->len) return 0; /* Not exclusive use of packet? Must copy. */ - if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len)) - goto copy_skb; - if (skb_shared(*pskb)) - goto copy_skb; - - return pskb_may_pull(*pskb, writable_len); - -copy_skb: - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return 0; - BUG_ON(skb_is_nonlinear(nskb)); - - /* Rest of kernel will get very unhappy if we pass it a - suddenly-orphaned skbuff */ - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - return 1; + if (!skb_cloned(skb)) { + if (writable_len <= skb_headlen(skb)) + return 1; + } else if (skb_clone_writable(skb, writable_len)) + return 1; + + if (writable_len <= skb_headlen(skb)) + writable_len = 0; + else + writable_len -= skb_headlen(skb); + + return !!__pskb_pull_tail(skb, writable_len); } EXPORT_SYMBOL(skb_make_writable); @@ -293,7 +284,7 @@ void __init netfilter_init(void) } #ifdef CONFIG_PROC_FS - proc_net_netfilter = proc_mkdir("netfilter", proc_net); + proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net); if (!proc_net_netfilter) panic("cannot create netfilter proc entry"); #endif diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index e42ab230ad8..7b8239c0cd5 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -36,7 +36,7 @@ MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); module_param(ts_algo, charp, 0400); MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)"); -unsigned int (*nf_nat_amanda_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -79,7 +79,7 @@ static struct { }, }; -static int amanda_help(struct sk_buff **pskb, +static int amanda_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -101,25 +101,25 @@ static int amanda_help(struct sk_buff **pskb, /* increase the UDP timeout of the master connection as replies from * Amanda clients to the server can be quite delayed */ - nf_ct_refresh(ct, *pskb, master_timeout * HZ); + nf_ct_refresh(ct, skb, master_timeout * HZ); /* No data? */ dataoff = protoff + sizeof(struct udphdr); - if (dataoff >= (*pskb)->len) { + if (dataoff >= skb->len) { if (net_ratelimit()) - printk("amanda_help: skblen = %u\n", (*pskb)->len); + printk("amanda_help: skblen = %u\n", skb->len); return NF_ACCEPT; } memset(&ts, 0, sizeof(ts)); - start = skb_find_text(*pskb, dataoff, (*pskb)->len, + start = skb_find_text(skb, dataoff, skb->len, search[SEARCH_CONNECT].ts, &ts); if (start == UINT_MAX) goto out; start += dataoff + search[SEARCH_CONNECT].len; memset(&ts, 0, sizeof(ts)); - stop = skb_find_text(*pskb, start, (*pskb)->len, + stop = skb_find_text(skb, start, skb->len, search[SEARCH_NEWLINE].ts, &ts); if (stop == UINT_MAX) goto out; @@ -127,13 +127,13 @@ static int amanda_help(struct sk_buff **pskb, for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) { memset(&ts, 0, sizeof(ts)); - off = skb_find_text(*pskb, start, stop, search[i].ts, &ts); + off = skb_find_text(skb, start, stop, search[i].ts, &ts); if (off == UINT_MAX) continue; off += start + search[i].len; len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off); - if (skb_copy_bits(*pskb, off, pbuf, len)) + if (skb_copy_bits(skb, off, pbuf, len)) break; pbuf[len] = '\0'; @@ -153,7 +153,7 @@ static int amanda_help(struct sk_buff **pskb, nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook); if (nf_nat_amanda && ct->status & IPS_NAT_MASK) - ret = nf_nat_amanda(pskb, ctinfo, off - dataoff, + ret = nf_nat_amanda(skb, ctinfo, off - dataoff, len, exp); else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0fe11889ce1..4d6171bc082 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -63,7 +63,6 @@ unsigned int nf_ct_log_invalid __read_mostly; HLIST_HEAD(unconfirmed); static int nf_conntrack_vmalloc __read_mostly; static struct kmem_cache *nf_conntrack_cachep __read_mostly; -static unsigned int nf_conntrack_next_id; DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); @@ -287,7 +286,6 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int hash, unsigned int repl_hash) { - ct->id = ++nf_conntrack_next_id; hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &nf_conntrack_hash[hash]); hlist_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, @@ -309,7 +307,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); /* Confirm a connection given skb; places it in hash table */ int -__nf_conntrack_confirm(struct sk_buff **pskb) +__nf_conntrack_confirm(struct sk_buff *skb) { unsigned int hash, repl_hash; struct nf_conntrack_tuple_hash *h; @@ -318,7 +316,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb) struct hlist_node *n; enum ip_conntrack_info ctinfo; - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); /* ipt_REJECT uses nf_conntrack_attach to attach related ICMP/TCP RST packets in other direction. Actual packet @@ -369,14 +367,14 @@ __nf_conntrack_confirm(struct sk_buff **pskb) write_unlock_bh(&nf_conntrack_lock); help = nfct_help(ct); if (help && help->helper) - nf_conntrack_event_cache(IPCT_HELPER, *pskb); + nf_conntrack_event_cache(IPCT_HELPER, skb); #ifdef CONFIG_NF_NAT_NEEDED if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, *pskb); + nf_conntrack_event_cache(IPCT_NATINFO, skb); #endif nf_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, *pskb); + IPCT_RELATED : IPCT_NEW, skb); return NF_ACCEPT; out: @@ -634,7 +632,7 @@ resolve_normal_ct(struct sk_buff *skb, } unsigned int -nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) +nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -646,14 +644,14 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) int ret; /* Previously seen (loopback or untracked)? Ignore. */ - if ((*pskb)->nfct) { + if (skb->nfct) { NF_CT_STAT_INC_ATOMIC(ignore); return NF_ACCEPT; } /* rcu_read_lock()ed by nf_hook_slow */ l3proto = __nf_ct_l3proto_find((u_int16_t)pf); - ret = l3proto->get_l4proto(*pskb, skb_network_offset(*pskb), + ret = l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum); if (ret <= 0) { pr_debug("not prepared to track yet or error occured\n"); @@ -668,13 +666,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) * inverse of the return code tells to the netfilter * core what to do with the packet. */ if (l4proto->error != NULL && - (ret = l4proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) { + (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) { NF_CT_STAT_INC_ATOMIC(error); NF_CT_STAT_INC_ATOMIC(invalid); return -ret; } - ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, l4proto, + ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ @@ -688,21 +686,21 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) return NF_DROP; } - NF_CT_ASSERT((*pskb)->nfct); + NF_CT_ASSERT(skb->nfct); - ret = l4proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum); + ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum); if (ret < 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ pr_debug("nf_conntrack_in: Can't track with proto module\n"); - nf_conntrack_put((*pskb)->nfct); - (*pskb)->nfct = NULL; + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; NF_CT_STAT_INC_ATOMIC(invalid); return -ret; } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, *pskb); + nf_conntrack_event_cache(IPCT_STATUS, skb); return ret; } @@ -827,44 +825,41 @@ EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); #include <linux/netfilter/nfnetlink_conntrack.h> #include <linux/mutex.h> - /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be * in ip_conntrack_core, since we don't want the protocols to autoload * or depend on ctnetlink */ -int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb, +int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), + NLA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), &tuple->src.u.tcp.port); - NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), + NLA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), &tuple->dst.u.tcp.port); return 0; -nfattr_failure: +nla_put_failure: return -1; } -EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nfattr); +EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr); -static const size_t cta_min_proto[CTA_PROTO_MAX] = { - [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), - [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t) +const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = { + [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 }, + [CTA_PROTO_DST_PORT] = { .type = NLA_U16 }, }; +EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy); -int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[], +int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t) { - if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1]) - return -EINVAL; - - if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT]) return -EINVAL; - t->src.u.tcp.port = *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); - t->dst.u.tcp.port = *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); + t->src.u.tcp.port = *(__be16 *)nla_data(tb[CTA_PROTO_SRC_PORT]); + t->dst.u.tcp.port = *(__be16 *)nla_data(tb[CTA_PROTO_DST_PORT]); return 0; } -EXPORT_SYMBOL_GPL(nf_ct_port_nfattr_to_tuple); +EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); #endif /* Used by ipt_REJECT and ip6t_REJECT. */ diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 3ac64e25f10..175c8d1a199 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -20,6 +20,7 @@ #include <linux/percpu.h> #include <linux/kernel.h> #include <linux/jhash.h> +#include <net/net_namespace.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> @@ -40,7 +41,6 @@ static int nf_ct_expect_hash_rnd_initted __read_mostly; static int nf_ct_expect_vmalloc; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; -static unsigned int nf_ct_expect_next_id; /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) @@ -301,7 +301,6 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) exp->timeout.expires = jiffies + master_help->helper->timeout * HZ; add_timer(&exp->timeout); - exp->id = ++nf_ct_expect_next_id; atomic_inc(&exp->use); NF_CT_STAT_INC(expect_create); } @@ -473,22 +472,8 @@ static const struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct ct_expect_iter_state *st; - int ret; - - st = kzalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL); - if (!st) - return -ENOMEM; - ret = seq_open(file, &exp_seq_ops); - if (ret) - goto out_free; - seq = file->private_data; - seq->private = st; - return ret; -out_free: - kfree(st); - return ret; + return seq_open_private(file, &exp_seq_ops, + sizeof(struct ct_expect_iter_state)); } static const struct file_operations exp_file_ops = { @@ -505,7 +490,7 @@ static int __init exp_proc_init(void) #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc; - proc = proc_net_fops_create("nf_conntrack_expect", 0440, &exp_file_ops); + proc = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops); if (!proc) return -ENOMEM; #endif /* CONFIG_PROC_FS */ @@ -515,7 +500,7 @@ static int __init exp_proc_init(void) static void exp_proc_remove(void) { #ifdef CONFIG_PROC_FS - proc_net_remove("nf_conntrack_expect"); + proc_net_remove(&init_net, "nf_conntrack_expect"); #endif /* CONFIG_PROC_FS */ } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index c763ee74ea0..6df259067f7 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -43,7 +43,7 @@ module_param_array(ports, ushort, &ports_c, 0400); static int loose; module_param(loose, bool, 0600); -unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, enum nf_ct_ftp_type type, unsigned int matchoff, @@ -344,7 +344,7 @@ static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir, } } -static int help(struct sk_buff **pskb, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -371,21 +371,21 @@ static int help(struct sk_buff **pskb, return NF_ACCEPT; } - th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; dataoff = protoff + th->doff * 4; /* No data? */ - if (dataoff >= (*pskb)->len) { + if (dataoff >= skb->len) { pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, - (*pskb)->len); + skb->len); return NF_ACCEPT; } - datalen = (*pskb)->len - dataoff; + datalen = skb->len - dataoff; spin_lock_bh(&nf_ftp_lock); - fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer); + fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer); BUG_ON(fb_ptr == NULL); ends_in_nl = (fb_ptr[datalen - 1] == '\n'); @@ -491,7 +491,7 @@ static int help(struct sk_buff **pskb, * (possibly changed) expectation itself. */ nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook); if (nf_nat_ftp && ct->status & IPS_NAT_MASK) - ret = nf_nat_ftp(pskb, ctinfo, search[dir][i].ftptype, + ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype, matchoff, matchlen, exp); else { /* Can't expect this? Best to drop packet now. */ @@ -508,7 +508,7 @@ out_update_nl: /* Now if this ends in \n, update ftp info. Seq may have been * adjusted by NAT code. */ if (ends_in_nl) - update_nl_seq(seq, ct_ftp_info, dir, *pskb); + update_nl_seq(seq, ct_ftp_info, dir, skb); out: spin_unlock_bh(&nf_ftp_lock); return ret; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index a8a9dfbe7a6..f23fd9598e1 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -47,27 +47,27 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations " "(determined by routing information)"); /* Hooks for NAT */ -int (*set_h245_addr_hook) (struct sk_buff **pskb, +int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned char **data, int dataoff, H245_TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) __read_mostly; -int (*set_h225_addr_hook) (struct sk_buff **pskb, +int (*set_h225_addr_hook) (struct sk_buff *skb, unsigned char **data, int dataoff, TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) __read_mostly; -int (*set_sig_addr_hook) (struct sk_buff **pskb, +int (*set_sig_addr_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) __read_mostly; -int (*set_ras_addr_hook) (struct sk_buff **pskb, +int (*set_ras_addr_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) __read_mostly; -int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb, +int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -75,25 +75,25 @@ int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb, __be16 port, __be16 rtp_port, struct nf_conntrack_expect *rtp_exp, struct nf_conntrack_expect *rtcp_exp) __read_mostly; -int (*nat_t120_hook) (struct sk_buff **pskb, +int (*nat_t120_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_h245_hook) (struct sk_buff **pskb, +int (*nat_h245_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_callforwarding_hook) (struct sk_buff **pskb, +int (*nat_callforwarding_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_q931_hook) (struct sk_buff **pskb, +int (*nat_q931_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int idx, @@ -108,7 +108,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[]; static struct nf_conntrack_helper nf_conntrack_helper_ras[]; /****************************************************************************/ -static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, +static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int *datalen, int *dataoff) { @@ -122,7 +122,7 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, int tpktoff; /* Get TCP header */ - th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return 0; @@ -130,13 +130,13 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, tcpdataoff = protoff + th->doff * 4; /* Get TCP data length */ - tcpdatalen = (*pskb)->len - tcpdataoff; + tcpdatalen = skb->len - tcpdataoff; if (tcpdatalen <= 0) /* No TCP data */ goto clear_out; if (*data == NULL) { /* first TPKT */ /* Get first TPKT pointer */ - tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen, + tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen, h323_buffer); BUG_ON(tpkt == NULL); @@ -248,7 +248,7 @@ static int get_h245_addr(struct nf_conn *ct, unsigned char *data, } /****************************************************************************/ -static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, +static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr) @@ -297,7 +297,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = nat_rtp_rtcp(skb, ct, ctinfo, data, dataoff, taddr, port, rtp_port, rtp_exp, rtcp_exp); } else { /* Conntrack only */ if (nf_ct_expect_related(rtp_exp) == 0) { @@ -321,7 +321,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int expect_t120(struct sk_buff **pskb, +static int expect_t120(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -355,7 +355,7 @@ static int expect_t120(struct sk_buff **pskb, (nat_t120 = rcu_dereference(nat_t120_hook)) && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_t120(pskb, ct, ctinfo, data, dataoff, taddr, + ret = nat_t120(skb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -371,7 +371,7 @@ static int expect_t120(struct sk_buff **pskb, } /****************************************************************************/ -static int process_h245_channel(struct sk_buff **pskb, +static int process_h245_channel(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -381,7 +381,7 @@ static int process_h245_channel(struct sk_buff **pskb, if (channel->options & eH2250LogicalChannelParameters_mediaChannel) { /* RTP */ - ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, &channel->mediaChannel); if (ret < 0) return -1; @@ -390,7 +390,7 @@ static int process_h245_channel(struct sk_buff **pskb, if (channel-> options & eH2250LogicalChannelParameters_mediaControlChannel) { /* RTCP */ - ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, &channel->mediaControlChannel); if (ret < 0) return -1; @@ -400,7 +400,7 @@ static int process_h245_channel(struct sk_buff **pskb, } /****************************************************************************/ -static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, +static int process_olc(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, OpenLogicalChannel *olc) @@ -412,7 +412,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, if (olc->forwardLogicalChannelParameters.multiplexParameters.choice == eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters) { - ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff, + ret = process_h245_channel(skb, ct, ctinfo, data, dataoff, &olc-> forwardLogicalChannelParameters. multiplexParameters. @@ -430,7 +430,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) { ret = - process_h245_channel(pskb, ct, ctinfo, data, dataoff, + process_h245_channel(skb, ct, ctinfo, data, dataoff, &olc-> reverseLogicalChannelParameters. multiplexParameters. @@ -448,7 +448,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, t120.choice == eDataProtocolCapability_separateLANStack && olc->separateStack.networkAddress.choice == eNetworkAccessParameters_networkAddress_localAreaAddress) { - ret = expect_t120(pskb, ct, ctinfo, data, dataoff, + ret = expect_t120(skb, ct, ctinfo, data, dataoff, &olc->separateStack.networkAddress. localAreaAddress); if (ret < 0) @@ -459,7 +459,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, +static int process_olca(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, OpenLogicalChannelAck *olca) @@ -477,7 +477,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, choice == eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) { - ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff, + ret = process_h245_channel(skb, ct, ctinfo, data, dataoff, &olca-> reverseLogicalChannelParameters. multiplexParameters. @@ -496,7 +496,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, if (ack->options & eH2250LogicalChannelAckParameters_mediaChannel) { /* RTP */ - ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, &ack->mediaChannel); if (ret < 0) return -1; @@ -505,7 +505,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, if (ack->options & eH2250LogicalChannelAckParameters_mediaControlChannel) { /* RTCP */ - ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, &ack->mediaControlChannel); if (ret < 0) return -1; @@ -515,7 +515,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, if ((olca->options & eOpenLogicalChannelAck_separateStack) && olca->separateStack.networkAddress.choice == eNetworkAccessParameters_networkAddress_localAreaAddress) { - ret = expect_t120(pskb, ct, ctinfo, data, dataoff, + ret = expect_t120(skb, ct, ctinfo, data, dataoff, &olca->separateStack.networkAddress. localAreaAddress); if (ret < 0) @@ -526,7 +526,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, +static int process_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, MultimediaSystemControlMessage *mscm) @@ -535,7 +535,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, case eMultimediaSystemControlMessage_request: if (mscm->request.choice == eRequestMessage_openLogicalChannel) { - return process_olc(pskb, ct, ctinfo, data, dataoff, + return process_olc(skb, ct, ctinfo, data, dataoff, &mscm->request.openLogicalChannel); } pr_debug("nf_ct_h323: H.245 Request %d\n", @@ -544,7 +544,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, case eMultimediaSystemControlMessage_response: if (mscm->response.choice == eResponseMessage_openLogicalChannelAck) { - return process_olca(pskb, ct, ctinfo, data, dataoff, + return process_olca(skb, ct, ctinfo, data, dataoff, &mscm->response. openLogicalChannelAck); } @@ -560,7 +560,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int h245_help(struct sk_buff **pskb, unsigned int protoff, +static int h245_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { static MultimediaSystemControlMessage mscm; @@ -574,12 +574,12 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff, ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { return NF_ACCEPT; } - pr_debug("nf_ct_h245: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_h245: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); /* Process each TPKT */ - while (get_tpkt_data(pskb, protoff, ct, ctinfo, + while (get_tpkt_data(skb, protoff, ct, ctinfo, &data, &datalen, &dataoff)) { pr_debug("nf_ct_h245: TPKT len=%d ", datalen); NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); @@ -596,7 +596,7 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff, } /* Process H.245 signal */ - if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0) + if (process_h245(skb, ct, ctinfo, &data, dataoff, &mscm) < 0) goto drop; } @@ -654,7 +654,7 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, } /****************************************************************************/ -static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct, +static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr) @@ -687,7 +687,7 @@ static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct, (nat_h245 = rcu_dereference(nat_h245_hook)) && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_h245(pskb, ct, ctinfo, data, dataoff, taddr, + ret = nat_h245(skb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -758,7 +758,7 @@ static int callforward_do_filter(union nf_conntrack_address *src, } /****************************************************************************/ -static int expect_callforwarding(struct sk_buff **pskb, +static int expect_callforwarding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -798,7 +798,7 @@ static int expect_callforwarding(struct sk_buff **pskb, (nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) && ct->status & IPS_NAT_MASK) { /* Need NAT */ - ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff, + ret = nat_callforwarding(skb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -814,7 +814,7 @@ static int expect_callforwarding(struct sk_buff **pskb, } /****************************************************************************/ -static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, +static int process_setup(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Setup_UUIE *setup) @@ -829,7 +829,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_q931: Setup\n"); if (setup->options & eSetup_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &setup->h245Address); if (ret < 0) return -1; @@ -846,7 +846,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, NIP6(*(struct in6_addr *)&addr), ntohs(port), NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3), ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); - ret = set_h225_addr(pskb, data, dataoff, + ret = set_h225_addr(skb, data, dataoff, &setup->destCallSignalAddress, &ct->tuplehash[!dir].tuple.src.u3, ct->tuplehash[!dir].tuple.src.u.tcp.port); @@ -864,7 +864,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, NIP6(*(struct in6_addr *)&addr), ntohs(port), NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3), ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); - ret = set_h225_addr(pskb, data, dataoff, + ret = set_h225_addr(skb, data, dataoff, &setup->sourceCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, ct->tuplehash[!dir].tuple.dst.u.tcp.port); @@ -874,7 +874,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, if (setup->options & eSetup_UUIE_fastStart) { for (i = 0; i < setup->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &setup->fastStart.item[i]); if (ret < 0) return -1; @@ -885,7 +885,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_callproceeding(struct sk_buff **pskb, +static int process_callproceeding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -897,7 +897,7 @@ static int process_callproceeding(struct sk_buff **pskb, pr_debug("nf_ct_q931: CallProceeding\n"); if (callproc->options & eCallProceeding_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &callproc->h245Address); if (ret < 0) return -1; @@ -905,7 +905,7 @@ static int process_callproceeding(struct sk_buff **pskb, if (callproc->options & eCallProceeding_UUIE_fastStart) { for (i = 0; i < callproc->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &callproc->fastStart.item[i]); if (ret < 0) return -1; @@ -916,7 +916,7 @@ static int process_callproceeding(struct sk_buff **pskb, } /****************************************************************************/ -static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, +static int process_connect(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Connect_UUIE *connect) @@ -927,7 +927,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_q931: Connect\n"); if (connect->options & eConnect_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &connect->h245Address); if (ret < 0) return -1; @@ -935,7 +935,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, if (connect->options & eConnect_UUIE_fastStart) { for (i = 0; i < connect->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &connect->fastStart.item[i]); if (ret < 0) return -1; @@ -946,7 +946,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, +static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Alerting_UUIE *alert) @@ -957,7 +957,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_q931: Alerting\n"); if (alert->options & eAlerting_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &alert->h245Address); if (ret < 0) return -1; @@ -965,7 +965,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, if (alert->options & eAlerting_UUIE_fastStart) { for (i = 0; i < alert->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &alert->fastStart.item[i]); if (ret < 0) return -1; @@ -976,7 +976,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, +static int process_facility(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Facility_UUIE *facility) @@ -988,7 +988,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, if (facility->reason.choice == eFacilityReason_callForwarded) { if (facility->options & eFacility_UUIE_alternativeAddress) - return expect_callforwarding(pskb, ct, ctinfo, data, + return expect_callforwarding(skb, ct, ctinfo, data, dataoff, &facility-> alternativeAddress); @@ -996,7 +996,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, } if (facility->options & eFacility_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &facility->h245Address); if (ret < 0) return -1; @@ -1004,7 +1004,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, if (facility->options & eFacility_UUIE_fastStart) { for (i = 0; i < facility->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &facility->fastStart.item[i]); if (ret < 0) return -1; @@ -1015,7 +1015,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, +static int process_progress(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Progress_UUIE *progress) @@ -1026,7 +1026,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_q931: Progress\n"); if (progress->options & eProgress_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &progress->h245Address); if (ret < 0) return -1; @@ -1034,7 +1034,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, if (progress->options & eProgress_UUIE_fastStart) { for (i = 0; i < progress->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &progress->fastStart.item[i]); if (ret < 0) return -1; @@ -1045,7 +1045,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, +static int process_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Q931 *q931) { @@ -1055,28 +1055,28 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, switch (pdu->h323_message_body.choice) { case eH323_UU_PDU_h323_message_body_setup: - ret = process_setup(pskb, ct, ctinfo, data, dataoff, + ret = process_setup(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.setup); break; case eH323_UU_PDU_h323_message_body_callProceeding: - ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff, + ret = process_callproceeding(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body. callProceeding); break; case eH323_UU_PDU_h323_message_body_connect: - ret = process_connect(pskb, ct, ctinfo, data, dataoff, + ret = process_connect(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.connect); break; case eH323_UU_PDU_h323_message_body_alerting: - ret = process_alerting(pskb, ct, ctinfo, data, dataoff, + ret = process_alerting(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.alerting); break; case eH323_UU_PDU_h323_message_body_facility: - ret = process_facility(pskb, ct, ctinfo, data, dataoff, + ret = process_facility(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.facility); break; case eH323_UU_PDU_h323_message_body_progress: - ret = process_progress(pskb, ct, ctinfo, data, dataoff, + ret = process_progress(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.progress); break; default: @@ -1090,7 +1090,7 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, if (pdu->options & eH323_UU_PDU_h245Control) { for (i = 0; i < pdu->h245Control.count; i++) { - ret = process_h245(pskb, ct, ctinfo, data, dataoff, + ret = process_h245(skb, ct, ctinfo, data, dataoff, &pdu->h245Control.item[i]); if (ret < 0) return -1; @@ -1101,7 +1101,7 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int q931_help(struct sk_buff **pskb, unsigned int protoff, +static int q931_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { static Q931 q931; @@ -1115,12 +1115,12 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff, ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { return NF_ACCEPT; } - pr_debug("nf_ct_q931: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_q931: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); /* Process each TPKT */ - while (get_tpkt_data(pskb, protoff, ct, ctinfo, + while (get_tpkt_data(skb, protoff, ct, ctinfo, &data, &datalen, &dataoff)) { pr_debug("nf_ct_q931: TPKT len=%d ", datalen); NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); @@ -1136,7 +1136,7 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff, } /* Process Q.931 signal */ - if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0) + if (process_q931(skb, ct, ctinfo, &data, dataoff, &q931) < 0) goto drop; } @@ -1177,20 +1177,20 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { }; /****************************************************************************/ -static unsigned char *get_udp_data(struct sk_buff **pskb, unsigned int protoff, +static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff, int *datalen) { struct udphdr _uh, *uh; int dataoff; - uh = skb_header_pointer(*pskb, protoff, sizeof(_uh), &_uh); + uh = skb_header_pointer(skb, protoff, sizeof(_uh), &_uh); if (uh == NULL) return NULL; dataoff = protoff + sizeof(_uh); - if (dataoff >= (*pskb)->len) + if (dataoff >= skb->len) return NULL; - *datalen = (*pskb)->len - dataoff; - return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer); + *datalen = skb->len - dataoff; + return skb_header_pointer(skb, dataoff, *datalen, h323_buffer); } /****************************************************************************/ @@ -1227,7 +1227,7 @@ static int set_expect_timeout(struct nf_conntrack_expect *exp, } /****************************************************************************/ -static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, +static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) @@ -1265,7 +1265,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, nat_q931 = rcu_dereference(nat_q931_hook); if (nat_q931 && ct->status & IPS_NAT_MASK) { /* Need NAT */ - ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp); + ret = nat_q931(skb, ct, ctinfo, data, taddr, i, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1283,7 +1283,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_grq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_grq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, GatekeeperRequest *grq) { @@ -1293,13 +1293,13 @@ static int process_grq(struct sk_buff **pskb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) /* NATed */ - return set_ras_addr(pskb, ct, ctinfo, data, + return set_ras_addr(skb, ct, ctinfo, data, &grq->rasAddress, 1); return 0; } /****************************************************************************/ -static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct, +static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, GatekeeperConfirm *gcf) { @@ -1343,7 +1343,7 @@ static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RegistrationRequest *rrq) { @@ -1353,7 +1353,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_ras: RRQ\n"); - ret = expect_q931(pskb, ct, ctinfo, data, + ret = expect_q931(skb, ct, ctinfo, data, rrq->callSignalAddress.item, rrq->callSignalAddress.count); if (ret < 0) @@ -1361,7 +1361,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) { - ret = set_ras_addr(pskb, ct, ctinfo, data, + ret = set_ras_addr(skb, ct, ctinfo, data, rrq->rasAddress.item, rrq->rasAddress.count); if (ret < 0) @@ -1378,7 +1378,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, +static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RegistrationConfirm *rcf) { @@ -1392,7 +1392,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(pskb, ct, ctinfo, data, + ret = set_sig_addr(skb, ct, ctinfo, data, rcf->callSignalAddress.item, rcf->callSignalAddress.count); if (ret < 0) @@ -1407,7 +1407,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, if (info->timeout > 0) { pr_debug("nf_ct_ras: set RAS connection timeout to " "%u seconds\n", info->timeout); - nf_ct_refresh(ct, *pskb, info->timeout * HZ); + nf_ct_refresh(ct, skb, info->timeout * HZ); /* Set expect timeout */ read_lock_bh(&nf_conntrack_lock); @@ -1427,7 +1427,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_urq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_urq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, UnregistrationRequest *urq) { @@ -1440,7 +1440,7 @@ static int process_urq(struct sk_buff **pskb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(pskb, ct, ctinfo, data, + ret = set_sig_addr(skb, ct, ctinfo, data, urq->callSignalAddress.item, urq->callSignalAddress.count); if (ret < 0) @@ -1453,13 +1453,13 @@ static int process_urq(struct sk_buff **pskb, struct nf_conn *ct, info->sig_port[!dir] = 0; /* Give it 30 seconds for UCF or URJ */ - nf_ct_refresh(ct, *pskb, 30 * HZ); + nf_ct_refresh(ct, skb, 30 * HZ); return 0; } /****************************************************************************/ -static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_arq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, AdmissionRequest *arq) { @@ -1479,7 +1479,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, port == info->sig_port[dir] && set_h225_addr && ct->status & IPS_NAT_MASK) { /* Answering ARQ */ - return set_h225_addr(pskb, data, 0, + return set_h225_addr(skb, data, 0, &arq->destCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, info->sig_port[!dir]); @@ -1491,7 +1491,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) && set_h225_addr && ct->status & IPS_NAT_MASK) { /* Calling ARQ */ - return set_h225_addr(pskb, data, 0, + return set_h225_addr(skb, data, 0, &arq->srcCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, port); @@ -1501,7 +1501,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, +static int process_acf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, AdmissionConfirm *acf) { @@ -1522,7 +1522,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, /* Answering ACF */ set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) - return set_sig_addr(pskb, ct, ctinfo, data, + return set_sig_addr(skb, ct, ctinfo, data, &acf->destCallSignalAddress, 1); return 0; } @@ -1548,7 +1548,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, LocationRequest *lrq) { @@ -1558,13 +1558,13 @@ static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) - return set_ras_addr(pskb, ct, ctinfo, data, + return set_ras_addr(skb, ct, ctinfo, data, &lrq->replyAddress, 1); return 0; } /****************************************************************************/ -static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct, +static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, LocationConfirm *lcf) { @@ -1603,7 +1603,7 @@ static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, +static int process_irr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, InfoRequestResponse *irr) { @@ -1615,7 +1615,7 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) { - ret = set_ras_addr(pskb, ct, ctinfo, data, + ret = set_ras_addr(skb, ct, ctinfo, data, &irr->rasAddress, 1); if (ret < 0) return -1; @@ -1623,7 +1623,7 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(pskb, ct, ctinfo, data, + ret = set_sig_addr(skb, ct, ctinfo, data, irr->callSignalAddress.item, irr->callSignalAddress.count); if (ret < 0) @@ -1634,40 +1634,40 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_ras(struct sk_buff **pskb, struct nf_conn *ct, +static int process_ras(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RasMessage *ras) { switch (ras->choice) { case eRasMessage_gatekeeperRequest: - return process_grq(pskb, ct, ctinfo, data, + return process_grq(skb, ct, ctinfo, data, &ras->gatekeeperRequest); case eRasMessage_gatekeeperConfirm: - return process_gcf(pskb, ct, ctinfo, data, + return process_gcf(skb, ct, ctinfo, data, &ras->gatekeeperConfirm); case eRasMessage_registrationRequest: - return process_rrq(pskb, ct, ctinfo, data, + return process_rrq(skb, ct, ctinfo, data, &ras->registrationRequest); case eRasMessage_registrationConfirm: - return process_rcf(pskb, ct, ctinfo, data, + return process_rcf(skb, ct, ctinfo, data, &ras->registrationConfirm); case eRasMessage_unregistrationRequest: - return process_urq(pskb, ct, ctinfo, data, + return process_urq(skb, ct, ctinfo, data, &ras->unregistrationRequest); case eRasMessage_admissionRequest: - return process_arq(pskb, ct, ctinfo, data, + return process_arq(skb, ct, ctinfo, data, &ras->admissionRequest); case eRasMessage_admissionConfirm: - return process_acf(pskb, ct, ctinfo, data, + return process_acf(skb, ct, ctinfo, data, &ras->admissionConfirm); case eRasMessage_locationRequest: - return process_lrq(pskb, ct, ctinfo, data, + return process_lrq(skb, ct, ctinfo, data, &ras->locationRequest); case eRasMessage_locationConfirm: - return process_lcf(pskb, ct, ctinfo, data, + return process_lcf(skb, ct, ctinfo, data, &ras->locationConfirm); case eRasMessage_infoRequestResponse: - return process_irr(pskb, ct, ctinfo, data, + return process_irr(skb, ct, ctinfo, data, &ras->infoRequestResponse); default: pr_debug("nf_ct_ras: RAS message %d\n", ras->choice); @@ -1678,7 +1678,7 @@ static int process_ras(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int ras_help(struct sk_buff **pskb, unsigned int protoff, +static int ras_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { static RasMessage ras; @@ -1686,12 +1686,12 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff, int datalen = 0; int ret; - pr_debug("nf_ct_ras: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_ras: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); /* Get UDP data */ - data = get_udp_data(pskb, protoff, &datalen); + data = get_udp_data(skb, protoff, &datalen); if (data == NULL) goto accept; pr_debug("nf_ct_ras: RAS message len=%d ", datalen); @@ -1707,7 +1707,7 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff, } /* Process RAS message */ - if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0) + if (process_ras(skb, ct, ctinfo, &data, &ras) < 0) goto drop; accept: diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 1562ca97a34..dfaed4ba83c 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -30,7 +30,7 @@ static unsigned int dcc_timeout __read_mostly = 300; static char *irc_buffer; static DEFINE_SPINLOCK(irc_buffer_lock); -unsigned int (*nf_nat_irc_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -89,7 +89,7 @@ static int parse_dcc(char *data, char *data_end, u_int32_t *ip, return 0; } -static int help(struct sk_buff **pskb, unsigned int protoff, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { unsigned int dataoff; @@ -116,22 +116,22 @@ static int help(struct sk_buff **pskb, unsigned int protoff, return NF_ACCEPT; /* Not a full tcp header? */ - th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; /* No data? */ dataoff = protoff + th->doff*4; - if (dataoff >= (*pskb)->len) + if (dataoff >= skb->len) return NF_ACCEPT; spin_lock_bh(&irc_buffer_lock); - ib_ptr = skb_header_pointer(*pskb, dataoff, (*pskb)->len - dataoff, + ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff, irc_buffer); BUG_ON(ib_ptr == NULL); data = ib_ptr; - data_limit = ib_ptr + (*pskb)->len - dataoff; + data_limit = ib_ptr + skb->len - dataoff; /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */ @@ -143,7 +143,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, data += 5; /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */ - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); pr_debug("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u\n", NIPQUAD(iph->saddr), ntohs(th->source), NIPQUAD(iph->daddr), ntohs(th->dest)); @@ -193,7 +193,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, nf_nat_irc = rcu_dereference(nf_nat_irc_hook); if (nf_nat_irc && ct->status & IPS_NAT_MASK) - ret = nf_nat_irc(pskb, ctinfo, + ret = nf_nat_irc(skb, ctinfo, addr_beg_p - ib_ptr, addr_end_p - addr_beg_p, exp); diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 1d59fabeb5f..9810d81e2a0 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -42,17 +42,17 @@ static unsigned int timeout __read_mostly = 3; module_param(timeout, uint, 0400); MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); -static int help(struct sk_buff **pskb, unsigned int protoff, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { struct nf_conntrack_expect *exp; - struct iphdr *iph = ip_hdr(*pskb); - struct rtable *rt = (struct rtable *)(*pskb)->dst; + struct iphdr *iph = ip_hdr(skb); + struct rtable *rt = (struct rtable *)skb->dst; struct in_device *in_dev; __be32 mask = 0; /* we're only interested in locally generated packets */ - if ((*pskb)->sk == NULL) + if (skb->sk == NULL) goto out; if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) goto out; @@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, nf_ct_expect_related(exp); nf_ct_expect_put(exp); - nf_ct_refresh(ct, *pskb, timeout * HZ); + nf_ct_refresh(ct, skb, timeout * HZ); out: return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2863e72b409..9be1826e6cd 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -4,7 +4,7 @@ * (C) 2001 by Jay Schulist <jschlst@samba.org> * (C) 2002-2006 by Harald Welte <laforge@gnumonks.org> * (C) 2003 by Patrick Mchardy <kaber@trash.net> - * (C) 2005-2006 by Pablo Neira Ayuso <pablo@eurodev.net> + * (C) 2005-2007 by Pablo Neira Ayuso <pablo@netfilter.org> * * Initial connection tracking via netlink development funded and * generally made possible by Network Robots, Inc. (www.networkrobots.com) @@ -54,18 +54,21 @@ ctnetlink_dump_tuples_proto(struct sk_buff *skb, struct nf_conntrack_l4proto *l4proto) { int ret = 0; - struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO); + struct nlattr *nest_parms; - NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); + nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + NLA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); - if (likely(l4proto->tuple_to_nfattr)) - ret = l4proto->tuple_to_nfattr(skb, tuple); + if (likely(l4proto->tuple_to_nlattr)) + ret = l4proto->tuple_to_nlattr(skb, tuple); - NFA_NEST_END(skb, nest_parms); + nla_nest_end(skb, nest_parms); return ret; -nfattr_failure: +nla_put_failure: return -1; } @@ -75,16 +78,20 @@ ctnetlink_dump_tuples_ip(struct sk_buff *skb, struct nf_conntrack_l3proto *l3proto) { int ret = 0; - struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_IP); + struct nlattr *nest_parms; - if (likely(l3proto->tuple_to_nfattr)) - ret = l3proto->tuple_to_nfattr(skb, tuple); + nest_parms = nla_nest_start(skb, CTA_TUPLE_IP | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; - NFA_NEST_END(skb, nest_parms); + if (likely(l3proto->tuple_to_nlattr)) + ret = l3proto->tuple_to_nlattr(skb, tuple); + + nla_nest_end(skb, nest_parms); return ret; -nfattr_failure: +nla_put_failure: return -1; } @@ -114,10 +121,10 @@ static inline int ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) { __be32 status = htonl((u_int32_t) ct->status); - NFA_PUT(skb, CTA_STATUS, sizeof(status), &status); + NLA_PUT(skb, CTA_STATUS, sizeof(status), &status); return 0; -nfattr_failure: +nla_put_failure: return -1; } @@ -132,10 +139,10 @@ ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) else timeout = htonl(timeout_l / HZ); - NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout); + NLA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout); return 0; -nfattr_failure: +nla_put_failure: return -1; } @@ -143,25 +150,27 @@ static inline int ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct) { struct nf_conntrack_l4proto *l4proto = nf_ct_l4proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); - struct nfattr *nest_proto; + struct nlattr *nest_proto; int ret; - if (!l4proto->to_nfattr) { + if (!l4proto->to_nlattr) { nf_ct_l4proto_put(l4proto); return 0; } - nest_proto = NFA_NEST(skb, CTA_PROTOINFO); + nest_proto = nla_nest_start(skb, CTA_PROTOINFO | NLA_F_NESTED); + if (!nest_proto) + goto nla_put_failure; - ret = l4proto->to_nfattr(skb, nest_proto, ct); + ret = l4proto->to_nlattr(skb, nest_proto, ct); nf_ct_l4proto_put(l4proto); - NFA_NEST_END(skb, nest_proto); + nla_nest_end(skb, nest_proto); return ret; -nfattr_failure: +nla_put_failure: nf_ct_l4proto_put(l4proto); return -1; } @@ -169,7 +178,7 @@ nfattr_failure: static inline int ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct) { - struct nfattr *nest_helper; + struct nlattr *nest_helper; const struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_helper *helper; @@ -181,18 +190,20 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct) if (!helper) goto out; - nest_helper = NFA_NEST(skb, CTA_HELP); - NFA_PUT(skb, CTA_HELP_NAME, strlen(helper->name), helper->name); + nest_helper = nla_nest_start(skb, CTA_HELP | NLA_F_NESTED); + if (!nest_helper) + goto nla_put_failure; + NLA_PUT(skb, CTA_HELP_NAME, strlen(helper->name), helper->name); - if (helper->to_nfattr) - helper->to_nfattr(skb, ct); + if (helper->to_nlattr) + helper->to_nlattr(skb, ct); - NFA_NEST_END(skb, nest_helper); + nla_nest_end(skb, nest_helper); out: rcu_read_unlock(); return 0; -nfattr_failure: +nla_put_failure: rcu_read_unlock(); return -1; } @@ -203,20 +214,24 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, enum ip_conntrack_dir dir) { enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; - struct nfattr *nest_count = NFA_NEST(skb, type); + struct nlattr *nest_count; __be32 tmp; + nest_count = nla_nest_start(skb, type | NLA_F_NESTED); + if (!nest_count) + goto nla_put_failure; + tmp = htonl(ct->counters[dir].packets); - NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); + NLA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); tmp = htonl(ct->counters[dir].bytes); - NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp); + NLA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp); - NFA_NEST_END(skb, nest_count); + nla_nest_end(skb, nest_count); return 0; -nfattr_failure: +nla_put_failure: return -1; } #else @@ -229,10 +244,10 @@ ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) { __be32 mark = htonl(ct->mark); - NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark); + NLA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark); return 0; -nfattr_failure: +nla_put_failure: return -1; } #else @@ -242,11 +257,11 @@ nfattr_failure: static inline int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { - __be32 id = htonl(ct->id); - NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id); + __be32 id = htonl((unsigned long)ct); + NLA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id); return 0; -nfattr_failure: +nla_put_failure: return -1; } @@ -255,10 +270,10 @@ ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) { __be32 use = htonl(atomic_read(&ct->ct_general.use)); - NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); + NLA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); return 0; -nfattr_failure: +nla_put_failure: return -1; } @@ -271,7 +286,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - struct nfattr *nest_parms; + struct nlattr *nest_parms; unsigned char *b = skb_tail_pointer(skb); event |= NFNL_SUBSYS_CTNETLINK << 8; @@ -284,15 +299,19 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) - goto nfattr_failure; - NFA_NEST_END(skb, nest_parms); + goto nla_put_failure; + nla_nest_end(skb, nest_parms); - nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); + nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) - goto nfattr_failure; - NFA_NEST_END(skb, nest_parms); + goto nla_put_failure; + nla_nest_end(skb, nest_parms); if (ctnetlink_dump_status(skb, ct) < 0 || ctnetlink_dump_timeout(skb, ct) < 0 || @@ -303,13 +322,13 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, ctnetlink_dump_mark(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0) - goto nfattr_failure; + goto nla_put_failure; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: -nfattr_failure: +nla_put_failure: nlmsg_trim(skb, b); return -1; } @@ -320,7 +339,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - struct nfattr *nest_parms; + struct nlattr *nest_parms; struct nf_conn *ct = (struct nf_conn *)ptr; struct sk_buff *skb; unsigned int type; @@ -362,45 +381,49 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) - goto nfattr_failure; - NFA_NEST_END(skb, nest_parms); + goto nla_put_failure; + nla_nest_end(skb, nest_parms); - nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); + nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) - goto nfattr_failure; - NFA_NEST_END(skb, nest_parms); + goto nla_put_failure; + nla_nest_end(skb, nest_parms); if (events & IPCT_DESTROY) { if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) - goto nfattr_failure; + goto nla_put_failure; } else { if (ctnetlink_dump_status(skb, ct) < 0) - goto nfattr_failure; + goto nla_put_failure; if (ctnetlink_dump_timeout(skb, ct) < 0) - goto nfattr_failure; + goto nla_put_failure; if (events & IPCT_PROTOINFO && ctnetlink_dump_protoinfo(skb, ct) < 0) - goto nfattr_failure; + goto nla_put_failure; if ((events & IPCT_HELPER || nfct_help(ct)) && ctnetlink_dump_helpinfo(skb, ct) < 0) - goto nfattr_failure; + goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK if ((events & IPCT_MARK || ct->mark) && ctnetlink_dump_mark(skb, ct) < 0) - goto nfattr_failure; + goto nla_put_failure; #endif if (events & IPCT_COUNTER_FILLING && (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)) - goto nfattr_failure; + goto nla_put_failure; } nlh->nlmsg_len = skb->tail - b; @@ -408,7 +431,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, return NOTIFY_DONE; nlmsg_failure: -nfattr_failure: +nla_put_failure: kfree_skb(skb); return NOTIFY_DONE; } @@ -479,49 +502,56 @@ out: } static inline int -ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple) +ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple) { - struct nfattr *tb[CTA_IP_MAX]; + struct nlattr *tb[CTA_IP_MAX+1]; struct nf_conntrack_l3proto *l3proto; int ret = 0; - nfattr_parse_nested(tb, CTA_IP_MAX, attr); + nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); - if (likely(l3proto->nfattr_to_tuple)) - ret = l3proto->nfattr_to_tuple(tb, tuple); + if (likely(l3proto->nlattr_to_tuple)) { + ret = nla_validate_nested(attr, CTA_IP_MAX, + l3proto->nla_policy); + if (ret == 0) + ret = l3proto->nlattr_to_tuple(tb, tuple); + } nf_ct_l3proto_put(l3proto); return ret; } -static const size_t cta_min_proto[CTA_PROTO_MAX] = { - [CTA_PROTO_NUM-1] = sizeof(u_int8_t), +static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = { + [CTA_PROTO_NUM] = { .type = NLA_U8 }, }; static inline int -ctnetlink_parse_tuple_proto(struct nfattr *attr, +ctnetlink_parse_tuple_proto(struct nlattr *attr, struct nf_conntrack_tuple *tuple) { - struct nfattr *tb[CTA_PROTO_MAX]; + struct nlattr *tb[CTA_PROTO_MAX+1]; struct nf_conntrack_l4proto *l4proto; int ret = 0; - nfattr_parse_nested(tb, CTA_PROTO_MAX, attr); - - if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) - return -EINVAL; + ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy); + if (ret < 0) + return ret; - if (!tb[CTA_PROTO_NUM-1]) + if (!tb[CTA_PROTO_NUM]) return -EINVAL; - tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); + tuple->dst.protonum = *(u_int8_t *)nla_data(tb[CTA_PROTO_NUM]); l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); - if (likely(l4proto->nfattr_to_tuple)) - ret = l4proto->nfattr_to_tuple(tb, tuple); + if (likely(l4proto->nlattr_to_tuple)) { + ret = nla_validate_nested(attr, CTA_PROTO_MAX, + l4proto->nla_policy); + if (ret == 0) + ret = l4proto->nlattr_to_tuple(tb, tuple); + } nf_ct_l4proto_put(l4proto); @@ -529,29 +559,29 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr, } static inline int -ctnetlink_parse_tuple(struct nfattr *cda[], struct nf_conntrack_tuple *tuple, +ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple, enum ctattr_tuple type, u_int8_t l3num) { - struct nfattr *tb[CTA_TUPLE_MAX]; + struct nlattr *tb[CTA_TUPLE_MAX+1]; int err; memset(tuple, 0, sizeof(*tuple)); - nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]); + nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], NULL); - if (!tb[CTA_TUPLE_IP-1]) + if (!tb[CTA_TUPLE_IP]) return -EINVAL; tuple->src.l3num = l3num; - err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple); + err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP], tuple); if (err < 0) return err; - if (!tb[CTA_TUPLE_PROTO-1]) + if (!tb[CTA_TUPLE_PROTO]) return -EINVAL; - err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple); + err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO], tuple); if (err < 0) return err; @@ -565,32 +595,32 @@ ctnetlink_parse_tuple(struct nfattr *cda[], struct nf_conntrack_tuple *tuple, } #ifdef CONFIG_NF_NAT_NEEDED -static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = { - [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t), - [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t), +static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { + [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, + [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, }; -static int nfnetlink_parse_nat_proto(struct nfattr *attr, +static int nfnetlink_parse_nat_proto(struct nlattr *attr, const struct nf_conn *ct, struct nf_nat_range *range) { - struct nfattr *tb[CTA_PROTONAT_MAX]; + struct nlattr *tb[CTA_PROTONAT_MAX+1]; struct nf_nat_protocol *npt; + int err; - nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr); - - if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat)) - return -EINVAL; + err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); + if (err < 0) + return err; npt = nf_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); - if (!npt->nfattr_to_range) { + if (!npt->nlattr_to_range) { nf_nat_proto_put(npt); return 0; } - /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */ - if (npt->nfattr_to_range(tb, range) > 0) + /* nlattr_to_range returns 1 if it parsed, 0 if not, neg. on error */ + if (npt->nlattr_to_range(tb, range) > 0) range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; nf_nat_proto_put(npt); @@ -598,40 +628,39 @@ static int nfnetlink_parse_nat_proto(struct nfattr *attr, return 0; } -static const size_t cta_min_nat[CTA_NAT_MAX] = { - [CTA_NAT_MINIP-1] = sizeof(u_int32_t), - [CTA_NAT_MAXIP-1] = sizeof(u_int32_t), +static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { + [CTA_NAT_MINIP] = { .type = NLA_U32 }, + [CTA_NAT_MAXIP] = { .type = NLA_U32 }, }; static inline int -nfnetlink_parse_nat(struct nfattr *nat, +nfnetlink_parse_nat(struct nlattr *nat, const struct nf_conn *ct, struct nf_nat_range *range) { - struct nfattr *tb[CTA_NAT_MAX]; + struct nlattr *tb[CTA_NAT_MAX+1]; int err; memset(range, 0, sizeof(*range)); - nfattr_parse_nested(tb, CTA_NAT_MAX, nat); - - if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat)) - return -EINVAL; + err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); + if (err < 0) + return err; - if (tb[CTA_NAT_MINIP-1]) - range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]); + if (tb[CTA_NAT_MINIP]) + range->min_ip = *(__be32 *)nla_data(tb[CTA_NAT_MINIP]); - if (!tb[CTA_NAT_MAXIP-1]) + if (!tb[CTA_NAT_MAXIP]) range->max_ip = range->min_ip; else - range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); + range->max_ip = *(__be32 *)nla_data(tb[CTA_NAT_MAXIP]); if (range->min_ip) range->flags |= IP_NAT_RANGE_MAP_IPS; - if (!tb[CTA_NAT_PROTO-1]) + if (!tb[CTA_NAT_PROTO]) return 0; - err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range); + err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); if (err < 0) return err; @@ -640,31 +669,31 @@ nfnetlink_parse_nat(struct nfattr *nat, #endif static inline int -ctnetlink_parse_help(struct nfattr *attr, char **helper_name) +ctnetlink_parse_help(struct nlattr *attr, char **helper_name) { - struct nfattr *tb[CTA_HELP_MAX]; + struct nlattr *tb[CTA_HELP_MAX+1]; - nfattr_parse_nested(tb, CTA_HELP_MAX, attr); + nla_parse_nested(tb, CTA_HELP_MAX, attr, NULL); - if (!tb[CTA_HELP_NAME-1]) + if (!tb[CTA_HELP_NAME]) return -EINVAL; - *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]); + *helper_name = nla_data(tb[CTA_HELP_NAME]); return 0; } -static const size_t cta_min[CTA_MAX] = { - [CTA_STATUS-1] = sizeof(u_int32_t), - [CTA_TIMEOUT-1] = sizeof(u_int32_t), - [CTA_MARK-1] = sizeof(u_int32_t), - [CTA_USE-1] = sizeof(u_int32_t), - [CTA_ID-1] = sizeof(u_int32_t) +static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { + [CTA_STATUS] = { .type = NLA_U32 }, + [CTA_TIMEOUT] = { .type = NLA_U32 }, + [CTA_MARK] = { .type = NLA_U32 }, + [CTA_USE] = { .type = NLA_U32 }, + [CTA_ID] = { .type = NLA_U32 }, }; static int ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[]) + struct nlmsghdr *nlh, struct nlattr *cda[]) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; @@ -673,12 +702,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, u_int8_t u3 = nfmsg->nfgen_family; int err = 0; - if (nfattr_bad_size(cda, CTA_MAX, cta_min)) - return -EINVAL; - - if (cda[CTA_TUPLE_ORIG-1]) + if (cda[CTA_TUPLE_ORIG]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); - else if (cda[CTA_TUPLE_REPLY-1]) + else if (cda[CTA_TUPLE_REPLY]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { /* Flush the whole table */ @@ -695,9 +721,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, ct = nf_ct_tuplehash_to_ctrack(h); - if (cda[CTA_ID-1]) { - u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1])); - if (ct->id != id) { + if (cda[CTA_ID]) { + u_int32_t id = ntohl(*(__be32 *)nla_data(cda[CTA_ID])); + if (id != (u32)(unsigned long)ct) { nf_ct_put(ct); return -ENOENT; } @@ -712,7 +738,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, static int ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[]) + struct nlmsghdr *nlh, struct nlattr *cda[]) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; @@ -731,12 +757,9 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, ctnetlink_done); } - if (nfattr_bad_size(cda, CTA_MAX, cta_min)) - return -EINVAL; - - if (cda[CTA_TUPLE_ORIG-1]) + if (cda[CTA_TUPLE_ORIG]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); - else if (cda[CTA_TUPLE_REPLY-1]) + else if (cda[CTA_TUPLE_REPLY]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else return -EINVAL; @@ -776,10 +799,10 @@ out: } static inline int -ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[]) +ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) { unsigned long d; - unsigned int status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1])); + unsigned int status = ntohl(*(__be32 *)nla_data(cda[CTA_STATUS])); d = ct->status ^ status; if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) @@ -795,14 +818,14 @@ ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[]) /* ASSURED bit can only be set */ return -EINVAL; - if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { + if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { #ifndef CONFIG_NF_NAT_NEEDED return -EINVAL; #else struct nf_nat_range range; - if (cda[CTA_NAT_DST-1]) { - if (nfnetlink_parse_nat(cda[CTA_NAT_DST-1], ct, + if (cda[CTA_NAT_DST]) { + if (nfnetlink_parse_nat(cda[CTA_NAT_DST], ct, &range) < 0) return -EINVAL; if (nf_nat_initialized(ct, @@ -810,8 +833,8 @@ ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[]) return -EEXIST; nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); } - if (cda[CTA_NAT_SRC-1]) { - if (nfnetlink_parse_nat(cda[CTA_NAT_SRC-1], ct, + if (cda[CTA_NAT_SRC]) { + if (nfnetlink_parse_nat(cda[CTA_NAT_SRC], ct, &range) < 0) return -EINVAL; if (nf_nat_initialized(ct, @@ -831,7 +854,7 @@ ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[]) static inline int -ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) +ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) { struct nf_conntrack_helper *helper; struct nf_conn_help *help = nfct_help(ct); @@ -842,7 +865,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) if (ct->master) return -EINVAL; - err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname); + err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); if (err < 0) return err; @@ -879,9 +902,9 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) } static inline int -ctnetlink_change_timeout(struct nf_conn *ct, struct nfattr *cda[]) +ctnetlink_change_timeout(struct nf_conn *ct, struct nlattr *cda[]) { - u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1])); + u_int32_t timeout = ntohl(*(__be32 *)nla_data(cda[CTA_TIMEOUT])); if (!del_timer(&ct->timeout)) return -ETIME; @@ -893,66 +916,67 @@ ctnetlink_change_timeout(struct nf_conn *ct, struct nfattr *cda[]) } static inline int -ctnetlink_change_protoinfo(struct nf_conn *ct, struct nfattr *cda[]) +ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[]) { - struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1]; + struct nlattr *tb[CTA_PROTOINFO_MAX+1], *attr = cda[CTA_PROTOINFO]; struct nf_conntrack_l4proto *l4proto; u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; u_int16_t l3num = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; int err = 0; - nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr); + nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, NULL); l4proto = nf_ct_l4proto_find_get(l3num, npt); - if (l4proto->from_nfattr) - err = l4proto->from_nfattr(tb, ct); + if (l4proto->from_nlattr) + err = l4proto->from_nlattr(tb, ct); nf_ct_l4proto_put(l4proto); return err; } static int -ctnetlink_change_conntrack(struct nf_conn *ct, struct nfattr *cda[]) +ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[]) { int err; - if (cda[CTA_HELP-1]) { + if (cda[CTA_HELP]) { err = ctnetlink_change_helper(ct, cda); if (err < 0) return err; } - if (cda[CTA_TIMEOUT-1]) { + if (cda[CTA_TIMEOUT]) { err = ctnetlink_change_timeout(ct, cda); if (err < 0) return err; } - if (cda[CTA_STATUS-1]) { + if (cda[CTA_STATUS]) { err = ctnetlink_change_status(ct, cda); if (err < 0) return err; } - if (cda[CTA_PROTOINFO-1]) { + if (cda[CTA_PROTOINFO]) { err = ctnetlink_change_protoinfo(ct, cda); if (err < 0) return err; } #if defined(CONFIG_NF_CONNTRACK_MARK) - if (cda[CTA_MARK-1]) - ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); + if (cda[CTA_MARK]) + ct->mark = ntohl(*(__be32 *)nla_data(cda[CTA_MARK])); #endif return 0; } static int -ctnetlink_create_conntrack(struct nfattr *cda[], +ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conntrack_tuple *otuple, - struct nf_conntrack_tuple *rtuple) + struct nf_conntrack_tuple *rtuple, + struct nf_conn *master_ct) { struct nf_conn *ct; int err = -EINVAL; @@ -963,28 +987,28 @@ ctnetlink_create_conntrack(struct nfattr *cda[], if (ct == NULL || IS_ERR(ct)) return -ENOMEM; - if (!cda[CTA_TIMEOUT-1]) + if (!cda[CTA_TIMEOUT]) goto err; - ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1])); + ct->timeout.expires = ntohl(*(__be32 *)nla_data(cda[CTA_TIMEOUT])); ct->timeout.expires = jiffies + ct->timeout.expires * HZ; ct->status |= IPS_CONFIRMED; - if (cda[CTA_STATUS-1]) { + if (cda[CTA_STATUS]) { err = ctnetlink_change_status(ct, cda); if (err < 0) goto err; } - if (cda[CTA_PROTOINFO-1]) { + if (cda[CTA_PROTOINFO]) { err = ctnetlink_change_protoinfo(ct, cda); if (err < 0) goto err; } #if defined(CONFIG_NF_CONNTRACK_MARK) - if (cda[CTA_MARK-1]) - ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); + if (cda[CTA_MARK]) + ct->mark = ntohl(*(__be32 *)nla_data(cda[CTA_MARK])); #endif helper = nf_ct_helper_find_get(rtuple); @@ -999,6 +1023,10 @@ ctnetlink_create_conntrack(struct nfattr *cda[], rcu_assign_pointer(help->helper, helper); } + /* setup master conntrack: this is a confirmed expectation */ + if (master_ct) + ct->master = master_ct; + add_timer(&ct->timeout); nf_conntrack_hash_insert(ct); @@ -1014,7 +1042,7 @@ err: static int ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[]) + struct nlmsghdr *nlh, struct nlattr *cda[]) { struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -1022,32 +1050,56 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, u_int8_t u3 = nfmsg->nfgen_family; int err = 0; - if (nfattr_bad_size(cda, CTA_MAX, cta_min)) - return -EINVAL; - - if (cda[CTA_TUPLE_ORIG-1]) { + if (cda[CTA_TUPLE_ORIG]) { err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3); if (err < 0) return err; } - if (cda[CTA_TUPLE_REPLY-1]) { + if (cda[CTA_TUPLE_REPLY]) { err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3); if (err < 0) return err; } write_lock_bh(&nf_conntrack_lock); - if (cda[CTA_TUPLE_ORIG-1]) + if (cda[CTA_TUPLE_ORIG]) h = __nf_conntrack_find(&otuple, NULL); - else if (cda[CTA_TUPLE_REPLY-1]) + else if (cda[CTA_TUPLE_REPLY]) h = __nf_conntrack_find(&rtuple, NULL); if (h == NULL) { + struct nf_conntrack_tuple master; + struct nf_conntrack_tuple_hash *master_h = NULL; + struct nf_conn *master_ct = NULL; + + if (cda[CTA_TUPLE_MASTER]) { + err = ctnetlink_parse_tuple(cda, + &master, + CTA_TUPLE_MASTER, + u3); + if (err < 0) + return err; + + master_h = __nf_conntrack_find(&master, NULL); + if (master_h == NULL) { + err = -ENOENT; + goto out_unlock; + } + master_ct = nf_ct_tuplehash_to_ctrack(master_h); + atomic_inc(&master_ct->ct_general.use); + } + write_unlock_bh(&nf_conntrack_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) - err = ctnetlink_create_conntrack(cda, &otuple, &rtuple); + err = ctnetlink_create_conntrack(cda, + &otuple, + &rtuple, + master_ct); + if (err < 0 && master_ct) + nf_ct_put(master_ct); + return err; } /* implicit 'else' */ @@ -1057,7 +1109,12 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, err = -EEXIST; if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { /* we only allow nat config for new conntracks */ - if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { + if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { + err = -EINVAL; + goto out_unlock; + } + /* can't link an existing conntrack to a master */ + if (cda[CTA_TUPLE_MASTER]) { err = -EINVAL; goto out_unlock; } @@ -1079,16 +1136,18 @@ ctnetlink_exp_dump_tuple(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, enum ctattr_expect type) { - struct nfattr *nest_parms = NFA_NEST(skb, type); + struct nlattr *nest_parms; + nest_parms = nla_nest_start(skb, type | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; if (ctnetlink_dump_tuples(skb, tuple) < 0) - goto nfattr_failure; - - NFA_NEST_END(skb, nest_parms); + goto nla_put_failure; + nla_nest_end(skb, nest_parms); return 0; -nfattr_failure: +nla_put_failure: return -1; } @@ -1101,32 +1160,34 @@ ctnetlink_exp_dump_mask(struct sk_buff *skb, struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple m; - struct nfattr *nest_parms; + struct nlattr *nest_parms; memset(&m, 0xFF, sizeof(m)); m.src.u.all = mask->src.u.all; memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3)); - nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK); + nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto); nf_ct_l3proto_put(l3proto); if (unlikely(ret < 0)) - goto nfattr_failure; + goto nla_put_failure; l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto); nf_ct_l4proto_put(l4proto); if (unlikely(ret < 0)) - goto nfattr_failure; + goto nla_put_failure; - NFA_NEST_END(skb, nest_parms); + nla_nest_end(skb, nest_parms); return 0; -nfattr_failure: +nla_put_failure: return -1; } @@ -1136,23 +1197,23 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, { struct nf_conn *master = exp->master; __be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ); - __be32 id = htonl(exp->id); + __be32 id = htonl((unsigned long)exp); if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) - goto nfattr_failure; + goto nla_put_failure; if (ctnetlink_exp_dump_mask(skb, &exp->tuple, &exp->mask) < 0) - goto nfattr_failure; + goto nla_put_failure; if (ctnetlink_exp_dump_tuple(skb, &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple, CTA_EXPECT_MASTER) < 0) - goto nfattr_failure; + goto nla_put_failure; - NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); - NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); + NLA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); + NLA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); return 0; -nfattr_failure: +nla_put_failure: return -1; } @@ -1176,13 +1237,13 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, nfmsg->res_id = 0; if (ctnetlink_exp_dump_expect(skb, exp) < 0) - goto nfattr_failure; + goto nla_put_failure; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: -nfattr_failure: +nla_put_failure: nlmsg_trim(skb, b); return -1; } @@ -1224,14 +1285,14 @@ static int ctnetlink_expect_event(struct notifier_block *this, nfmsg->res_id = 0; if (ctnetlink_exp_dump_expect(skb, exp) < 0) - goto nfattr_failure; + goto nla_put_failure; nlh->nlmsg_len = skb->tail - b; nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0); return NOTIFY_DONE; nlmsg_failure: -nfattr_failure: +nla_put_failure: kfree_skb(skb); return NOTIFY_DONE; } @@ -1286,14 +1347,14 @@ out: return skb->len; } -static const size_t cta_min_exp[CTA_EXPECT_MAX] = { - [CTA_EXPECT_TIMEOUT-1] = sizeof(u_int32_t), - [CTA_EXPECT_ID-1] = sizeof(u_int32_t) +static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { + [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 }, + [CTA_EXPECT_ID] = { .type = NLA_U32 }, }; static int ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[]) + struct nlmsghdr *nlh, struct nlattr *cda[]) { struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; @@ -1302,16 +1363,13 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, u_int8_t u3 = nfmsg->nfgen_family; int err = 0; - if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) - return -EINVAL; - if (nlh->nlmsg_flags & NLM_F_DUMP) { return netlink_dump_start(ctnl, skb, nlh, ctnetlink_exp_dump_table, ctnetlink_exp_done); } - if (cda[CTA_EXPECT_MASTER-1]) + if (cda[CTA_EXPECT_MASTER]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); else return -EINVAL; @@ -1323,9 +1381,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (!exp) return -ENOENT; - if (cda[CTA_EXPECT_ID-1]) { - __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); - if (exp->id != ntohl(id)) { + if (cda[CTA_EXPECT_ID]) { + __be32 id = *(__be32 *)nla_data(cda[CTA_EXPECT_ID]); + if (ntohl(id) != (u32)(unsigned long)exp) { nf_ct_expect_put(exp); return -ENOENT; } @@ -1355,7 +1413,7 @@ out: static int ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[]) + struct nlmsghdr *nlh, struct nlattr *cda[]) { struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; @@ -1366,10 +1424,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, unsigned int i; int err; - if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) - return -EINVAL; - - if (cda[CTA_EXPECT_TUPLE-1]) { + if (cda[CTA_EXPECT_TUPLE]) { /* delete a single expect by tuple */ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); if (err < 0) @@ -1380,9 +1435,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, if (!exp) return -ENOENT; - if (cda[CTA_EXPECT_ID-1]) { - __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); - if (exp->id != ntohl(id)) { + if (cda[CTA_EXPECT_ID]) { + __be32 id = *(__be32 *)nla_data(cda[CTA_EXPECT_ID]); + if (ntohl(id) != (u32)(unsigned long)exp) { nf_ct_expect_put(exp); return -ENOENT; } @@ -1393,8 +1448,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* have to put what we 'get' above. * after this line usage count == 0 */ nf_ct_expect_put(exp); - } else if (cda[CTA_EXPECT_HELP_NAME-1]) { - char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]); + } else if (cda[CTA_EXPECT_HELP_NAME]) { + char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]); struct nf_conn_help *m_help; /* delete all expectations for this helper */ @@ -1436,13 +1491,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return 0; } static int -ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nfattr *cda[]) +ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nlattr *cda[]) { return -EOPNOTSUPP; } static int -ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3) +ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3) { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -1499,7 +1554,7 @@ out: static int ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[]) + struct nlmsghdr *nlh, struct nlattr *cda[]) { struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; @@ -1507,12 +1562,9 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, u_int8_t u3 = nfmsg->nfgen_family; int err = 0; - if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) - return -EINVAL; - - if (!cda[CTA_EXPECT_TUPLE-1] - || !cda[CTA_EXPECT_MASK-1] - || !cda[CTA_EXPECT_MASTER-1]) + if (!cda[CTA_EXPECT_TUPLE] + || !cda[CTA_EXPECT_MASK] + || !cda[CTA_EXPECT_MASTER]) return -EINVAL; err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); @@ -1548,34 +1600,41 @@ static struct notifier_block ctnl_notifier_exp = { }; #endif -static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { +static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, - .attr_count = CTA_MAX, }, + .attr_count = CTA_MAX, + .policy = ct_nla_policy }, [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, - .attr_count = CTA_MAX, }, + .attr_count = CTA_MAX, + .policy = ct_nla_policy }, [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, - .attr_count = CTA_MAX, }, + .attr_count = CTA_MAX, + .policy = ct_nla_policy }, [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, - .attr_count = CTA_MAX, }, + .attr_count = CTA_MAX, + .policy = ct_nla_policy }, }; -static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { +static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, - .attr_count = CTA_EXPECT_MAX, }, + .attr_count = CTA_EXPECT_MAX, + .policy = exp_nla_policy }, [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, - .attr_count = CTA_EXPECT_MAX, }, + .attr_count = CTA_EXPECT_MAX, + .policy = exp_nla_policy }, [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, - .attr_count = CTA_EXPECT_MAX, }, + .attr_count = CTA_EXPECT_MAX, + .policy = exp_nla_policy }, }; -static struct nfnetlink_subsystem ctnl_subsys = { +static const struct nfnetlink_subsystem ctnl_subsys = { .name = "conntrack", .subsys_id = NFNL_SUBSYS_CTNETLINK, .cb_count = IPCTNL_MSG_MAX, .cb = ctnl_cb, }; -static struct nfnetlink_subsystem ctnl_exp_subsys = { +static const struct nfnetlink_subsystem ctnl_exp_subsys = { .name = "conntrack_expect", .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP, .cb_count = IPCTNL_MSG_EXP_MAX, diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index b0804199ab5..099b6df3e2b 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -41,14 +41,14 @@ MODULE_ALIAS("ip_conntrack_pptp"); static DEFINE_SPINLOCK(nf_pptp_lock); int -(*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb, +(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound); int -(*nf_nat_pptp_hook_inbound)(struct sk_buff **pskb, +(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) __read_mostly; @@ -254,7 +254,7 @@ out_unexpect_orig: } static inline int -pptp_inbound_pkt(struct sk_buff **pskb, +pptp_inbound_pkt(struct sk_buff *skb, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, unsigned int reqlen, @@ -367,7 +367,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound); if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK) - return nf_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq); + return nf_nat_pptp_inbound(skb, ct, ctinfo, ctlh, pptpReq); return NF_ACCEPT; invalid: @@ -380,7 +380,7 @@ invalid: } static inline int -pptp_outbound_pkt(struct sk_buff **pskb, +pptp_outbound_pkt(struct sk_buff *skb, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, unsigned int reqlen, @@ -462,7 +462,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound); if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK) - return nf_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq); + return nf_nat_pptp_outbound(skb, ct, ctinfo, ctlh, pptpReq); return NF_ACCEPT; invalid: @@ -492,7 +492,7 @@ static const unsigned int pptp_msg_size[] = { /* track caller id inside control connection, call expect_related */ static int -conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, +conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { @@ -502,7 +502,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, struct pptp_pkt_hdr _pptph, *pptph; struct PptpControlHeader _ctlh, *ctlh; union pptp_ctrl_union _pptpReq, *pptpReq; - unsigned int tcplen = (*pskb)->len - protoff; + unsigned int tcplen = skb->len - protoff; unsigned int datalen, reqlen, nexthdr_off; int oldsstate, oldcstate; int ret; @@ -514,12 +514,12 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, return NF_ACCEPT; nexthdr_off = protoff; - tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); + tcph = skb_header_pointer(skb, nexthdr_off, sizeof(_tcph), &_tcph); BUG_ON(!tcph); nexthdr_off += tcph->doff * 4; datalen = tcplen - tcph->doff * 4; - pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); + pptph = skb_header_pointer(skb, nexthdr_off, sizeof(_pptph), &_pptph); if (!pptph) { pr_debug("no full PPTP header, can't track\n"); return NF_ACCEPT; @@ -534,7 +534,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, return NF_ACCEPT; } - ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + ctlh = skb_header_pointer(skb, nexthdr_off, sizeof(_ctlh), &_ctlh); if (!ctlh) return NF_ACCEPT; nexthdr_off += sizeof(_ctlh); @@ -547,7 +547,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, if (reqlen > sizeof(*pptpReq)) reqlen = sizeof(*pptpReq); - pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + pptpReq = skb_header_pointer(skb, nexthdr_off, reqlen, &_pptpReq); if (!pptpReq) return NF_ACCEPT; @@ -560,11 +560,11 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, * established from PNS->PAC. However, RFC makes no guarantee */ if (dir == IP_CT_DIR_ORIGINAL) /* client -> server (PNS -> PAC) */ - ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, + ret = pptp_outbound_pkt(skb, ctlh, pptpReq, reqlen, ct, ctinfo); else /* server -> client (PAC -> PNS) */ - ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, + ret = pptp_inbound_pkt(skb, ctlh, pptpReq, reqlen, ct, ctinfo); pr_debug("sstate: %d->%d, cstate: %d->%d\n", oldsstate, info->sstate, oldcstate, info->cstate); diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index d8b501878d9..13f81917964 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -70,7 +70,6 @@ static int new(struct nf_conn *conntrack, const struct sk_buff *skb, static struct ctl_table_header *generic_sysctl_header; static struct ctl_table generic_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_GENERIC_TIMEOUT, .procname = "nf_conntrack_generic_timeout", .data = &nf_ct_generic_timeout, .maxlen = sizeof(unsigned int), @@ -84,7 +83,6 @@ static struct ctl_table generic_sysctl_table[] = { #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table generic_compat_sysctl_table[] = { { - .ctl_name = NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT, .procname = "ip_conntrack_generic_timeout", .data = &nf_ct_generic_timeout, .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index bdbead8a7a8..4a185f6aa65 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -274,8 +274,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .destroy = gre_destroy, .me = THIS_MODULE, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, #endif }; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 04192acc7c4..cb046751059 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -476,7 +476,6 @@ static unsigned int sctp_sysctl_table_users; static struct ctl_table_header *sctp_sysctl_header; static struct ctl_table sctp_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, .procname = "nf_conntrack_sctp_timeout_closed", .data = &nf_ct_sctp_timeout_closed, .maxlen = sizeof(unsigned int), @@ -484,7 +483,6 @@ static struct ctl_table sctp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, .procname = "nf_conntrack_sctp_timeout_cookie_wait", .data = &nf_ct_sctp_timeout_cookie_wait, .maxlen = sizeof(unsigned int), @@ -492,7 +490,6 @@ static struct ctl_table sctp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, .procname = "nf_conntrack_sctp_timeout_cookie_echoed", .data = &nf_ct_sctp_timeout_cookie_echoed, .maxlen = sizeof(unsigned int), @@ -500,7 +497,6 @@ static struct ctl_table sctp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, .procname = "nf_conntrack_sctp_timeout_established", .data = &nf_ct_sctp_timeout_established, .maxlen = sizeof(unsigned int), @@ -508,7 +504,6 @@ static struct ctl_table sctp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, .procname = "nf_conntrack_sctp_timeout_shutdown_sent", .data = &nf_ct_sctp_timeout_shutdown_sent, .maxlen = sizeof(unsigned int), @@ -516,7 +511,6 @@ static struct ctl_table sctp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, .procname = "nf_conntrack_sctp_timeout_shutdown_recd", .data = &nf_ct_sctp_timeout_shutdown_recd, .maxlen = sizeof(unsigned int), @@ -524,7 +518,6 @@ static struct ctl_table sctp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", .data = &nf_ct_sctp_timeout_shutdown_ack_sent, .maxlen = sizeof(unsigned int), @@ -539,7 +532,6 @@ static struct ctl_table sctp_sysctl_table[] = { #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table sctp_compat_sysctl_table[] = { { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, .procname = "ip_conntrack_sctp_timeout_closed", .data = &nf_ct_sctp_timeout_closed, .maxlen = sizeof(unsigned int), @@ -547,7 +539,6 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, .procname = "ip_conntrack_sctp_timeout_cookie_wait", .data = &nf_ct_sctp_timeout_cookie_wait, .maxlen = sizeof(unsigned int), @@ -555,7 +546,6 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, .procname = "ip_conntrack_sctp_timeout_cookie_echoed", .data = &nf_ct_sctp_timeout_cookie_echoed, .maxlen = sizeof(unsigned int), @@ -563,7 +553,6 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, .procname = "ip_conntrack_sctp_timeout_established", .data = &nf_ct_sctp_timeout_established, .maxlen = sizeof(unsigned int), @@ -571,7 +560,6 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, .procname = "ip_conntrack_sctp_timeout_shutdown_sent", .data = &nf_ct_sctp_timeout_shutdown_sent, .maxlen = sizeof(unsigned int), @@ -579,7 +567,6 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, .procname = "ip_conntrack_sctp_timeout_shutdown_recd", .data = &nf_ct_sctp_timeout_shutdown_recd, .maxlen = sizeof(unsigned int), @@ -587,7 +574,6 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", .data = &nf_ct_sctp_timeout_shutdown_ack_sent, .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index eb3fe740146..7a3f64c1aca 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -831,6 +831,22 @@ static int tcp_packet(struct nf_conn *conntrack, tuple = &conntrack->tuplehash[dir].tuple; switch (new_state) { + case TCP_CONNTRACK_SYN_SENT: + if (old_state < TCP_CONNTRACK_TIME_WAIT) + break; + if ((conntrack->proto.tcp.seen[!dir].flags & + IP_CT_TCP_FLAG_CLOSE_INIT) + || (conntrack->proto.tcp.last_dir == dir + && conntrack->proto.tcp.last_index == TCP_RST_SET)) { + /* Attempt to reopen a closed/aborted connection. + * Delete this connection and look up again. */ + write_unlock_bh(&tcp_lock); + if (del_timer(&conntrack->timeout)) + conntrack->timeout.function((unsigned long) + conntrack); + return -NF_REPEAT; + } + /* Fall through */ case TCP_CONNTRACK_IGNORE: /* Ignored packets: * @@ -879,27 +895,6 @@ static int tcp_packet(struct nf_conn *conntrack, nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; - case TCP_CONNTRACK_SYN_SENT: - if (old_state < TCP_CONNTRACK_TIME_WAIT) - break; - if ((conntrack->proto.tcp.seen[dir].flags & - IP_CT_TCP_FLAG_CLOSE_INIT) - || after(ntohl(th->seq), - conntrack->proto.tcp.seen[dir].td_end)) { - /* Attempt to reopen a closed connection. - * Delete this connection and look up again. */ - write_unlock_bh(&tcp_lock); - if (del_timer(&conntrack->timeout)) - conntrack->timeout.function((unsigned long) - conntrack); - return -NF_REPEAT; - } else { - write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, - NULL, "nf_ct_tcp: invalid SYN"); - return -NF_ACCEPT; - } case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) @@ -932,6 +927,7 @@ static int tcp_packet(struct nf_conn *conntrack, in_window: /* From now on we have got in-window packets */ conntrack->proto.tcp.last_index = index; + conntrack->proto.tcp.last_dir = dir; pr_debug("tcp_conntracks: "); NF_CT_DUMP_TUPLE(tuple); @@ -1067,93 +1063,96 @@ static int tcp_new(struct nf_conn *conntrack, #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> -static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, +static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, const struct nf_conn *ct) { - struct nfattr *nest_parms; + struct nlattr *nest_parms; struct nf_ct_tcp_flags tmp = {}; read_lock_bh(&tcp_lock); - nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); - NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), + nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + NLA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), &ct->proto.tcp.state); - NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t), + NLA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t), &ct->proto.tcp.seen[0].td_scale); - NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t), + NLA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t), &ct->proto.tcp.seen[1].td_scale); tmp.flags = ct->proto.tcp.seen[0].flags; - NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, + NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, sizeof(struct nf_ct_tcp_flags), &tmp); tmp.flags = ct->proto.tcp.seen[1].flags; - NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, + NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, sizeof(struct nf_ct_tcp_flags), &tmp); read_unlock_bh(&tcp_lock); - NFA_NEST_END(skb, nest_parms); + nla_nest_end(skb, nest_parms); return 0; -nfattr_failure: +nla_put_failure: read_unlock_bh(&tcp_lock); return -1; } -static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = { - [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t), - [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] = sizeof(u_int8_t), - [CTA_PROTOINFO_TCP_WSCALE_REPLY-1] = sizeof(u_int8_t), - [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1] = sizeof(struct nf_ct_tcp_flags), - [CTA_PROTOINFO_TCP_FLAGS_REPLY-1] = sizeof(struct nf_ct_tcp_flags) +static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { + [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, + [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, + [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, + [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, }; -static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct) +static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) { - struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; - struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; + struct nlattr *attr = cda[CTA_PROTOINFO_TCP]; + struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1]; + int err; /* updates could not contain anything about the private * protocol info, in that case skip the parsing */ if (!attr) return 0; - nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr); - - if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp)) - return -EINVAL; + err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr, tcp_nla_policy); + if (err < 0) + return err; - if (!tb[CTA_PROTOINFO_TCP_STATE-1]) + if (!tb[CTA_PROTOINFO_TCP_STATE]) return -EINVAL; write_lock_bh(&tcp_lock); ct->proto.tcp.state = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]); + *(u_int8_t *)nla_data(tb[CTA_PROTOINFO_TCP_STATE]); - if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]) { + if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) { struct nf_ct_tcp_flags *attr = - NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]); + nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]); ct->proto.tcp.seen[0].flags &= ~attr->mask; ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask; } - if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]) { + if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) { struct nf_ct_tcp_flags *attr = - NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]); + nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]); ct->proto.tcp.seen[1].flags &= ~attr->mask; ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask; } - if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] && - tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1] && + if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] && + tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] && ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE && ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { ct->proto.tcp.seen[0].td_scale = *(u_int8_t *) - NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]); + nla_data(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]); ct->proto.tcp.seen[1].td_scale = *(u_int8_t *) - NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]); + nla_data(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]); } write_unlock_bh(&tcp_lock); @@ -1166,7 +1165,6 @@ static unsigned int tcp_sysctl_table_users; static struct ctl_table_header *tcp_sysctl_header; static struct ctl_table tcp_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, .procname = "nf_conntrack_tcp_timeout_syn_sent", .data = &nf_ct_tcp_timeout_syn_sent, .maxlen = sizeof(unsigned int), @@ -1174,7 +1172,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, .procname = "nf_conntrack_tcp_timeout_syn_recv", .data = &nf_ct_tcp_timeout_syn_recv, .maxlen = sizeof(unsigned int), @@ -1182,7 +1179,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, .procname = "nf_conntrack_tcp_timeout_established", .data = &nf_ct_tcp_timeout_established, .maxlen = sizeof(unsigned int), @@ -1190,7 +1186,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, .procname = "nf_conntrack_tcp_timeout_fin_wait", .data = &nf_ct_tcp_timeout_fin_wait, .maxlen = sizeof(unsigned int), @@ -1198,7 +1193,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, .procname = "nf_conntrack_tcp_timeout_close_wait", .data = &nf_ct_tcp_timeout_close_wait, .maxlen = sizeof(unsigned int), @@ -1206,7 +1200,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, .procname = "nf_conntrack_tcp_timeout_last_ack", .data = &nf_ct_tcp_timeout_last_ack, .maxlen = sizeof(unsigned int), @@ -1214,7 +1207,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, .procname = "nf_conntrack_tcp_timeout_time_wait", .data = &nf_ct_tcp_timeout_time_wait, .maxlen = sizeof(unsigned int), @@ -1222,7 +1214,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, .procname = "nf_conntrack_tcp_timeout_close", .data = &nf_ct_tcp_timeout_close, .maxlen = sizeof(unsigned int), @@ -1230,7 +1221,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, .procname = "nf_conntrack_tcp_timeout_max_retrans", .data = &nf_ct_tcp_timeout_max_retrans, .maxlen = sizeof(unsigned int), @@ -1269,7 +1259,6 @@ static struct ctl_table tcp_sysctl_table[] = { #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table tcp_compat_sysctl_table[] = { { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, .procname = "ip_conntrack_tcp_timeout_syn_sent", .data = &nf_ct_tcp_timeout_syn_sent, .maxlen = sizeof(unsigned int), @@ -1277,7 +1266,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, .procname = "ip_conntrack_tcp_timeout_syn_recv", .data = &nf_ct_tcp_timeout_syn_recv, .maxlen = sizeof(unsigned int), @@ -1285,7 +1273,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, .procname = "ip_conntrack_tcp_timeout_established", .data = &nf_ct_tcp_timeout_established, .maxlen = sizeof(unsigned int), @@ -1293,7 +1280,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, .procname = "ip_conntrack_tcp_timeout_fin_wait", .data = &nf_ct_tcp_timeout_fin_wait, .maxlen = sizeof(unsigned int), @@ -1301,7 +1287,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, .procname = "ip_conntrack_tcp_timeout_close_wait", .data = &nf_ct_tcp_timeout_close_wait, .maxlen = sizeof(unsigned int), @@ -1309,7 +1294,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, .procname = "ip_conntrack_tcp_timeout_last_ack", .data = &nf_ct_tcp_timeout_last_ack, .maxlen = sizeof(unsigned int), @@ -1317,7 +1301,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, .procname = "ip_conntrack_tcp_timeout_time_wait", .data = &nf_ct_tcp_timeout_time_wait, .maxlen = sizeof(unsigned int), @@ -1325,7 +1308,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, .procname = "ip_conntrack_tcp_timeout_close", .data = &nf_ct_tcp_timeout_close, .maxlen = sizeof(unsigned int), @@ -1333,7 +1315,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, .procname = "ip_conntrack_tcp_timeout_max_retrans", .data = &nf_ct_tcp_timeout_max_retrans, .maxlen = sizeof(unsigned int), @@ -1384,10 +1365,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .new = tcp_new, .error = tcp_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .to_nfattr = tcp_to_nfattr, - .from_nfattr = nfattr_to_tcp, - .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, + .to_nlattr = tcp_to_nlattr, + .from_nlattr = nlattr_to_tcp, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_users = &tcp_sysctl_table_users, @@ -1413,10 +1395,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .new = tcp_new, .error = tcp_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .to_nfattr = tcp_to_nfattr, - .from_nfattr = nfattr_to_tcp, - .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, + .to_nlattr = tcp_to_nlattr, + .from_nlattr = nlattr_to_tcp, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_users = &tcp_sysctl_table_users, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 2a2fd1a764e..b3e7ecb080e 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -146,7 +146,6 @@ static unsigned int udp_sysctl_table_users; static struct ctl_table_header *udp_sysctl_header; static struct ctl_table udp_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT, .procname = "nf_conntrack_udp_timeout", .data = &nf_ct_udp_timeout, .maxlen = sizeof(unsigned int), @@ -154,7 +153,6 @@ static struct ctl_table udp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM, .procname = "nf_conntrack_udp_timeout_stream", .data = &nf_ct_udp_timeout_stream, .maxlen = sizeof(unsigned int), @@ -168,7 +166,6 @@ static struct ctl_table udp_sysctl_table[] = { #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table udp_compat_sysctl_table[] = { { - .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT, .procname = "ip_conntrack_udp_timeout", .data = &nf_ct_udp_timeout, .maxlen = sizeof(unsigned int), @@ -176,7 +173,6 @@ static struct ctl_table udp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM, .procname = "ip_conntrack_udp_timeout_stream", .data = &nf_ct_udp_timeout_stream, .maxlen = sizeof(unsigned int), @@ -203,8 +199,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .new = udp_new, .error = udp_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_users = &udp_sysctl_table_users, @@ -230,8 +227,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .new = udp_new, .error = udp_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_users = &udp_sysctl_table_users, diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index b906b413997..b8981dd922b 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -203,8 +203,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .new = udplite_new, .error = udplite_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_users = &udplite_sysctl_table_users, @@ -226,8 +227,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .new = udplite_new, .error = udplite_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL .ctl_table_users = &udplite_sysctl_table_users, diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 355d371bac9..b5a16c6e21c 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -56,7 +56,7 @@ struct sane_reply_net_start { /* other fields aren't interesting for conntrack */ }; -static int help(struct sk_buff **pskb, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -80,19 +80,19 @@ static int help(struct sk_buff **pskb, return NF_ACCEPT; /* Not a full tcp header? */ - th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; /* No data? */ dataoff = protoff + th->doff * 4; - if (dataoff >= (*pskb)->len) + if (dataoff >= skb->len) return NF_ACCEPT; - datalen = (*pskb)->len - dataoff; + datalen = skb->len - dataoff; spin_lock_bh(&nf_sane_lock); - sb_ptr = skb_header_pointer(*pskb, dataoff, datalen, sane_buffer); + sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer); BUG_ON(sb_ptr == NULL); if (dir == IP_CT_DIR_ORIGINAL) { diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index d449fa47491..8f8b5a48df3 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -36,13 +36,13 @@ static unsigned int sip_timeout __read_mostly = SIP_TIMEOUT; module_param(sip_timeout, uint, 0600); MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session"); -unsigned int (*nf_nat_sip_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_hook); -unsigned int (*nf_nat_sdp_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp, const char *dptr) __read_mostly; @@ -363,7 +363,7 @@ int ct_sip_get_info(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(ct_sip_get_info); -static int set_expected_rtp(struct sk_buff **pskb, +static int set_expected_rtp(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, union nf_conntrack_address *addr, @@ -385,7 +385,7 @@ static int set_expected_rtp(struct sk_buff **pskb, nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook); if (nf_nat_sdp && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp(pskb, ctinfo, exp, dptr); + ret = nf_nat_sdp(skb, ctinfo, exp, dptr); else { if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; @@ -397,7 +397,7 @@ static int set_expected_rtp(struct sk_buff **pskb, return ret; } -static int sip_help(struct sk_buff **pskb, +static int sip_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -414,13 +414,13 @@ static int sip_help(struct sk_buff **pskb, /* No Data ? */ dataoff = protoff + sizeof(struct udphdr); - if (dataoff >= (*pskb)->len) + if (dataoff >= skb->len) return NF_ACCEPT; - nf_ct_refresh(ct, *pskb, sip_timeout * HZ); + nf_ct_refresh(ct, skb, sip_timeout * HZ); - if (!skb_is_nonlinear(*pskb)) - dptr = (*pskb)->data + dataoff; + if (!skb_is_nonlinear(skb)) + dptr = skb->data + dataoff; else { pr_debug("Copy of skbuff not supported yet.\n"); goto out; @@ -428,13 +428,13 @@ static int sip_help(struct sk_buff **pskb, nf_nat_sip = rcu_dereference(nf_nat_sip_hook); if (nf_nat_sip && ct->status & IPS_NAT_MASK) { - if (!nf_nat_sip(pskb, ctinfo, ct, &dptr)) { + if (!nf_nat_sip(skb, ctinfo, ct, &dptr)) { ret = NF_DROP; goto out; } } - datalen = (*pskb)->len - dataoff; + datalen = skb->len - dataoff; if (datalen < sizeof("SIP/2.0 200") - 1) goto out; @@ -464,7 +464,7 @@ static int sip_help(struct sk_buff **pskb, ret = NF_DROP; goto out; } - ret = set_expected_rtp(pskb, ct, ctinfo, &addr, + ret = set_expected_rtp(skb, ct, ctinfo, &addr, htons(port), dptr); } } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index a4ce5e88799..9efdd37fc19 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -14,6 +14,7 @@ #include <linux/seq_file.h> #include <linux/percpu.h> #include <linux/netdevice.h> +#include <net/net_namespace.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif @@ -194,22 +195,8 @@ static const struct seq_operations ct_seq_ops = { static int ct_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct ct_iter_state *st; - int ret; - - st = kzalloc(sizeof(struct ct_iter_state), GFP_KERNEL); - if (st == NULL) - return -ENOMEM; - ret = seq_open(file, &ct_seq_ops); - if (ret) - goto out_free; - seq = file->private_data; - seq->private = st; - return ret; -out_free: - kfree(st); - return ret; + return seq_open_private(file, &ct_seq_ops, + sizeof(struct ct_iter_state)); } static const struct file_operations ct_file_ops = { @@ -420,10 +407,10 @@ static int __init nf_conntrack_standalone_init(void) return ret; #ifdef CONFIG_PROC_FS - proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops); if (!proc) goto cleanup_init; - proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat); + proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, init_net.proc_net_stat); if (!proc_stat) goto cleanup_proc; @@ -444,9 +431,9 @@ static int __init nf_conntrack_standalone_init(void) cleanup_proc_stat: #endif #ifdef CONFIG_PROC_FS - remove_proc_entry("nf_conntrack", proc_net_stat); + remove_proc_entry("nf_conntrack", init_net. proc_net_stat); cleanup_proc: - proc_net_remove("nf_conntrack"); + proc_net_remove(&init_net, "nf_conntrack"); cleanup_init: #endif /* CNFIG_PROC_FS */ nf_conntrack_cleanup(); @@ -459,8 +446,8 @@ static void __exit nf_conntrack_standalone_fini(void) unregister_sysctl_table(nf_ct_sysctl_header); #endif #ifdef CONFIG_PROC_FS - remove_proc_entry("nf_conntrack", proc_net_stat); - proc_net_remove("nf_conntrack"); + remove_proc_entry("nf_conntrack", init_net.proc_net_stat); + proc_net_remove(&init_net, "nf_conntrack"); #endif /* CNFIG_PROC_FS */ nf_conntrack_cleanup(); } diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index cc19506cf2f..e894aa1ff3a 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -29,12 +29,12 @@ static int ports_c; module_param_array(ports, ushort, &ports_c, 0400); MODULE_PARM_DESC(ports, "Port numbers of TFTP servers"); -unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_tftp_hook); -static int tftp_help(struct sk_buff **pskb, +static int tftp_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -46,7 +46,7 @@ static int tftp_help(struct sk_buff **pskb, int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; typeof(nf_nat_tftp_hook) nf_nat_tftp; - tfh = skb_header_pointer(*pskb, protoff + sizeof(struct udphdr), + tfh = skb_header_pointer(skb, protoff + sizeof(struct udphdr), sizeof(_tftph), &_tftph); if (tfh == NULL) return NF_ACCEPT; @@ -70,7 +70,7 @@ static int tftp_help(struct sk_buff **pskb, nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook); if (nf_nat_tftp && ct->status & IPS_NAT_MASK) - ret = nf_nat_tftp(pskb, ctinfo, exp); + ret = nf_nat_tftp(skb, ctinfo, exp); else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; nf_ct_expect_put(exp); diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 0df7fff196a..196269c1e58 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -14,7 +14,7 @@ /* core.c */ extern unsigned int nf_iterate(struct list_head *head, - struct sk_buff **skb, + struct sk_buff *skb, int hook, const struct net_device *indev, const struct net_device *outdev, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index a481a349f7b..0cef1433d66 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -256,14 +256,14 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, if (verdict == NF_ACCEPT) { afinfo = nf_get_afinfo(info->pf); - if (!afinfo || afinfo->reroute(&skb, info) < 0) + if (!afinfo || afinfo->reroute(skb, info) < 0) verdict = NF_DROP; } if (verdict == NF_ACCEPT) { next_hook: verdict = nf_iterate(&nf_hooks[info->pf][info->hook], - &skb, info->hook, + skb, info->hook, info->indev, info->outdev, &elem, info->okfn, INT_MIN); } diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index e32761ce260..aa2831587b8 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -69,6 +69,9 @@ static int nf_sockopt(struct sock *sk, int pf, int val, struct nf_sockopt_ops *ops; int ret; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) return -EINTR; @@ -125,6 +128,10 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, struct nf_sockopt_ops *ops; int ret; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + + if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) return -EINTR; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 8797e6953ef..2128542995f 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -41,32 +41,20 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; static struct sock *nfnl = NULL; -static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; +static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; static DEFINE_MUTEX(nfnl_mutex); -static void nfnl_lock(void) +static inline void nfnl_lock(void) { mutex_lock(&nfnl_mutex); } -static int nfnl_trylock(void) -{ - return !mutex_trylock(&nfnl_mutex); -} - -static void __nfnl_unlock(void) -{ - mutex_unlock(&nfnl_mutex); -} - -static void nfnl_unlock(void) +static inline void nfnl_unlock(void) { mutex_unlock(&nfnl_mutex); - if (nfnl->sk_receive_queue.qlen) - nfnl->sk_data_ready(nfnl, 0); } -int nfnetlink_subsys_register(struct nfnetlink_subsystem *n) +int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) { nfnl_lock(); if (subsys_table[n->subsys_id]) { @@ -80,7 +68,7 @@ int nfnetlink_subsys_register(struct nfnetlink_subsystem *n) } EXPORT_SYMBOL_GPL(nfnetlink_subsys_register); -int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n) +int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n) { nfnl_lock(); subsys_table[n->subsys_id] = NULL; @@ -90,7 +78,7 @@ int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n) } EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister); -static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) +static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) { u_int8_t subsys_id = NFNL_SUBSYS_ID(type); @@ -100,8 +88,8 @@ static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) return subsys_table[subsys_id]; } -static inline struct nfnl_callback * -nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss) +static inline const struct nfnl_callback * +nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss) { u_int8_t cb_id = NFNL_MSG_TYPE(type); @@ -111,62 +99,6 @@ nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss) return &ss->cb[cb_id]; } -void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen, - const void *data) -{ - struct nfattr *nfa; - int size = NFA_LENGTH(attrlen); - - nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); - nfa->nfa_type = attrtype; - nfa->nfa_len = size; - memcpy(NFA_DATA(nfa), data, attrlen); - memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size); -} -EXPORT_SYMBOL_GPL(__nfa_fill); - -void nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) -{ - memset(tb, 0, sizeof(struct nfattr *) * maxattr); - - while (NFA_OK(nfa, len)) { - unsigned flavor = NFA_TYPE(nfa); - if (flavor && flavor <= maxattr) - tb[flavor-1] = nfa; - nfa = NFA_NEXT(nfa, len); - } -} -EXPORT_SYMBOL_GPL(nfattr_parse); - -/** - * nfnetlink_check_attributes - check and parse nfnetlink attributes - * - * subsys: nfnl subsystem for which this message is to be parsed - * nlmsghdr: netlink message to be checked/parsed - * cda: array of pointers, needs to be at least subsys->attr_count big - * - */ -static int -nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, - struct nlmsghdr *nlh, struct nfattr *cda[]) -{ - int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); - u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); - u_int16_t attr_count = subsys->cb[cb_id].attr_count; - - /* check attribute lengths. */ - if (likely(nlh->nlmsg_len > min_len)) { - struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh)); - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - nfattr_parse(cda, attr_count, attr, attrlen); - } - - /* implicit: if nlmsg_len == min_len, we return 0, and an empty - * (zeroed) cda[] array. The message is valid, but empty. */ - - return 0; -} - int nfnetlink_has_listeners(unsigned int group) { return netlink_has_listeners(nfnl, group); @@ -175,16 +107,7 @@ EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) { - int err = 0; - - NETLINK_CB(skb).dst_group = group; - if (echo) - atomic_inc(&skb->users); - netlink_broadcast(nfnl, skb, pid, group, gfp_any()); - if (echo) - err = netlink_unicast(nfnl, skb, pid, MSG_DONTWAIT); - - return err; + return nlmsg_notify(nfnl, skb, pid, group, echo, gfp_any()); } EXPORT_SYMBOL_GPL(nfnetlink_send); @@ -197,8 +120,8 @@ EXPORT_SYMBOL_GPL(nfnetlink_unicast); /* Process one complete nfnetlink message. */ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct nfnl_callback *nc; - struct nfnetlink_subsystem *ss; + const struct nfnl_callback *nc; + const struct nfnetlink_subsystem *ss; int type, err; if (security_netlink_recv(skb, CAP_NET_ADMIN)) @@ -212,9 +135,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) ss = nfnetlink_get_subsys(type); if (!ss) { #ifdef CONFIG_KMOD - /* don't call nfnl_unlock, since it would reenter - * with further packet processing */ - __nfnl_unlock(); + nfnl_unlock(); request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); nfnl_lock(); ss = nfnetlink_get_subsys(type); @@ -228,29 +149,31 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; { - u_int16_t attr_count = - ss->cb[NFNL_MSG_TYPE(nlh->nlmsg_type)].attr_count; - struct nfattr *cda[attr_count]; - - memset(cda, 0, sizeof(struct nfattr *) * attr_count); + int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); + u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); + u_int16_t attr_count = ss->cb[cb_id].attr_count; + struct nlattr *cda[attr_count+1]; + + if (likely(nlh->nlmsg_len >= min_len)) { + struct nlattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); + int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); + + err = nla_parse(cda, attr_count, attr, attrlen, + ss->cb[cb_id].policy); + if (err < 0) + return err; + } else + return -EINVAL; - err = nfnetlink_check_attributes(ss, nlh, cda); - if (err < 0) - return err; return nc->call(nfnl, skb, nlh, cda); } } -static void nfnetlink_rcv(struct sock *sk, int len) +static void nfnetlink_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - if (nfnl_trylock()) - return; - netlink_run_queue(sk, &qlen, nfnetlink_rcv_msg); - __nfnl_unlock(); - } while (qlen); + nfnl_lock(); + netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + nfnl_unlock(); } static void __exit nfnetlink_exit(void) @@ -264,7 +187,7 @@ static int __init nfnetlink_init(void) { printk("Netfilter messages via NETLINK v%s.\n", nfversion); - nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX, + nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX, nfnetlink_rcv, NULL, THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 2351533a850..2c7bd2eb029 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -37,8 +37,9 @@ #endif #define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE -#define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ +#define NFULNL_TIMEOUT_DEFAULT HZ /* every second */ #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ +#define NFULNL_COPY_RANGE_MAX 0xFFFF /* max packet size is limited by 16-bit struct nfattr nfa_len field */ #define PRINTR(x, args...) do { if (net_ratelimit()) \ printk(x, ## args); } while (0); @@ -152,6 +153,11 @@ instance_create(u_int16_t group_num, int pid) if (!inst) goto out_unlock; + if (!try_module_get(THIS_MODULE)) { + kfree(inst); + goto out_unlock; + } + INIT_HLIST_NODE(&inst->hlist); spin_lock_init(&inst->lock); /* needs to be two, since we _put() after creation */ @@ -166,10 +172,7 @@ instance_create(u_int16_t group_num, int pid) inst->flushtimeout = NFULNL_TIMEOUT_DEFAULT; inst->nlbufsiz = NFULNL_NLBUFSIZ_DEFAULT; inst->copy_mode = NFULNL_COPY_PACKET; - inst->copy_range = 0xffff; - - if (!try_module_get(THIS_MODULE)) - goto out_free; + inst->copy_range = NFULNL_COPY_RANGE_MAX; hlist_add_head(&inst->hlist, &instance_table[instance_hashfn(group_num)]); @@ -181,14 +184,12 @@ instance_create(u_int16_t group_num, int pid) return inst; -out_free: - instance_put(inst); out_unlock: write_unlock_bh(&instances_lock); return NULL; } -static int __nfulnl_send(struct nfulnl_instance *inst); +static void __nfulnl_flush(struct nfulnl_instance *inst); static void __instance_destroy(struct nfulnl_instance *inst) @@ -202,17 +203,8 @@ __instance_destroy(struct nfulnl_instance *inst) /* then flush all pending packets from skb */ spin_lock_bh(&inst->lock); - if (inst->skb) { - /* timer "holds" one reference (we have one more) */ - if (del_timer(&inst->timer)) - instance_put(inst); - if (inst->qlen) - __nfulnl_send(inst); - if (inst->skb) { - kfree_skb(inst->skb); - inst->skb = NULL; - } - } + if (inst->skb) + __nfulnl_flush(inst); spin_unlock_bh(&inst->lock); /* and finally put the refcount */ @@ -244,11 +236,8 @@ nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode, case NFULNL_COPY_PACKET: inst->copy_mode = mode; - /* we're using struct nfattr which has 16bit nfa_len */ - if (range > 0xffff) - inst->copy_range = 0xffff; - else - inst->copy_range = range; + inst->copy_range = min_t(unsigned int, + range, NFULNL_COPY_RANGE_MAX); break; default: @@ -310,8 +299,8 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags) return 0; } -static struct sk_buff *nfulnl_alloc_skb(unsigned int inst_size, - unsigned int pkt_size) +static struct sk_buff * +nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size) { struct sk_buff *skb; unsigned int n; @@ -364,7 +353,18 @@ nlmsg_failure: return status; } -static void nfulnl_timer(unsigned long data) +static void +__nfulnl_flush(struct nfulnl_instance *inst) +{ + /* timer holds a reference */ + if (del_timer(&inst->timer)) + instance_put(inst); + if (inst->skb) + __nfulnl_send(inst); +} + +static void +nfulnl_timer(unsigned long data) { struct nfulnl_instance *inst = (struct nfulnl_instance *)data; @@ -409,36 +409,36 @@ __build_packet_message(struct nfulnl_instance *inst, pmsg.hw_protocol = skb->protocol; pmsg.hook = hooknum; - NFA_PUT(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg); + NLA_PUT(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg); if (prefix) - NFA_PUT(inst->skb, NFULA_PREFIX, plen, prefix); + NLA_PUT(inst->skb, NFULA_PREFIX, plen, prefix); if (indev) { tmp_uint = htonl(indev->ifindex); #ifndef CONFIG_BRIDGE_NETFILTER - NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), + NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); #else if (pf == PF_BRIDGE) { /* Case 1: outdev is physical input device, we need to * look for bridge group (when called from * netfilter_bridge) */ - NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), &tmp_uint); /* this is the bridge group "brX" */ tmp_uint = htonl(indev->br_port->br->dev->ifindex); - NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); } else { /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ - NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); if (skb->nf_bridge && skb->nf_bridge->physindev) { tmp_uint = htonl(skb->nf_bridge->physindev->ifindex); - NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), &tmp_uint); } } @@ -448,28 +448,28 @@ __build_packet_message(struct nfulnl_instance *inst, if (outdev) { tmp_uint = htonl(outdev->ifindex); #ifndef CONFIG_BRIDGE_NETFILTER - NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), + NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); #else if (pf == PF_BRIDGE) { /* Case 1: outdev is physical output device, we need to * look for bridge group (when called from * netfilter_bridge) */ - NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), &tmp_uint); /* this is the bridge group "brX" */ tmp_uint = htonl(outdev->br_port->br->dev->ifindex); - NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); } else { /* Case 2: indev is a bridge group, we need to look * for physical device (when called from ipv4) */ - NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); if (skb->nf_bridge && skb->nf_bridge->physoutdev) { tmp_uint = htonl(skb->nf_bridge->physoutdev->ifindex); - NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, + NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), &tmp_uint); } } @@ -478,15 +478,16 @@ __build_packet_message(struct nfulnl_instance *inst, if (skb->mark) { tmp_uint = htonl(skb->mark); - NFA_PUT(inst->skb, NFULA_MARK, sizeof(tmp_uint), &tmp_uint); + NLA_PUT(inst->skb, NFULA_MARK, sizeof(tmp_uint), &tmp_uint); } - if (indev && skb->dev && skb->dev->hard_header_parse) { + if (indev && skb->dev) { struct nfulnl_msg_packet_hw phw; - int len = skb->dev->hard_header_parse((struct sk_buff *)skb, - phw.hw_addr); - phw.hw_addrlen = htons(len); - NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw); + int len = dev_parse_header(skb, phw.hw_addr); + if (len > 0) { + phw.hw_addrlen = htons(len); + NLA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw); + } } if (skb->tstamp.tv64) { @@ -495,7 +496,7 @@ __build_packet_message(struct nfulnl_instance *inst, ts.sec = cpu_to_be64(tv.tv_sec); ts.usec = cpu_to_be64(tv.tv_usec); - NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); + NLA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); } /* UID */ @@ -503,9 +504,9 @@ __build_packet_message(struct nfulnl_instance *inst, read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) { __be32 uid = htonl(skb->sk->sk_socket->file->f_uid); - /* need to unlock here since NFA_PUT may goto */ + /* need to unlock here since NLA_PUT may goto */ read_unlock_bh(&skb->sk->sk_callback_lock); - NFA_PUT(inst->skb, NFULA_UID, sizeof(uid), &uid); + NLA_PUT(inst->skb, NFULA_UID, sizeof(uid), &uid); } else read_unlock_bh(&skb->sk->sk_callback_lock); } @@ -513,28 +514,28 @@ __build_packet_message(struct nfulnl_instance *inst, /* local sequence number */ if (inst->flags & NFULNL_CFG_F_SEQ) { tmp_uint = htonl(inst->seq++); - NFA_PUT(inst->skb, NFULA_SEQ, sizeof(tmp_uint), &tmp_uint); + NLA_PUT(inst->skb, NFULA_SEQ, sizeof(tmp_uint), &tmp_uint); } /* global sequence number */ if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) { tmp_uint = htonl(atomic_inc_return(&global_seq)); - NFA_PUT(inst->skb, NFULA_SEQ_GLOBAL, sizeof(tmp_uint), &tmp_uint); + NLA_PUT(inst->skb, NFULA_SEQ_GLOBAL, sizeof(tmp_uint), &tmp_uint); } if (data_len) { - struct nfattr *nfa; - int size = NFA_LENGTH(data_len); + struct nlattr *nla; + int size = nla_attr_size(data_len); - if (skb_tailroom(inst->skb) < (int)NFA_SPACE(data_len)) { + if (skb_tailroom(inst->skb) < nla_total_size(data_len)) { printk(KERN_WARNING "nfnetlink_log: no tailroom!\n"); goto nlmsg_failure; } - nfa = (struct nfattr *)skb_put(inst->skb, NFA_ALIGN(size)); - nfa->nfa_type = NFULA_PAYLOAD; - nfa->nfa_len = size; + nla = (struct nlattr *)skb_put(inst->skb, nla_total_size(data_len)); + nla->nla_type = NFULA_PAYLOAD; + nla->nla_len = size; - if (skb_copy_bits(skb, 0, NFA_DATA(nfa), data_len)) + if (skb_copy_bits(skb, 0, nla_data(nla), data_len)) BUG(); } @@ -543,7 +544,7 @@ __build_packet_message(struct nfulnl_instance *inst, nlmsg_failure: UDEBUG("nlmsg_failure\n"); -nfattr_failure: +nla_put_failure: PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n"); return -1; } @@ -590,32 +591,31 @@ nfulnl_log_packet(unsigned int pf, if (prefix) plen = strlen(prefix) + 1; - /* all macros expand to constant values at compile time */ /* FIXME: do we want to make the size calculation conditional based on * what is actually present? way more branches and checks, but more * memory efficient... */ - size = NLMSG_SPACE(sizeof(struct nfgenmsg)) - + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hdr)) - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + size = NLMSG_ALIGN(sizeof(struct nfgenmsg)) + + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr)) + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #ifdef CONFIG_BRIDGE_NETFILTER - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #endif - + NFA_SPACE(sizeof(u_int32_t)) /* mark */ - + NFA_SPACE(sizeof(u_int32_t)) /* uid */ - + NFA_SPACE(plen) /* prefix */ - + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hw)) - + NFA_SPACE(sizeof(struct nfulnl_msg_packet_timestamp)); + + nla_total_size(sizeof(u_int32_t)) /* mark */ + + nla_total_size(sizeof(u_int32_t)) /* uid */ + + nla_total_size(plen) /* prefix */ + + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) + + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); UDEBUG("initial size=%u\n", size); spin_lock_bh(&inst->lock); if (inst->flags & NFULNL_CFG_F_SEQ) - size += NFA_SPACE(sizeof(u_int32_t)); + size += nla_total_size(sizeof(u_int32_t)); if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) - size += NFA_SPACE(sizeof(u_int32_t)); + size += nla_total_size(sizeof(u_int32_t)); qthreshold = inst->qthreshold; /* per-rule qthreshold overrides per-instance */ @@ -635,7 +635,7 @@ nfulnl_log_packet(unsigned int pf, else data_len = inst->copy_range; - size += NFA_SPACE(data_len); + size += nla_total_size(data_len); UDEBUG("copy_packet, therefore size now %u\n", size); break; @@ -643,17 +643,13 @@ nfulnl_log_packet(unsigned int pf, goto unlock_and_release; } - if (inst->qlen >= qthreshold || - (inst->skb && size > - skb_tailroom(inst->skb) - sizeof(struct nfgenmsg))) { + if (inst->skb && + size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) { /* either the queue len is too high or we don't have * enough room in the skb left. flush to userspace. */ UDEBUG("flushing old skb\n"); - /* timer "holds" one reference (we have another one) */ - if (del_timer(&inst->timer)) - instance_put(inst); - __nfulnl_send(inst); + __nfulnl_flush(inst); } if (!inst->skb) { @@ -668,9 +664,11 @@ nfulnl_log_packet(unsigned int pf, __build_packet_message(inst, skb, data_len, pf, hooknum, in, out, li, prefix, plen); + if (inst->qlen >= qthreshold) + __nfulnl_flush(inst); /* timer_pending always called within inst->lock, so there * is no chance of a race here */ - if (!timer_pending(&inst->timer)) { + else if (!timer_pending(&inst->timer)) { instance_get(inst); inst->timer.expires = jiffies + (inst->flushtimeout*HZ/100); add_timer(&inst->timer); @@ -706,7 +704,8 @@ nfulnl_rcv_nl_event(struct notifier_block *this, hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { UDEBUG("node = %p\n", inst); - if (n->pid == inst->peer_pid) + if ((n->net == &init_net) && + (n->pid == inst->peer_pid)) __instance_destroy(inst); } } @@ -721,7 +720,7 @@ static struct notifier_block nfulnl_rtnl_notifier = { static int nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfqa[]) + struct nlmsghdr *nlh, struct nlattr *nfqa[]) { return -ENOTSUPP; } @@ -732,34 +731,18 @@ static struct nf_logger nfulnl_logger = { .me = THIS_MODULE, }; -static const int nfula_min[NFULA_MAX] = { - [NFULA_PACKET_HDR-1] = sizeof(struct nfulnl_msg_packet_hdr), - [NFULA_MARK-1] = sizeof(u_int32_t), - [NFULA_TIMESTAMP-1] = sizeof(struct nfulnl_msg_packet_timestamp), - [NFULA_IFINDEX_INDEV-1] = sizeof(u_int32_t), - [NFULA_IFINDEX_OUTDEV-1]= sizeof(u_int32_t), - [NFULA_IFINDEX_PHYSINDEV-1] = sizeof(u_int32_t), - [NFULA_IFINDEX_PHYSOUTDEV-1] = sizeof(u_int32_t), - [NFULA_HWADDR-1] = sizeof(struct nfulnl_msg_packet_hw), - [NFULA_PAYLOAD-1] = 0, - [NFULA_PREFIX-1] = 0, - [NFULA_UID-1] = sizeof(u_int32_t), - [NFULA_SEQ-1] = sizeof(u_int32_t), - [NFULA_SEQ_GLOBAL-1] = sizeof(u_int32_t), -}; - -static const int nfula_cfg_min[NFULA_CFG_MAX] = { - [NFULA_CFG_CMD-1] = sizeof(struct nfulnl_msg_config_cmd), - [NFULA_CFG_MODE-1] = sizeof(struct nfulnl_msg_config_mode), - [NFULA_CFG_TIMEOUT-1] = sizeof(u_int32_t), - [NFULA_CFG_QTHRESH-1] = sizeof(u_int32_t), - [NFULA_CFG_NLBUFSIZ-1] = sizeof(u_int32_t), - [NFULA_CFG_FLAGS-1] = sizeof(u_int16_t), +static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = { + [NFULA_CFG_CMD] = { .len = sizeof(struct nfulnl_msg_config_cmd) }, + [NFULA_CFG_MODE] = { .len = sizeof(struct nfulnl_msg_config_mode) }, + [NFULA_CFG_TIMEOUT] = { .type = NLA_U32 }, + [NFULA_CFG_QTHRESH] = { .type = NLA_U32 }, + [NFULA_CFG_NLBUFSIZ] = { .type = NLA_U32 }, + [NFULA_CFG_FLAGS] = { .type = NLA_U16 }, }; static int nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfula[]) + struct nlmsghdr *nlh, struct nlattr *nfula[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t group_num = ntohs(nfmsg->res_id); @@ -768,16 +751,11 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, UDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); - if (nfattr_bad_size(nfula, NFULA_CFG_MAX, nfula_cfg_min)) { - UDEBUG("bad attribute size\n"); - return -EINVAL; - } - inst = instance_lookup_get(group_num); - if (nfula[NFULA_CFG_CMD-1]) { + if (nfula[NFULA_CFG_CMD]) { u_int8_t pf = nfmsg->nfgen_family; struct nfulnl_msg_config_cmd *cmd; - cmd = NFA_DATA(nfula[NFULA_CFG_CMD-1]); + cmd = nla_data(nfula[NFULA_CFG_CMD]); UDEBUG("found CFG_CMD for\n"); switch (cmd->command) { @@ -840,38 +818,38 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } } - if (nfula[NFULA_CFG_MODE-1]) { + if (nfula[NFULA_CFG_MODE]) { struct nfulnl_msg_config_mode *params; - params = NFA_DATA(nfula[NFULA_CFG_MODE-1]); + params = nla_data(nfula[NFULA_CFG_MODE]); nfulnl_set_mode(inst, params->copy_mode, ntohl(params->copy_range)); } - if (nfula[NFULA_CFG_TIMEOUT-1]) { + if (nfula[NFULA_CFG_TIMEOUT]) { __be32 timeout = - *(__be32 *)NFA_DATA(nfula[NFULA_CFG_TIMEOUT-1]); + *(__be32 *)nla_data(nfula[NFULA_CFG_TIMEOUT]); nfulnl_set_timeout(inst, ntohl(timeout)); } - if (nfula[NFULA_CFG_NLBUFSIZ-1]) { + if (nfula[NFULA_CFG_NLBUFSIZ]) { __be32 nlbufsiz = - *(__be32 *)NFA_DATA(nfula[NFULA_CFG_NLBUFSIZ-1]); + *(__be32 *)nla_data(nfula[NFULA_CFG_NLBUFSIZ]); nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz)); } - if (nfula[NFULA_CFG_QTHRESH-1]) { + if (nfula[NFULA_CFG_QTHRESH]) { __be32 qthresh = - *(__be32 *)NFA_DATA(nfula[NFULA_CFG_QTHRESH-1]); + *(__be32 *)nla_data(nfula[NFULA_CFG_QTHRESH]); nfulnl_set_qthresh(inst, ntohl(qthresh)); } - if (nfula[NFULA_CFG_FLAGS-1]) { + if (nfula[NFULA_CFG_FLAGS]) { __be16 flags = - *(__be16 *)NFA_DATA(nfula[NFULA_CFG_FLAGS-1]); + *(__be16 *)nla_data(nfula[NFULA_CFG_FLAGS]); nfulnl_set_flags(inst, ntohs(flags)); } @@ -881,14 +859,15 @@ out: return ret; } -static struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = { +static const struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = { [NFULNL_MSG_PACKET] = { .call = nfulnl_recv_unsupp, .attr_count = NFULA_MAX, }, [NFULNL_MSG_CONFIG] = { .call = nfulnl_recv_config, - .attr_count = NFULA_CFG_MAX, }, + .attr_count = NFULA_CFG_MAX, + .policy = nfula_cfg_policy }, }; -static struct nfnetlink_subsystem nfulnl_subsys = { +static const struct nfnetlink_subsystem nfulnl_subsys = { .name = "log", .subsys_id = NFNL_SUBSYS_ULOG, .cb_count = NFULNL_MSG_MAX, @@ -972,22 +951,8 @@ static const struct seq_operations nful_seq_ops = { static int nful_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct iter_state *is; - int ret; - - is = kzalloc(sizeof(*is), GFP_KERNEL); - if (!is) - return -ENOMEM; - ret = seq_open(file, &nful_seq_ops); - if (ret < 0) - goto out_free; - seq = file->private_data; - seq->private = is; - return ret; -out_free: - kfree(is); - return ret; + return seq_open_private(file, &nful_seq_ops, + sizeof(struct iter_state)); } static const struct file_operations nful_file_ops = { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index bb65a38c816..3ceeffcf6f9 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -299,7 +299,7 @@ __nfqnl_set_mode(struct nfqnl_instance *queue, case NFQNL_COPY_PACKET: queue->copy_mode = mode; - /* we're using struct nfattr which has 16bit nfa_len */ + /* we're using struct nlattr which has 16bit nla_len */ if (range > 0xffff) queue->copy_range = 0xffff; else @@ -353,18 +353,17 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, QDEBUG("entered\n"); - /* all macros expand to constant values at compile time */ - size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + - + NFA_SPACE(sizeof(struct nfqnl_msg_packet_hdr)) - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + size = NLMSG_ALIGN(sizeof(struct nfgenmsg)) + + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #ifdef CONFIG_BRIDGE_NETFILTER - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #endif - + NFA_SPACE(sizeof(u_int32_t)) /* mark */ - + NFA_SPACE(sizeof(struct nfqnl_msg_packet_hw)) - + NFA_SPACE(sizeof(struct nfqnl_msg_packet_timestamp)); + + nla_total_size(sizeof(u_int32_t)) /* mark */ + + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) + + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); outdev = entinf->outdev; @@ -389,7 +388,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, else data_len = queue->copy_range; - size += NFA_SPACE(data_len); + size += nla_total_size(data_len); break; default: @@ -417,33 +416,33 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, pmsg.hw_protocol = entskb->protocol; pmsg.hook = entinf->hook; - NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); + NLA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); indev = entinf->indev; if (indev) { tmp_uint = htonl(indev->ifindex); #ifndef CONFIG_BRIDGE_NETFILTER - NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); + NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); #else if (entinf->pf == PF_BRIDGE) { /* Case 1: indev is physical input device, we need to * look for bridge group (when called from * netfilter_bridge) */ - NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), + NLA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), &tmp_uint); /* this is the bridge group "brX" */ tmp_uint = htonl(indev->br_port->br->dev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), + NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); } else { /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ - NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), + NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); if (entskb->nf_bridge && entskb->nf_bridge->physindev) { tmp_uint = htonl(entskb->nf_bridge->physindev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, + NLA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), &tmp_uint); } } @@ -453,27 +452,27 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (outdev) { tmp_uint = htonl(outdev->ifindex); #ifndef CONFIG_BRIDGE_NETFILTER - NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); + NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); #else if (entinf->pf == PF_BRIDGE) { /* Case 1: outdev is physical output device, we need to * look for bridge group (when called from * netfilter_bridge) */ - NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), + NLA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), &tmp_uint); /* this is the bridge group "brX" */ tmp_uint = htonl(outdev->br_port->br->dev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), + NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); } else { /* Case 2: outdev is bridge group, we need to look for * physical output device (when called from ipv4) */ - NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), + NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); if (entskb->nf_bridge && entskb->nf_bridge->physoutdev) { tmp_uint = htonl(entskb->nf_bridge->physoutdev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, + NLA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), &tmp_uint); } } @@ -482,17 +481,16 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entskb->mark) { tmp_uint = htonl(entskb->mark); - NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint); + NLA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint); } - if (indev && entskb->dev - && entskb->dev->hard_header_parse) { + if (indev && entskb->dev) { struct nfqnl_msg_packet_hw phw; - - int len = entskb->dev->hard_header_parse(entskb, - phw.hw_addr); - phw.hw_addrlen = htons(len); - NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); + int len = dev_parse_header(entskb, phw.hw_addr); + if (len) { + phw.hw_addrlen = htons(len); + NLA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); + } } if (entskb->tstamp.tv64) { @@ -501,23 +499,23 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, ts.sec = cpu_to_be64(tv.tv_sec); ts.usec = cpu_to_be64(tv.tv_usec); - NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); + NLA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); } if (data_len) { - struct nfattr *nfa; - int size = NFA_LENGTH(data_len); + struct nlattr *nla; + int size = nla_attr_size(data_len); - if (skb_tailroom(skb) < (int)NFA_SPACE(data_len)) { + if (skb_tailroom(skb) < nla_total_size(data_len)) { printk(KERN_WARNING "nf_queue: no tailroom!\n"); goto nlmsg_failure; } - nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); - nfa->nfa_type = NFQA_PAYLOAD; - nfa->nfa_len = size; + nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); + nla->nla_type = NFQA_PAYLOAD; + nla->nla_len = size; - if (skb_copy_bits(entskb, 0, NFA_DATA(nfa), data_len)) + if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) BUG(); } @@ -525,7 +523,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, return skb; nlmsg_failure: -nfattr_failure: +nla_put_failure: if (skb) kfree_skb(skb); *errp = -EINVAL; @@ -619,6 +617,7 @@ static int nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) { int diff; + int err; diff = data_len - e->skb->len; if (diff < 0) { @@ -628,25 +627,18 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) if (data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(e->skb, - skb_headroom(e->skb), - diff, - GFP_ATOMIC); - if (newskb == NULL) { + err = pskb_expand_head(e->skb, 0, + diff - skb_tailroom(e->skb), + GFP_ATOMIC); + if (err) { printk(KERN_WARNING "nf_queue: OOM " "in mangle, dropping packet\n"); - return -ENOMEM; + return err; } - if (e->skb->sk) - skb_set_owner_w(newskb, e->skb->sk); - kfree_skb(e->skb); - e->skb = newskb; } skb_put(e->skb, diff); } - if (!skb_make_writable(&e->skb, data_len)) + if (!skb_make_writable(e->skb, data_len)) return -ENOMEM; skb_copy_to_linear_data(e->skb, data, data_len); e->skb->ip_summed = CHECKSUM_NONE; @@ -734,6 +726,9 @@ nfqnl_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) nfqnl_dev_drop(dev->ifindex); @@ -762,7 +757,8 @@ nfqnl_rcv_nl_event(struct notifier_block *this, struct hlist_head *head = &instance_table[i]; hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { - if (n->pid == inst->peer_pid) + if ((n->net == &init_net) && + (n->pid == inst->peer_pid)) __instance_destroy(inst); } } @@ -775,15 +771,15 @@ static struct notifier_block nfqnl_rtnl_notifier = { .notifier_call = nfqnl_rcv_nl_event, }; -static const int nfqa_verdict_min[NFQA_MAX] = { - [NFQA_VERDICT_HDR-1] = sizeof(struct nfqnl_msg_verdict_hdr), - [NFQA_MARK-1] = sizeof(u_int32_t), - [NFQA_PAYLOAD-1] = 0, +static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { + [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, + [NFQA_MARK] = { .type = NLA_U32 }, + [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, }; static int nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfqa[]) + struct nlmsghdr *nlh, struct nlattr *nfqa[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); @@ -794,11 +790,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, struct nfqnl_queue_entry *entry; int err; - if (nfattr_bad_size(nfqa, NFQA_MAX, nfqa_verdict_min)) { - QDEBUG("bad attribute size\n"); - return -EINVAL; - } - queue = instance_lookup_get(queue_num); if (!queue) return -ENODEV; @@ -808,12 +799,12 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, goto err_out_put; } - if (!nfqa[NFQA_VERDICT_HDR-1]) { + if (!nfqa[NFQA_VERDICT_HDR]) { err = -EINVAL; goto err_out_put; } - vhdr = NFA_DATA(nfqa[NFQA_VERDICT_HDR-1]); + vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); verdict = ntohl(vhdr->verdict); if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) { @@ -827,15 +818,15 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, goto err_out_put; } - if (nfqa[NFQA_PAYLOAD-1]) { - if (nfqnl_mangle(NFA_DATA(nfqa[NFQA_PAYLOAD-1]), - NFA_PAYLOAD(nfqa[NFQA_PAYLOAD-1]), entry) < 0) + if (nfqa[NFQA_PAYLOAD]) { + if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), + nla_len(nfqa[NFQA_PAYLOAD]), entry) < 0) verdict = NF_DROP; } - if (nfqa[NFQA_MARK-1]) + if (nfqa[NFQA_MARK]) entry->skb->mark = ntohl(*(__be32 *) - NFA_DATA(nfqa[NFQA_MARK-1])); + nla_data(nfqa[NFQA_MARK])); issue_verdict(entry, verdict); instance_put(queue); @@ -848,14 +839,14 @@ err_out_put: static int nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfqa[]) + struct nlmsghdr *nlh, struct nlattr *nfqa[]) { return -ENOTSUPP; } -static const int nfqa_cfg_min[NFQA_CFG_MAX] = { - [NFQA_CFG_CMD-1] = sizeof(struct nfqnl_msg_config_cmd), - [NFQA_CFG_PARAMS-1] = sizeof(struct nfqnl_msg_config_params), +static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { + [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, + [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, }; static struct nf_queue_handler nfqh = { @@ -865,7 +856,7 @@ static struct nf_queue_handler nfqh = { static int nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfqa[]) + struct nlmsghdr *nlh, struct nlattr *nfqa[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); @@ -874,15 +865,10 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); - if (nfattr_bad_size(nfqa, NFQA_CFG_MAX, nfqa_cfg_min)) { - QDEBUG("bad attribute size\n"); - return -EINVAL; - } - queue = instance_lookup_get(queue_num); - if (nfqa[NFQA_CFG_CMD-1]) { + if (nfqa[NFQA_CFG_CMD]) { struct nfqnl_msg_config_cmd *cmd; - cmd = NFA_DATA(nfqa[NFQA_CFG_CMD-1]); + cmd = nla_data(nfqa[NFQA_CFG_CMD]); QDEBUG("found CFG_CMD\n"); switch (cmd->command) { @@ -933,21 +919,21 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } } - if (nfqa[NFQA_CFG_PARAMS-1]) { + if (nfqa[NFQA_CFG_PARAMS]) { struct nfqnl_msg_config_params *params; if (!queue) { ret = -ENOENT; goto out_put; } - params = NFA_DATA(nfqa[NFQA_CFG_PARAMS-1]); + params = nla_data(nfqa[NFQA_CFG_PARAMS]); nfqnl_set_mode(queue, params->copy_mode, ntohl(params->copy_range)); } - if (nfqa[NFQA_CFG_QUEUE_MAXLEN-1]) { + if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) { __be32 *queue_maxlen; - queue_maxlen = NFA_DATA(nfqa[NFQA_CFG_QUEUE_MAXLEN-1]); + queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); spin_lock_bh(&queue->lock); queue->queue_maxlen = ntohl(*queue_maxlen); spin_unlock_bh(&queue->lock); @@ -958,16 +944,18 @@ out_put: return ret; } -static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { +static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp, .attr_count = NFQA_MAX, }, [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict, - .attr_count = NFQA_MAX, }, + .attr_count = NFQA_MAX, + .policy = nfqa_verdict_policy }, [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, - .attr_count = NFQA_CFG_MAX, }, + .attr_count = NFQA_CFG_MAX, + .policy = nfqa_cfg_policy }, }; -static struct nfnetlink_subsystem nfqnl_subsys = { +static const struct nfnetlink_subsystem nfqnl_subsys = { .name = "nf_queue", .subsys_id = NFNL_SUBSYS_QUEUE, .cb_count = NFQNL_MSG_MAX, @@ -1057,22 +1045,8 @@ static const struct seq_operations nfqnl_seq_ops = { static int nfqnl_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct iter_state *is; - int ret; - - is = kzalloc(sizeof(*is), GFP_KERNEL); - if (!is) - return -ENOMEM; - ret = seq_open(file, &nfqnl_seq_ops); - if (ret < 0) - goto out_free; - seq = file->private_data; - seq->private = is; - return ret; -out_free: - kfree(is); - return ret; + return seq_open_private(file, &nfqnl_seq_ops, + sizeof(struct iter_state)); } static const struct file_operations nfqnl_file_ops = { diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index cc2baa6d5a7..d9a3bded0d0 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -22,6 +22,7 @@ #include <linux/vmalloc.h> #include <linux/mutex.h> #include <linux/mm.h> +#include <net/net_namespace.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_arp.h> @@ -795,7 +796,7 @@ int xt_proto_init(int af) #ifdef CONFIG_PROC_FS strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); if (!proc) goto out; proc->data = (void *) ((unsigned long) af | (TABLE << 16)); @@ -803,14 +804,14 @@ int xt_proto_init(int af) strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); if (!proc) goto out_remove_tables; proc->data = (void *) ((unsigned long) af | (MATCH << 16)); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); if (!proc) goto out_remove_matches; proc->data = (void *) ((unsigned long) af | (TARGET << 16)); @@ -822,12 +823,12 @@ int xt_proto_init(int af) out_remove_matches: strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); out_remove_tables: strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); out: return -1; #endif @@ -841,15 +842,15 @@ void xt_proto_fini(int af) strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); #endif /*CONFIG_PROC_FS*/ } EXPORT_SYMBOL_GPL(xt_proto_fini); diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index 51942856682..77eeae658d4 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -24,9 +24,10 @@ MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("iptables qdisc classification target module"); MODULE_ALIAS("ipt_CLASSIFY"); +MODULE_ALIAS("ip6t_CLASSIFY"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -35,7 +36,7 @@ target(struct sk_buff **pskb, { const struct xt_classify_target_info *clinfo = targinfo; - (*pskb)->priority = clinfo->priority; + skb->priority = clinfo->priority; return XT_CONTINUE; } diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 5a00c544433..856793e8db7 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -23,17 +23,18 @@ #include <linux/ip.h> #include <net/checksum.h> -MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>"); +MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>"); MODULE_DESCRIPTION("IP tables CONNMARK matching module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_CONNMARK"); +MODULE_ALIAS("ip6t_CONNMARK"); #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_CONNMARK.h> #include <net/netfilter/nf_conntrack_ecache.h> static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -47,28 +48,28 @@ target(struct sk_buff **pskb, u_int32_t mark; u_int32_t newmark; - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (ct) { switch(markinfo->mode) { case XT_CONNMARK_SET: newmark = (ct->mark & ~markinfo->mask) | markinfo->mark; if (newmark != ct->mark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, *pskb); + nf_conntrack_event_cache(IPCT_MARK, skb); } break; case XT_CONNMARK_SAVE: newmark = (ct->mark & ~markinfo->mask) | - ((*pskb)->mark & markinfo->mask); + (skb->mark & markinfo->mask); if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, *pskb); + nf_conntrack_event_cache(IPCT_MARK, skb); } break; case XT_CONNMARK_RESTORE: - mark = (*pskb)->mark; + mark = skb->mark; diff = (ct->mark ^ mark) & markinfo->mask; - (*pskb)->mark = mark ^ diff; + skb->mark = mark ^ diff; break; } } diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 63d73138c1b..021b5c8d20e 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -61,12 +61,11 @@ static void secmark_restore(struct sk_buff *skb) } } -static unsigned int target(struct sk_buff **pskb, const struct net_device *in, +static unsigned int target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - struct sk_buff *skb = *pskb; const struct xt_connsecmark_target_info *info = targinfo; switch (info->mode) { diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 798ab731009..6322a933ab7 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -25,7 +25,7 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_DSCP"); MODULE_ALIAS("ip6t_DSCP"); -static unsigned int target(struct sk_buff **pskb, +static unsigned int target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -33,20 +33,20 @@ static unsigned int target(struct sk_buff **pskb, const void *targinfo) { const struct xt_DSCP_info *dinfo = targinfo; - u_int8_t dscp = ipv4_get_dsfield(ip_hdr(*pskb)) >> XT_DSCP_SHIFT; + u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; if (dscp != dinfo->dscp) { - if (!skb_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(skb, sizeof(struct iphdr))) return NF_DROP; - ipv4_change_dsfield(ip_hdr(*pskb), (__u8)(~XT_DSCP_MASK), + ipv4_change_dsfield(ip_hdr(skb), (__u8)(~XT_DSCP_MASK), dinfo->dscp << XT_DSCP_SHIFT); } return XT_CONTINUE; } -static unsigned int target6(struct sk_buff **pskb, +static unsigned int target6(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -54,13 +54,13 @@ static unsigned int target6(struct sk_buff **pskb, const void *targinfo) { const struct xt_DSCP_info *dinfo = targinfo; - u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(*pskb)) >> XT_DSCP_SHIFT; + u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; if (dscp != dinfo->dscp) { - if (!skb_make_writable(pskb, sizeof(struct ipv6hdr))) + if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) return NF_DROP; - ipv6_change_dsfield(ipv6_hdr(*pskb), (__u8)(~XT_DSCP_MASK), + ipv6_change_dsfield(ipv6_hdr(skb), (__u8)(~XT_DSCP_MASK), dinfo->dscp << XT_DSCP_SHIFT); } return XT_CONTINUE; diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index f30fe0baf7d..bc6503d77d7 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -22,7 +22,7 @@ MODULE_ALIAS("ipt_MARK"); MODULE_ALIAS("ip6t_MARK"); static unsigned int -target_v0(struct sk_buff **pskb, +target_v0(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -31,12 +31,12 @@ target_v0(struct sk_buff **pskb, { const struct xt_mark_target_info *markinfo = targinfo; - (*pskb)->mark = markinfo->mark; + skb->mark = markinfo->mark; return XT_CONTINUE; } static unsigned int -target_v1(struct sk_buff **pskb, +target_v1(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -52,15 +52,15 @@ target_v1(struct sk_buff **pskb, break; case XT_MARK_AND: - mark = (*pskb)->mark & markinfo->mark; + mark = skb->mark & markinfo->mark; break; case XT_MARK_OR: - mark = (*pskb)->mark | markinfo->mark; + mark = skb->mark | markinfo->mark; break; } - (*pskb)->mark = mark; + skb->mark = mark; return XT_CONTINUE; } diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index d3594c7ccb2..9fb449ffbf8 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -20,7 +20,7 @@ MODULE_ALIAS("ipt_NFLOG"); MODULE_ALIAS("ip6t_NFLOG"); static unsigned int -nflog_target(struct sk_buff **pskb, +nflog_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -33,7 +33,7 @@ nflog_target(struct sk_buff **pskb, li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(target->family, hooknum, *pskb, in, out, &li, + nf_log_packet(target->family, hooknum, skb, in, out, &li, "%s", info->prefix); return XT_CONTINUE; } diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 13f59f3e8c3..c3984e9f766 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -24,7 +24,7 @@ MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index b7d6312fccc..4976ce18661 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -9,9 +9,10 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_NOTRACK"); +MODULE_ALIAS("ip6t_NOTRACK"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -19,16 +20,16 @@ target(struct sk_buff **pskb, const void *targinfo) { /* Previously seen (loopback)? Ignore. */ - if ((*pskb)->nfct != NULL) + if (skb->nfct != NULL) return XT_CONTINUE; /* Attach fake conntrack entry. If there is a real ct entry correspondig to this packet, it'll hang aroun till timing out. We don't deal with it for performance reasons. JK */ - (*pskb)->nfct = &nf_conntrack_untracked.ct_general; - (*pskb)->nfctinfo = IP_CT_NEW; - nf_conntrack_get((*pskb)->nfct); + skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); return XT_CONTINUE; } diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index c83779a941a..235806eb6ec 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -28,7 +28,7 @@ MODULE_ALIAS("ip6t_SECMARK"); static u8 mode; -static unsigned int target(struct sk_buff **pskb, const struct net_device *in, +static unsigned int target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -47,7 +47,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, BUG(); } - (*pskb)->secmark = secmark; + skb->secmark = secmark; return XT_CONTINUE; } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index d40f7e4b128..07435a602b1 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -39,7 +39,7 @@ optlen(const u_int8_t *opt, unsigned int offset) } static int -tcpmss_mangle_packet(struct sk_buff **pskb, +tcpmss_mangle_packet(struct sk_buff *skb, const struct xt_tcpmss_info *info, unsigned int tcphoff, unsigned int minlen) @@ -50,11 +50,11 @@ tcpmss_mangle_packet(struct sk_buff **pskb, u16 newmss; u8 *opt; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return -1; - tcplen = (*pskb)->len - tcphoff; - tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff); + tcplen = skb->len - tcphoff; + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); /* Since it passed flags test in tcp match, we know it is is not a fragment, and has data >= tcp header length. SYN @@ -64,19 +64,19 @@ tcpmss_mangle_packet(struct sk_buff **pskb, if (tcplen != tcph->doff*4) { if (net_ratelimit()) printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n", - (*pskb)->len); + skb->len); return -1; } if (info->mss == XT_TCPMSS_CLAMP_PMTU) { - if (dst_mtu((*pskb)->dst) <= minlen) { + if (dst_mtu(skb->dst) <= minlen) { if (net_ratelimit()) printk(KERN_ERR "xt_TCPMSS: " "unknown or invalid path-MTU (%u)\n", - dst_mtu((*pskb)->dst)); + dst_mtu(skb->dst)); return -1; } - newmss = dst_mtu((*pskb)->dst) - minlen; + newmss = dst_mtu(skb->dst) - minlen; } else newmss = info->mss; @@ -95,7 +95,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb, opt[i+2] = (newmss & 0xff00) >> 8; opt[i+3] = newmss & 0x00ff; - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, htons(oldmss), htons(newmss), 0); return 0; } @@ -104,57 +104,53 @@ tcpmss_mangle_packet(struct sk_buff **pskb, /* * MSS Option not found ?! add it.. */ - if (skb_tailroom((*pskb)) < TCPOLEN_MSS) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), - TCPOLEN_MSS, GFP_ATOMIC); - if (!newskb) + if (skb_tailroom(skb) < TCPOLEN_MSS) { + if (pskb_expand_head(skb, 0, + TCPOLEN_MSS - skb_tailroom(skb), + GFP_ATOMIC)) return -1; - kfree_skb(*pskb); - *pskb = newskb; - tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff); + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); } - skb_put((*pskb), TCPOLEN_MSS); + skb_put(skb, TCPOLEN_MSS); opt = (u_int8_t *)tcph + sizeof(struct tcphdr); memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1); opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; opt[3] = newmss & 0x00ff; - nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0); + nf_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0); oldval = ((__be16 *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, oldval, ((__be16 *)tcph)[6], 0); return TCPOLEN_MSS; } static unsigned int -xt_tcpmss_target4(struct sk_buff **pskb, +xt_tcpmss_target4(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); __be16 newlen; int ret; - ret = tcpmss_mangle_packet(pskb, targinfo, iph->ihl * 4, + ret = tcpmss_mangle_packet(skb, targinfo, iph->ihl * 4, sizeof(*iph) + sizeof(struct tcphdr)); if (ret < 0) return NF_DROP; if (ret > 0) { - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); newlen = htons(ntohs(iph->tot_len) + ret); nf_csum_replace2(&iph->check, iph->tot_len, newlen); iph->tot_len = newlen; @@ -164,30 +160,30 @@ xt_tcpmss_target4(struct sk_buff **pskb, #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) static unsigned int -xt_tcpmss_target6(struct sk_buff **pskb, +xt_tcpmss_target6(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - struct ipv6hdr *ipv6h = ipv6_hdr(*pskb); + struct ipv6hdr *ipv6h = ipv6_hdr(skb); u8 nexthdr; int tcphoff; int ret; nexthdr = ipv6h->nexthdr; - tcphoff = ipv6_skip_exthdr(*pskb, sizeof(*ipv6h), &nexthdr); + tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); if (tcphoff < 0) { WARN_ON(1); return NF_DROP; } - ret = tcpmss_mangle_packet(pskb, targinfo, tcphoff, + ret = tcpmss_mangle_packet(skb, targinfo, tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); if (ret < 0) return NF_DROP; if (ret > 0) { - ipv6h = ipv6_hdr(*pskb); + ipv6h = ipv6_hdr(skb); ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret); } return XT_CONTINUE; diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c index 4df2dedcc0b..26c5d08ab2c 100644 --- a/net/netfilter/xt_TRACE.c +++ b/net/netfilter/xt_TRACE.c @@ -10,14 +10,14 @@ MODULE_ALIAS("ipt_TRACE"); MODULE_ALIAS("ip6t_TRACE"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - (*pskb)->nf_trace = 1; + skb->nf_trace = 1; return XT_CONTINUE; } diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index dd4d79b8fc9..9ec50139b9a 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -2,18 +2,19 @@ * GPL (C) 2002 Martin Devera (devik@cdi.cz). */ #include <linux/module.h> +#include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_connbytes.h> #include <net/netfilter/nf_conntrack.h> #include <asm/div64.h> -#include <asm/bitops.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection"); MODULE_ALIAS("ipt_connbytes"); +MODULE_ALIAS("ip6t_connbytes"); static bool match(const struct sk_buff *skb, diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index e73fa9b46cf..9f67920af41 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -25,10 +25,11 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_connmark.h> -MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>"); +MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>"); MODULE_DESCRIPTION("IP tables connmark match module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_connmark"); +MODULE_ALIAS("ip6t_connmark"); static bool match(const struct sk_buff *skb, diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 83224ec89cc..c2b1b24ee33 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -24,6 +24,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Match for DCCP protocol packets"); MODULE_ALIAS("ipt_dccp"); +MODULE_ALIAS("ip6t_dccp"); #define DCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \ || (!!((invflag) & (option)) ^ (cond))) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index bd45f9d3f7d..19103678bf2 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -21,6 +21,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <net/net_namespace.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> @@ -743,13 +744,13 @@ static int __init xt_hashlimit_init(void) printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n"); goto err2; } - hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", proc_net); + hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", init_net.proc_net); if (!hashlimit_procdir4) { printk(KERN_ERR "xt_hashlimit: unable to create proc dir " "entry\n"); goto err3; } - hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", proc_net); + hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net); if (!hashlimit_procdir6) { printk(KERN_ERR "xt_hashlimit: unable to create proc dir " "entry\n"); @@ -757,7 +758,7 @@ static int __init xt_hashlimit_init(void) } return 0; err4: - remove_proc_entry("ipt_hashlimit", proc_net); + remove_proc_entry("ipt_hashlimit", init_net.proc_net); err3: kmem_cache_destroy(hashlimit_cachep); err2: @@ -769,8 +770,8 @@ err1: static void __exit xt_hashlimit_fini(void) { - remove_proc_entry("ipt_hashlimit", proc_net); - remove_proc_entry("ip6t_hashlimit", proc_net); + remove_proc_entry("ipt_hashlimit", init_net.proc_net); + remove_proc_entry("ip6t_hashlimit", init_net.proc_net); kmem_cache_destroy(hashlimit_cachep); xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); } diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 4fcca797150..f263a77e57b 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -1,5 +1,5 @@ -/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr> - * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr> +/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr> + * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index c002153b80a..3358273a47b 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -13,6 +13,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Kiran Kumar Immidi"); MODULE_DESCRIPTION("Match for SCTP protocol packets"); MODULE_ALIAS("ipt_sctp"); +MODULE_ALIAS("ip6t_sctp"); #ifdef DEBUG_SCTP #define duprintf(format, args...) printk(format , ## args) @@ -41,21 +42,21 @@ match_flags(const struct xt_sctp_flag_info *flag_info, static inline bool match_packet(const struct sk_buff *skb, unsigned int offset, - const u_int32_t *chunkmap, - int chunk_match_type, - const struct xt_sctp_flag_info *flag_info, - const int flag_count, + const struct xt_sctp_info *info, bool *hotdrop) { u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)]; sctp_chunkhdr_t _sch, *sch; + int chunk_match_type = info->chunk_match_type; + const struct xt_sctp_flag_info *flag_info = info->flag_info; + int flag_count = info->flag_count; #ifdef DEBUG_SCTP int i = 0; #endif if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) - SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap); + SCTP_CHUNKMAP_COPY(chunkmapcopy, info->chunkmap); do { sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch); @@ -72,7 +73,7 @@ match_packet(const struct sk_buff *skb, duprintf("skb->len: %d\toffset: %d\n", skb->len, offset); - if (SCTP_CHUNKMAP_IS_SET(chunkmap, sch->type)) { + if (SCTP_CHUNKMAP_IS_SET(info->chunkmap, sch->type)) { switch (chunk_match_type) { case SCTP_CHUNK_MATCH_ANY: if (match_flags(flag_info, flag_count, @@ -103,7 +104,7 @@ match_packet(const struct sk_buff *skb, switch (chunk_match_type) { case SCTP_CHUNK_MATCH_ALL: - return SCTP_CHUNKMAP_IS_CLEAR(chunkmap); + return SCTP_CHUNKMAP_IS_CLEAR(info->chunkmap); case SCTP_CHUNK_MATCH_ANY: return false; case SCTP_CHUNK_MATCH_ONLY: @@ -147,9 +148,7 @@ match(const struct sk_buff *skb, && ntohs(sh->dest) <= info->dpts[1], XT_SCTP_DEST_PORTS, info->flags, info->invflags) && SCCHECK(match_packet(skb, protoff + sizeof (sctp_sctphdr_t), - info->chunkmap, info->chunk_match_type, - info->flag_info, info->flag_count, - hotdrop), + info, hotdrop), XT_SCTP_CHUNK_TYPES, info->flags, info->invflags); } diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index cd5f6d758c6..84d401bfafa 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -22,6 +22,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); MODULE_DESCRIPTION("iptables TCP MSS match module"); MODULE_ALIAS("ipt_tcpmss"); +MODULE_ALIAS("ip6t_tcpmss"); static bool match(const struct sk_buff *skb, diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c new file mode 100644 index 00000000000..ef48bbd9357 --- /dev/null +++ b/net/netfilter/xt_time.c @@ -0,0 +1,269 @@ +/* + * xt_time + * Copyright © Jan Engelhardt <jengelh@computergmbh.de>, 2007 + * + * based on ipt_time by Fabrice MARIE <fabrice@netfilter.org> + * This is a module which is used for time matching + * It is using some modified code from dietlibc (localtime() function) + * that you can find at http://www.fefe.de/dietlibc/ + * This file is distributed under the terms of the GNU General Public + * License (GPL). Copies of the GPL can be obtained from gnu.org/gpl. + */ +#include <linux/ktime.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_time.h> + +struct xtm { + u_int8_t month; /* (1-12) */ + u_int8_t monthday; /* (1-31) */ + u_int8_t weekday; /* (1-7) */ + u_int8_t hour; /* (0-23) */ + u_int8_t minute; /* (0-59) */ + u_int8_t second; /* (0-59) */ + unsigned int dse; +}; + +extern struct timezone sys_tz; /* ouch */ + +static const u_int16_t days_since_year[] = { + 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, +}; + +static const u_int16_t days_since_leapyear[] = { + 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, +}; + +/* + * Since time progresses forward, it is best to organize this array in reverse, + * to minimize lookup time. + */ +enum { + DSE_FIRST = 2039, +}; +static const u_int16_t days_since_epoch[] = { + /* 2039 - 2030 */ + 25202, 24837, 24472, 24106, 23741, 23376, 23011, 22645, 22280, 21915, + /* 2029 - 2020 */ + 21550, 21184, 20819, 20454, 20089, 19723, 19358, 18993, 18628, 18262, + /* 2019 - 2010 */ + 17897, 17532, 17167, 16801, 16436, 16071, 15706, 15340, 14975, 14610, + /* 2009 - 2000 */ + 14245, 13879, 13514, 13149, 12784, 12418, 12053, 11688, 11323, 10957, + /* 1999 - 1990 */ + 10592, 10227, 9862, 9496, 9131, 8766, 8401, 8035, 7670, 7305, + /* 1989 - 1980 */ + 6940, 6574, 6209, 5844, 5479, 5113, 4748, 4383, 4018, 3652, + /* 1979 - 1970 */ + 3287, 2922, 2557, 2191, 1826, 1461, 1096, 730, 365, 0, +}; + +static inline bool is_leap(unsigned int y) +{ + return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0); +} + +/* + * Each network packet has a (nano)seconds-since-the-epoch (SSTE) timestamp. + * Since we match against days and daytime, the SSTE value needs to be + * computed back into human-readable dates. + * + * This is done in three separate functions so that the most expensive + * calculations are done last, in case a "simple match" can be found earlier. + */ +static inline unsigned int localtime_1(struct xtm *r, time_t time) +{ + unsigned int v, w; + + /* Each day has 86400s, so finding the hour/minute is actually easy. */ + v = time % 86400; + r->second = v % 60; + w = v / 60; + r->minute = w % 60; + r->hour = w / 60; + return v; +} + +static inline void localtime_2(struct xtm *r, time_t time) +{ + /* + * Here comes the rest (weekday, monthday). First, divide the SSTE + * by seconds-per-day to get the number of _days_ since the epoch. + */ + r->dse = time / 86400; + + /* 1970-01-01 (w=0) was a Thursday (4). */ + r->weekday = (4 + r->dse) % 7; +} + +static void localtime_3(struct xtm *r, time_t time) +{ + unsigned int year, i, w = r->dse; + + /* + * In each year, a certain number of days-since-the-epoch have passed. + * Find the year that is closest to said days. + * + * Consider, for example, w=21612 (2029-03-04). Loop will abort on + * dse[i] <= w, which happens when dse[i] == 21550. This implies + * year == 2009. w will then be 62. + */ + for (i = 0, year = DSE_FIRST; days_since_epoch[i] > w; + ++i, --year) + /* just loop */; + + w -= days_since_epoch[i]; + + /* + * By now we have the current year, and the day of the year. + * r->yearday = w; + * + * On to finding the month (like above). In each month, a certain + * number of days-since-New Year have passed, and find the closest + * one. + * + * Consider w=62 (in a non-leap year). Loop will abort on + * dsy[i] < w, which happens when dsy[i] == 31+28 (i == 2). + * Concludes i == 2, i.e. 3rd month => March. + * + * (A different approach to use would be to subtract a monthlength + * from w repeatedly while counting.) + */ + if (is_leap(year)) { + for (i = ARRAY_SIZE(days_since_leapyear) - 1; + i > 0 && days_since_year[i] > w; --i) + /* just loop */; + } else { + for (i = ARRAY_SIZE(days_since_year) - 1; + i > 0 && days_since_year[i] > w; --i) + /* just loop */; + } + + r->month = i + 1; + r->monthday = w - days_since_year[i] + 1; + return; +} + +static bool xt_time_match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, bool *hotdrop) +{ + const struct xt_time_info *info = matchinfo; + unsigned int packet_time; + struct xtm current_time; + s64 stamp; + + /* + * We cannot use get_seconds() instead of __net_timestamp() here. + * Suppose you have two rules: + * 1. match before 13:00 + * 2. match after 13:00 + * If you match against processing time (get_seconds) it + * may happen that the same packet matches both rules if + * it arrived at the right moment before 13:00. + */ + if (skb->tstamp.tv64 == 0) + __net_timestamp((struct sk_buff *)skb); + + stamp = skb->tstamp.tv64; + do_div(stamp, NSEC_PER_SEC); + + if (info->flags & XT_TIME_LOCAL_TZ) + /* Adjust for local timezone */ + stamp -= 60 * sys_tz.tz_minuteswest; + + /* + * xt_time will match when _all_ of the following hold: + * - 'now' is in the global time range date_start..date_end + * - 'now' is in the monthday mask + * - 'now' is in the weekday mask + * - 'now' is in the daytime range time_start..time_end + * (and by default, libxt_time will set these so as to match) + */ + + if (stamp < info->date_start || stamp > info->date_stop) + return false; + + packet_time = localtime_1(¤t_time, stamp); + + if (info->daytime_start < info->daytime_stop) { + if (packet_time < info->daytime_start || + packet_time > info->daytime_stop) + return false; + } else { + if (packet_time < info->daytime_start && + packet_time > info->daytime_stop) + return false; + } + + localtime_2(¤t_time, stamp); + + if (!(info->weekdays_match & (1 << current_time.weekday))) + return false; + + /* Do not spend time computing monthday if all days match anyway */ + if (info->monthdays_match != XT_TIME_ALL_MONTHDAYS) { + localtime_3(¤t_time, stamp); + if (!(info->monthdays_match & (1 << current_time.monthday))) + return false; + } + + return true; +} + +static bool xt_time_check(const char *tablename, const void *ip, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) +{ + struct xt_time_info *info = matchinfo; + + if (info->daytime_start > XT_TIME_MAX_DAYTIME || + info->daytime_stop > XT_TIME_MAX_DAYTIME) { + printk(KERN_WARNING "xt_time: invalid argument - start or " + "stop time greater than 23:59:59\n"); + return false; + } + + return true; +} + +static struct xt_match xt_time_reg[] __read_mostly = { + { + .name = "time", + .family = AF_INET, + .match = xt_time_match, + .matchsize = sizeof(struct xt_time_info), + .checkentry = xt_time_check, + .me = THIS_MODULE, + }, + { + .name = "time", + .family = AF_INET6, + .match = xt_time_match, + .matchsize = sizeof(struct xt_time_info), + .checkentry = xt_time_check, + .me = THIS_MODULE, + }, +}; + +static int __init xt_time_init(void) +{ + return xt_register_matches(xt_time_reg, ARRAY_SIZE(xt_time_reg)); +} + +static void __exit xt_time_exit(void) +{ + xt_unregister_matches(xt_time_reg, ARRAY_SIZE(xt_time_reg)); +} + +module_init(xt_time_init); +module_exit(xt_time_exit); +MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); +MODULE_DESCRIPTION("netfilter time match"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_time"); +MODULE_ALIAS("ip6t_time"); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index c060e3f991f..ba0ca8d3f77 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -130,7 +130,7 @@ static int netlbl_cipsov4_add_common(struct genl_info *info, return -EINVAL; nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem) - if (nla->nla_type == NLBL_CIPSOV4_A_TAG) { + if (nla_type(nla) == NLBL_CIPSOV4_A_TAG) { if (iter >= CIPSO_V4_TAG_MAXCNT) return -EINVAL; doi_def->tags[iter++] = nla_get_u8(nla); @@ -192,13 +192,13 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], nla_a_rem) - if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) { + if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) { if (nla_validate_nested(nla_a, NLBL_CIPSOV4_A_MAX, netlbl_cipsov4_genl_policy) != 0) goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) - switch (nla_b->nla_type) { + switch (nla_type(nla_b)) { case NLBL_CIPSOV4_A_MLSLVLLOC: if (nla_get_u32(nla_b) > CIPSO_V4_MAX_LOC_LVLS) @@ -240,7 +240,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], nla_a_rem) - if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) { + if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) { struct nlattr *lvl_loc; struct nlattr *lvl_rem; @@ -265,13 +265,13 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSCATLST], nla_a_rem) - if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) { + if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) { if (nla_validate_nested(nla_a, NLBL_CIPSOV4_A_MAX, netlbl_cipsov4_genl_policy) != 0) goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) - switch (nla_b->nla_type) { + switch (nla_type(nla_b)) { case NLBL_CIPSOV4_A_MLSCATLOC: if (nla_get_u32(nla_b) > CIPSO_V4_MAX_LOC_CATS) @@ -315,7 +315,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSCATLST], nla_a_rem) - if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) { + if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) { struct nlattr *cat_loc; struct nlattr *cat_rem; diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index b6c844b7e1c..b3675bd7db3 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -178,11 +178,9 @@ int netlbl_domhsh_init(u32 size) for (iter = 0; iter < hsh_tbl->size; iter++) INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); - rcu_read_lock(); spin_lock(&netlbl_domhsh_lock); rcu_assign_pointer(netlbl_domhsh, hsh_tbl); spin_unlock(&netlbl_domhsh_lock); - rcu_read_unlock(); return 0; } @@ -222,7 +220,6 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, entry->valid = 1; INIT_RCU_HEAD(&entry->rcu); - ret_val = 0; rcu_read_lock(); if (entry->domain != NULL) { bkt = netlbl_domhsh_hash(entry->domain); @@ -233,7 +230,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, else ret_val = -EEXIST; spin_unlock(&netlbl_domhsh_lock); - } else if (entry->domain == NULL) { + } else { INIT_LIST_HEAD(&entry->list); spin_lock(&netlbl_domhsh_def_lock); if (rcu_dereference(netlbl_domhsh_def) == NULL) @@ -241,9 +238,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, else ret_val = -EEXIST; spin_unlock(&netlbl_domhsh_def_lock); - } else - ret_val = -EINVAL; - + } audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); if (audit_buf != NULL) { audit_log_format(audit_buf, @@ -262,7 +257,6 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); audit_log_end(audit_buf); } - rcu_read_unlock(); if (ret_val != 0) { @@ -313,38 +307,30 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) struct audit_buffer *audit_buf; rcu_read_lock(); - if (domain != NULL) - entry = netlbl_domhsh_search(domain, 0); - else - entry = netlbl_domhsh_search(domain, 1); + entry = netlbl_domhsh_search(domain, (domain != NULL ? 0 : 1)); if (entry == NULL) goto remove_return; switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - break; case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, - entry->domain); - if (ret_val != 0) - goto remove_return; + cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, + entry->domain); break; } - ret_val = 0; if (entry != rcu_dereference(netlbl_domhsh_def)) { spin_lock(&netlbl_domhsh_lock); if (entry->valid) { entry->valid = 0; list_del_rcu(&entry->list); - } else - ret_val = -ENOENT; + ret_val = 0; + } spin_unlock(&netlbl_domhsh_lock); } else { spin_lock(&netlbl_domhsh_def_lock); if (entry->valid) { entry->valid = 0; rcu_assign_pointer(netlbl_domhsh_def, NULL); - } else - ret_val = -ENOENT; + ret_val = 0; + } spin_unlock(&netlbl_domhsh_def_lock); } @@ -357,11 +343,10 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) audit_log_end(audit_buf); } - if (ret_val == 0) - call_rcu(&entry->rcu, netlbl_domhsh_free_entry); - remove_return: rcu_read_unlock(); + if (ret_val == 0) + call_rcu(&entry->rcu, netlbl_domhsh_free_entry); return ret_val; } diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 5315dacc522..56483377997 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -85,11 +85,9 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { */ void netlbl_mgmt_protocount_inc(void) { - rcu_read_lock(); spin_lock(&netlabel_mgmt_protocount_lock); netlabel_mgmt_protocount++; spin_unlock(&netlabel_mgmt_protocount_lock); - rcu_read_unlock(); } /** @@ -103,12 +101,10 @@ void netlbl_mgmt_protocount_inc(void) */ void netlbl_mgmt_protocount_dec(void) { - rcu_read_lock(); spin_lock(&netlabel_mgmt_protocount_lock); if (netlabel_mgmt_protocount > 0) netlabel_mgmt_protocount--; spin_unlock(&netlabel_mgmt_protocount_lock); - rcu_read_unlock(); } /** diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 5c303c68af1..348292450de 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -84,12 +84,10 @@ static void netlbl_unlabel_acceptflg_set(u8 value, struct audit_buffer *audit_buf; u8 old_val; - rcu_read_lock(); - old_val = netlabel_unlabel_acceptflg; spin_lock(&netlabel_unlabel_acceptflg_lock); + old_val = netlabel_unlabel_acceptflg; netlabel_unlabel_acceptflg = value; spin_unlock(&netlabel_unlabel_acceptflg_lock); - rcu_read_unlock(); audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW, audit_info); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5681ce3aebc..4f994c0fb3f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -57,6 +57,7 @@ #include <linux/selinux.h> #include <linux/mutex.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/scm.h> #include <net/netlink.h> @@ -79,7 +80,7 @@ struct netlink_sock { struct netlink_callback *cb; struct mutex *cb_mutex; struct mutex cb_def_mutex; - void (*data_ready)(struct sock *sk, int bytes); + void (*netlink_rcv)(struct sk_buff *skb); struct module *module; }; @@ -88,7 +89,12 @@ struct netlink_sock { static inline struct netlink_sock *nlk_sk(struct sock *sk) { - return (struct netlink_sock *)sk; + return container_of(sk, struct netlink_sock, sk); +} + +static inline int netlink_is_kernel(struct sock *sk) +{ + return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; } struct nl_pid_hash { @@ -121,7 +127,6 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); static int netlink_dump(struct sock *sk); static void netlink_destroy_callback(struct netlink_callback *cb); -static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb); static DEFINE_RWLOCK(nl_table_lock); static atomic_t nl_table_users = ATOMIC_INIT(0); @@ -210,7 +215,7 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } -static __inline__ struct sock *netlink_lookup(int protocol, u32 pid) +static __inline__ struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) { struct nl_pid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; @@ -220,7 +225,7 @@ static __inline__ struct sock *netlink_lookup(int protocol, u32 pid) read_lock(&nl_table_lock); head = nl_pid_hashfn(hash, pid); sk_for_each(sk, node, head) { - if (nlk_sk(sk)->pid == pid) { + if ((sk->sk_net == net) && (nlk_sk(sk)->pid == pid)) { sock_hold(sk); goto found; } @@ -327,7 +332,7 @@ netlink_update_listeners(struct sock *sk) * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, u32 pid) +static int netlink_insert(struct sock *sk, struct net *net, u32 pid) { struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; @@ -340,7 +345,7 @@ static int netlink_insert(struct sock *sk, u32 pid) head = nl_pid_hashfn(hash, pid); len = 0; sk_for_each(osk, node, head) { - if (nlk_sk(osk)->pid == pid) + if ((osk->sk_net == net) && (nlk_sk(osk)->pid == pid)) break; len++; } @@ -383,15 +388,15 @@ static struct proto netlink_proto = { .obj_size = sizeof(struct netlink_sock), }; -static int __netlink_create(struct socket *sock, struct mutex *cb_mutex, - int protocol) +static int __netlink_create(struct net *net, struct socket *sock, + struct mutex *cb_mutex, int protocol) { struct sock *sk; struct netlink_sock *nlk; sock->ops = &netlink_ops; - sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); + sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); if (!sk) return -ENOMEM; @@ -411,7 +416,7 @@ static int __netlink_create(struct socket *sock, struct mutex *cb_mutex, return 0; } -static int netlink_create(struct socket *sock, int protocol) +static int netlink_create(struct net *net, struct socket *sock, int protocol) { struct module *module = NULL; struct mutex *cb_mutex; @@ -440,7 +445,7 @@ static int netlink_create(struct socket *sock, int protocol) cb_mutex = nl_table[protocol].cb_mutex; netlink_unlock_table(); - if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0) + if ((err = __netlink_create(net, sock, cb_mutex, protocol)) < 0) goto out_module; nlk = nlk_sk(sock->sk); @@ -477,6 +482,7 @@ static int netlink_release(struct socket *sock) if (nlk->pid && !nlk->subscriptions) { struct netlink_notify n = { + .net = sk->sk_net, .protocol = sk->sk_protocol, .pid = nlk->pid, }; @@ -487,7 +493,7 @@ static int netlink_release(struct socket *sock) module_put(nlk->module); netlink_table_grab(); - if (nlk->flags & NETLINK_KERNEL_SOCKET) { + if (netlink_is_kernel(sk)) { kfree(nl_table[sk->sk_protocol].listeners); nl_table[sk->sk_protocol].module = NULL; nl_table[sk->sk_protocol].registered = 0; @@ -505,6 +511,7 @@ static int netlink_release(struct socket *sock) static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; @@ -518,6 +525,8 @@ retry: netlink_table_grab(); head = nl_pid_hashfn(hash, pid); sk_for_each(osk, node, head) { + if ((osk->sk_net != net)) + continue; if (nlk_sk(osk)->pid == pid) { /* Bind collision, search negative pid values. */ pid = rover--; @@ -529,7 +538,7 @@ retry: } netlink_table_ungrab(); - err = netlink_insert(sk, pid); + err = netlink_insert(sk, net, pid); if (err == -EADDRINUSE) goto retry; @@ -594,6 +603,7 @@ static int netlink_realloc_groups(struct sock *sk) static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; @@ -615,7 +625,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len return -EINVAL; } else { err = nladdr->nl_pid ? - netlink_insert(sk, nladdr->nl_pid) : + netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); if (err) return err; @@ -698,19 +708,17 @@ static void netlink_overrun(struct sock *sk) static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) { - int protocol = ssk->sk_protocol; struct sock *sock; struct netlink_sock *nlk; - sock = netlink_lookup(protocol, pid); + sock = netlink_lookup(ssk->sk_net, ssk->sk_protocol, pid); if (!sock) return ERR_PTR(-ECONNREFUSED); /* Don't bother queuing skb if kernel socket has no input function */ nlk = nlk_sk(sock); - if ((nlk->pid == 0 && !nlk->data_ready) || - (sock->sk_state == NETLINK_CONNECTED && - nlk->dst_pid != nlk_sk(ssk)->pid)) { + if (sock->sk_state == NETLINK_CONNECTED && + nlk->dst_pid != nlk_sk(ssk)->pid) { sock_put(sock); return ERR_PTR(-ECONNREFUSED); } @@ -754,7 +762,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, test_bit(0, &nlk->state)) { DECLARE_WAITQUEUE(wait, current); if (!timeo) { - if (!ssk || nlk_sk(ssk)->pid == 0) + if (!ssk || netlink_is_kernel(ssk)) netlink_overrun(sk); sock_put(sk); kfree_skb(skb); @@ -783,7 +791,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, return 0; } -int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol) +int netlink_sendskb(struct sock *sk, struct sk_buff *skb) { int len = skb->len; @@ -824,7 +832,34 @@ static inline struct sk_buff *netlink_trim(struct sk_buff *skb, return skb; } -int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock) +static inline void netlink_rcv_wake(struct sock *sk) +{ + struct netlink_sock *nlk = nlk_sk(sk); + + if (skb_queue_empty(&sk->sk_receive_queue)) + clear_bit(0, &nlk->state); + if (!test_bit(0, &nlk->state)) + wake_up_interruptible(&nlk->wait); +} + +static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) +{ + int ret; + struct netlink_sock *nlk = nlk_sk(sk); + + ret = -ECONNREFUSED; + if (nlk->netlink_rcv != NULL) { + ret = skb->len; + skb_set_owner_r(skb, sk); + nlk->netlink_rcv(skb); + } + kfree_skb(skb); + sock_put(sk); + return ret; +} + +int netlink_unicast(struct sock *ssk, struct sk_buff *skb, + u32 pid, int nonblock) { struct sock *sk; int err; @@ -839,13 +874,16 @@ retry: kfree_skb(skb); return PTR_ERR(sk); } + if (netlink_is_kernel(sk)) + return netlink_unicast_kernel(sk, skb); + err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); if (err == 1) goto retry; if (err) return err; - return netlink_sendskb(sk, skb, ssk->sk_protocol); + return netlink_sendskb(sk, skb); } int netlink_has_listeners(struct sock *sk, unsigned int group) @@ -853,7 +891,7 @@ int netlink_has_listeners(struct sock *sk, unsigned int group) int res = 0; unsigned long *listeners; - BUG_ON(!(nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET)); + BUG_ON(!netlink_is_kernel(sk)); rcu_read_lock(); listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); @@ -883,6 +921,7 @@ static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff struct netlink_broadcast_data { struct sock *exclude_sk; + struct net *net; u32 pid; u32 group; int failure; @@ -905,6 +944,9 @@ static inline int do_one_broadcast(struct sock *sk, !test_bit(p->group - 1, nlk->groups)) goto out; + if ((sk->sk_net != p->net)) + goto out; + if (p->failure) { netlink_overrun(sk); goto out; @@ -943,6 +985,7 @@ out: int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, u32 group, gfp_t allocation) { + struct net *net = ssk->sk_net; struct netlink_broadcast_data info; struct hlist_node *node; struct sock *sk; @@ -950,6 +993,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, skb = netlink_trim(skb, allocation); info.exclude_sk = ssk; + info.net = net; info.pid = pid; info.group = group; info.failure = 0; @@ -998,6 +1042,9 @@ static inline int do_one_set_err(struct sock *sk, if (sk == p->exclude_sk) goto out; + if (sk->sk_net != p->exclude_sk->sk_net) + goto out; + if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1129,16 +1176,6 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); } -static inline void netlink_rcv_wake(struct sock *sk) -{ - struct netlink_sock *nlk = nlk_sk(sk); - - if (skb_queue_empty(&sk->sk_receive_queue)) - clear_bit(0, &nlk->state); - if (!test_bit(0, &nlk->state)) - wake_up_interruptible(&nlk->wait); -} - static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { @@ -1286,11 +1323,7 @@ out: static void netlink_data_ready(struct sock *sk, int len) { - struct netlink_sock *nlk = nlk_sk(sk); - - if (nlk->data_ready) - nlk->data_ready(sk, len); - netlink_rcv_wake(sk); + BUG(); } /* @@ -1300,8 +1333,8 @@ static void netlink_data_ready(struct sock *sk, int len) */ struct sock * -netlink_kernel_create(int unit, unsigned int groups, - void (*input)(struct sock *sk, int len), +netlink_kernel_create(struct net *net, int unit, unsigned int groups, + void (*input)(struct sk_buff *skb), struct mutex *cb_mutex, struct module *module) { struct socket *sock; @@ -1317,7 +1350,7 @@ netlink_kernel_create(int unit, unsigned int groups, if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - if (__netlink_create(sock, cb_mutex, unit) < 0) + if (__netlink_create(net, sock, cb_mutex, unit) < 0) goto out_sock_release; if (groups < 32) @@ -1330,20 +1363,24 @@ netlink_kernel_create(int unit, unsigned int groups, sk = sock->sk; sk->sk_data_ready = netlink_data_ready; if (input) - nlk_sk(sk)->data_ready = input; + nlk_sk(sk)->netlink_rcv = input; - if (netlink_insert(sk, 0)) + if (netlink_insert(sk, net, 0)) goto out_sock_release; nlk = nlk_sk(sk); nlk->flags |= NETLINK_KERNEL_SOCKET; netlink_table_grab(); - nl_table[unit].groups = groups; - nl_table[unit].listeners = listeners; - nl_table[unit].cb_mutex = cb_mutex; - nl_table[unit].module = module; - nl_table[unit].registered = 1; + if (!nl_table[unit].registered) { + nl_table[unit].groups = groups; + nl_table[unit].listeners = listeners; + nl_table[unit].cb_mutex = cb_mutex; + nl_table[unit].module = module; + nl_table[unit].registered = 1; + } else { + kfree(listeners); + } netlink_table_ungrab(); return sk; @@ -1509,7 +1546,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, atomic_inc(&skb->users); cb->skb = skb; - sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid); + sk = netlink_lookup(ssk->sk_net, ssk->sk_protocol, NETLINK_CB(skb).pid); if (sk == NULL) { netlink_destroy_callback(cb); return -ECONNREFUSED; @@ -1530,9 +1567,8 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, sock_put(sk); /* We successfully started a dump, by returning -EINTR we - * signal the queue mangement to interrupt processing of - * any netlink messages so userspace gets a chance to read - * the results. */ + * signal not to send ACK even if it was requested. + */ return -EINTR; } @@ -1551,7 +1587,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) if (!skb) { struct sock *sk; - sk = netlink_lookup(in_skb->sk->sk_protocol, + sk = netlink_lookup(in_skb->sk->sk_net, + in_skb->sk->sk_protocol, NETLINK_CB(in_skb).pid); if (sk) { sk->sk_err = ENOBUFS; @@ -1569,13 +1606,15 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); } -static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, +int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, struct nlmsghdr *)) { struct nlmsghdr *nlh; int err; while (skb->len >= nlmsg_total_size(0)) { + int msglen; + nlh = nlmsg_hdr(skb); err = 0; @@ -1584,91 +1623,31 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, /* Only requests are handled by the kernel */ if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) - goto skip; + goto ack; /* Skip control messages */ if (nlh->nlmsg_type < NLMSG_MIN_TYPE) - goto skip; + goto ack; err = cb(skb, nlh); - if (err == -EINTR) { - /* Not an error, but we interrupt processing */ - netlink_queue_skip(nlh, skb); - return err; - } -skip: + if (err == -EINTR) + goto skip; + +ack: if (nlh->nlmsg_flags & NLM_F_ACK || err) netlink_ack(skb, nlh, err); - netlink_queue_skip(nlh, skb); +skip: + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + skb_pull(skb, msglen); } return 0; } /** - * nelink_run_queue - Process netlink receive queue. - * @sk: Netlink socket containing the queue - * @qlen: Place to store queue length upon entry - * @cb: Callback function invoked for each netlink message found - * - * Processes as much as there was in the queue upon entry and invokes - * a callback function for each netlink message found. The callback - * function may refuse a message by returning a negative error code - * but setting the error pointer to 0 in which case this function - * returns with a qlen != 0. - * - * qlen must be initialized to 0 before the initial entry, afterwards - * the function may be called repeatedly until qlen reaches 0. - * - * The callback function may return -EINTR to signal that processing - * of netlink messages shall be interrupted. In this case the message - * currently being processed will NOT be requeued onto the receive - * queue. - */ -void netlink_run_queue(struct sock *sk, unsigned int *qlen, - int (*cb)(struct sk_buff *, struct nlmsghdr *)) -{ - struct sk_buff *skb; - - if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue)) - *qlen = skb_queue_len(&sk->sk_receive_queue); - - for (; *qlen; (*qlen)--) { - skb = skb_dequeue(&sk->sk_receive_queue); - if (netlink_rcv_skb(skb, cb)) { - if (skb->len) - skb_queue_head(&sk->sk_receive_queue, skb); - else { - kfree_skb(skb); - (*qlen)--; - } - break; - } - - kfree_skb(skb); - } -} - -/** - * netlink_queue_skip - Skip netlink message while processing queue. - * @nlh: Netlink message to be skipped - * @skb: Socket buffer containing the netlink messages. - * - * Pulls the given netlink message off the socket buffer so the next - * call to netlink_queue_run() will not reconsider the message. - */ -static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) -{ - int msglen = NLMSG_ALIGN(nlh->nlmsg_len); - - if (msglen > skb->len) - msglen = skb->len; - - skb_pull(skb, msglen); -} - -/** * nlmsg_notify - send a notification netlink message * @sk: netlink socket to use * @skb: notification message @@ -1702,6 +1681,7 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, #ifdef CONFIG_PROC_FS struct nl_seq_iter { + struct net *net; int link; int hash_idx; }; @@ -1719,6 +1699,8 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) for (j = 0; j <= hash->mask; j++) { sk_for_each(s, node, &hash->table[j]) { + if (iter->net != s->sk_net) + continue; if (off == pos) { iter->link = i; iter->hash_idx = j; @@ -1748,11 +1730,14 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (v == SEQ_START_TOKEN) return netlink_seq_socket_idx(seq, 0); - s = sk_next(v); + iter = seq->private; + s = v; + do { + s = sk_next(s); + } while (s && (iter->net != s->sk_net)); if (s) return s; - iter = seq->private; i = iter->link; j = iter->hash_idx + 1; @@ -1761,6 +1746,8 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); + while (s && (iter->net != s->sk_net)) + s = sk_next(s); if (s) { iter->link = i; iter->hash_idx = j; @@ -1815,31 +1802,35 @@ static const struct seq_operations netlink_seq_ops = { static int netlink_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; struct nl_seq_iter *iter; - int err; - iter = kzalloc(sizeof(*iter), GFP_KERNEL); + iter = __seq_open_private(file, &netlink_seq_ops, sizeof(*iter)); if (!iter) return -ENOMEM; - err = seq_open(file, &netlink_seq_ops); - if (err) { - kfree(iter); - return err; + iter->net = get_proc_net(inode); + if (!iter->net) { + seq_release_private(inode, file); + return -ENXIO; } - seq = file->private_data; - seq->private = iter; return 0; } +static int netlink_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct nl_seq_iter *iter = seq->private; + put_net(iter->net); + return seq_release_private(inode, file); +} + static const struct file_operations netlink_seq_fops = { .owner = THIS_MODULE, .open = netlink_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = netlink_seq_release, }; #endif @@ -1881,11 +1872,32 @@ static struct net_proto_family netlink_family_ops = { .owner = THIS_MODULE, /* for consistency 8) */ }; +static int __net_init netlink_net_init(struct net *net) +{ +#ifdef CONFIG_PROC_FS + if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) + return -ENOMEM; +#endif + return 0; +} + +static void __net_exit netlink_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "netlink"); +#endif +} + +static struct pernet_operations netlink_net_ops = { + .init = netlink_net_init, + .exit = netlink_net_exit, +}; + static int __init netlink_proto_init(void) { struct sk_buff *dummy_skb; int i; - unsigned long max; + unsigned long limit; unsigned int order; int err = proto_register(&netlink_proto, 0); @@ -1899,13 +1911,13 @@ static int __init netlink_proto_init(void) goto panic; if (num_physpages >= (128 * 1024)) - max = num_physpages >> (21 - PAGE_SHIFT); + limit = num_physpages >> (21 - PAGE_SHIFT); else - max = num_physpages >> (23 - PAGE_SHIFT); + limit = num_physpages >> (23 - PAGE_SHIFT); - order = get_bitmask_order(max) - 1 + PAGE_SHIFT; - max = (1UL << order) / sizeof(struct hlist_head); - order = get_bitmask_order(max > UINT_MAX ? UINT_MAX : max) - 1; + order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; + limit = (1UL << order) / sizeof(struct hlist_head); + order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; for (i = 0; i < MAX_LINKS; i++) { struct nl_pid_hash *hash = &nl_table[i].hash; @@ -1926,9 +1938,7 @@ static int __init netlink_proto_init(void) } sock_register(&netlink_family_ops); -#ifdef CONFIG_PROC_FS - proc_net_fops_create("netlink", 0, &netlink_seq_fops); -#endif + register_pernet_subsys(&netlink_net_ops); /* The netlink device handler may be needed early. */ rtnetlink_init(); out: @@ -1940,7 +1950,7 @@ panic: core_initcall(netlink_proto_init); EXPORT_SYMBOL(netlink_ack); -EXPORT_SYMBOL(netlink_run_queue); +EXPORT_SYMBOL(netlink_rcv_skb); EXPORT_SYMBOL(netlink_broadcast); EXPORT_SYMBOL(netlink_dump_start); EXPORT_SYMBOL(netlink_kernel_create); diff --git a/net/netlink/attr.c b/net/netlink/attr.c index e4d7bed99c2..ec39d12c242 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -27,12 +27,12 @@ static int validate_nla(struct nlattr *nla, int maxtype, const struct nla_policy *policy) { const struct nla_policy *pt; - int minlen = 0, attrlen = nla_len(nla); + int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla); - if (nla->nla_type <= 0 || nla->nla_type > maxtype) + if (type <= 0 || type > maxtype) return 0; - pt = &policy[nla->nla_type]; + pt = &policy[type]; BUG_ON(pt->type > NLA_TYPE_MAX); @@ -149,7 +149,7 @@ int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); nla_for_each_attr(nla, head, len, rem) { - u16 type = nla->nla_type; + u16 type = nla_type(nla); if (type > 0 && type <= maxtype) { if (policy) { @@ -185,7 +185,7 @@ struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) int rem; nla_for_each_attr(nla, head, len, rem) - if (nla->nla_type == attrtype) + if (nla_type(nla) == attrtype) return nla; return NULL; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 8c11ca4a212..150579a2146 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -22,22 +22,14 @@ struct sock *genl_sock = NULL; static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */ -static void genl_lock(void) +static inline void genl_lock(void) { mutex_lock(&genl_mutex); } -static int genl_trylock(void) -{ - return !mutex_trylock(&genl_mutex); -} - -static void genl_unlock(void) +static inline void genl_unlock(void) { mutex_unlock(&genl_mutex); - - if (genl_sock && genl_sock->sk_receive_queue.qlen) - genl_sock->sk_data_ready(genl_sock, 0); } #define GENL_FAM_TAB_SIZE 16 @@ -478,16 +470,11 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return ops->doit(skb, &info); } -static void genl_rcv(struct sock *sk, int len) +static void genl_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - if (genl_trylock()) - return; - netlink_run_queue(sk, &qlen, genl_rcv_msg); - genl_unlock(); - } while (qlen && genl_sock && genl_sock->sk_receive_queue.qlen); + genl_lock(); + netlink_rcv_skb(skb, &genl_rcv_msg); + genl_unlock(); } /************************************************************************** @@ -782,8 +769,8 @@ static int __init genl_init(void) netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); /* we'll bump the group number right afterwards */ - genl_sock = netlink_kernel_create(NETLINK_GENERIC, 0, genl_rcv, - NULL, THIS_MODULE); + genl_sock = netlink_kernel_create(&init_net, NETLINK_GENERIC, 0, + genl_rcv, NULL, THIS_MODULE); if (genl_sock == NULL) panic("GENL: Cannot initialize generic netlink\n"); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index dc9273295a3..3a4d479ea64 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -27,6 +27,7 @@ #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -105,6 +106,9 @@ static int nr_device_event(struct notifier_block *this, unsigned long event, voi { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_DOWN) return NOTIFY_DONE; @@ -408,15 +412,18 @@ static struct proto nr_proto = { .obj_size = sizeof(struct nr_sock), }; -static int nr_create(struct socket *sock, int protocol) +static int nr_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct nr_sock *nr; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL) return -ENOMEM; nr = nr_sk(sk); @@ -458,7 +465,7 @@ static struct sock *nr_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) return NULL; nr = nr_sk(sk); @@ -1447,9 +1454,9 @@ static int __init nr_proto_init(void) nr_loopback_init(); - proc_net_fops_create("nr", S_IRUGO, &nr_info_fops); - proc_net_fops_create("nr_neigh", S_IRUGO, &nr_neigh_fops); - proc_net_fops_create("nr_nodes", S_IRUGO, &nr_nodes_fops); + proc_net_fops_create(&init_net, "nr", S_IRUGO, &nr_info_fops); + proc_net_fops_create(&init_net, "nr_neigh", S_IRUGO, &nr_neigh_fops); + proc_net_fops_create(&init_net, "nr_nodes", S_IRUGO, &nr_nodes_fops); out: return rc; fail: @@ -1477,9 +1484,9 @@ static void __exit nr_exit(void) { int i; - proc_net_remove("nr"); - proc_net_remove("nr_neigh"); - proc_net_remove("nr_nodes"); + proc_net_remove(&init_net, "nr"); + proc_net_remove(&init_net, "nr_neigh"); + proc_net_remove(&init_net, "nr_nodes"); nr_loopback_clear(); nr_rt_free(); diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index c7b5d930e73..8c68da5ef0a 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -95,8 +95,9 @@ static int nr_rebuild_header(struct sk_buff *skb) #endif -static int nr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) +static int nr_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, unsigned len) { unsigned char *buff = skb_push(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN); @@ -193,6 +194,12 @@ static struct net_device_stats *nr_get_stats(struct net_device *dev) return &nr->stats; } +static const struct header_ops nr_header_ops = { + .create = nr_header, + .rebuild= nr_rebuild_header, +}; + + void nr_setup(struct net_device *dev) { dev->mtu = NR_MAX_PACKET_SIZE; @@ -200,11 +207,10 @@ void nr_setup(struct net_device *dev) dev->open = nr_open; dev->stop = nr_close; - dev->hard_header = nr_header; + dev->header_ops = &nr_header_ops; dev->hard_header_len = NR_NETWORK_LEN + NR_TRANSPORT_LEN; dev->addr_len = AX25_ADDR_LEN; dev->type = ARPHRD_NETROM; - dev->rebuild_header = nr_rebuild_header; dev->set_mac_address = nr_set_mac_address; /* New-style flags. */ diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 24fe4a66d29..e943c16552a 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -580,7 +580,7 @@ static struct net_device *nr_ax25_dev_get(char *devname) { struct net_device *dev; - if ((dev = dev_get_by_name(devname)) == NULL) + if ((dev = dev_get_by_name(&init_net, devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) @@ -598,7 +598,7 @@ struct net_device *nr_dev_first(void) struct net_device *dev, *first = NULL; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; @@ -618,7 +618,7 @@ struct net_device *nr_dev_get(ax25_address *addr) struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1322d62b5d9..d0936506b73 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -61,6 +61,7 @@ #include <linux/wireless.h> #include <linux/kernel.h> #include <linux/kmod.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> #include <linux/skbuff.h> @@ -251,6 +252,9 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct struct sock *sk; struct sockaddr_pkt *spkt; + if (dev->nd_net != &init_net) + goto out; + /* * When we registered the protocol we saved the socket in the data * field for just this event. @@ -343,7 +347,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, */ saddr->spkt_device[13] = 0; - dev = dev_get_by_name(saddr->spkt_device); + dev = dev_get_by_name(&init_net, saddr->spkt_device); err = -ENODEV; if (dev == NULL) goto out_unlock; @@ -385,7 +389,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, skb_reset_network_header(skb); /* Try to align data part correctly */ - if (dev->hard_header) { + if (dev->header_ops) { skb->data -= dev->hard_header_len; skb->tail -= dev->hard_header_len; if (len < dev->hard_header_len) @@ -451,6 +455,9 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet int skb_len = skb->len; unsigned int snaplen, res; + if (dev->nd_net != &init_net) + goto drop; + if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -459,7 +466,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet skb->dev = dev; - if (dev->hard_header) { + if (dev->header_ops) { /* The device has an explicit notion of ll header, exported to higher levels. @@ -512,10 +519,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; - sll->sll_halen = 0; - if (dev->hard_header_parse) - sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr); + sll->sll_halen = dev_parse_header(skb, sll->sll_addr); PACKET_SKB_CB(skb)->origlen = skb->len; @@ -567,13 +572,16 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe struct sk_buff *copy_skb = NULL; struct timeval tv; + if (dev->nd_net != &init_net) + goto drop; + if (skb->pkt_type == PACKET_LOOPBACK) goto drop; sk = pt->af_packet_priv; po = pkt_sk(sk); - if (dev->hard_header) { + if (dev->header_ops) { if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); else if (skb->pkt_type == PACKET_OUTGOING) { @@ -640,18 +648,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe h->tp_snaplen = snaplen; h->tp_mac = macoff; h->tp_net = netoff; - if (skb->tstamp.tv64 == 0) { - __net_timestamp(skb); - sock_enable_timestamp(sk); - } - tv = ktime_to_timeval(skb->tstamp); + if (skb->tstamp.tv64) + tv = ktime_to_timeval(skb->tstamp); + else + do_gettimeofday(&tv); h->tp_sec = tv.tv_sec; h->tp_usec = tv.tv_usec; sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); - sll->sll_halen = 0; - if (dev->hard_header_parse) - sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr); + sll->sll_halen = dev_parse_header(skb, sll->sll_addr); sll->sll_family = AF_PACKET; sll->sll_hatype = dev->type; sll->sll_protocol = skb->protocol; @@ -733,7 +738,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, } - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); err = -ENXIO; if (dev == NULL) goto out_unlock; @@ -756,16 +761,10 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, skb_reserve(skb, LL_RESERVED_SPACE(dev)); skb_reset_network_header(skb); - if (dev->hard_header) { - int res; - err = -EINVAL; - res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len); - if (sock->type != SOCK_DGRAM) { - skb_reset_tail_pointer(skb); - skb->len = 0; - } else if (res < 0) - goto out_free; - } + err = -EINVAL; + if (sock->type == SOCK_DGRAM && + dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0) + goto out_free; /* Returns -EFAULT on error */ err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); @@ -928,7 +927,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int add return -EINVAL; strlcpy(name,uaddr->sa_data,sizeof(name)); - dev = dev_get_by_name(name); + dev = dev_get_by_name(&init_net, name); if (dev) { err = packet_do_bind(sk, dev, pkt_sk(sk)->num); dev_put(dev); @@ -955,7 +954,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len if (sll->sll_ifindex) { err = -ENODEV; - dev = dev_get_by_index(sll->sll_ifindex); + dev = dev_get_by_index(&init_net, sll->sll_ifindex); if (dev == NULL) goto out; } @@ -977,13 +976,16 @@ static struct proto packet_proto = { * Create a packet of type SOCK_PACKET. */ -static int packet_create(struct socket *sock, int protocol) +static int packet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct packet_sock *po; __be16 proto = (__force __be16)protocol; /* weird, but documented */ int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (!capable(CAP_NET_RAW)) return -EPERM; if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && @@ -993,7 +995,7 @@ static int packet_create(struct socket *sock, int protocol) sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1); + sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, 1); if (sk == NULL) goto out; @@ -1149,7 +1151,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - dev = dev_get_by_index(pkt_sk(sk)->ifindex); + dev = dev_get_by_index(&init_net, pkt_sk(sk)->ifindex); if (dev) { strlcpy(uaddr->sa_data, dev->name, 15); dev_put(dev); @@ -1174,7 +1176,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_family = AF_PACKET; sll->sll_ifindex = po->ifindex; sll->sll_protocol = po->num; - dev = dev_get_by_index(po->ifindex); + dev = dev_get_by_index(&init_net, po->ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; @@ -1226,7 +1228,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) rtnl_lock(); err = -ENODEV; - dev = __dev_get_by_index(mreq->mr_ifindex); + dev = __dev_get_by_index(&init_net, mreq->mr_ifindex); if (!dev) goto done; @@ -1280,7 +1282,7 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) if (--ml->count == 0) { struct net_device *dev; *mlp = ml->next; - dev = dev_get_by_index(ml->ifindex); + dev = dev_get_by_index(&init_net, ml->ifindex); if (dev) { packet_dev_mc(dev, ml, -1); dev_put(dev); @@ -1308,7 +1310,7 @@ static void packet_flush_mclist(struct sock *sk) struct net_device *dev; po->mclist = ml->next; - if ((dev = dev_get_by_index(ml->ifindex)) != NULL) { + if ((dev = dev_get_by_index(&init_net, ml->ifindex)) != NULL) { packet_dev_mc(dev, ml, -1); dev_put(dev); } @@ -1465,6 +1467,9 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void struct hlist_node *node; struct net_device *dev = data; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + read_lock(&packet_sklist_lock); sk_for_each(sk, node, &packet_sklist) { struct packet_sock *po = pkt_sk(sk); @@ -1618,11 +1623,6 @@ static struct vm_operations_struct packet_mmap_ops = { .close =packet_mm_close, }; -static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order) -{ - return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1); -} - static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len) { int i; @@ -1952,7 +1952,7 @@ static const struct file_operations packet_seq_fops = { static void __exit packet_exit(void) { - proc_net_remove("packet"); + proc_net_remove(&init_net, "packet"); unregister_netdevice_notifier(&packet_netdev_notifier); sock_unregister(PF_PACKET); proto_unregister(&packet_proto); @@ -1967,7 +1967,7 @@ static int __init packet_init(void) sock_register(&packet_family_ops); register_netdevice_notifier(&packet_netdev_notifier); - proc_net_fops_create("packet", 0, &packet_seq_fops); + proc_net_fops_create(&init_net, "packet", 0, &packet_seq_fops); out: return rc; } diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig index 8b31759ee8b..7f807b30cfb 100644 --- a/net/rfkill/Kconfig +++ b/net/rfkill/Kconfig @@ -5,7 +5,7 @@ menuconfig RFKILL tristate "RF switch subsystem support" help Say Y here if you want to have control over RF switches - found on many WiFi, Bluetooth and IRDA cards. + found on many WiFi and Bluetooth cards. To compile this driver as a module, choose M here: the module will be called rfkill. @@ -22,3 +22,10 @@ config RFKILL_INPUT To compile this driver as a module, choose M here: the module will be called rfkill-input. + +# LED trigger support +config RFKILL_LEDS + bool + depends on RFKILL && LEDS_TRIGGERS + default y + diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c index 9f746be5885..d1e9d68f8ba 100644 --- a/net/rfkill/rfkill-input.c +++ b/net/rfkill/rfkill-input.c @@ -17,6 +17,8 @@ #include <linux/init.h> #include <linux/rfkill.h> +#include "rfkill-input.h" + MODULE_AUTHOR("Dmitry Torokhov <dtor@mail.ru>"); MODULE_DESCRIPTION("Input layer to RF switch connector"); MODULE_LICENSE("GPL"); @@ -81,6 +83,7 @@ static void rfkill_schedule_toggle(struct rfkill_task *task) static DEFINE_RFKILL_TASK(rfkill_wlan, RFKILL_TYPE_WLAN); static DEFINE_RFKILL_TASK(rfkill_bt, RFKILL_TYPE_BLUETOOTH); +static DEFINE_RFKILL_TASK(rfkill_uwb, RFKILL_TYPE_UWB); static void rfkill_event(struct input_handle *handle, unsigned int type, unsigned int code, int down) @@ -93,6 +96,9 @@ static void rfkill_event(struct input_handle *handle, unsigned int type, case KEY_BLUETOOTH: rfkill_schedule_toggle(&rfkill_bt); break; + case KEY_UWB: + rfkill_schedule_toggle(&rfkill_uwb); + break; default: break; } @@ -140,13 +146,18 @@ static void rfkill_disconnect(struct input_handle *handle) static const struct input_device_id rfkill_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, - .evbit = { BIT(EV_KEY) }, - .keybit = { [LONG(KEY_WLAN)] = BIT(KEY_WLAN) }, + .evbit = { BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_WLAN)] = BIT_MASK(KEY_WLAN) }, + }, + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, + .evbit = { BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_BLUETOOTH)] = BIT_MASK(KEY_BLUETOOTH) }, }, { .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, - .evbit = { BIT(EV_KEY) }, - .keybit = { [LONG(KEY_BLUETOOTH)] = BIT(KEY_BLUETOOTH) }, + .evbit = { BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_UWB)] = BIT_MASK(KEY_UWB) }, }, { } }; diff --git a/net/rfkill/rfkill-input.h b/net/rfkill/rfkill-input.h new file mode 100644 index 00000000000..4dae5006fc7 --- /dev/null +++ b/net/rfkill/rfkill-input.h @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2007 Ivo van Doorn + */ + +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#ifndef __RFKILL_INPUT_H +#define __RFKILL_INPUT_H + +void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state); + +#endif /* __RFKILL_INPUT_H */ diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index db3395bfbcf..51d151c0e96 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006 Ivo van Doorn + * Copyright (C) 2006 - 2007 Ivo van Doorn * Copyright (C) 2007 Dmitry Torokhov * * This program is free software; you can redistribute it and/or modify @@ -37,6 +37,22 @@ static DEFINE_MUTEX(rfkill_mutex); static enum rfkill_state rfkill_states[RFKILL_TYPE_MAX]; + +static void rfkill_led_trigger(struct rfkill *rfkill, + enum rfkill_state state) +{ +#ifdef CONFIG_RFKILL_LEDS + struct led_trigger *led = &rfkill->led_trigger; + + if (!led->name) + return; + if (state == RFKILL_STATE_OFF) + led_trigger_event(led, LED_OFF); + else + led_trigger_event(led, LED_FULL); +#endif /* CONFIG_RFKILL_LEDS */ +} + static int rfkill_toggle_radio(struct rfkill *rfkill, enum rfkill_state state) { @@ -48,8 +64,10 @@ static int rfkill_toggle_radio(struct rfkill *rfkill, if (state != rfkill->state) { retval = rfkill->toggle_radio(rfkill->data, state); - if (!retval) + if (!retval) { rfkill->state = state; + rfkill_led_trigger(rfkill, state); + } } mutex_unlock(&rfkill->mutex); @@ -106,8 +124,8 @@ static ssize_t rfkill_type_show(struct device *dev, case RFKILL_TYPE_BLUETOOTH: type = "bluetooth"; break; - case RFKILL_TYPE_IRDA: - type = "irda"; + case RFKILL_TYPE_UWB: + type = "ultrawideband"; break; default: BUG(); @@ -172,6 +190,10 @@ static ssize_t rfkill_claim_store(struct device *dev, if (error) return error; + if (rfkill->user_claim_unsupported) { + error = -EOPNOTSUPP; + goto out_unlock; + } if (rfkill->user_claim != claim) { if (!claim) rfkill_toggle_radio(rfkill, @@ -179,9 +201,10 @@ static ssize_t rfkill_claim_store(struct device *dev, rfkill->user_claim = claim; } +out_unlock: mutex_unlock(&rfkill_mutex); - return count; + return error ? error : count; } static struct device_attribute rfkill_dev_attrs[] = { @@ -281,7 +304,7 @@ static void rfkill_remove_switch(struct rfkill *rfkill) /** * rfkill_allocate - allocate memory for rfkill structure. * @parent: device that has rf switch on it - * @type: type of the switch (wlan, bluetooth, irda) + * @type: type of the switch (RFKILL_TYPE_*) * * This function should be called by the network driver when it needs * rfkill structure. Once the structure is allocated the driver shoud @@ -328,6 +351,26 @@ void rfkill_free(struct rfkill *rfkill) } EXPORT_SYMBOL(rfkill_free); +static void rfkill_led_trigger_register(struct rfkill *rfkill) +{ +#ifdef CONFIG_RFKILL_LEDS + int error; + + rfkill->led_trigger.name = rfkill->dev.bus_id; + error = led_trigger_register(&rfkill->led_trigger); + if (error) + rfkill->led_trigger.name = NULL; +#endif /* CONFIG_RFKILL_LEDS */ +} + +static void rfkill_led_trigger_unregister(struct rfkill *rfkill) +{ +#ifdef CONFIG_RFKILL_LEDS + if (rfkill->led_trigger.name) + led_trigger_unregister(&rfkill->led_trigger); +#endif +} + /** * rfkill_register - Register a rfkill structure. * @rfkill: rfkill structure to be registered @@ -357,6 +400,7 @@ int rfkill_register(struct rfkill *rfkill) rfkill_remove_switch(rfkill); return error; } + rfkill_led_trigger_register(rfkill); return 0; } @@ -372,6 +416,7 @@ EXPORT_SYMBOL(rfkill_register); */ void rfkill_unregister(struct rfkill *rfkill) { + rfkill_led_trigger_unregister(rfkill); device_del(&rfkill->dev); rfkill_remove_switch(rfkill); put_device(&rfkill->dev); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 976c3cc86a2..509defe53ee 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -26,6 +26,7 @@ #include <linux/sockios.h> #include <linux/net.h> #include <linux/stat.h> +#include <net/net_namespace.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> @@ -196,6 +197,9 @@ static int rose_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_DOWN) return NOTIFY_DONE; @@ -498,15 +502,18 @@ static struct proto rose_proto = { .obj_size = sizeof(struct rose_sock), }; -static int rose_create(struct socket *sock, int protocol) +static int rose_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct rose_sock *rose; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) return -ENOMEM; rose = rose_sk(sk); @@ -544,7 +551,7 @@ static struct sock *rose_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) return NULL; rose = rose_sk(sk); @@ -1576,10 +1583,10 @@ static int __init rose_proto_init(void) rose_add_loopback_neigh(); - proc_net_fops_create("rose", S_IRUGO, &rose_info_fops); - proc_net_fops_create("rose_neigh", S_IRUGO, &rose_neigh_fops); - proc_net_fops_create("rose_nodes", S_IRUGO, &rose_nodes_fops); - proc_net_fops_create("rose_routes", S_IRUGO, &rose_routes_fops); + proc_net_fops_create(&init_net, "rose", S_IRUGO, &rose_info_fops); + proc_net_fops_create(&init_net, "rose_neigh", S_IRUGO, &rose_neigh_fops); + proc_net_fops_create(&init_net, "rose_nodes", S_IRUGO, &rose_nodes_fops); + proc_net_fops_create(&init_net, "rose_routes", S_IRUGO, &rose_routes_fops); out: return rc; fail: @@ -1606,10 +1613,10 @@ static void __exit rose_exit(void) { int i; - proc_net_remove("rose"); - proc_net_remove("rose_neigh"); - proc_net_remove("rose_nodes"); - proc_net_remove("rose_routes"); + proc_net_remove(&init_net, "rose"); + proc_net_remove(&init_net, "rose_neigh"); + proc_net_remove(&init_net, "rose_nodes"); + proc_net_remove(&init_net, "rose_routes"); rose_loopback_clear(); rose_rt_free(); diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 8d88795dc66..1b6741f1d74 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -35,8 +35,9 @@ #include <net/ax25.h> #include <net/rose.h> -static int rose_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) +static int rose_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, unsigned len) { unsigned char *buff = skb_push(skb, ROSE_MIN_LEN + 2); @@ -148,6 +149,11 @@ static struct net_device_stats *rose_get_stats(struct net_device *dev) return netdev_priv(dev); } +static const struct header_ops rose_header_ops = { + .create = rose_header, + .rebuild= rose_rebuild_header, +}; + void rose_setup(struct net_device *dev) { dev->mtu = ROSE_MAX_PACKET_SIZE - 2; @@ -155,11 +161,10 @@ void rose_setup(struct net_device *dev) dev->open = rose_open; dev->stop = rose_close; - dev->hard_header = rose_header; + dev->header_ops = &rose_header_ops; dev->hard_header_len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN; dev->addr_len = ROSE_ADDR_LEN; dev->type = ARPHRD_ROSE; - dev->rebuild_header = rose_rebuild_header; dev->set_mac_address = rose_set_mac_address; /* New-style flags. */ diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 96f61a71b25..540c0f26ffe 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -583,7 +583,7 @@ static struct net_device *rose_ax25_dev_get(char *devname) { struct net_device *dev; - if ((dev = dev_get_by_name(devname)) == NULL) + if ((dev = dev_get_by_name(&init_net, devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) @@ -601,7 +601,7 @@ struct net_device *rose_dev_first(void) struct net_device *dev, *first = NULL; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; @@ -619,7 +619,7 @@ struct net_device *rose_dev_get(rose_address *addr) struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; @@ -636,7 +636,7 @@ static int rose_dev_exists(rose_address *addr) struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) goto out; } diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c58fa0d1be2..c680017f5c8 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -14,6 +14,8 @@ #include <linux/skbuff.h> #include <linux/poll.h> #include <linux/proc_fs.h> +#include <linux/key-type.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include "ar-internal.h" @@ -605,13 +607,16 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock, /* * create an RxRPC socket */ -static int rxrpc_create(struct socket *sock, int protocol) +static int rxrpc_create(struct net *net, struct socket *sock, int protocol) { struct rxrpc_sock *rx; struct sock *sk; _enter("%p,%d", sock, protocol); + if (net != &init_net) + return -EAFNOSUPPORT; + /* we support transport protocol UDP only */ if (protocol != PF_INET) return -EPROTONOSUPPORT; @@ -622,7 +627,7 @@ static int rxrpc_create(struct socket *sock, int protocol) sock->ops = &rxrpc_rpc_ops; sock->state = SS_UNCONNECTED; - sk = sk_alloc(PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1); + sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1); if (!sk) return -ENOMEM; @@ -829,8 +834,8 @@ static int __init af_rxrpc_init(void) } #ifdef CONFIG_PROC_FS - proc_net_fops_create("rxrpc_calls", 0, &rxrpc_call_seq_fops); - proc_net_fops_create("rxrpc_conns", 0, &rxrpc_connection_seq_fops); + proc_net_fops_create(&init_net, "rxrpc_calls", 0, &rxrpc_call_seq_fops); + proc_net_fops_create(&init_net, "rxrpc_conns", 0, &rxrpc_connection_seq_fops); #endif return 0; @@ -868,8 +873,8 @@ static void __exit af_rxrpc_exit(void) _debug("flush scheduled work"); flush_workqueue(rxrpc_workqueue); - proc_net_remove("rxrpc_conns"); - proc_net_remove("rxrpc_calls"); + proc_net_remove(&init_net, "rxrpc_conns"); + proc_net_remove(&init_net, "rxrpc_calls"); destroy_workqueue(rxrpc_workqueue); kmem_cache_destroy(rxrpc_call_jar); _leave(""); diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 7e049ff6ae6..9a8ff684da7 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -15,7 +15,7 @@ #include <linux/module.h> #include <linux/net.h> #include <linux/skbuff.h> -#include <linux/key.h> +#include <linux/key-type.h> #include <linux/crypto.h> #include <net/sock.h> #include <net/af_rxrpc.h> @@ -40,7 +40,6 @@ struct key_type key_type_rxrpc = { .destroy = rxrpc_destroy, .describe = rxrpc_describe, }; - EXPORT_SYMBOL(key_type_rxrpc); /* @@ -330,5 +329,32 @@ error: _leave(" = -ENOMEM [ins %d]", ret); return -ENOMEM; } - EXPORT_SYMBOL(rxrpc_get_server_data_key); + +/** + * rxrpc_get_null_key - Generate a null RxRPC key + * @keyname: The name to give the key. + * + * Generate a null RxRPC key that can be used to indicate anonymous security is + * required for a particular domain. + */ +struct key *rxrpc_get_null_key(const char *keyname) +{ + struct key *key; + int ret; + + key = key_alloc(&key_type_rxrpc, keyname, 0, 0, current, + KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA); + if (IS_ERR(key)) + return key; + + ret = key_instantiate_and_link(key, NULL, 0, NULL, NULL); + if (ret < 0) { + key_revoke(key); + key_put(key); + return ERR_PTR(ret); + } + + return key; +} +EXPORT_SYMBOL(rxrpc_get_null_key); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index ac3cabdca78..eebefb6ef13 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -135,9 +135,8 @@ static void rxkad_prime_packet_security(struct rxrpc_connection *conn) tmpbuf.x[2] = 0; tmpbuf.x[3] = htonl(conn->security_ix); - memset(sg, 0, sizeof(sg)); - sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf)); - sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf)); + sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf)); + sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf)); crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf)); memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv)); @@ -180,9 +179,8 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, desc.info = iv.x; desc.flags = 0; - memset(sg, 0, sizeof(sg)); - sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf)); - sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf)); + sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf)); + sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf)); crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf)); memcpy(sechdr, &tmpbuf, sizeof(tmpbuf)); @@ -227,9 +225,8 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, desc.info = iv.x; desc.flags = 0; - memset(sg, 0, sizeof(sg[0]) * 2); - sg_set_buf(&sg[0], sechdr, sizeof(rxkhdr)); - sg_set_buf(&sg[1], &rxkhdr, sizeof(rxkhdr)); + sg_init_one(&sg[0], sechdr, sizeof(rxkhdr)); + sg_init_one(&sg[1], &rxkhdr, sizeof(rxkhdr)); crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(rxkhdr)); /* we want to encrypt the skbuff in-place */ @@ -240,7 +237,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, len = data_size + call->conn->size_align - 1; len &= ~(call->conn->size_align - 1); - skb_to_sgvec(skb, sg, 0, len); + sg_init_table(sg, skb_to_sgvec(skb, sg, 0, len)); crypto_blkcipher_encrypt_iv(&desc, sg, sg, len); _leave(" = 0"); @@ -290,9 +287,8 @@ static int rxkad_secure_packet(const struct rxrpc_call *call, tmpbuf.x[0] = sp->hdr.callNumber; tmpbuf.x[1] = x; - memset(&sg, 0, sizeof(sg)); - sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf)); - sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf)); + sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf)); + sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf)); crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf)); x = ntohl(tmpbuf.x[1]); @@ -332,20 +328,23 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, struct rxrpc_skb_priv *sp; struct blkcipher_desc desc; struct rxrpc_crypt iv; - struct scatterlist sg[2]; + struct scatterlist sg[16]; struct sk_buff *trailer; u32 data_size, buf; u16 check; + int nsg; _enter(""); sp = rxrpc_skb(skb); /* we want to decrypt the skbuff in-place */ - if (skb_cow_data(skb, 0, &trailer) < 0) + nsg = skb_cow_data(skb, 0, &trailer); + if (nsg < 0 || nsg > 16) goto nomem; - skb_to_sgvec(skb, sg, 0, 8); + sg_init_table(sg, nsg); + sg_mark_end(sg, skb_to_sgvec(skb, sg, 0, 8)); /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); @@ -426,7 +425,8 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, goto nomem; } - skb_to_sgvec(skb, sg, 0, skb->len); + sg_init_table(sg, nsg); + sg_mark_end(sg, skb_to_sgvec(skb, sg, 0, skb->len)); /* decrypt from the session key */ payload = call->conn->key->payload.data; @@ -521,9 +521,8 @@ static int rxkad_verify_packet(const struct rxrpc_call *call, tmpbuf.x[0] = call->call_id; tmpbuf.x[1] = x; - memset(&sg, 0, sizeof(sg)); - sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf)); - sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf)); + sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf)); + sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf)); crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf)); x = ntohl(tmpbuf.x[1]); @@ -690,16 +689,20 @@ static void rxkad_calc_response_checksum(struct rxkad_response *response) static void rxkad_sg_set_buf2(struct scatterlist sg[2], void *buf, size_t buflen) { + int nsg = 1; - memset(sg, 0, sizeof(sg)); + sg_init_table(sg, 2); sg_set_buf(&sg[0], buf, buflen); if (sg[0].offset + buflen > PAGE_SIZE) { /* the buffer was split over two pages */ sg[0].length = PAGE_SIZE - sg[0].offset; sg_set_buf(&sg[1], buf + sg[0].length, buflen - sg[0].length); + nsg++; } + sg_mark_end(sg, nsg); + ASSERTCMP(sg[0].length + sg[1].length, ==, buflen); } @@ -712,7 +715,7 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn, { struct blkcipher_desc desc; struct rxrpc_crypt iv; - struct scatterlist ssg[2], dsg[2]; + struct scatterlist sg[2]; /* continue encrypting from where we left off */ memcpy(&iv, s2->session_key, sizeof(iv)); @@ -720,9 +723,8 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn, desc.info = iv.x; desc.flags = 0; - rxkad_sg_set_buf2(ssg, &resp->encrypted, sizeof(resp->encrypted)); - memcpy(dsg, ssg, sizeof(dsg)); - crypto_blkcipher_encrypt_iv(&desc, dsg, ssg, sizeof(resp->encrypted)); + rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted)); + crypto_blkcipher_encrypt_iv(&desc, sg, sg, sizeof(resp->encrypted)); } /* @@ -817,7 +819,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, { struct blkcipher_desc desc; struct rxrpc_crypt iv, key; - struct scatterlist ssg[1], dsg[1]; + struct scatterlist sg[1]; struct in_addr addr; unsigned life; time_t issue, now; @@ -850,9 +852,8 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, desc.info = iv.x; desc.flags = 0; - sg_init_one(&ssg[0], ticket, ticket_len); - memcpy(dsg, ssg, sizeof(dsg)); - crypto_blkcipher_decrypt_iv(&desc, dsg, ssg, ticket_len); + sg_init_one(&sg[0], ticket, ticket_len); + crypto_blkcipher_decrypt_iv(&desc, sg, sg, ticket_len); p = ticket; end = p + ticket_len; @@ -961,7 +962,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn, const struct rxrpc_crypt *session_key) { struct blkcipher_desc desc; - struct scatterlist ssg[2], dsg[2]; + struct scatterlist sg[2]; struct rxrpc_crypt iv; _enter(",,%08x%08x", @@ -979,9 +980,8 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn, desc.info = iv.x; desc.flags = 0; - rxkad_sg_set_buf2(ssg, &resp->encrypted, sizeof(resp->encrypted)); - memcpy(dsg, ssg, sizeof(dsg)); - crypto_blkcipher_decrypt_iv(&desc, dsg, ssg, sizeof(resp->encrypted)); + rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted)); + crypto_blkcipher_decrypt_iv(&desc, sg, sg, sizeof(resp->encrypted)); mutex_unlock(&rxkad_ci_mutex); _leave(""); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 8a74cac0be8..9c15c4888d1 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -2,9 +2,7 @@ # Traffic control configuration. # -menu "QoS and/or fair queueing" - -config NET_SCHED +menuconfig NET_SCHED bool "QoS and/or fair queueing" select NET_SCH_FIFO ---help--- @@ -41,9 +39,6 @@ config NET_SCHED The available schedulers are listed in the following questions; you can say Y to as many as you like. If unsure, say N now. -config NET_SCH_FIFO - bool - if NET_SCHED comment "Queueing/Scheduling" @@ -447,6 +442,17 @@ config NET_ACT_IPT To compile this code as a module, choose M here: the module will be called ipt. +config NET_ACT_NAT + tristate "Stateless NAT" + depends on NET_CLS_ACT + select NETFILTER + ---help--- + Say Y here to do stateless NAT on IPv4 packets. You should use + netfilter for NAT unless you know what you are doing. + + To compile this code as a module, choose M here: the + module will be called nat. + config NET_ACT_PEDIT tristate "Packet Editing" depends on NET_CLS_ACT @@ -489,4 +495,5 @@ config NET_CLS_IND endif # NET_SCHED -endmenu +config NET_SCH_FIFO + bool diff --git a/net/sched/Makefile b/net/sched/Makefile index b67c36f65cf..81ecbe8e7dc 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_NET_ACT_POLICE) += act_police.o obj-$(CONFIG_NET_ACT_GACT) += act_gact.o obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o +obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 6b407ece953..fa006e06ce3 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -202,11 +202,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, /* yes, we have to worry about both in and out dev worry later - danger - this API seems to have changed from earlier kernels */ - - /* iptables targets take a double skb pointer in case the skb - * needs to be replaced. We don't own the skb, so this must not - * happen. The pskb_expand_head above should make sure of this */ - ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL, + ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL, ipt->tcfi_hook, ipt->tcfi_t->u.kernel.target, ipt->tcfi_t->data); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 579578944ae..c3fde9180f9 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -20,6 +20,7 @@ #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> +#include <net/net_namespace.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_mirred.h> @@ -73,7 +74,7 @@ static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est, parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]); if (parm->ifindex) { - dev = __dev_get_by_index(parm->ifindex); + dev = __dev_get_by_index(&init_net, parm->ifindex); if (dev == NULL) return -ENODEV; switch (dev->type) { @@ -165,7 +166,7 @@ bad_mirred: return TC_ACT_SHOT; } - skb2 = skb_clone(skb, GFP_ATOMIC); + skb2 = skb_act_clone(skb, GFP_ATOMIC); if (skb2 == NULL) goto bad_mirred; if (m->tcfm_eaction != TCA_EGRESS_MIRROR && diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c new file mode 100644 index 00000000000..c96273bcaf9 --- /dev/null +++ b/net/sched/act_nat.c @@ -0,0 +1,322 @@ +/* + * Stateless NAT actions + * + * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/rtnetlink.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/tc_act/tc_nat.h> +#include <net/act_api.h> +#include <net/icmp.h> +#include <net/ip.h> +#include <net/netlink.h> +#include <net/tc_act/tc_nat.h> +#include <net/tcp.h> +#include <net/udp.h> + + +#define NAT_TAB_MASK 15 +static struct tcf_common *tcf_nat_ht[NAT_TAB_MASK + 1]; +static u32 nat_idx_gen; +static DEFINE_RWLOCK(nat_lock); + +static struct tcf_hashinfo nat_hash_info = { + .htab = tcf_nat_ht, + .hmask = NAT_TAB_MASK, + .lock = &nat_lock, +}; + +static int tcf_nat_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct rtattr *tb[TCA_NAT_MAX]; + struct tc_nat *parm; + int ret = 0; + struct tcf_nat *p; + struct tcf_common *pc; + + if (rta == NULL || rtattr_parse_nested(tb, TCA_NAT_MAX, rta) < 0) + return -EINVAL; + + if (tb[TCA_NAT_PARMS - 1] == NULL || + RTA_PAYLOAD(tb[TCA_NAT_PARMS - 1]) < sizeof(*parm)) + return -EINVAL; + parm = RTA_DATA(tb[TCA_NAT_PARMS - 1]); + + pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, + &nat_idx_gen, &nat_hash_info); + if (unlikely(!pc)) + return -ENOMEM; + p = to_tcf_nat(pc); + ret = ACT_P_CREATED; + } else { + p = to_tcf_nat(pc); + if (!ovr) { + tcf_hash_release(pc, bind, &nat_hash_info); + return -EEXIST; + } + } + + spin_lock_bh(&p->tcf_lock); + p->old_addr = parm->old_addr; + p->new_addr = parm->new_addr; + p->mask = parm->mask; + p->flags = parm->flags; + + p->tcf_action = parm->action; + spin_unlock_bh(&p->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(pc, &nat_hash_info); + + return ret; +} + +static int tcf_nat_cleanup(struct tc_action *a, int bind) +{ + struct tcf_nat *p = a->priv; + + return tcf_hash_release(&p->common, bind, &nat_hash_info); +} + +static int tcf_nat(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_nat *p = a->priv; + struct iphdr *iph; + __be32 old_addr; + __be32 new_addr; + __be32 mask; + __be32 addr; + int egress; + int action; + int ihl; + + spin_lock(&p->tcf_lock); + + p->tcf_tm.lastuse = jiffies; + old_addr = p->old_addr; + new_addr = p->new_addr; + mask = p->mask; + egress = p->flags & TCA_NAT_FLAG_EGRESS; + action = p->tcf_action; + + p->tcf_bstats.bytes += skb->len; + p->tcf_bstats.packets++; + + spin_unlock(&p->tcf_lock); + + if (unlikely(action == TC_ACT_SHOT)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(*iph))) + goto drop; + + iph = ip_hdr(skb); + + if (egress) + addr = iph->saddr; + else + addr = iph->daddr; + + if (!((old_addr ^ addr) & mask)) { + if (skb_cloned(skb) && + !skb_clone_writable(skb, sizeof(*iph)) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + goto drop; + + new_addr &= mask; + new_addr |= addr & ~mask; + + /* Rewrite IP header */ + iph = ip_hdr(skb); + if (egress) + iph->saddr = new_addr; + else + iph->daddr = new_addr; + + nf_csum_replace4(&iph->check, addr, new_addr); + } + + ihl = iph->ihl * 4; + + /* It would be nice to share code with stateful NAT. */ + switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { + case IPPROTO_TCP: + { + struct tcphdr *tcph; + + if (!pskb_may_pull(skb, ihl + sizeof(*tcph)) || + (skb_cloned(skb) && + !skb_clone_writable(skb, ihl + sizeof(*tcph)) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + goto drop; + + tcph = (void *)(skb_network_header(skb) + ihl); + nf_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); + break; + } + case IPPROTO_UDP: + { + struct udphdr *udph; + + if (!pskb_may_pull(skb, ihl + sizeof(*udph)) || + (skb_cloned(skb) && + !skb_clone_writable(skb, ihl + sizeof(*udph)) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + goto drop; + + udph = (void *)(skb_network_header(skb) + ihl); + if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { + nf_proto_csum_replace4(&udph->check, skb, addr, + new_addr, 1); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } + break; + } + case IPPROTO_ICMP: + { + struct icmphdr *icmph; + + if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) + goto drop; + + icmph = (void *)(skb_network_header(skb) + ihl); + + if ((icmph->type != ICMP_DEST_UNREACH) && + (icmph->type != ICMP_TIME_EXCEEDED) && + (icmph->type != ICMP_PARAMETERPROB)) + break; + + iph = (void *)(icmph + 1); + if (egress) + addr = iph->daddr; + else + addr = iph->saddr; + + if ((old_addr ^ addr) & mask) + break; + + if (skb_cloned(skb) && + !skb_clone_writable(skb, + ihl + sizeof(*icmph) + sizeof(*iph)) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + goto drop; + + icmph = (void *)(skb_network_header(skb) + ihl); + iph = (void *)(icmph + 1); + + new_addr &= mask; + new_addr |= addr & ~mask; + + /* XXX Fix up the inner checksums. */ + if (egress) + iph->daddr = new_addr; + else + iph->saddr = new_addr; + + nf_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, + 1); + break; + } + default: + break; + } + + return action; + +drop: + spin_lock(&p->tcf_lock); + p->tcf_qstats.drops++; + spin_unlock(&p->tcf_lock); + return TC_ACT_SHOT; +} + +static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_nat *p = a->priv; + struct tc_nat *opt; + struct tcf_t t; + int s; + + s = sizeof(*opt); + + /* netlink spinlocks held above us - must use ATOMIC */ + opt = kzalloc(s, GFP_ATOMIC); + if (unlikely(!opt)) + return -ENOBUFS; + + opt->old_addr = p->old_addr; + opt->new_addr = p->new_addr; + opt->mask = p->mask; + opt->flags = p->flags; + + opt->index = p->tcf_index; + opt->action = p->tcf_action; + opt->refcnt = p->tcf_refcnt - ref; + opt->bindcnt = p->tcf_bindcnt - bind; + + RTA_PUT(skb, TCA_NAT_PARMS, s, opt); + t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(p->tcf_tm.expires); + RTA_PUT(skb, TCA_NAT_TM, sizeof(t), &t); + + kfree(opt); + + return skb->len; + +rtattr_failure: + nlmsg_trim(skb, b); + kfree(opt); + return -1; +} + +static struct tc_action_ops act_nat_ops = { + .kind = "nat", + .hinfo = &nat_hash_info, + .type = TCA_ACT_NAT, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_nat, + .dump = tcf_nat_dump, + .cleanup = tcf_nat_cleanup, + .lookup = tcf_hash_search, + .init = tcf_nat_init, + .walk = tcf_generic_walker +}; + +MODULE_DESCRIPTION("Stateless NAT actions"); +MODULE_LICENSE("GPL"); + +static int __init nat_init_module(void) +{ + return tcf_register_action(&act_nat_ops); +} + +static void __exit nat_cleanup_module(void) +{ + tcf_unregister_action(&act_nat_ops); +} + +module_init(nat_init_module); +module_exit(nat_cleanup_module); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 17f6f27e28a..a73e3e6d87e 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -21,8 +21,8 @@ #include <net/act_api.h> #include <net/netlink.h> -#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log]) -#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log]) +#define L2T(p,L) qdisc_l2t((p)->tcfp_R_tab, L) +#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L) #define POL_TAB_MASK 15 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5f0fbca7393..03657976fd5 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -154,7 +154,7 @@ replay: /* Find head of filter chain. */ /* Find link */ - if ((dev = __dev_get_by_index(t->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, t->tcm_ifindex)) == NULL) return -ENODEV; /* Find qdisc */ @@ -387,7 +387,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return skb->len; - if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return skb->len; if (!tcm->tcm_parent) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d4d5d2f271d..9e98c6e567d 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -592,7 +592,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, } else handle = gen_new_kid(ht, htid); - if (tb[TCA_U32_SEL-1] == 0 || + if (tb[TCA_U32_SEL-1] == NULL || RTA_PAYLOAD(tb[TCA_U32_SEL-1]) < sizeof(struct tc_u32_sel)) return -EINVAL; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 650f09c8bd6..ceda8890ab0 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -55,7 +55,7 @@ * ppp0..9. * * NOTE: Certain meta values depend on other subsystems and are - * only available if that subsytem is enabled in the kernel. + * only available if that subsystem is enabled in the kernel. */ #include <linux/module.h> @@ -291,7 +291,7 @@ META_COLLECTOR(var_sk_bound_if) } else { struct net_device *dev; - dev = dev_get_by_index(skb->sk->sk_bound_dev_if); + dev = dev_get_by_index(&init_net, skb->sk->sk_bound_dev_if); *err = var_dev(dev, dst); if (dev) dev_put(dev); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index dee0d5fb39c..8ae137e3522 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -28,6 +28,7 @@ #include <linux/list.h> #include <linux/hrtimer.h> +#include <net/net_namespace.h> #include <net/netlink.h> #include <net/pkt_sched.h> @@ -606,7 +607,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) struct Qdisc *p = NULL; int err; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; if (clid) { @@ -673,7 +674,7 @@ replay: clid = tcm->tcm_parent; q = p = NULL; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; if (clid) { @@ -880,7 +881,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = q_idx = cb->args[1]; read_lock(&dev_base_lock); idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -931,7 +932,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) u32 qid = TC_H_MAJ(clid); int err; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; /* @@ -1114,7 +1115,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return 0; - if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return 0; s_t = cb->args[0]; @@ -1225,10 +1226,13 @@ EXPORT_SYMBOL(tcf_destroy_chain); #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { + struct timespec ts; + + hrtimer_get_res(CLOCK_MONOTONIC, &ts); seq_printf(seq, "%08x %08x %08x %08x\n", (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1), 1000000, - (u32)NSEC_PER_SEC/(u32)ktime_to_ns(KTIME_MONOTONIC_RES)); + (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts))); return 0; } @@ -1251,7 +1255,7 @@ static int __init pktsched_init(void) { register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); - proc_net_fops_create("psched", 0, &psched_fops); + proc_net_fops_create(&init_net, "psched", 0, &psched_fops); rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index cbef3bbfc20..4de3744e65c 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -175,7 +175,7 @@ struct cbq_sched_data }; -#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log]) +#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len) static __inline__ unsigned cbq_hash(u32 h) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index c81649cf0b9..fa1a6f45dc4 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -134,34 +134,19 @@ static inline int qdisc_restart(struct net_device *dev) { struct Qdisc *q = dev->qdisc; struct sk_buff *skb; - unsigned lockless; int ret; /* Dequeue packet */ if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) return 0; - /* - * When the driver has LLTX set, it does its own locking in - * start_xmit. These checks are worth it because even uncongested - * locks can be quite expensive. The driver can do a trylock, as - * is being done here; in case of lock contention it should return - * NETDEV_TX_LOCKED and the packet will be requeued. - */ - lockless = (dev->features & NETIF_F_LLTX); - - if (!lockless && !netif_tx_trylock(dev)) { - /* Another CPU grabbed the driver tx lock */ - return handle_dev_cpu_collision(skb, dev, q); - } /* And release queue */ spin_unlock(&dev->queue_lock); + HARD_TX_LOCK(dev, smp_processor_id()); ret = dev_hard_start_xmit(skb, dev); - - if (!lockless) - netif_tx_unlock(dev); + HARD_TX_UNLOCK(dev); spin_lock(&dev->queue_lock); q = dev->qdisc; @@ -256,14 +241,27 @@ static void dev_watchdog_down(struct net_device *dev) netif_tx_unlock_bh(dev); } +/** + * netif_carrier_on - set carrier + * @dev: network device + * + * Device has detected that carrier. + */ void netif_carrier_on(struct net_device *dev) { - if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) + if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { linkwatch_fire_event(dev); - if (netif_running(dev)) - __netdev_watchdog_up(dev); + if (netif_running(dev)) + __netdev_watchdog_up(dev); + } } +/** + * netif_carrier_off - clear carrier + * @dev: network device + * + * Device has detected loss of carrier. + */ void netif_carrier_off(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) @@ -558,6 +556,7 @@ void dev_deactivate(struct net_device *dev) { struct Qdisc *qdisc; struct sk_buff *skb; + int running; spin_lock_bh(&dev->queue_lock); qdisc = dev->qdisc; @@ -573,12 +572,31 @@ void dev_deactivate(struct net_device *dev) dev_watchdog_down(dev); - /* Wait for outstanding dev_queue_xmit calls. */ + /* Wait for outstanding qdisc-less dev_queue_xmit calls. */ synchronize_rcu(); /* Wait for outstanding qdisc_run calls. */ - while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) - yield(); + do { + while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) + yield(); + + /* + * Double-check inside queue lock to ensure that all effects + * of the queue run are visible when we return. + */ + spin_lock_bh(&dev->queue_lock); + running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); + spin_unlock_bh(&dev->queue_lock); + + /* + * The running flag should never be set at this point because + * we've already set dev->qdisc to noop_qdisc *inside* the same + * pair of spin locks. That is, if any qdisc_run starts after + * our initial test it should see the noop_qdisc and then + * clear the RUNNING bit before dropping the queue lock. So + * if it is set here then we've found a bug. + */ + } while (WARN_ON_ONCE(running)); } void dev_init_scheduler(struct net_device *dev) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 246a2f9765f..5e608a64935 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -132,10 +132,8 @@ struct htb_class { static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, int size) { - int slot = size >> rate->rate.cell_log; - if (slot > 255) - return (rate->data[255]*(slot >> 8) + rate->data[slot & 0xFF]); - return rate->data[slot]; + long result = qdisc_l2t(rate, size); + return result; } struct htb_sched { diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 2d32fd27496..3f8335e6ea2 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -205,20 +205,19 @@ static unsigned int ingress_drop(struct Qdisc *sch) #ifndef CONFIG_NET_CLS_ACT #ifdef CONFIG_NETFILTER static unsigned int -ing_hook(unsigned int hook, struct sk_buff **pskb, +ing_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *indev, const struct net_device *outdev, int (*okfn)(struct sk_buff *)) { struct Qdisc *q; - struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; int fwres=NF_ACCEPT; DPRINTK("ing_hook: skb %s dev=%s len=%u\n", skb->sk ? "(owned)" : "(unowned)", - skb->dev ? (*pskb)->dev->name : "(no dev)", + skb->dev ? skb->dev->name : "(no dev)", skb->len); if (dev->qdisc_ingress) { diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index abd82fc3ec6..de894096e44 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -136,7 +136,7 @@ prio_dequeue(struct Qdisc* sch) * pulling an skb. This way we avoid excessive requeues * for slower queues. */ - if (!netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) { + if (!__netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) { qdisc = q->queues[prio]; skb = qdisc->dequeue(qdisc); if (skb) { @@ -165,7 +165,7 @@ static struct sk_buff *rr_dequeue(struct Qdisc* sch) * for slower queues. If the queue is stopped, try the * next queue. */ - if (!netif_subqueue_stopped(sch->dev, + if (!__netif_subqueue_stopped(sch->dev, (q->mq ? q->curband : 0))) { qdisc = q->queues[q->curband]; skb = qdisc->dequeue(qdisc); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 8c2639af4c6..b0d81098b0e 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -115,8 +115,8 @@ struct tbf_sched_data struct qdisc_watchdog watchdog; /* Watchdog timer */ }; -#define L2T(q,L) ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log]) -#define L2T_P(q,L) ((q)->P_tab->data[(L)>>(q)->P_tab->rate.cell_log]) +#define L2T(q,L) qdisc_l2t((q)->R_tab,L) +#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L) static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) { diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 0968184ea6b..421281d9dd1 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -232,9 +232,12 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * } if (neigh_event_send(n, skb_res) == 0) { int err; + read_lock(&n->lock); - err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + n->ha, NULL, skb->len); read_unlock(&n->lock); + if (err < 0) { neigh_release(n); return -EINVAL; @@ -246,10 +249,10 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * return (skb_res == NULL) ? -EAGAIN : 1; } -static __inline__ int -teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) +static inline int teql_resolve(struct sk_buff *skb, + struct sk_buff *skb_res, struct net_device *dev) { - if (dev->hard_header == NULL || + if (dev->header_ops == NULL || skb->dst == NULL || skb->dst->neighbour == NULL) return 0; @@ -263,7 +266,7 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev) int busy; int nores; int len = skb->len; - int subq = skb->queue_mapping; + int subq = skb_get_queue_mapping(skb); struct sk_buff *skb_res = NULL; start = master->slaves; @@ -281,7 +284,7 @@ restart: if (slave->qdisc_sleeping != q) continue; if (netif_queue_stopped(slave) || - netif_subqueue_stopped(slave, subq) || + __netif_subqueue_stopped(slave, subq) || !netif_running(slave)) { busy = 1; continue; @@ -291,7 +294,7 @@ restart: case 0: if (netif_tx_trylock(slave)) { if (!netif_queue_stopped(slave) && - !netif_subqueue_stopped(slave, subq) && + !__netif_subqueue_stopped(slave, subq) && slave->hard_start_xmit(skb, slave) == 0) { netif_tx_unlock(slave); master->slaves = NEXT_SLAVE(q); @@ -432,7 +435,6 @@ static __init void teql_master_setup(struct net_device *dev) dev->tx_queue_len = 100; dev->flags = IFF_NOARP; dev->hard_header_len = LL_MAX_HEADER; - SET_MODULE_OWNER(dev); } static LIST_HEAD(master_dev_list); diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 70c828bbe44..1da7204d9b4 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -9,7 +9,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ transport.o chunk.o sm_make_chunk.o ulpevent.o \ inqueue.o outqueue.o ulpqueue.o command.o \ tsnmap.o bind_addr.o socket.o primitive.o \ - output.o input.o debug.o ssnmap.o proc.o crc32c.o + output.o input.o debug.o ssnmap.o proc.o crc32c.o \ + auth.o sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o sctp-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 9bad8ba0fed..03158e3665d 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -74,6 +74,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a { struct sctp_sock *sp; int i; + sctp_paramhdr_t *p; + int err; /* Retrieve the SCTP per socket area. */ sp = sctp_sk((struct sock *)sk); @@ -298,6 +300,30 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->default_timetolive = sp->default_timetolive; asoc->default_rcv_context = sp->default_rcv_context; + /* AUTH related initializations */ + INIT_LIST_HEAD(&asoc->endpoint_shared_keys); + err = sctp_auth_asoc_copy_shkeys(ep, asoc, gfp); + if (err) + goto fail_init; + + asoc->active_key_id = ep->active_key_id; + asoc->asoc_shared_key = NULL; + + asoc->default_hmac_id = 0; + /* Save the hmacs and chunks list into this association */ + if (ep->auth_hmacs_list) + memcpy(asoc->c.auth_hmacs, ep->auth_hmacs_list, + ntohs(ep->auth_hmacs_list->param_hdr.length)); + if (ep->auth_chunk_list) + memcpy(asoc->c.auth_chunks, ep->auth_chunk_list, + ntohs(ep->auth_chunk_list->param_hdr.length)); + + /* Get the AUTH random number for this association */ + p = (sctp_paramhdr_t *)asoc->c.auth_random; + p->type = SCTP_PARAM_RANDOM; + p->length = htons(sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH); + get_random_bytes(p+1, SCTP_AUTH_RANDOM_LENGTH); + return asoc; fail_init: @@ -389,6 +415,9 @@ void sctp_association_free(struct sctp_association *asoc) /* Free peer's cached cookie. */ kfree(asoc->peer.cookie); + kfree(asoc->peer.peer_random); + kfree(asoc->peer.peer_chunks); + kfree(asoc->peer.peer_hmacs); /* Release the transport structures. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { @@ -407,6 +436,12 @@ void sctp_association_free(struct sctp_association *asoc) if (asoc->addip_last_asconf) sctp_chunk_free(asoc->addip_last_asconf); + /* AUTH - Free the endpoint shared keys */ + sctp_auth_destroy_keys(&asoc->endpoint_shared_keys); + + /* AUTH - Free the association shared key */ + sctp_auth_key_put(asoc->asoc_shared_key); + sctp_association_put(asoc); } @@ -976,6 +1011,16 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) state = asoc->state; subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type); + /* SCTP-AUTH, Section 6.3: + * The receiver has a list of chunk types which it expects + * to be received only after an AUTH-chunk. This list has + * been sent to the peer during the association setup. It + * MUST silently discard these chunks if they are not placed + * after an AUTH chunk in the packet. + */ + if (sctp_auth_recv_cid(subtype.chunk, asoc) && !chunk->auth) + continue; + /* Remember where the last DATA chunk came from so we * know where to send the SACK. */ @@ -1112,6 +1157,24 @@ void sctp_assoc_update(struct sctp_association *asoc, sctp_assoc_set_id(asoc, GFP_ATOMIC); } } + + /* SCTP-AUTH: Save the peer parameters from the new assocaitions + * and also move the association shared keys over + */ + kfree(asoc->peer.peer_random); + asoc->peer.peer_random = new->peer.peer_random; + new->peer.peer_random = NULL; + + kfree(asoc->peer.peer_chunks); + asoc->peer.peer_chunks = new->peer.peer_chunks; + new->peer.peer_chunks = NULL; + + kfree(asoc->peer.peer_hmacs); + asoc->peer.peer_hmacs = new->peer.peer_hmacs; + new->peer.peer_hmacs = NULL; + + sctp_auth_key_put(asoc->asoc_shared_key); + sctp_auth_asoc_init_active_key(asoc, GFP_ATOMIC); } /* Update the retran path for sending a retransmitted packet. diff --git a/net/sctp/auth.c b/net/sctp/auth.c new file mode 100644 index 00000000000..6d5fa6bb371 --- /dev/null +++ b/net/sctp/auth.c @@ -0,0 +1,936 @@ +/* SCTP kernel reference Implementation + * (C) Copyright 2007 Hewlett-Packard Development Company, L.P. + * + * This file is part of the SCTP kernel reference Implementation + * + * The SCTP reference implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * The SCTP reference implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Please send any bug reports or fixes you make to the + * email address(es): + * lksctp developers <lksctp-developers@lists.sourceforge.net> + * + * Or submit a bug report through the following website: + * http://www.sf.net/projects/lksctp + * + * Written or modified by: + * Vlad Yasevich <vladislav.yasevich@hp.com> + * + * Any bugs reported given to us we will try to fix... any fixes shared will + * be incorporated into the next SCTP release. + */ + +#include <linux/types.h> +#include <linux/crypto.h> +#include <linux/scatterlist.h> +#include <net/sctp/sctp.h> +#include <net/sctp/auth.h> + +static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { + { + /* id 0 is reserved. as all 0 */ + .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_0, + }, + { + .hmac_id = SCTP_AUTH_HMAC_ID_SHA1, + .hmac_name="hmac(sha1)", + .hmac_len = SCTP_SHA1_SIG_SIZE, + }, + { + /* id 2 is reserved as well */ + .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_2, + }, + { + .hmac_id = SCTP_AUTH_HMAC_ID_SHA256, + .hmac_name="hmac(sha256)", + .hmac_len = SCTP_SHA256_SIG_SIZE, + } +}; + + +void sctp_auth_key_put(struct sctp_auth_bytes *key) +{ + if (!key) + return; + + if (atomic_dec_and_test(&key->refcnt)) { + kfree(key); + SCTP_DBG_OBJCNT_DEC(keys); + } +} + +/* Create a new key structure of a given length */ +static struct sctp_auth_bytes *sctp_auth_create_key(__u32 key_len, gfp_t gfp) +{ + struct sctp_auth_bytes *key; + + /* Allocate the shared key */ + key = kmalloc(sizeof(struct sctp_auth_bytes) + key_len, gfp); + if (!key) + return NULL; + + key->len = key_len; + atomic_set(&key->refcnt, 1); + SCTP_DBG_OBJCNT_INC(keys); + + return key; +} + +/* Create a new shared key container with a give key id */ +struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp) +{ + struct sctp_shared_key *new; + + /* Allocate the shared key container */ + new = kzalloc(sizeof(struct sctp_shared_key), gfp); + if (!new) + return NULL; + + INIT_LIST_HEAD(&new->key_list); + new->key_id = key_id; + + return new; +} + +/* Free the shared key stucture */ +static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key) +{ + BUG_ON(!list_empty(&sh_key->key_list)); + sctp_auth_key_put(sh_key->key); + sh_key->key = NULL; + kfree(sh_key); +} + +/* Destory the entire key list. This is done during the + * associon and endpoint free process. + */ +void sctp_auth_destroy_keys(struct list_head *keys) +{ + struct sctp_shared_key *ep_key; + struct sctp_shared_key *tmp; + + if (list_empty(keys)) + return; + + key_for_each_safe(ep_key, tmp, keys) { + list_del_init(&ep_key->key_list); + sctp_auth_shkey_free(ep_key); + } +} + +/* Compare two byte vectors as numbers. Return values + * are: + * 0 - vectors are equal + * < 0 - vector 1 is smaller then vector2 + * > 0 - vector 1 is greater then vector2 + * + * Algorithm is: + * This is performed by selecting the numerically smaller key vector... + * If the key vectors are equal as numbers but differ in length ... + * the shorter vector is considered smaller + * + * Examples (with small values): + * 000123456789 > 123456789 (first number is longer) + * 000123456789 < 234567891 (second number is larger numerically) + * 123456789 > 2345678 (first number is both larger & longer) + */ +static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1, + struct sctp_auth_bytes *vector2) +{ + int diff; + int i; + const __u8 *longer; + + diff = vector1->len - vector2->len; + if (diff) { + longer = (diff > 0) ? vector1->data : vector2->data; + + /* Check to see if the longer number is + * lead-zero padded. If it is not, it + * is automatically larger numerically. + */ + for (i = 0; i < abs(diff); i++ ) { + if (longer[i] != 0) + return diff; + } + } + + /* lengths are the same, compare numbers */ + return memcmp(vector1->data, vector2->data, vector1->len); +} + +/* + * Create a key vector as described in SCTP-AUTH, Section 6.1 + * The RANDOM parameter, the CHUNKS parameter and the HMAC-ALGO + * parameter sent by each endpoint are concatenated as byte vectors. + * These parameters include the parameter type, parameter length, and + * the parameter value, but padding is omitted; all padding MUST be + * removed from this concatenation before proceeding with further + * computation of keys. Parameters which were not sent are simply + * omitted from the concatenation process. The resulting two vectors + * are called the two key vectors. + */ +static struct sctp_auth_bytes *sctp_auth_make_key_vector( + sctp_random_param_t *random, + sctp_chunks_param_t *chunks, + sctp_hmac_algo_param_t *hmacs, + gfp_t gfp) +{ + struct sctp_auth_bytes *new; + __u32 len; + __u32 offset = 0; + + len = ntohs(random->param_hdr.length) + ntohs(hmacs->param_hdr.length); + if (chunks) + len += ntohs(chunks->param_hdr.length); + + new = kmalloc(sizeof(struct sctp_auth_bytes) + len, gfp); + if (!new) + return NULL; + + new->len = len; + + memcpy(new->data, random, ntohs(random->param_hdr.length)); + offset += ntohs(random->param_hdr.length); + + if (chunks) { + memcpy(new->data + offset, chunks, + ntohs(chunks->param_hdr.length)); + offset += ntohs(chunks->param_hdr.length); + } + + memcpy(new->data + offset, hmacs, ntohs(hmacs->param_hdr.length)); + + return new; +} + + +/* Make a key vector based on our local parameters */ +static struct sctp_auth_bytes *sctp_auth_make_local_vector( + const struct sctp_association *asoc, + gfp_t gfp) +{ + return sctp_auth_make_key_vector( + (sctp_random_param_t*)asoc->c.auth_random, + (sctp_chunks_param_t*)asoc->c.auth_chunks, + (sctp_hmac_algo_param_t*)asoc->c.auth_hmacs, + gfp); +} + +/* Make a key vector based on peer's parameters */ +static struct sctp_auth_bytes *sctp_auth_make_peer_vector( + const struct sctp_association *asoc, + gfp_t gfp) +{ + return sctp_auth_make_key_vector(asoc->peer.peer_random, + asoc->peer.peer_chunks, + asoc->peer.peer_hmacs, + gfp); +} + + +/* Set the value of the association shared key base on the parameters + * given. The algorithm is: + * From the endpoint pair shared keys and the key vectors the + * association shared keys are computed. This is performed by selecting + * the numerically smaller key vector and concatenating it to the + * endpoint pair shared key, and then concatenating the numerically + * larger key vector to that. The result of the concatenation is the + * association shared key. + */ +static struct sctp_auth_bytes *sctp_auth_asoc_set_secret( + struct sctp_shared_key *ep_key, + struct sctp_auth_bytes *first_vector, + struct sctp_auth_bytes *last_vector, + gfp_t gfp) +{ + struct sctp_auth_bytes *secret; + __u32 offset = 0; + __u32 auth_len; + + auth_len = first_vector->len + last_vector->len; + if (ep_key->key) + auth_len += ep_key->key->len; + + secret = sctp_auth_create_key(auth_len, gfp); + if (!secret) + return NULL; + + if (ep_key->key) { + memcpy(secret->data, ep_key->key->data, ep_key->key->len); + offset += ep_key->key->len; + } + + memcpy(secret->data + offset, first_vector->data, first_vector->len); + offset += first_vector->len; + + memcpy(secret->data + offset, last_vector->data, last_vector->len); + + return secret; +} + +/* Create an association shared key. Follow the algorithm + * described in SCTP-AUTH, Section 6.1 + */ +static struct sctp_auth_bytes *sctp_auth_asoc_create_secret( + const struct sctp_association *asoc, + struct sctp_shared_key *ep_key, + gfp_t gfp) +{ + struct sctp_auth_bytes *local_key_vector; + struct sctp_auth_bytes *peer_key_vector; + struct sctp_auth_bytes *first_vector, + *last_vector; + struct sctp_auth_bytes *secret = NULL; + int cmp; + + + /* Now we need to build the key vectors + * SCTP-AUTH , Section 6.1 + * The RANDOM parameter, the CHUNKS parameter and the HMAC-ALGO + * parameter sent by each endpoint are concatenated as byte vectors. + * These parameters include the parameter type, parameter length, and + * the parameter value, but padding is omitted; all padding MUST be + * removed from this concatenation before proceeding with further + * computation of keys. Parameters which were not sent are simply + * omitted from the concatenation process. The resulting two vectors + * are called the two key vectors. + */ + + local_key_vector = sctp_auth_make_local_vector(asoc, gfp); + peer_key_vector = sctp_auth_make_peer_vector(asoc, gfp); + + if (!peer_key_vector || !local_key_vector) + goto out; + + /* Figure out the order in wich the key_vectors will be + * added to the endpoint shared key. + * SCTP-AUTH, Section 6.1: + * This is performed by selecting the numerically smaller key + * vector and concatenating it to the endpoint pair shared + * key, and then concatenating the numerically larger key + * vector to that. If the key vectors are equal as numbers + * but differ in length, then the concatenation order is the + * endpoint shared key, followed by the shorter key vector, + * followed by the longer key vector. Otherwise, the key + * vectors are identical, and may be concatenated to the + * endpoint pair key in any order. + */ + cmp = sctp_auth_compare_vectors(local_key_vector, + peer_key_vector); + if (cmp < 0) { + first_vector = local_key_vector; + last_vector = peer_key_vector; + } else { + first_vector = peer_key_vector; + last_vector = local_key_vector; + } + + secret = sctp_auth_asoc_set_secret(ep_key, first_vector, last_vector, + gfp); +out: + kfree(local_key_vector); + kfree(peer_key_vector); + + return secret; +} + +/* + * Populate the association overlay list with the list + * from the endpoint. + */ +int sctp_auth_asoc_copy_shkeys(const struct sctp_endpoint *ep, + struct sctp_association *asoc, + gfp_t gfp) +{ + struct sctp_shared_key *sh_key; + struct sctp_shared_key *new; + + BUG_ON(!list_empty(&asoc->endpoint_shared_keys)); + + key_for_each(sh_key, &ep->endpoint_shared_keys) { + new = sctp_auth_shkey_create(sh_key->key_id, gfp); + if (!new) + goto nomem; + + new->key = sh_key->key; + sctp_auth_key_hold(new->key); + list_add(&new->key_list, &asoc->endpoint_shared_keys); + } + + return 0; + +nomem: + sctp_auth_destroy_keys(&asoc->endpoint_shared_keys); + return -ENOMEM; +} + + +/* Public interface to creat the association shared key. + * See code above for the algorithm. + */ +int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) +{ + struct sctp_auth_bytes *secret; + struct sctp_shared_key *ep_key; + + /* If we don't support AUTH, or peer is not capable + * we don't need to do anything. + */ + if (!sctp_auth_enable || !asoc->peer.auth_capable) + return 0; + + /* If the key_id is non-zero and we couldn't find an + * endpoint pair shared key, we can't compute the + * secret. + * For key_id 0, endpoint pair shared key is a NULL key. + */ + ep_key = sctp_auth_get_shkey(asoc, asoc->active_key_id); + BUG_ON(!ep_key); + + secret = sctp_auth_asoc_create_secret(asoc, ep_key, gfp); + if (!secret) + return -ENOMEM; + + sctp_auth_key_put(asoc->asoc_shared_key); + asoc->asoc_shared_key = secret; + + return 0; +} + + +/* Find the endpoint pair shared key based on the key_id */ +struct sctp_shared_key *sctp_auth_get_shkey( + const struct sctp_association *asoc, + __u16 key_id) +{ + struct sctp_shared_key *key = NULL; + + /* First search associations set of endpoint pair shared keys */ + key_for_each(key, &asoc->endpoint_shared_keys) { + if (key->key_id == key_id) + break; + } + + return key; +} + +/* + * Initialize all the possible digest transforms that we can use. Right now + * now, the supported digests are SHA1 and SHA256. We do this here once + * because of the restrictiong that transforms may only be allocated in + * user context. This forces us to pre-allocated all possible transforms + * at the endpoint init time. + */ +int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) +{ + struct crypto_hash *tfm = NULL; + __u16 id; + + /* if the transforms are already allocted, we are done */ + if (!sctp_auth_enable) { + ep->auth_hmacs = NULL; + return 0; + } + + if (ep->auth_hmacs) + return 0; + + /* Allocated the array of pointers to transorms */ + ep->auth_hmacs = kzalloc( + sizeof(struct crypto_hash *) * SCTP_AUTH_NUM_HMACS, + gfp); + if (!ep->auth_hmacs) + return -ENOMEM; + + for (id = 0; id < SCTP_AUTH_NUM_HMACS; id++) { + + /* See is we support the id. Supported IDs have name and + * length fields set, so that we can allocated and use + * them. We can safely just check for name, for without the + * name, we can't allocate the TFM. + */ + if (!sctp_hmac_list[id].hmac_name) + continue; + + /* If this TFM has been allocated, we are all set */ + if (ep->auth_hmacs[id]) + continue; + + /* Allocate the ID */ + tfm = crypto_alloc_hash(sctp_hmac_list[id].hmac_name, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto out_err; + + ep->auth_hmacs[id] = tfm; + } + + return 0; + +out_err: + /* Clean up any successfull allocations */ + sctp_auth_destroy_hmacs(ep->auth_hmacs); + return -ENOMEM; +} + +/* Destroy the hmac tfm array */ +void sctp_auth_destroy_hmacs(struct crypto_hash *auth_hmacs[]) +{ + int i; + + if (!auth_hmacs) + return; + + for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) + { + if (auth_hmacs[i]) + crypto_free_hash(auth_hmacs[i]); + } + kfree(auth_hmacs); +} + + +struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id) +{ + return &sctp_hmac_list[hmac_id]; +} + +/* Get an hmac description information that we can use to build + * the AUTH chunk + */ +struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc) +{ + struct sctp_hmac_algo_param *hmacs; + __u16 n_elt; + __u16 id = 0; + int i; + + /* If we have a default entry, use it */ + if (asoc->default_hmac_id) + return &sctp_hmac_list[asoc->default_hmac_id]; + + /* Since we do not have a default entry, find the first entry + * we support and return that. Do not cache that id. + */ + hmacs = asoc->peer.peer_hmacs; + if (!hmacs) + return NULL; + + n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1; + for (i = 0; i < n_elt; i++) { + id = ntohs(hmacs->hmac_ids[i]); + + /* Check the id is in the supported range */ + if (id > SCTP_AUTH_HMAC_ID_MAX) + continue; + + /* See is we support the id. Supported IDs have name and + * length fields set, so that we can allocated and use + * them. We can safely just check for name, for without the + * name, we can't allocate the TFM. + */ + if (!sctp_hmac_list[id].hmac_name) + continue; + + break; + } + + if (id == 0) + return NULL; + + return &sctp_hmac_list[id]; +} + +static int __sctp_auth_find_hmacid(__be16 *hmacs, int n_elts, __be16 hmac_id) +{ + int found = 0; + int i; + + for (i = 0; i < n_elts; i++) { + if (hmac_id == hmacs[i]) { + found = 1; + break; + } + } + + return found; +} + +/* See if the HMAC_ID is one that we claim as supported */ +int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc, + __be16 hmac_id) +{ + struct sctp_hmac_algo_param *hmacs; + __u16 n_elt; + + if (!asoc) + return 0; + + hmacs = (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs; + n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1; + + return __sctp_auth_find_hmacid(hmacs->hmac_ids, n_elt, hmac_id); +} + + +/* Cache the default HMAC id. This to follow this text from SCTP-AUTH: + * Section 6.1: + * The receiver of a HMAC-ALGO parameter SHOULD use the first listed + * algorithm it supports. + */ +void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, + struct sctp_hmac_algo_param *hmacs) +{ + struct sctp_endpoint *ep; + __u16 id; + int i; + int n_params; + + /* if the default id is already set, use it */ + if (asoc->default_hmac_id) + return; + + n_params = (ntohs(hmacs->param_hdr.length) + - sizeof(sctp_paramhdr_t)) >> 1; + ep = asoc->ep; + for (i = 0; i < n_params; i++) { + id = ntohs(hmacs->hmac_ids[i]); + + /* Check the id is in the supported range */ + if (id > SCTP_AUTH_HMAC_ID_MAX) + continue; + + /* If this TFM has been allocated, use this id */ + if (ep->auth_hmacs[id]) { + asoc->default_hmac_id = id; + break; + } + } +} + + +/* Check to see if the given chunk is supposed to be authenticated */ +static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) +{ + unsigned short len; + int found = 0; + int i; + + if (!param) + return 0; + + len = ntohs(param->param_hdr.length) - sizeof(sctp_paramhdr_t); + + /* SCTP-AUTH, Section 3.2 + * The chunk types for INIT, INIT-ACK, SHUTDOWN-COMPLETE and AUTH + * chunks MUST NOT be listed in the CHUNKS parameter. However, if + * a CHUNKS parameter is received then the types for INIT, INIT-ACK, + * SHUTDOWN-COMPLETE and AUTH chunks MUST be ignored. + */ + for (i = 0; !found && i < len; i++) { + switch (param->chunks[i]) { + case SCTP_CID_INIT: + case SCTP_CID_INIT_ACK: + case SCTP_CID_SHUTDOWN_COMPLETE: + case SCTP_CID_AUTH: + break; + + default: + if (param->chunks[i] == chunk) + found = 1; + break; + } + } + + return found; +} + +/* Check if peer requested that this chunk is authenticated */ +int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) +{ + if (!sctp_auth_enable || !asoc || !asoc->peer.auth_capable) + return 0; + + return __sctp_auth_cid(chunk, asoc->peer.peer_chunks); +} + +/* Check if we requested that peer authenticate this chunk. */ +int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc) +{ + if (!sctp_auth_enable || !asoc) + return 0; + + return __sctp_auth_cid(chunk, + (struct sctp_chunks_param *)asoc->c.auth_chunks); +} + +/* SCTP-AUTH: Section 6.2: + * The sender MUST calculate the MAC as described in RFC2104 [2] using + * the hash function H as described by the MAC Identifier and the shared + * association key K based on the endpoint pair shared key described by + * the shared key identifier. The 'data' used for the computation of + * the AUTH-chunk is given by the AUTH chunk with its HMAC field set to + * zero (as shown in Figure 6) followed by all chunks that are placed + * after the AUTH chunk in the SCTP packet. + */ +void sctp_auth_calculate_hmac(const struct sctp_association *asoc, + struct sk_buff *skb, + struct sctp_auth_chunk *auth, + gfp_t gfp) +{ + struct scatterlist sg; + struct hash_desc desc; + struct sctp_auth_bytes *asoc_key; + __u16 key_id, hmac_id; + __u8 *digest; + unsigned char *end; + int free_key = 0; + + /* Extract the info we need: + * - hmac id + * - key id + */ + key_id = ntohs(auth->auth_hdr.shkey_id); + hmac_id = ntohs(auth->auth_hdr.hmac_id); + + if (key_id == asoc->active_key_id) + asoc_key = asoc->asoc_shared_key; + else { + struct sctp_shared_key *ep_key; + + ep_key = sctp_auth_get_shkey(asoc, key_id); + if (!ep_key) + return; + + asoc_key = sctp_auth_asoc_create_secret(asoc, ep_key, gfp); + if (!asoc_key) + return; + + free_key = 1; + } + + /* set up scatter list */ + end = skb_tail_pointer(skb); + sg_init_one(&sg, auth, end - (unsigned char *)auth); + + desc.tfm = asoc->ep->auth_hmacs[hmac_id]; + desc.flags = 0; + + digest = auth->auth_hdr.hmac; + if (crypto_hash_setkey(desc.tfm, &asoc_key->data[0], asoc_key->len)) + goto free; + + crypto_hash_digest(&desc, &sg, sg.length, digest); + +free: + if (free_key) + sctp_auth_key_put(asoc_key); +} + +/* API Helpers */ + +/* Add a chunk to the endpoint authenticated chunk list */ +int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id) +{ + struct sctp_chunks_param *p = ep->auth_chunk_list; + __u16 nchunks; + __u16 param_len; + + /* If this chunk is already specified, we are done */ + if (__sctp_auth_cid(chunk_id, p)) + return 0; + + /* Check if we can add this chunk to the array */ + param_len = ntohs(p->param_hdr.length); + nchunks = param_len - sizeof(sctp_paramhdr_t); + if (nchunks == SCTP_NUM_CHUNK_TYPES) + return -EINVAL; + + p->chunks[nchunks] = chunk_id; + p->param_hdr.length = htons(param_len + 1); + return 0; +} + +/* Add hmac identifires to the endpoint list of supported hmac ids */ +int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep, + struct sctp_hmacalgo *hmacs) +{ + int has_sha1 = 0; + __u16 id; + int i; + + /* Scan the list looking for unsupported id. Also make sure that + * SHA1 is specified. + */ + for (i = 0; i < hmacs->shmac_num_idents; i++) { + id = hmacs->shmac_idents[i]; + + if (SCTP_AUTH_HMAC_ID_SHA1 == id) + has_sha1 = 1; + + if (!sctp_hmac_list[id].hmac_name) + return -EOPNOTSUPP; + } + + if (!has_sha1) + return -EINVAL; + + memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0], + hmacs->shmac_num_idents * sizeof(__u16)); + ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) + + hmacs->shmac_num_idents * sizeof(__u16)); + return 0; +} + +/* Set a new shared key on either endpoint or association. If the + * the key with a same ID already exists, replace the key (remove the + * old key and add a new one). + */ +int sctp_auth_set_key(struct sctp_endpoint *ep, + struct sctp_association *asoc, + struct sctp_authkey *auth_key) +{ + struct sctp_shared_key *cur_key = NULL; + struct sctp_auth_bytes *key; + struct list_head *sh_keys; + int replace = 0; + + /* Try to find the given key id to see if + * we are doing a replace, or adding a new key + */ + if (asoc) + sh_keys = &asoc->endpoint_shared_keys; + else + sh_keys = &ep->endpoint_shared_keys; + + key_for_each(cur_key, sh_keys) { + if (cur_key->key_id == auth_key->sca_keynumber) { + replace = 1; + break; + } + } + + /* If we are not replacing a key id, we need to allocate + * a shared key. + */ + if (!replace) { + cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, + GFP_KERNEL); + if (!cur_key) + return -ENOMEM; + } + + /* Create a new key data based on the info passed in */ + key = sctp_auth_create_key(auth_key->sca_keylen, GFP_KERNEL); + if (!key) + goto nomem; + + memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylen); + + /* If we are replacing, remove the old keys data from the + * key id. If we are adding new key id, add it to the + * list. + */ + if (replace) + sctp_auth_key_put(cur_key->key); + else + list_add(&cur_key->key_list, sh_keys); + + cur_key->key = key; + sctp_auth_key_hold(key); + + return 0; +nomem: + if (!replace) + sctp_auth_shkey_free(cur_key); + + return -ENOMEM; +} + +int sctp_auth_set_active_key(struct sctp_endpoint *ep, + struct sctp_association *asoc, + __u16 key_id) +{ + struct sctp_shared_key *key; + struct list_head *sh_keys; + int found = 0; + + /* The key identifier MUST correst to an existing key */ + if (asoc) + sh_keys = &asoc->endpoint_shared_keys; + else + sh_keys = &ep->endpoint_shared_keys; + + key_for_each(key, sh_keys) { + if (key->key_id == key_id) { + found = 1; + break; + } + } + + if (!found) + return -EINVAL; + + if (asoc) { + asoc->active_key_id = key_id; + sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); + } else + ep->active_key_id = key_id; + + return 0; +} + +int sctp_auth_del_key_id(struct sctp_endpoint *ep, + struct sctp_association *asoc, + __u16 key_id) +{ + struct sctp_shared_key *key; + struct list_head *sh_keys; + int found = 0; + + /* The key identifier MUST NOT be the current active key + * The key identifier MUST correst to an existing key + */ + if (asoc) { + if (asoc->active_key_id == key_id) + return -EINVAL; + + sh_keys = &asoc->endpoint_shared_keys; + } else { + if (ep->active_key_id == key_id) + return -EINVAL; + + sh_keys = &ep->endpoint_shared_keys; + } + + key_for_each(key, sh_keys) { + if (key->key_id == key_id) { + found = 1; + break; + } + } + + if (!found) + return -EINVAL; + + /* Delete the shared key */ + list_del_init(&key->key_list); + sctp_auth_shkey_free(key); + + return 0; +} diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 77fb7b06a9c..619d0f2dee5 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -194,6 +194,18 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, max = asoc->frag_point; + /* If the the peer requested that we authenticate DATA chunks + * we need to accound for bundling of the AUTH chunks along with + * DATA. + */ + if (sctp_auth_send_cid(SCTP_CID_DATA, asoc)) { + struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc); + + if (hmac_desc) + max -= WORD_ROUND(sizeof(sctp_auth_chunk_t) + + hmac_desc->hmac_len); + } + whole = 0; first_len = max; diff --git a/net/sctp/crc32c.c b/net/sctp/crc32c.c index 59cf7b06d21..181edabdb8c 100644 --- a/net/sctp/crc32c.c +++ b/net/sctp/crc32c.c @@ -170,6 +170,7 @@ __u32 sctp_update_cksum(__u8 *buffer, __u16 length, __u32 crc32) return crc32; } +#if 0 __u32 sctp_update_copy_cksum(__u8 *to, __u8 *from, __u16 length, __u32 crc32) { __u32 i; @@ -186,6 +187,7 @@ __u32 sctp_update_copy_cksum(__u8 *to, __u8 *from, __u16 length, __u32 crc32) return crc32; } +#endif /* 0 */ __u32 sctp_end_cksum(__u32 crc32) { diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 8f485a0d14b..2d2d81ef4a6 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -69,12 +69,56 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct sock *sk, gfp_t gfp) { + struct sctp_hmac_algo_param *auth_hmacs = NULL; + struct sctp_chunks_param *auth_chunks = NULL; + struct sctp_shared_key *null_key; + int err; + memset(ep, 0, sizeof(struct sctp_endpoint)); ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp); if (!ep->digest) return NULL; + if (sctp_auth_enable) { + /* Allocate space for HMACS and CHUNKS authentication + * variables. There are arrays that we encode directly + * into parameters to make the rest of the operations easier. + */ + auth_hmacs = kzalloc(sizeof(sctp_hmac_algo_param_t) + + sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp); + if (!auth_hmacs) + goto nomem; + + auth_chunks = kzalloc(sizeof(sctp_chunks_param_t) + + SCTP_NUM_CHUNK_TYPES, gfp); + if (!auth_chunks) + goto nomem; + + /* Initialize the HMACS parameter. + * SCTP-AUTH: Section 3.3 + * Every endpoint supporting SCTP chunk authentication MUST + * support the HMAC based on the SHA-1 algorithm. + */ + auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO; + auth_hmacs->param_hdr.length = + htons(sizeof(sctp_paramhdr_t) + 2); + auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1); + + /* Initialize the CHUNKS parameter */ + auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS; + + /* If the Add-IP functionality is enabled, we must + * authenticate, ASCONF and ASCONF-ACK chunks + */ + if (sctp_addip_enable) { + auth_chunks->chunks[0] = SCTP_CID_ASCONF; + auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; + auth_chunks->param_hdr.length = + htons(sizeof(sctp_paramhdr_t) + 2); + } + } + /* Initialize the base structure. */ /* What type of endpoint are we? */ ep->base.type = SCTP_EP_TYPE_SOCKET; @@ -102,6 +146,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Use SCTP specific send buffer space queues. */ ep->sndbuf_policy = sctp_sndbuf_policy; + sk->sk_write_space = sctp_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); @@ -113,7 +158,36 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, ep->last_key = ep->current_key = 0; ep->key_changed_at = jiffies; + /* SCTP-AUTH extensions*/ + INIT_LIST_HEAD(&ep->endpoint_shared_keys); + null_key = sctp_auth_shkey_create(0, GFP_KERNEL); + if (!null_key) + goto nomem; + + list_add(&null_key->key_list, &ep->endpoint_shared_keys); + + /* Allocate and initialize transorms arrays for suported HMACs. */ + err = sctp_auth_init_hmacs(ep, gfp); + if (err) + goto nomem_hmacs; + + /* Add the null key to the endpoint shared keys list and + * set the hmcas and chunks pointers. + */ + ep->auth_hmacs_list = auth_hmacs; + ep->auth_chunk_list = auth_chunks; + return ep; + +nomem_hmacs: + sctp_auth_destroy_keys(&ep->endpoint_shared_keys); +nomem: + /* Free all allocations */ + kfree(auth_hmacs); + kfree(auth_chunks); + kfree(ep->digest); + return NULL; + } /* Create a sctp_endpoint with all that boring stuff initialized. @@ -186,6 +260,16 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) /* Free the digest buffer */ kfree(ep->digest); + /* SCTP-AUTH: Free up AUTH releated data such as shared keys + * chunks and hmacs arrays that were allocated + */ + sctp_auth_destroy_keys(&ep->endpoint_shared_keys); + kfree(ep->auth_hmacs_list); + kfree(ep->auth_chunk_list); + + /* AUTH - Free any allocated HMAC transform containers */ + sctp_auth_destroy_hmacs(ep->auth_hmacs); + /* Cleanup. */ sctp_inq_free(&ep->base.inqueue); sctp_bind_addr_free(&ep->base.bind_addr); @@ -316,6 +400,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) sctp_subtype_t subtype; sctp_state_t state; int error = 0; + int first_time = 1; /* is this the first time through the looop */ if (ep->base.dead) return; @@ -327,6 +412,29 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) while (NULL != (chunk = sctp_inq_pop(inqueue))) { subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type); + /* If the first chunk in the packet is AUTH, do special + * processing specified in Section 6.3 of SCTP-AUTH spec + */ + if (first_time && (subtype.chunk == SCTP_CID_AUTH)) { + struct sctp_chunkhdr *next_hdr; + + next_hdr = sctp_inq_peek(inqueue); + if (!next_hdr) + goto normal; + + /* If the next chunk is COOKIE-ECHO, skip the AUTH + * chunk while saving a pointer to it so we can do + * Authentication later (during cookie-echo + * processing). + */ + if (next_hdr->type == SCTP_CID_COOKIE_ECHO) { + chunk->auth_chunk = skb_clone(chunk->skb, + GFP_ATOMIC); + chunk->auth = 1; + continue; + } + } +normal: /* We might have grown an association since last we * looked, so try again. * @@ -342,6 +450,8 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) } state = asoc ? asoc->state : SCTP_STATE_CLOSED; + if (sctp_auth_recv_cid(subtype.chunk, asoc) && !chunk->auth) + continue; /* Remember where the last DATA chunk came from so we * know where to send the SACK. @@ -365,5 +475,8 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) */ if (!sctp_sk(sk)->ep) break; + + if (first_time) + first_time = 0; } } diff --git a/net/sctp/input.c b/net/sctp/input.c index f9a0c9276e3..86503e7fa21 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -911,15 +911,6 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, ch = (sctp_chunkhdr_t *) skb->data; - /* If this is INIT/INIT-ACK look inside the chunk too. */ - switch (ch->type) { - case SCTP_CID_INIT: - case SCTP_CID_INIT_ACK: - break; - default: - return NULL; - } - /* The code below will attempt to walk the chunk and extract * parameter information. Before we do that, we need to verify * that the chunk length doesn't cause overflow. Otherwise, we'll @@ -964,6 +955,60 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, return NULL; } +/* SCTP-AUTH, Section 6.3: +* If the receiver does not find a STCB for a packet containing an AUTH +* chunk as the first chunk and not a COOKIE-ECHO chunk as the second +* chunk, it MUST use the chunks after the AUTH chunk to look up an existing +* association. +* +* This means that any chunks that can help us identify the association need +* to be looked at to find this assocation. +* +* TODO: The only chunk currently defined that can do that is ASCONF, but we +* don't support that functionality yet. +*/ +static struct sctp_association *__sctp_rcv_auth_lookup(struct sk_buff *skb, + const union sctp_addr *paddr, + const union sctp_addr *laddr, + struct sctp_transport **transportp) +{ + /* XXX - walk through the chunks looking for something that can + * help us find the association. INIT, and INIT-ACK are not permitted. + * That leaves ASCONF, but we don't support that yet. + */ + return NULL; +} + +/* + * There are circumstances when we need to look inside the SCTP packet + * for information to help us find the association. Examples + * include looking inside of INIT/INIT-ACK chunks or after the AUTH + * chunks. + */ +static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb, + const union sctp_addr *paddr, + const union sctp_addr *laddr, + struct sctp_transport **transportp) +{ + sctp_chunkhdr_t *ch; + + ch = (sctp_chunkhdr_t *) skb->data; + + /* If this is INIT/INIT-ACK look inside the chunk too. */ + switch (ch->type) { + case SCTP_CID_INIT: + case SCTP_CID_INIT_ACK: + return __sctp_rcv_init_lookup(skb, laddr, transportp); + break; + + case SCTP_CID_AUTH: + return __sctp_rcv_auth_lookup(skb, paddr, laddr, transportp); + break; + } + + return NULL; +} + /* Lookup an association for an inbound skb. */ static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb, const union sctp_addr *paddr, @@ -979,7 +1024,7 @@ static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb, * parameters within the INIT or INIT-ACK. */ if (!asoc) - asoc = __sctp_rcv_init_lookup(skb, laddr, transportp); + asoc = __sctp_rcv_lookup_harder(skb, paddr, laddr, transportp); return asoc; } diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index e4ea7fdf36e..f10fe7fbf24 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -100,6 +100,25 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) q->immediate.func(&q->immediate); } +/* Peek at the next chunk on the inqeue. */ +struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue) +{ + struct sctp_chunk *chunk; + sctp_chunkhdr_t *ch = NULL; + + chunk = queue->in_progress; + /* If there is no more chunks in this packet, say so */ + if (chunk->singleton || + chunk->end_of_packet || + chunk->pdiscard) + return NULL; + + ch = (sctp_chunkhdr_t *)chunk->chunk_end; + + return ch; +} + + /* Extract a chunk from an SCTP inqueue. * * WARNING: If you need to put the chunk on another queue, you need to diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 670fd2740b8..eb4deaf5891 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -493,7 +493,7 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1, if (addr1->sa.sa_family != addr2->sa.sa_family) { if (addr1->sa.sa_family == AF_INET && addr2->sa.sa_family == AF_INET6 && - IPV6_ADDR_MAPPED == ipv6_addr_type(&addr2->v6.sin6_addr)) { + ipv6_addr_v4mapped(&addr2->v6.sin6_addr)) { if (addr2->v6.sin6_port == addr1->v4.sin_port && addr2->v6.sin6_addr.s6_addr32[3] == addr1->v4.sin_addr.s_addr) @@ -501,7 +501,7 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1, } if (addr2->sa.sa_family == AF_INET && addr1->sa.sa_family == AF_INET6 && - IPV6_ADDR_MAPPED == ipv6_addr_type(&addr1->v6.sin6_addr)) { + ipv6_addr_v4mapped(&addr1->v6.sin6_addr)) { if (addr1->v6.sin6_port == addr2->v4.sin_port && addr1->v6.sin6_addr.s6_addr32[3] == addr2->v4.sin_addr.s_addr) @@ -631,7 +631,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; - newsk = sk_alloc(PF_INET6, GFP_KERNEL, sk->sk_prot, 1); + newsk = sk_alloc(sk->sk_net, PF_INET6, GFP_KERNEL, sk->sk_prot, 1); if (!newsk) goto out; @@ -855,7 +855,7 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; - dev = dev_get_by_index(addr->v6.sin6_scope_id); + dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); if (!dev) return 0; if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) { @@ -886,7 +886,7 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; - dev = dev_get_by_index(addr->v6.sin6_scope_id); + dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); if (!dev) return 0; dev_put(dev); @@ -954,9 +954,9 @@ static struct inet_protosw sctpv6_stream_protosw = { .flags = SCTP_PROTOSW_FLAG, }; -static int sctp6_rcv(struct sk_buff **pskb) +static int sctp6_rcv(struct sk_buff *skb) { - return sctp_rcv(*pskb) ? -1 : 0; + return sctp_rcv(skb) ? -1 : 0; } static struct inet6_protocol sctpv6_protocol = { diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index fcfb9d806de..2cf6ad6ff8c 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -58,6 +58,7 @@ SCTP_DBG_OBJCNT(chunk); SCTP_DBG_OBJCNT(addr); SCTP_DBG_OBJCNT(ssnmap); SCTP_DBG_OBJCNT(datamsg); +SCTP_DBG_OBJCNT(keys); /* An array to make it easy to pretty print the debug information * to the proc fs. @@ -73,6 +74,7 @@ static sctp_dbg_objcnt_entry_t sctp_dbg_objcnt[] = { SCTP_DBG_OBJCNT_ENTRY(addr), SCTP_DBG_OBJCNT_ENTRY(ssnmap), SCTP_DBG_OBJCNT_ENTRY(datamsg), + SCTP_DBG_OBJCNT_ENTRY(keys), }; /* Callback from procfs to read out objcount information. diff --git a/net/sctp/output.c b/net/sctp/output.c index d85543def75..847639d542c 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -79,7 +79,10 @@ struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, packet->vtag = vtag; packet->has_cookie_echo = 0; packet->has_sack = 0; + packet->has_auth = 0; + packet->has_data = 0; packet->ipfragok = 0; + packet->auth = NULL; if (ecn_capable && sctp_packet_empty(packet)) { chunk = sctp_get_ecne_prepend(packet->transport->asoc); @@ -121,8 +124,11 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, packet->vtag = 0; packet->has_cookie_echo = 0; packet->has_sack = 0; + packet->has_auth = 0; + packet->has_data = 0; packet->ipfragok = 0; packet->malloced = 0; + packet->auth = NULL; return packet; } @@ -181,6 +187,39 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, return retval; } +/* Try to bundle an auth chunk into the packet. */ +static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt, + struct sctp_chunk *chunk) +{ + struct sctp_association *asoc = pkt->transport->asoc; + struct sctp_chunk *auth; + sctp_xmit_t retval = SCTP_XMIT_OK; + + /* if we don't have an association, we can't do authentication */ + if (!asoc) + return retval; + + /* See if this is an auth chunk we are bundling or if + * auth is already bundled. + */ + if (chunk->chunk_hdr->type == SCTP_CID_AUTH || pkt->auth) + return retval; + + /* if the peer did not request this chunk to be authenticated, + * don't do it + */ + if (!chunk->auth) + return retval; + + auth = sctp_make_auth(asoc); + if (!auth) + return retval; + + retval = sctp_packet_append_chunk(pkt, auth); + + return retval; +} + /* Try to bundle a SACK with the packet. */ static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt, struct sctp_chunk *chunk) @@ -227,12 +266,17 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, SCTP_DEBUG_PRINTK("%s: packet:%p chunk:%p\n", __FUNCTION__, packet, chunk); - retval = sctp_packet_bundle_sack(packet, chunk); - psize = packet->size; + /* Try to bundle AUTH chunk */ + retval = sctp_packet_bundle_auth(packet, chunk); + if (retval != SCTP_XMIT_OK) + goto finish; + /* Try to bundle SACK chunk */ + retval = sctp_packet_bundle_sack(packet, chunk); if (retval != SCTP_XMIT_OK) goto finish; + psize = packet->size; pmtu = ((packet->transport->asoc) ? (packet->transport->asoc->pathmtu) : (packet->transport->pathmtu)); @@ -241,10 +285,16 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, /* Decide if we need to fragment or resubmit later. */ if (too_big) { - /* Both control chunks and data chunks with TSNs are - * non-fragmentable. + /* It's OK to fragmet at IP level if any one of the following + * is true: + * 1. The packet is empty (meaning this chunk is greater + * the MTU) + * 2. The chunk we are adding is a control chunk + * 3. The packet doesn't have any data in it yet and data + * requires authentication. */ - if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk)) { + if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) || + (!packet->has_data && chunk->auth)) { /* We no longer do re-fragmentation. * Just fragment at the IP layer, if we * actually hit this condition @@ -266,16 +316,31 @@ append: /* DATA is a special case since we must examine both rwnd and cwnd * before we send DATA. */ - if (sctp_chunk_is_data(chunk)) { + switch (chunk->chunk_hdr->type) { + case SCTP_CID_DATA: retval = sctp_packet_append_data(packet, chunk); /* Disallow SACK bundling after DATA. */ packet->has_sack = 1; + /* Disallow AUTH bundling after DATA */ + packet->has_auth = 1; + /* Let it be knows that packet has DATA in it */ + packet->has_data = 1; if (SCTP_XMIT_OK != retval) goto finish; - } else if (SCTP_CID_COOKIE_ECHO == chunk->chunk_hdr->type) + break; + case SCTP_CID_COOKIE_ECHO: packet->has_cookie_echo = 1; - else if (SCTP_CID_SACK == chunk->chunk_hdr->type) + break; + + case SCTP_CID_SACK: packet->has_sack = 1; + break; + + case SCTP_CID_AUTH: + packet->has_auth = 1; + packet->auth = chunk; + break; + } /* It is OK to send this chunk. */ list_add_tail(&chunk->list, &packet->chunk_list); @@ -303,6 +368,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) int padding; /* How much padding do we need? */ __u8 has_data = 0; struct dst_entry *dst = tp->dst; + unsigned char *auth = NULL; /* pointer to auth in skb data */ + __u32 cksum_buf_len = sizeof(struct sctphdr); SCTP_DEBUG_PRINTK("%s: packet:%p\n", __FUNCTION__, packet); @@ -356,16 +423,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) sh->vtag = htonl(packet->vtag); sh->checksum = 0; - /* 2) Calculate the Adler-32 checksum of the whole packet, - * including the SCTP common header and all the - * chunks. - * - * Note: Adler-32 is no longer applicable, as has been replaced - * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. - */ - if (!(dst->dev->features & NETIF_F_NO_CSUM)) - crc32 = sctp_start_cksum((__u8 *)sh, sizeof(struct sctphdr)); - /** * 6.10 Bundling * @@ -416,14 +473,16 @@ int sctp_packet_transmit(struct sctp_packet *packet) if (padding) memset(skb_put(chunk->skb, padding), 0, padding); - if (dst->dev->features & NETIF_F_NO_CSUM) - memcpy(skb_put(nskb, chunk->skb->len), + /* if this is the auth chunk that we are adding, + * store pointer where it will be added and put + * the auth into the packet. + */ + if (chunk == packet->auth) + auth = skb_tail_pointer(nskb); + + cksum_buf_len += chunk->skb->len; + memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data, chunk->skb->len); - else - crc32 = sctp_update_copy_cksum(skb_put(nskb, - chunk->skb->len), - chunk->skb->data, - chunk->skb->len, crc32); SCTP_DEBUG_PRINTK("%s %p[%s] %s 0x%x, %s %d, %s %d, %s %d\n", "*** Chunk", chunk, @@ -445,9 +504,31 @@ int sctp_packet_transmit(struct sctp_packet *packet) sctp_chunk_free(chunk); } - /* Perform final transformation on checksum. */ - if (!(dst->dev->features & NETIF_F_NO_CSUM)) + /* SCTP-AUTH, Section 6.2 + * The sender MUST calculate the MAC as described in RFC2104 [2] + * using the hash function H as described by the MAC Identifier and + * the shared association key K based on the endpoint pair shared key + * described by the shared key identifier. The 'data' used for the + * computation of the AUTH-chunk is given by the AUTH chunk with its + * HMAC field set to zero (as shown in Figure 6) followed by all + * chunks that are placed after the AUTH chunk in the SCTP packet. + */ + if (auth) + sctp_auth_calculate_hmac(asoc, nskb, + (struct sctp_auth_chunk *)auth, + GFP_ATOMIC); + + /* 2) Calculate the Adler-32 checksum of the whole packet, + * including the SCTP common header and all the + * chunks. + * + * Note: Adler-32 is no longer applicable, as has been replaced + * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. + */ + if (!(dst->dev->features & NETIF_F_NO_CSUM)) { + crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); crc32 = sctp_end_cksum(crc32); + } /* 3) Put the resultant value into the checksum field in the * common header, and leave the rest of the bits unchanged. diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 3d036cdfae4..f5cd96f5fe7 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -51,6 +51,8 @@ #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/seq_file.h> +#include <linux/bootmem.h> +#include <net/net_namespace.h> #include <net/protocol.h> #include <net/ip.h> #include <net/ipv6.h> @@ -82,6 +84,10 @@ static struct sctp_af *sctp_af_v6_specific; struct kmem_cache *sctp_chunk_cachep __read_mostly; struct kmem_cache *sctp_bucket_cachep __read_mostly; +int sysctl_sctp_mem[3]; +int sysctl_sctp_rmem[3]; +int sysctl_sctp_wmem[3]; + /* Return the address of the control sock. */ struct sock *sctp_get_ctl_sock(void) { @@ -93,7 +99,7 @@ static __init int sctp_proc_init(void) { if (!proc_net_sctp) { struct proc_dir_entry *ent; - ent = proc_mkdir("net/sctp", NULL); + ent = proc_mkdir("sctp", init_net.proc_net); if (ent) { ent->owner = THIS_MODULE; proc_net_sctp = ent; @@ -126,7 +132,7 @@ static void sctp_proc_exit(void) if (proc_net_sctp) { proc_net_sctp = NULL; - remove_proc_entry("net/sctp", NULL); + remove_proc_entry("sctp", init_net.proc_net); } } @@ -173,7 +179,7 @@ static void sctp_get_local_addr_list(void) struct sctp_af *af; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); af->copy_addrlist(&sctp_local_addr_list, dev); @@ -546,7 +552,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, { struct inet_sock *inet = inet_sk(sk); struct inet_sock *newinet; - struct sock *newsk = sk_alloc(PF_INET, GFP_KERNEL, sk->sk_prot, 1); + struct sock *newsk = sk_alloc(sk->sk_net, PF_INET, GFP_KERNEL, sk->sk_prot, 1); if (!newsk) goto out; @@ -987,6 +993,8 @@ SCTP_STATIC __init int sctp_init(void) int i; int status = -EINVAL; unsigned long goal; + unsigned long limit; + int max_share; int order; /* SCTP_DEBUG sanity check. */ @@ -1077,6 +1085,31 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); + /* Set the pressure threshold to be a fraction of global memory that + * is up to 1/2 at 256 MB, decreasing toward zero with the amount of + * memory, with a floor of 128 pages. + * Note this initalizes the data in sctpv6_prot too + * Unabashedly stolen from tcp_init + */ + limit = min(num_physpages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (num_physpages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = max(limit, 128UL); + sysctl_sctp_mem[0] = limit / 4 * 3; + sysctl_sctp_mem[1] = limit; + sysctl_sctp_mem[2] = sysctl_sctp_mem[0] * 2; + + /* Set per-socket limits to no more than 1/128 the pressure threshold*/ + limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7); + max_share = min(4UL*1024*1024, limit); + + sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */ + sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); + sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); + + sysctl_sctp_wmem[0] = SK_STREAM_MEM_QUANTUM; + sysctl_sctp_wmem[1] = 16*1024; + sysctl_sctp_wmem[2] = max(64*1024, max_share); + /* Size and allocate the association hash table. * The methodology is similar to that of the tcp hash tables. */ @@ -1139,9 +1172,6 @@ SCTP_STATIC __init int sctp_init(void) sctp_port_hashtable[i].chain = NULL; } - spin_lock_init(&sctp_port_alloc_lock); - sctp_port_rover = sysctl_local_port_range[0] - 1; - printk(KERN_INFO "SCTP: Hash tables configured " "(established %d bind %d)\n", sctp_assoc_hashsize, sctp_port_hashsize); @@ -1152,6 +1182,9 @@ SCTP_STATIC __init int sctp_init(void) /* Enable PR-SCTP by default. */ sctp_prsctp_enable = 1; + /* Disable AUTH by default. */ + sctp_auth_enable = 0; + sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); @@ -1195,7 +1228,6 @@ SCTP_STATIC __init int sctp_init(void) if (status) goto err_v6_add_protocol; - __unsafe(THIS_MODULE); status = 0; out: return status; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 23ae37ec871..c377e4e8f65 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -56,7 +56,7 @@ #include <linux/ipv6.h> #include <linux/net.h> #include <linux/inet.h> -#include <asm/scatterlist.h> +#include <linux/scatterlist.h> #include <linux/crypto.h> #include <net/sock.h> @@ -179,6 +179,11 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_supported_addrs_param_t sat; __be16 types[2]; sctp_adaptation_ind_param_t aiparam; + sctp_supported_ext_param_t ext_param; + int num_ext = 0; + __u8 extensions[3]; + sctp_paramhdr_t *auth_chunks = NULL, + *auth_hmacs = NULL; /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -202,11 +207,52 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize = sizeof(init) + addrs_len + SCTP_SAT_LEN(num_types); chunksize += sizeof(ecap_param); - if (sctp_prsctp_enable) + if (sctp_prsctp_enable) { chunksize += sizeof(prsctp_param); + extensions[num_ext] = SCTP_CID_FWD_TSN; + num_ext += 1; + } + /* ADDIP: Section 4.2.7: + * An implementation supporting this extension [ADDIP] MUST list + * the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and + * INIT-ACK parameters. + */ + if (sctp_addip_enable) { + extensions[num_ext] = SCTP_CID_ASCONF; + extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; + num_ext += 2; + } + chunksize += sizeof(aiparam); chunksize += vparam_len; + /* Account for AUTH related parameters */ + if (sctp_auth_enable) { + /* Add random parameter length*/ + chunksize += sizeof(asoc->c.auth_random); + + /* Add HMACS parameter length if any were defined */ + auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; + if (auth_hmacs->length) + chunksize += ntohs(auth_hmacs->length); + else + auth_hmacs = NULL; + + /* Add CHUNKS parameter length */ + auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; + if (auth_chunks->length) + chunksize += ntohs(auth_chunks->length); + else + auth_hmacs = NULL; + + extensions[num_ext] = SCTP_CID_AUTH; + num_ext += 1; + } + + /* If we have any extensions to report, account for that */ + if (num_ext) + chunksize += sizeof(sctp_supported_ext_param_t) + num_ext; + /* RFC 2960 3.3.2 Initiation (INIT) (1) * * Note 3: An INIT chunk MUST NOT contain more than one Host @@ -241,12 +287,38 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_addto_chunk(retval, num_types * sizeof(__u16), &types); sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); + + /* Add the supported extensions paramter. Be nice and add this + * fist before addiding the parameters for the extensions themselves + */ + if (num_ext) { + ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT; + ext_param.param_hdr.length = + htons(sizeof(sctp_supported_ext_param_t) + num_ext); + sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), + &ext_param); + sctp_addto_chunk(retval, num_ext, extensions); + } + if (sctp_prsctp_enable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); + aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; aiparam.param_hdr.length = htons(sizeof(aiparam)); aiparam.adaptation_ind = htonl(sp->adaptation_ind); sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); + + /* Add SCTP-AUTH chunks to the parameter list */ + if (sctp_auth_enable) { + sctp_addto_chunk(retval, sizeof(asoc->c.auth_random), + asoc->c.auth_random); + if (auth_hmacs) + sctp_addto_chunk(retval, ntohs(auth_hmacs->length), + auth_hmacs); + if (auth_chunks) + sctp_addto_chunk(retval, ntohs(auth_chunks->length), + auth_chunks); + } nodata: kfree(addrs.v); return retval; @@ -264,6 +336,12 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, int cookie_len; size_t chunksize; sctp_adaptation_ind_param_t aiparam; + sctp_supported_ext_param_t ext_param; + int num_ext = 0; + __u8 extensions[3]; + sctp_paramhdr_t *auth_chunks = NULL, + *auth_hmacs = NULL, + *auth_random = NULL; retval = NULL; @@ -294,11 +372,41 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, chunksize += sizeof(ecap_param); /* Tell peer that we'll do PR-SCTP only if peer advertised. */ - if (asoc->peer.prsctp_capable) + if (asoc->peer.prsctp_capable) { chunksize += sizeof(prsctp_param); + extensions[num_ext] = SCTP_CID_FWD_TSN; + num_ext += 1; + } + + if (sctp_addip_enable) { + extensions[num_ext] = SCTP_CID_ASCONF; + extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; + num_ext += 2; + } + chunksize += sizeof(ext_param) + num_ext; chunksize += sizeof(aiparam); + if (asoc->peer.auth_capable) { + auth_random = (sctp_paramhdr_t *)asoc->c.auth_random; + chunksize += ntohs(auth_random->length); + + auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; + if (auth_hmacs->length) + chunksize += ntohs(auth_hmacs->length); + else + auth_hmacs = NULL; + + auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; + if (auth_chunks->length) + chunksize += ntohs(auth_chunks->length); + else + auth_chunks = NULL; + + extensions[num_ext] = SCTP_CID_AUTH; + num_ext += 1; + } + /* Now allocate and fill out the chunk. */ retval = sctp_make_chunk(asoc, SCTP_CID_INIT_ACK, 0, chunksize); if (!retval) @@ -314,6 +422,14 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, sctp_addto_chunk(retval, cookie_len, cookie); if (asoc->peer.ecn_capable) sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); + if (num_ext) { + ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT; + ext_param.param_hdr.length = + htons(sizeof(sctp_supported_ext_param_t) + num_ext); + sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), + &ext_param); + sctp_addto_chunk(retval, num_ext, extensions); + } if (asoc->peer.prsctp_capable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); @@ -322,6 +438,17 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, aiparam.adaptation_ind = htonl(sctp_sk(asoc->base.sk)->adaptation_ind); sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); + if (asoc->peer.auth_capable) { + sctp_addto_chunk(retval, ntohs(auth_random->length), + auth_random); + if (auth_hmacs) + sctp_addto_chunk(retval, ntohs(auth_hmacs->length), + auth_hmacs); + if (auth_chunks) + sctp_addto_chunk(retval, ntohs(auth_chunks->length), + auth_chunks); + } + /* We need to remove the const qualifier at this point. */ retval->asoc = (struct sctp_association *) asoc; @@ -839,6 +966,26 @@ err_chunk: return retval; } +/* Append bytes to the end of a parameter. Will panic if chunk is not big + * enough. + */ +static void *sctp_addto_param(struct sctp_chunk *chunk, int len, + const void *data) +{ + void *target; + int chunklen = ntohs(chunk->chunk_hdr->length); + + target = skb_put(chunk->skb, len); + + memcpy(target, data, len); + + /* Adjust the chunk length field. */ + chunk->chunk_hdr->length = htons(chunklen + len); + chunk->chunk_end = skb_tail_pointer(chunk->skb); + + return target; +} + /* Make an ABORT chunk with a PROTOCOL VIOLATION cause code. */ struct sctp_chunk *sctp_make_abort_violation( const struct sctp_association *asoc, @@ -964,6 +1111,41 @@ nodata: return retval; } +struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) +{ + struct sctp_chunk *retval; + struct sctp_hmac *hmac_desc; + struct sctp_authhdr auth_hdr; + __u8 *hmac; + + /* Get the first hmac that the peer told us to use */ + hmac_desc = sctp_auth_asoc_get_hmac(asoc); + if (unlikely(!hmac_desc)) + return NULL; + + retval = sctp_make_chunk(asoc, SCTP_CID_AUTH, 0, + hmac_desc->hmac_len + sizeof(sctp_authhdr_t)); + if (!retval) + return NULL; + + auth_hdr.hmac_id = htons(hmac_desc->hmac_id); + auth_hdr.shkey_id = htons(asoc->active_key_id); + + retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(sctp_authhdr_t), + &auth_hdr); + + hmac = skb_put(retval->skb, hmac_desc->hmac_len); + memset(hmac, 0, hmac_desc->hmac_len); + + /* Adjust the chunk header to include the empty MAC */ + retval->chunk_hdr->length = + htons(ntohs(retval->chunk_hdr->length) + hmac_desc->hmac_len); + retval->chunk_end = skb_tail_pointer(retval->skb); + + return retval; +} + + /******************************************************************** * 2nd Level Abstractions ********************************************************************/ @@ -1078,6 +1260,10 @@ struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, retval->chunk_hdr = chunk_hdr; retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(struct sctp_chunkhdr); + /* Determine if the chunk needs to be authenticated */ + if (sctp_auth_send_cid(type, asoc)) + retval->auth = 1; + /* Set the skb to the belonging sock for accounting. */ skb->sk = sk; @@ -1146,25 +1332,6 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) return target; } -/* Append bytes to the end of a parameter. Will panic if chunk is not big - * enough. - */ -void *sctp_addto_param(struct sctp_chunk *chunk, int len, const void *data) -{ - void *target; - int chunklen = ntohs(chunk->chunk_hdr->length); - - target = skb_put(chunk->skb, len); - - memcpy(target, data, len); - - /* Adjust the chunk length field. */ - chunk->chunk_hdr->length = htons(chunklen + len); - chunk->chunk_end = skb_tail_pointer(chunk->skb); - - return target; -} - /* Append bytes from user space to the end of a chunk. Will panic if * chunk is not big enough. * Returns a kernel err value. @@ -1346,9 +1513,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, struct hash_desc desc; /* Sign the message. */ - sg.page = virt_to_page(&cookie->c); - sg.offset = (unsigned long)(&cookie->c) % PAGE_SIZE; - sg.length = bodysize; + sg_init_one(&sg, &cookie->c, bodysize); keylen = SCTP_SECRET_SIZE; key = (char *)ep->secret_key[ep->current_key]; desc.tfm = sctp_sk(ep->base.sk)->hmac; @@ -1418,9 +1583,7 @@ struct sctp_association *sctp_unpack_cookie( /* Check the signature. */ keylen = SCTP_SECRET_SIZE; - sg.page = virt_to_page(bear_cookie); - sg.offset = (unsigned long)(bear_cookie) % PAGE_SIZE; - sg.length = bodysize; + sg_init_one(&sg, bear_cookie, bodysize); key = (char *)ep->secret_key[ep->current_key]; desc.tfm = sctp_sk(ep->base.sk)->hmac; desc.flags = 0; @@ -1663,6 +1826,35 @@ static int sctp_process_hn_param(const struct sctp_association *asoc, return 0; } +static void sctp_process_ext_param(struct sctp_association *asoc, + union sctp_params param) +{ + __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); + int i; + + for (i = 0; i < num_ext; i++) { + switch (param.ext->chunks[i]) { + case SCTP_CID_FWD_TSN: + if (sctp_prsctp_enable && + !asoc->peer.prsctp_capable) + asoc->peer.prsctp_capable = 1; + break; + case SCTP_CID_AUTH: + /* if the peer reports AUTH, assume that he + * supports AUTH. + */ + asoc->peer.auth_capable = 1; + break; + case SCTP_CID_ASCONF: + case SCTP_CID_ASCONF_ACK: + asoc->peer.addip_capable = 1; + break; + default: + break; + } + } +} + /* RFC 3.2.1 & the Implementers Guide 2.2. * * The Parameter Types are encoded such that the @@ -1779,15 +1971,52 @@ static int sctp_verify_param(const struct sctp_association *asoc, case SCTP_PARAM_UNRECOGNIZED_PARAMETERS: case SCTP_PARAM_ECN_CAPABLE: case SCTP_PARAM_ADAPTATION_LAYER_IND: + case SCTP_PARAM_SUPPORTED_EXT: break; case SCTP_PARAM_HOST_NAME_ADDRESS: /* Tell the peer, we won't support this param. */ return sctp_process_hn_param(asoc, param, chunk, err_chunk); + case SCTP_PARAM_FWD_TSN_SUPPORT: if (sctp_prsctp_enable) break; + goto fallthrough; + + case SCTP_PARAM_RANDOM: + if (!sctp_auth_enable) + goto fallthrough; + + /* SCTP-AUTH: Secion 6.1 + * If the random number is not 32 byte long the association + * MUST be aborted. The ABORT chunk SHOULD contain the error + * cause 'Protocol Violation'. + */ + if (SCTP_AUTH_RANDOM_LENGTH != + ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) + return sctp_process_inv_paramlength(asoc, param.p, + chunk, err_chunk); + break; + + case SCTP_PARAM_CHUNKS: + if (!sctp_auth_enable) + goto fallthrough; + + /* SCTP-AUTH: Section 3.2 + * The CHUNKS parameter MUST be included once in the INIT or + * INIT-ACK chunk if the sender wants to receive authenticated + * chunks. Its maximum length is 260 bytes. + */ + if (260 < ntohs(param.p->length)) + return sctp_process_inv_paramlength(asoc, param.p, + chunk, err_chunk); + break; + + case SCTP_PARAM_HMAC_ALGO: + if (!sctp_auth_enable) + break; /* Fall Through */ +fallthrough: default: SCTP_DEBUG_PRINTK("Unrecognized param: %d for chunk %d.\n", ntohs(param.p->type), cid); @@ -1892,13 +2121,29 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, } /* Process the initialization parameters. */ - sctp_walk_params(param, peer_init, init_hdr.params) { if (!sctp_process_param(asoc, param, peer_addr, gfp)) goto clean_up; } + /* AUTH: After processing the parameters, make sure that we + * have all the required info to potentially do authentications. + */ + if (asoc->peer.auth_capable && (!asoc->peer.peer_random || + !asoc->peer.peer_hmacs)) + asoc->peer.auth_capable = 0; + + + /* If the peer claims support for ADD-IP without support + * for AUTH, disable support for ADD-IP. + */ + if (asoc->peer.addip_capable && !asoc->peer.auth_capable) { + asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP | + SCTP_PARAM_DEL_IP | + SCTP_PARAM_SET_PRIMARY); + } + /* Walk list of transports, removing transports in the UNKNOWN state. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); @@ -2128,12 +2373,57 @@ static int sctp_process_param(struct sctp_association *asoc, asoc->peer.adaptation_ind = param.aind->adaptation_ind; break; + case SCTP_PARAM_SUPPORTED_EXT: + sctp_process_ext_param(asoc, param); + break; + case SCTP_PARAM_FWD_TSN_SUPPORT: if (sctp_prsctp_enable) { asoc->peer.prsctp_capable = 1; break; } /* Fall Through */ + goto fall_through; + + case SCTP_PARAM_RANDOM: + if (!sctp_auth_enable) + goto fall_through; + + /* Save peer's random parameter */ + asoc->peer.peer_random = kmemdup(param.p, + ntohs(param.p->length), gfp); + if (!asoc->peer.peer_random) { + retval = 0; + break; + } + break; + + case SCTP_PARAM_HMAC_ALGO: + if (!sctp_auth_enable) + goto fall_through; + + /* Save peer's HMAC list */ + asoc->peer.peer_hmacs = kmemdup(param.p, + ntohs(param.p->length), gfp); + if (!asoc->peer.peer_hmacs) { + retval = 0; + break; + } + + /* Set the default HMAC the peer requested*/ + sctp_auth_asoc_set_default_hmac(asoc, param.hmac_algo); + break; + + case SCTP_PARAM_CHUNKS: + if (!sctp_auth_enable) + goto fall_through; + + asoc->peer.peer_chunks = kmemdup(param.p, + ntohs(param.p->length), gfp); + if (!asoc->peer.peer_chunks) + retval = 0; + break; +fall_through: default: /* Any unrecognized parameters should have been caught * and handled by sctp_verify_param() which should be diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8d789008349..bbdc938da86 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1524,6 +1524,11 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_cmd_adaptation_ind(commands, asoc); break; + case SCTP_CMD_ASSOC_SHKEY: + error = sctp_auth_asoc_init_active_key(asoc, + GFP_ATOMIC); + break; + default: printk(KERN_WARNING "Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a583d67cab6..f01b408508f 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -138,6 +138,11 @@ static sctp_disposition_t sctp_sf_violation_chunk( void *arg, sctp_cmd_seq_t *commands); +static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + struct sctp_chunk *chunk); + /* Small helper function that checks if the chunk length * is of the appropriate length. The 'required_length' argument * is set to be the size of a specific chunk we are testing. @@ -495,8 +500,6 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes if there is any. */ @@ -521,6 +524,22 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); error = SCTP_ERROR_INV_PARAM; } + + /* SCTP-AUTH, Section 6.3: + * It should be noted that if the receiver wants to tear + * down an association in an authenticated way only, the + * handling of malformed packets should not result in + * tearing down the association. + * + * This means that if we only want to abort associations + * in an authenticated way (i.e AUTH+ABORT), then we + * can't destory this association just becuase the packet + * was malformed. + */ + if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + + SCTP_INC_STATS(SCTP_MIB_ABORTEDS); return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED, asoc, chunk->transport); } @@ -549,6 +568,11 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_COOKIE_ECHOED)); + /* SCTP-AUTH: genereate the assocition shared keys so that + * we can potentially signe the COOKIE-ECHO. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_SHKEY, SCTP_NULL()); + /* 5.1 C) "A" shall then send the State Cookie received in the * INIT ACK chunk in a COOKIE ECHO chunk, ... */ @@ -686,6 +710,44 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, peer_init, GFP_ATOMIC)) goto nomem_init; + /* SCTP-AUTH: Now that we've populate required fields in + * sctp_process_init, set up the assocaition shared keys as + * necessary so that we can potentially authenticate the ACK + */ + error = sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC); + if (error) + goto nomem_init; + + /* SCTP-AUTH: auth_chunk pointer is only set when the cookie-echo + * is supposed to be authenticated and we have to do delayed + * authentication. We've just recreated the association using + * the information in the cookie and now it's much easier to + * do the authentication. + */ + if (chunk->auth_chunk) { + struct sctp_chunk auth; + sctp_ierror_t ret; + + /* set-up our fake chunk so that we can process it */ + auth.skb = chunk->auth_chunk; + auth.asoc = chunk->asoc; + auth.sctp_hdr = chunk->sctp_hdr; + auth.chunk_hdr = (sctp_chunkhdr_t *)skb_push(chunk->auth_chunk, + sizeof(sctp_chunkhdr_t)); + skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t)); + auth.transport = chunk->transport; + + ret = sctp_sf_authenticate(ep, new_asoc, type, &auth); + + /* We can now safely free the auth_chunk clone */ + kfree_skb(chunk->auth_chunk); + + if (ret != SCTP_IERROR_NO_ERROR) { + sctp_association_free(new_asoc); + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + } + } + repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) goto nomem_init; @@ -1247,6 +1309,26 @@ static void sctp_tietags_populate(struct sctp_association *new_asoc, new_asoc->c.initial_tsn = asoc->c.initial_tsn; } +static void sctp_auth_params_populate(struct sctp_association *new_asoc, + const struct sctp_association *asoc) +{ + /* Only perform this if AUTH extension is enabled */ + if (!sctp_auth_enable) + return; + + /* We need to provide the same parameter information as + * was in the original INIT. This means that we need to copy + * the HMACS, CHUNKS, and RANDOM parameter from the original + * assocaition. + */ + memcpy(new_asoc->c.auth_random, asoc->c.auth_random, + sizeof(asoc->c.auth_random)); + memcpy(new_asoc->c.auth_hmacs, asoc->c.auth_hmacs, + sizeof(asoc->c.auth_hmacs)); + memcpy(new_asoc->c.auth_chunks, asoc->c.auth_chunks, + sizeof(asoc->c.auth_chunks)); +} + /* * Compare vtag/tietag values to determine unexpected COOKIE-ECHO * handling action. @@ -1404,6 +1486,8 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( sctp_tietags_populate(new_asoc, asoc); + sctp_auth_params_populate(new_asoc, asoc); + /* B) "Z" shall respond immediately with an INIT ACK chunk. */ /* If there are errors need to be reported for unknown parameters, @@ -3618,6 +3702,169 @@ gen_shutdown: } /* + * SCTP-AUTH Section 6.3 Receving authenticated chukns + * + * The receiver MUST use the HMAC algorithm indicated in the HMAC + * Identifier field. If this algorithm was not specified by the + * receiver in the HMAC-ALGO parameter in the INIT or INIT-ACK chunk + * during association setup, the AUTH chunk and all chunks after it MUST + * be discarded and an ERROR chunk SHOULD be sent with the error cause + * defined in Section 4.1. + * + * If an endpoint with no shared key receives a Shared Key Identifier + * other than 0, it MUST silently discard all authenticated chunks. If + * the endpoint has at least one endpoint pair shared key for the peer, + * it MUST use the key specified by the Shared Key Identifier if a + * key has been configured for that Shared Key Identifier. If no + * endpoint pair shared key has been configured for that Shared Key + * Identifier, all authenticated chunks MUST be silently discarded. + * + * Verification Tag: 8.5 Verification Tag [Normal verification] + * + * The return value is the disposition of the chunk. + */ +static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + struct sctp_chunk *chunk) +{ + struct sctp_authhdr *auth_hdr; + struct sctp_hmac *hmac; + unsigned int sig_len; + __u16 key_id; + __u8 *save_digest; + __u8 *digest; + + /* Pull in the auth header, so we can do some more verification */ + auth_hdr = (struct sctp_authhdr *)chunk->skb->data; + chunk->subh.auth_hdr = auth_hdr; + skb_pull(chunk->skb, sizeof(struct sctp_authhdr)); + + /* Make sure that we suport the HMAC algorithm from the auth + * chunk. + */ + if (!sctp_auth_asoc_verify_hmac_id(asoc, auth_hdr->hmac_id)) + return SCTP_IERROR_AUTH_BAD_HMAC; + + /* Make sure that the provided shared key identifier has been + * configured + */ + key_id = ntohs(auth_hdr->shkey_id); + if (key_id != asoc->active_key_id && !sctp_auth_get_shkey(asoc, key_id)) + return SCTP_IERROR_AUTH_BAD_KEYID; + + + /* Make sure that the length of the signature matches what + * we expect. + */ + sig_len = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_auth_chunk_t); + hmac = sctp_auth_get_hmac(ntohs(auth_hdr->hmac_id)); + if (sig_len != hmac->hmac_len) + return SCTP_IERROR_PROTO_VIOLATION; + + /* Now that we've done validation checks, we can compute and + * verify the hmac. The steps involved are: + * 1. Save the digest from the chunk. + * 2. Zero out the digest in the chunk. + * 3. Compute the new digest + * 4. Compare saved and new digests. + */ + digest = auth_hdr->hmac; + skb_pull(chunk->skb, sig_len); + + save_digest = kmemdup(digest, sig_len, GFP_ATOMIC); + if (!save_digest) + goto nomem; + + memset(digest, 0, sig_len); + + sctp_auth_calculate_hmac(asoc, chunk->skb, + (struct sctp_auth_chunk *)chunk->chunk_hdr, + GFP_ATOMIC); + + /* Discard the packet if the digests do not match */ + if (memcmp(save_digest, digest, sig_len)) { + kfree(save_digest); + return SCTP_IERROR_BAD_SIG; + } + + kfree(save_digest); + chunk->auth = 1; + + return SCTP_IERROR_NO_ERROR; +nomem: + return SCTP_IERROR_NOMEM; +} + +sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) +{ + struct sctp_authhdr *auth_hdr; + struct sctp_chunk *chunk = arg; + struct sctp_chunk *err_chunk; + sctp_ierror_t error; + + if (!sctp_vtag_verify(chunk, asoc)) { + sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, + SCTP_NULL()); + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + } + + /* Make sure that the AUTH chunk has valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_auth_chunk))) + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + + auth_hdr = (struct sctp_authhdr *)chunk->skb->data; + error = sctp_sf_authenticate(ep, asoc, type, chunk); + switch (error) { + case SCTP_IERROR_AUTH_BAD_HMAC: + /* Generate the ERROR chunk and discard the rest + * of the packet + */ + err_chunk = sctp_make_op_error(asoc, chunk, + SCTP_ERROR_UNSUP_HMAC, + &auth_hdr->hmac_id, + sizeof(__u16)); + if (err_chunk) { + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, + SCTP_CHUNK(err_chunk)); + } + /* Fall Through */ + case SCTP_IERROR_AUTH_BAD_KEYID: + case SCTP_IERROR_BAD_SIG: + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + break; + case SCTP_IERROR_PROTO_VIOLATION: + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + break; + case SCTP_IERROR_NOMEM: + return SCTP_DISPOSITION_NOMEM; + default: + break; + } + + if (asoc->active_key_id != ntohs(auth_hdr->shkey_id)) { + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_authkey(asoc, ntohs(auth_hdr->shkey_id), + SCTP_AUTH_NEWKEY, GFP_ATOMIC); + + if (!ev) + return -ENOMEM; + + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ev)); + } + + return SCTP_DISPOSITION_CONSUME; +} + +/* * Process an unknown chunk. * * Section: 3.2. Also, 2.1 in the implementor's guide. @@ -3822,6 +4069,20 @@ static sctp_disposition_t sctp_sf_abort_violation( if (!abort) goto nomem; + /* SCTP-AUTH, Section 6.3: + * It should be noted that if the receiver wants to tear + * down an association in an authenticated way only, the + * handling of malformed packets should not result in + * tearing down the association. + * + * This means that if we only want to abort associations + * in an authenticated way (i.e AUTH+ABORT), then we + * can't destory this association just becuase the packet + * was malformed. + */ + if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) + goto discard; + if (asoc) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); @@ -3859,6 +4120,7 @@ static sctp_disposition_t sctp_sf_abort_violation( SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); } +discard: sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); SCTP_INC_STATS(SCTP_MIB_ABORTEDS); @@ -5428,10 +5690,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, sctp_verb_t deliver; int tmp; __u32 tsn; - int account_value; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sock *sk = asoc->base.sk; - int rcvbuf_over = 0; data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data; skb_pull(chunk->skb, sizeof(sctp_datahdr_t)); @@ -5441,48 +5701,6 @@ static int sctp_eat_data(const struct sctp_association *asoc, /* ASSERT: Now skb->data is really the user data. */ - /* - * If we are established, and we have used up our receive buffer - * memory, think about droping the frame. - * Note that we have an opportunity to improve performance here. - * If we accept one chunk from an skbuff, we have to keep all the - * memory of that skbuff around until the chunk is read into user - * space. Therefore, once we accept 1 chunk we may as well accept all - * remaining chunks in the skbuff. The data_accepted flag helps us do - * that. - */ - if ((asoc->state == SCTP_STATE_ESTABLISHED) && (!chunk->data_accepted)) { - /* - * If the receive buffer policy is 1, then each - * association can allocate up to sk_rcvbuf bytes - * otherwise, all the associations in aggregate - * may allocate up to sk_rcvbuf bytes - */ - if (asoc->ep->rcvbuf_policy) - account_value = atomic_read(&asoc->rmem_alloc); - else - account_value = atomic_read(&sk->sk_rmem_alloc); - if (account_value > sk->sk_rcvbuf) { - /* - * We need to make forward progress, even when we are - * under memory pressure, so we always allow the - * next tsn after the ctsn ack point to be accepted. - * This lets us avoid deadlocks in which we have to - * drop frames that would otherwise let us drain the - * receive queue. - */ - if ((sctp_tsnmap_get_ctsn(map) + 1) != tsn) - return SCTP_IERROR_IGNORE_TSN; - - /* - * We're going to accept the frame but we should renege - * to make space for it. This will send us down that - * path later in this function. - */ - rcvbuf_over = 1; - } - } - /* Process ECN based congestion. * * Since the chunk structure is reused for all chunks within @@ -5542,18 +5760,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, * seems a bit troublesome in that frag_point varies based on * PMTU. In cases, such as loopback, this might be a rather * large spill over. - * NOTE: If we have a full receive buffer here, we only renege if - * our receiver can still make progress without the tsn being - * received. We do this because in the event that the associations - * receive queue is empty we are filling a leading gap, and since - * reneging moves the gap to the end of the tsn stream, we are likely - * to stall again very shortly. Avoiding the renege when we fill a - * leading gap is a good heuristic for avoiding such steady state - * stalls. - */ - if (!asoc->rwnd || asoc->rwnd_over || - (datalen > asoc->rwnd + asoc->frag_point) || - (rcvbuf_over && (!skb_queue_len(&sk->sk_receive_queue)))) { + */ + if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over || + (datalen > asoc->rwnd + asoc->frag_point))) { /* If this is the next TSN, consider reneging to make * room. Note: Playing nice with a confused sender. A @@ -5574,6 +5783,21 @@ static int sctp_eat_data(const struct sctp_association *asoc, } /* + * Also try to renege to limit our memory usage in the event that + * we are under memory pressure + * If we can't renege, don't worry about it, the sk_stream_rmem_schedule + * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our + * memory usage too much + */ + if (*sk->sk_prot_creator->memory_pressure) { + if (sctp_tsnmap_has_gap(map) && + (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { + SCTP_DEBUG_PRINTK("Under Pressure! Reneging for tsn:%u\n", tsn); + deliver = SCTP_CMD_RENEGE; + } + } + + /* * Section 3.3.10.9 No User Data (9) * * Cause of error diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index ddb0ba3974b..a93a4bc8f68 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -523,6 +523,34 @@ static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUN TYPE_SCTP_FWD_TSN, }; /*state_fn_t prsctp_chunk_event_table[][] */ +#define TYPE_SCTP_AUTH { \ + /* SCTP_STATE_EMPTY */ \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ + /* SCTP_STATE_CLOSED */ \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ + /* SCTP_STATE_COOKIE_WAIT */ \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ + /* SCTP_STATE_COOKIE_ECHOED */ \ + TYPE_SCTP_FUNC(sctp_sf_eat_auth), \ + /* SCTP_STATE_ESTABLISHED */ \ + TYPE_SCTP_FUNC(sctp_sf_eat_auth), \ + /* SCTP_STATE_SHUTDOWN_PENDING */ \ + TYPE_SCTP_FUNC(sctp_sf_eat_auth), \ + /* SCTP_STATE_SHUTDOWN_SENT */ \ + TYPE_SCTP_FUNC(sctp_sf_eat_auth), \ + /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ + TYPE_SCTP_FUNC(sctp_sf_eat_auth), \ + /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ + TYPE_SCTP_FUNC(sctp_sf_eat_auth), \ +} /* TYPE_SCTP_AUTH */ + +/* The primary index for this table is the chunk type. + * The secondary index for this table is the state. + */ +static const sctp_sm_table_entry_t auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = { + TYPE_SCTP_AUTH, +}; /*state_fn_t auth_chunk_event_table[][] */ + static const sctp_sm_table_entry_t chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = { /* SCTP_STATE_EMPTY */ @@ -976,5 +1004,10 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, return &addip_chunk_event_table[1][state]; } + if (sctp_auth_enable) { + if (cid == SCTP_CID_AUTH) + return &auth_chunk_event_table[0][state]; + } + return &chunk_event_table_unknown[state]; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 772fbfb4bfd..bd6f42a15a4 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -107,23 +107,42 @@ static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; +extern struct kmem_cache *sctp_bucket_cachep; +extern int sysctl_sctp_mem[3]; +extern int sysctl_sctp_rmem[3]; +extern int sysctl_sctp_wmem[3]; + +static int sctp_memory_pressure; +static atomic_t sctp_memory_allocated; +static atomic_t sctp_sockets_allocated; + +static void sctp_enter_memory_pressure(void) +{ + sctp_memory_pressure = 1; +} + + /* Get the sndbuf space available at the time on the association. */ static inline int sctp_wspace(struct sctp_association *asoc) { - struct sock *sk = asoc->base.sk; - int amt = 0; + int amt; - if (asoc->ep->sndbuf_policy) { - /* make sure that no association uses more than sk_sndbuf */ - amt = sk->sk_sndbuf - asoc->sndbuf_used; + if (asoc->ep->sndbuf_policy) + amt = asoc->sndbuf_used; + else + amt = atomic_read(&asoc->base.sk->sk_wmem_alloc); + + if (amt >= asoc->base.sk->sk_sndbuf) { + if (asoc->base.sk->sk_userlocks & SOCK_SNDBUF_LOCK) + amt = 0; + else { + amt = sk_stream_wspace(asoc->base.sk); + if (amt < 0) + amt = 0; + } } else { - /* do socket level accounting */ - amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amt = asoc->base.sk->sk_sndbuf - amt; } - - if (amt < 0) - amt = 0; - return amt; } @@ -155,6 +174,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) sizeof(struct sctp_chunk); atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); + sk_charge_skb(sk, chunk->skb); } /* Verify that this is a valid address. */ @@ -2926,6 +2946,164 @@ static int sctp_setsockopt_maxburst(struct sock *sk, return 0; } +/* + * 7.1.18. Add a chunk that must be authenticated (SCTP_AUTH_CHUNK) + * + * This set option adds a chunk type that the user is requesting to be + * received only in an authenticated way. Changes to the list of chunks + * will only effect future associations on the socket. + */ +static int sctp_setsockopt_auth_chunk(struct sock *sk, + char __user *optval, + int optlen) +{ + struct sctp_authchunk val; + + if (optlen != sizeof(struct sctp_authchunk)) + return -EINVAL; + if (copy_from_user(&val, optval, optlen)) + return -EFAULT; + + switch (val.sauth_chunk) { + case SCTP_CID_INIT: + case SCTP_CID_INIT_ACK: + case SCTP_CID_SHUTDOWN_COMPLETE: + case SCTP_CID_AUTH: + return -EINVAL; + } + + /* add this chunk id to the endpoint */ + return sctp_auth_ep_add_chunkid(sctp_sk(sk)->ep, val.sauth_chunk); +} + +/* + * 7.1.19. Get or set the list of supported HMAC Identifiers (SCTP_HMAC_IDENT) + * + * This option gets or sets the list of HMAC algorithms that the local + * endpoint requires the peer to use. + */ +static int sctp_setsockopt_hmac_ident(struct sock *sk, + char __user *optval, + int optlen) +{ + struct sctp_hmacalgo *hmacs; + int err; + + if (optlen < sizeof(struct sctp_hmacalgo)) + return -EINVAL; + + hmacs = kmalloc(optlen, GFP_KERNEL); + if (!hmacs) + return -ENOMEM; + + if (copy_from_user(hmacs, optval, optlen)) { + err = -EFAULT; + goto out; + } + + if (hmacs->shmac_num_idents == 0 || + hmacs->shmac_num_idents > SCTP_AUTH_NUM_HMACS) { + err = -EINVAL; + goto out; + } + + err = sctp_auth_ep_set_hmacs(sctp_sk(sk)->ep, hmacs); +out: + kfree(hmacs); + return err; +} + +/* + * 7.1.20. Set a shared key (SCTP_AUTH_KEY) + * + * This option will set a shared secret key which is used to build an + * association shared key. + */ +static int sctp_setsockopt_auth_key(struct sock *sk, + char __user *optval, + int optlen) +{ + struct sctp_authkey *authkey; + struct sctp_association *asoc; + int ret; + + if (optlen <= sizeof(struct sctp_authkey)) + return -EINVAL; + + authkey = kmalloc(optlen, GFP_KERNEL); + if (!authkey) + return -ENOMEM; + + if (copy_from_user(authkey, optval, optlen)) { + ret = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, authkey->sca_assoc_id); + if (!asoc && authkey->sca_assoc_id && sctp_style(sk, UDP)) { + ret = -EINVAL; + goto out; + } + + ret = sctp_auth_set_key(sctp_sk(sk)->ep, asoc, authkey); +out: + kfree(authkey); + return ret; +} + +/* + * 7.1.21. Get or set the active shared key (SCTP_AUTH_ACTIVE_KEY) + * + * This option will get or set the active shared key to be used to build + * the association shared key. + */ +static int sctp_setsockopt_active_key(struct sock *sk, + char __user *optval, + int optlen) +{ + struct sctp_authkeyid val; + struct sctp_association *asoc; + + if (optlen != sizeof(struct sctp_authkeyid)) + return -EINVAL; + if (copy_from_user(&val, optval, optlen)) + return -EFAULT; + + asoc = sctp_id2assoc(sk, val.scact_assoc_id); + if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + return sctp_auth_set_active_key(sctp_sk(sk)->ep, asoc, + val.scact_keynumber); +} + +/* + * 7.1.22. Delete a shared key (SCTP_AUTH_DELETE_KEY) + * + * This set option will delete a shared secret key from use. + */ +static int sctp_setsockopt_del_key(struct sock *sk, + char __user *optval, + int optlen) +{ + struct sctp_authkeyid val; + struct sctp_association *asoc; + + if (optlen != sizeof(struct sctp_authkeyid)) + return -EINVAL; + if (copy_from_user(&val, optval, optlen)) + return -EFAULT; + + asoc = sctp_id2assoc(sk, val.scact_assoc_id); + if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + return sctp_auth_del_key_id(sctp_sk(sk)->ep, asoc, + val.scact_keynumber); + +} + + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -3049,6 +3227,21 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_MAX_BURST: retval = sctp_setsockopt_maxburst(sk, optval, optlen); break; + case SCTP_AUTH_CHUNK: + retval = sctp_setsockopt_auth_chunk(sk, optval, optlen); + break; + case SCTP_HMAC_IDENT: + retval = sctp_setsockopt_hmac_ident(sk, optval, optlen); + break; + case SCTP_AUTH_KEY: + retval = sctp_setsockopt_auth_key(sk, optval, optlen); + break; + case SCTP_AUTH_ACTIVE_KEY: + retval = sctp_setsockopt_active_key(sk, optval, optlen); + break; + case SCTP_AUTH_DELETE_KEY: + retval = sctp_setsockopt_del_key(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -3293,6 +3486,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); + atomic_inc(&sctp_sockets_allocated); return 0; } @@ -3306,7 +3500,7 @@ SCTP_STATIC int sctp_destroy_sock(struct sock *sk) /* Release our hold on the endpoint. */ ep = sctp_sk(sk)->ep; sctp_endpoint_free(ep); - + atomic_dec(&sctp_sockets_allocated); return 0; } @@ -4819,6 +5013,120 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, return -ENOTSUPP; } +static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_hmac_algo_param *hmacs; + __u16 param_len; + + hmacs = sctp_sk(sk)->ep->auth_hmacs_list; + param_len = ntohs(hmacs->param_hdr.length); + + if (len < param_len) + return -EINVAL; + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, hmacs->hmac_ids, len)) + return -EFAULT; + + return 0; +} + +static int sctp_getsockopt_active_key(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_authkeyid val; + struct sctp_association *asoc; + + if (len < sizeof(struct sctp_authkeyid)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid))) + return -EFAULT; + + asoc = sctp_id2assoc(sk, val.scact_assoc_id); + if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) + val.scact_keynumber = asoc->active_key_id; + else + val.scact_keynumber = sctp_sk(sk)->ep->active_key_id; + + return 0; +} + +static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_authchunks __user *p = (void __user *)optval; + struct sctp_authchunks val; + struct sctp_association *asoc; + struct sctp_chunks_param *ch; + char __user *to; + + if (len <= sizeof(struct sctp_authchunks)) + return -EINVAL; + + if (copy_from_user(&val, p, sizeof(struct sctp_authchunks))) + return -EFAULT; + + to = p->gauth_chunks; + asoc = sctp_id2assoc(sk, val.gauth_assoc_id); + if (!asoc) + return -EINVAL; + + ch = asoc->peer.peer_chunks; + + /* See if the user provided enough room for all the data */ + if (len < ntohs(ch->param_hdr.length)) + return -EINVAL; + + len = ntohs(ch->param_hdr.length); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(to, ch->chunks, len)) + return -EFAULT; + + return 0; +} + +static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_authchunks __user *p = (void __user *)optval; + struct sctp_authchunks val; + struct sctp_association *asoc; + struct sctp_chunks_param *ch; + char __user *to; + + if (len <= sizeof(struct sctp_authchunks)) + return -EINVAL; + + if (copy_from_user(&val, p, sizeof(struct sctp_authchunks))) + return -EFAULT; + + to = p->gauth_chunks; + asoc = sctp_id2assoc(sk, val.gauth_assoc_id); + if (!asoc && val.gauth_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) + ch = (struct sctp_chunks_param*)asoc->c.auth_chunks; + else + ch = sctp_sk(sk)->ep->auth_chunk_list; + + if (len < ntohs(ch->param_hdr.length)) + return -EINVAL; + + len = ntohs(ch->param_hdr.length); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(to, ch->chunks, len)) + return -EFAULT; + + return 0; +} + SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -4942,6 +5250,25 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_MAX_BURST: retval = sctp_getsockopt_maxburst(sk, len, optval, optlen); break; + case SCTP_AUTH_KEY: + case SCTP_AUTH_CHUNK: + case SCTP_AUTH_DELETE_KEY: + retval = -EOPNOTSUPP; + break; + case SCTP_HMAC_IDENT: + retval = sctp_getsockopt_hmac_ident(sk, len, optval, optlen); + break; + case SCTP_AUTH_ACTIVE_KEY: + retval = sctp_getsockopt_active_key(sk, len, optval, optlen); + break; + case SCTP_PEER_AUTH_CHUNKS: + retval = sctp_getsockopt_peer_auth_chunks(sk, len, optval, + optlen); + break; + case SCTP_LOCAL_AUTH_CHUNKS: + retval = sctp_getsockopt_local_auth_chunks(sk, len, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -4989,22 +5316,14 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) sctp_local_bh_disable(); if (snum == 0) { - /* Search for an available port. - * - * 'sctp_port_rover' was the last port assigned, so - * we start to search from 'sctp_port_rover + - * 1'. What we do is first check if port 'rover' is - * already in the hash table; if not, we use that; if - * it is, we try next. - */ - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover; - int index; - - sctp_spin_lock(&sctp_port_alloc_lock); - rover = sctp_port_rover; + /* Search for an available port. */ + int low, high, remaining, index; + unsigned int rover; + + inet_get_local_port_range(&low, &high); + remaining = (high - low) + 1; + rover = net_random() % remaining + low; + do { rover++; if ((rover < low) || (rover > high)) @@ -5019,8 +5338,6 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) next: sctp_spin_unlock(&head->lock); } while (--remaining > 0); - sctp_port_rover = rover; - sctp_spin_unlock(&sctp_port_alloc_lock); /* Exhausted local port range during search? */ ret = 1; @@ -5720,6 +6037,12 @@ static void sctp_wfree(struct sk_buff *skb) atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); + /* + * This undoes what is done via sk_charge_skb + */ + sk->sk_wmem_queued -= skb->truesize; + sk->sk_forward_alloc += skb->truesize; + sock_wfree(skb); __sctp_write_space(asoc); @@ -5737,6 +6060,11 @@ void sctp_sock_rfree(struct sk_buff *skb) struct sctp_ulpevent *event = sctp_skb2event(skb); atomic_sub(event->rmem_len, &sk->sk_rmem_alloc); + + /* + * Mimic the behavior of sk_stream_rfree + */ + sk->sk_forward_alloc += event->rmem_len; } @@ -6126,6 +6454,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_release_sock(newsk); } + /* This proto struct describes the ULP interface for SCTP. */ struct proto sctp_prot = { .name = "SCTP", @@ -6148,6 +6477,12 @@ struct proto sctp_prot = { .unhash = sctp_unhash, .get_port = sctp_get_port, .obj_size = sizeof(struct sctp_sock), + .sysctl_mem = sysctl_sctp_mem, + .sysctl_rmem = sysctl_sctp_rmem, + .sysctl_wmem = sysctl_sctp_wmem, + .memory_pressure = &sctp_memory_pressure, + .enter_memory_pressure = sctp_enter_memory_pressure, + .memory_allocated = &sctp_memory_allocated, }; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -6172,5 +6507,11 @@ struct proto sctpv6_prot = { .unhash = sctp_unhash, .get_port = sctp_get_port, .obj_size = sizeof(struct sctp6_sock), + .sysctl_mem = sysctl_sctp_mem, + .sysctl_rmem = sysctl_sctp_rmem, + .sysctl_wmem = sysctl_sctp_wmem, + .memory_pressure = &sctp_memory_pressure, + .enter_memory_pressure = sctp_enter_memory_pressure, + .memory_allocated = &sctp_memory_allocated, }; #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index e2c679baf91..0669778e433 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -52,6 +52,10 @@ static int int_max = INT_MAX; static long sack_timer_min = 1; static long sack_timer_max = 500; +extern int sysctl_sctp_mem[3]; +extern int sysctl_sctp_rmem[3]; +extern int sysctl_sctp_wmem[3]; + static ctl_table sctp_table[] = { { .ctl_name = NET_SCTP_RTO_INITIAL, @@ -226,6 +230,39 @@ static ctl_table sctp_table[] = { .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sctp_mem", + .data = &sysctl_sctp_mem, + .maxlen = sizeof(sysctl_sctp_mem), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sctp_rmem", + .data = &sysctl_sctp_rmem, + .maxlen = sizeof(sysctl_sctp_rmem), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sctp_wmem", + .data = &sysctl_sctp_wmem, + .maxlen = sizeof(sysctl_sctp_wmem), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "auth_enable", + .data = &sctp_auth_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec + }, { .ctl_name = 0 } }; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index bfecb353ab3..2c17c7efad4 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -685,6 +685,24 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, struct sctp_ulpevent *event = NULL; struct sk_buff *skb; size_t padding, len; + int rx_count; + + /* + * check to see if we need to make space for this + * new skb, expand the rcvbuffer if needed, or drop + * the frame + */ + if (asoc->ep->rcvbuf_policy) + rx_count = atomic_read(&asoc->rmem_alloc); + else + rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc); + + if (rx_count >= asoc->base.sk->sk_rcvbuf) { + + if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || + (!sk_stream_rmem_schedule(asoc->base.sk, chunk->skb))) + goto fail; + } /* Clone the original skb, sharing the data. */ skb = skb_clone(chunk->skb, gfp); @@ -795,6 +813,43 @@ fail: return NULL; } +struct sctp_ulpevent *sctp_ulpevent_make_authkey( + const struct sctp_association *asoc, __u16 key_id, + __u32 indication, gfp_t gfp) +{ + struct sctp_ulpevent *event; + struct sctp_authkey_event *ak; + struct sk_buff *skb; + + event = sctp_ulpevent_new(sizeof(struct sctp_authkey_event), + MSG_NOTIFICATION, gfp); + if (!event) + goto fail; + + skb = sctp_event2skb(event); + ak = (struct sctp_authkey_event *) + skb_put(skb, sizeof(struct sctp_authkey_event)); + + ak->auth_type = SCTP_AUTHENTICATION_EVENT; + ak->auth_flags = 0; + ak->auth_length = sizeof(struct sctp_authkey_event); + + ak->auth_keynumber = key_id; + ak->auth_altkeynumber = 0; + ak->auth_indication = indication; + + /* + * The association id field, holds the identifier for the association. + */ + sctp_ulpevent_set_owner(event, asoc); + ak->auth_assoc_id = sctp_assoc2id(asoc); + + return event; +fail: + return NULL; +} + + /* Return the notification type, assuming this is a notification * event. */ diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index fa0ba2a5564..4be92d0a2ca 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -908,8 +908,8 @@ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn) return; } -/* Renege 'needed' bytes from the ordering queue. */ -static __u16 sctp_ulpq_renege_order(struct sctp_ulpq *ulpq, __u16 needed) +static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, + struct sk_buff_head *list, __u16 needed) { __u16 freed = 0; __u32 tsn; @@ -919,7 +919,7 @@ static __u16 sctp_ulpq_renege_order(struct sctp_ulpq *ulpq, __u16 needed) tsnmap = &ulpq->asoc->peer.tsn_map; - while ((skb = __skb_dequeue_tail(&ulpq->lobby)) != NULL) { + while ((skb = __skb_dequeue_tail(list)) != NULL) { freed += skb_headlen(skb); event = sctp_skb2event(skb); tsn = event->tsn; @@ -933,30 +933,16 @@ static __u16 sctp_ulpq_renege_order(struct sctp_ulpq *ulpq, __u16 needed) return freed; } +/* Renege 'needed' bytes from the ordering queue. */ +static __u16 sctp_ulpq_renege_order(struct sctp_ulpq *ulpq, __u16 needed) +{ + return sctp_ulpq_renege_list(ulpq, &ulpq->lobby, needed); +} + /* Renege 'needed' bytes from the reassembly queue. */ static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed) { - __u16 freed = 0; - __u32 tsn; - struct sk_buff *skb; - struct sctp_ulpevent *event; - struct sctp_tsnmap *tsnmap; - - tsnmap = &ulpq->asoc->peer.tsn_map; - - /* Walk backwards through the list, reneges the newest tsns. */ - while ((skb = __skb_dequeue_tail(&ulpq->reasm)) != NULL) { - freed += skb_headlen(skb); - event = sctp_skb2event(skb); - tsn = event->tsn; - - sctp_ulpevent_free(event); - sctp_tsnmap_renege(tsnmap, tsn); - if (freed >= needed) - return freed; - } - - return freed; + return sctp_ulpq_renege_list(ulpq, &ulpq->reasm, needed); } /* Partial deliver the first message as there is pressure on rwnd. */ @@ -1027,6 +1013,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, sctp_ulpq_partial_delivery(ulpq, chunk, gfp); } + sk_stream_mem_reclaim(asoc->base.sk); return; } diff --git a/net/socket.c b/net/socket.c index b09eb9036a1..540013ea862 100644 --- a/net/socket.c +++ b/net/socket.c @@ -84,6 +84,7 @@ #include <linux/kmod.h> #include <linux/audit.h> #include <linux/wireless.h> +#include <linux/nsproxy.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -257,7 +258,7 @@ static void sock_destroy_inode(struct inode *inode) container_of(inode, struct socket_alloc, vfs_inode)); } -static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) +static void init_once(struct kmem_cache *cachep, void *foo) { struct socket_alloc *ei = (struct socket_alloc *)foo; @@ -363,26 +364,26 @@ static int sock_alloc_fd(struct file **filep) static int sock_attach_fd(struct socket *sock, struct file *file) { + struct dentry *dentry; struct qstr name = { .name = "" }; - file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); - if (unlikely(!file->f_path.dentry)) + dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); + if (unlikely(!dentry)) return -ENOMEM; - file->f_path.dentry->d_op = &sockfs_dentry_operations; + dentry->d_op = &sockfs_dentry_operations; /* * We dont want to push this dentry into global dentry hash table. * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED * This permits a working /proc/$pid/fd/XXX on sockets */ - file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED; - d_instantiate(file->f_path.dentry, SOCK_INODE(sock)); - file->f_path.mnt = mntget(sock_mnt); - file->f_mapping = file->f_path.dentry->d_inode->i_mapping; + dentry->d_flags &= ~DCACHE_UNHASHED; + d_instantiate(dentry, SOCK_INODE(sock)); sock->file = file; - file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops; - file->f_mode = FMODE_READ | FMODE_WRITE; + init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE, + &socket_file_ops); + SOCK_INODE(sock)->i_fop = &socket_file_ops; file->f_flags = O_RDWR; file->f_pos = 0; file->private_data = sock; @@ -790,9 +791,9 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL; +static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; -void brioctl_set(int (*hook) (unsigned int, void __user *)) +void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) { mutex_lock(&br_ioctl_mutex); br_ioctl_hook = hook; @@ -802,9 +803,9 @@ void brioctl_set(int (*hook) (unsigned int, void __user *)) EXPORT_SYMBOL(brioctl_set); static DEFINE_MUTEX(vlan_ioctl_mutex); -static int (*vlan_ioctl_hook) (void __user *arg); +static int (*vlan_ioctl_hook) (struct net *, void __user *arg); -void vlan_ioctl_set(int (*hook) (void __user *)) +void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) { mutex_lock(&vlan_ioctl_mutex); vlan_ioctl_hook = hook; @@ -833,16 +834,20 @@ EXPORT_SYMBOL(dlci_ioctl_set); static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct socket *sock; + struct sock *sk; void __user *argp = (void __user *)arg; int pid, err; + struct net *net; sock = file->private_data; + sk = sock->sk; + net = sk->sk_net; if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); } else #ifdef CONFIG_WIRELESS_EXT if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); } else #endif /* CONFIG_WIRELESS_EXT */ switch (cmd) { @@ -868,7 +873,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) mutex_lock(&br_ioctl_mutex); if (br_ioctl_hook) - err = br_ioctl_hook(cmd, argp); + err = br_ioctl_hook(net, cmd, argp); mutex_unlock(&br_ioctl_mutex); break; case SIOCGIFVLAN: @@ -879,7 +884,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) mutex_lock(&vlan_ioctl_mutex); if (vlan_ioctl_hook) - err = vlan_ioctl_hook(argp); + err = vlan_ioctl_hook(net, argp); mutex_unlock(&vlan_ioctl_mutex); break; case SIOCADDDLCI: @@ -902,7 +907,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) * to the NIC driver. */ if (err == -ENOIOCTLCMD) - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); break; } return err; @@ -1071,7 +1076,7 @@ call_kill: return 0; } -static int __sock_create(int family, int type, int protocol, +static int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) { int err; @@ -1147,7 +1152,7 @@ static int __sock_create(int family, int type, int protocol, /* Now protected by module ref count */ rcu_read_unlock(); - err = pf->create(sock, protocol); + err = pf->create(net, sock, protocol); if (err < 0) goto out_module_put; @@ -1186,12 +1191,12 @@ out_release: int sock_create(int family, int type, int protocol, struct socket **res) { - return __sock_create(family, type, protocol, res, 0); + return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } int sock_create_kern(int family, int type, int protocol, struct socket **res) { - return __sock_create(family, type, protocol, res, 1); + return __sock_create(&init_net, family, type, protocol, res, 1); } asmlinkage long sys_socket(int family, int type, int protocol) @@ -1924,7 +1929,7 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, * kernel msghdr to use the kernel address space) */ - uaddr = (void __user *)msg_sys.msg_name; + uaddr = (__force void __user *)msg_sys.msg_name; uaddr_len = COMPAT_NAMELEN(msg); if (MSG_CMSG_COMPAT & flags) { err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE); @@ -2230,6 +2235,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) err = sock->ops->accept(sock, *newsock, flags); if (err < 0) { sock_release(*newsock); + *newsock = NULL; goto done; } diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 8ebfc4db7f5..5c69a725e53 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_SUNRPC) += sunrpc.o obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ +obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o \ diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index bfb6a29633d..91cd8f0d1e1 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -75,7 +75,7 @@ krb5_encrypt( memcpy(local_iv, iv, crypto_blkcipher_ivsize(tfm)); memcpy(out, in, length); - sg_set_buf(sg, out, length); + sg_init_one(sg, out, length); ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length); out: @@ -110,7 +110,7 @@ krb5_decrypt( memcpy(local_iv,iv, crypto_blkcipher_ivsize(tfm)); memcpy(out, in, length); - sg_set_buf(sg, out, length); + sg_init_one(sg, out, length); ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length); out: @@ -146,7 +146,7 @@ make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body, err = crypto_hash_init(&desc); if (err) goto out; - sg_set_buf(sg, header, hdrlen); + sg_init_one(sg, header, hdrlen); err = crypto_hash_update(&desc, sg, hdrlen); if (err) goto out; @@ -188,8 +188,6 @@ encryptor(struct scatterlist *sg, void *data) /* Worst case is 4 fragments: head, end of page 1, start * of page 2, tail. Anything more is a bug. */ BUG_ON(desc->fragno > 3); - desc->infrags[desc->fragno] = *sg; - desc->outfrags[desc->fragno] = *sg; page_pos = desc->pos - outbuf->head[0].iov_len; if (page_pos >= 0 && page_pos < outbuf->page_len) { @@ -197,9 +195,12 @@ encryptor(struct scatterlist *sg, void *data) int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT; in_page = desc->pages[i]; } else { - in_page = sg->page; + in_page = sg_page(sg); } - desc->infrags[desc->fragno].page = in_page; + sg_set_page(&desc->infrags[desc->fragno], in_page, sg->length, + sg->offset); + sg_set_page(&desc->outfrags[desc->fragno], sg_page(sg), sg->length, + sg->offset); desc->fragno++; desc->fraglen += sg->length; desc->pos += sg->length; @@ -210,16 +211,22 @@ encryptor(struct scatterlist *sg, void *data) if (thislen == 0) return 0; + sg_mark_end(desc->infrags, desc->fragno); + sg_mark_end(desc->outfrags, desc->fragno); + ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags, desc->infrags, thislen); if (ret) return ret; + + sg_init_table(desc->infrags, 4); + sg_init_table(desc->outfrags, 4); + if (fraglen) { - desc->outfrags[0].page = sg->page; - desc->outfrags[0].offset = sg->offset + sg->length - fraglen; - desc->outfrags[0].length = fraglen; + sg_set_page(&desc->outfrags[0], sg_page(sg), fraglen, + sg->offset + sg->length - fraglen); desc->infrags[0] = desc->outfrags[0]; - desc->infrags[0].page = in_page; + sg_assign_page(&desc->infrags[0], in_page); desc->fragno = 1; desc->fraglen = fraglen; } else { @@ -248,6 +255,9 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, desc.fragno = 0; desc.fraglen = 0; + sg_init_table(desc.infrags, 4); + sg_init_table(desc.outfrags, 4); + ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc); return ret; } @@ -272,7 +282,8 @@ decryptor(struct scatterlist *sg, void *data) /* Worst case is 4 fragments: head, end of page 1, start * of page 2, tail. Anything more is a bug. */ BUG_ON(desc->fragno > 3); - desc->frags[desc->fragno] = *sg; + sg_set_page(&desc->frags[desc->fragno], sg_page(sg), sg->length, + sg->offset); desc->fragno++; desc->fraglen += sg->length; @@ -282,14 +293,18 @@ decryptor(struct scatterlist *sg, void *data) if (thislen == 0) return 0; + sg_mark_end(desc->frags, desc->fragno); + ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags, desc->frags, thislen); if (ret) return ret; + + sg_init_table(desc->frags, 4); + if (fraglen) { - desc->frags[0].page = sg->page; - desc->frags[0].offset = sg->offset + sg->length - fraglen; - desc->frags[0].length = fraglen; + sg_set_page(&desc->frags[0], sg_page(sg), fraglen, + sg->offset + sg->length - fraglen); desc->fragno = 1; desc->fraglen = fraglen; } else { @@ -314,6 +329,9 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, desc.desc.flags = 0; desc.fragno = 0; desc.fraglen = 0; + + sg_init_table(desc.frags, 4); + return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc); } diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 42b3220bed3..8bd074df27d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -42,7 +42,7 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) { u8 *ptr; u8 pad; - int len = buf->len; + size_t len = buf->len; if (len <= buf->head[0].iov_len) { pad = *(u8 *)(buf->head[0].iov_base + len - 1); @@ -53,9 +53,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) } else len -= buf->head[0].iov_len; if (len <= buf->page_len) { - int last = (buf->page_base + len - 1) + unsigned int last = (buf->page_base + len - 1) >>PAGE_CACHE_SHIFT; - int offset = (buf->page_base + len - 1) + unsigned int offset = (buf->page_base + len - 1) & (PAGE_CACHE_SIZE - 1); ptr = kmap_atomic(buf->pages[last], KM_USER0); pad = *(ptr + offset); diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index d158635de6c..abf17ce2e3b 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c @@ -173,7 +173,7 @@ make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header, if (err) goto out; - sg_set_buf(sg, header, hdrlen); + sg_init_one(sg, header, hdrlen); crypto_hash_update(&desc, sg, sg->length); xdr_process_buf(body, body_offset, body->len - body_offset, diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 7da7050f06c..73940df6c46 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -631,7 +631,8 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o) return 0; } -/* Verify the checksum on the header and return SVC_OK on success. +/* + * Verify the checksum on the header and return SVC_OK on success. * Otherwise, return SVC_DROP (in the case of a bad sequence number) * or return SVC_DENIED and indicate error in authp. */ @@ -961,6 +962,78 @@ gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip) } /* + * Having read the cred already and found we're in the context + * initiation case, read the verifier and initiate (or check the results + * of) upcalls to userspace for help with context initiation. If + * the upcall results are available, write the verifier and result. + * Otherwise, drop the request pending an answer to the upcall. + */ +static int svcauth_gss_handle_init(struct svc_rqst *rqstp, + struct rpc_gss_wire_cred *gc, __be32 *authp) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + struct xdr_netobj tmpobj; + struct rsi *rsip, rsikey; + + /* Read the verifier; should be NULL: */ + *authp = rpc_autherr_badverf; + if (argv->iov_len < 2 * 4) + return SVC_DENIED; + if (svc_getnl(argv) != RPC_AUTH_NULL) + return SVC_DENIED; + if (svc_getnl(argv) != 0) + return SVC_DENIED; + + /* Martial context handle and token for upcall: */ + *authp = rpc_autherr_badcred; + if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0) + return SVC_DENIED; + memset(&rsikey, 0, sizeof(rsikey)); + if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) + return SVC_DROP; + *authp = rpc_autherr_badverf; + if (svc_safe_getnetobj(argv, &tmpobj)) { + kfree(rsikey.in_handle.data); + return SVC_DENIED; + } + if (dup_netobj(&rsikey.in_token, &tmpobj)) { + kfree(rsikey.in_handle.data); + return SVC_DROP; + } + + /* Perform upcall, or find upcall result: */ + rsip = rsi_lookup(&rsikey); + rsi_free(&rsikey); + if (!rsip) + return SVC_DROP; + switch (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { + case -EAGAIN: + case -ETIMEDOUT: + case -ENOENT: + /* No upcall result: */ + return SVC_DROP; + case 0: + /* Got an answer to the upcall; use it: */ + if (gss_write_init_verf(rqstp, rsip)) + return SVC_DROP; + if (resv->iov_len + 4 > PAGE_SIZE) + return SVC_DROP; + svc_putnl(resv, RPC_SUCCESS); + if (svc_safe_putnetobj(resv, &rsip->out_handle)) + return SVC_DROP; + if (resv->iov_len + 3 * 4 > PAGE_SIZE) + return SVC_DROP; + svc_putnl(resv, rsip->major_status); + svc_putnl(resv, rsip->minor_status); + svc_putnl(resv, GSS_SEQ_WIN); + if (svc_safe_putnetobj(resv, &rsip->out_token)) + return SVC_DROP; + } + return SVC_COMPLETE; +} + +/* * Accept an rpcsec packet. * If context establishment, punt to user space * If data exchange, verify/decrypt @@ -974,11 +1047,9 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; u32 crlen; - struct xdr_netobj tmpobj; struct gss_svc_data *svcdata = rqstp->rq_auth_data; struct rpc_gss_wire_cred *gc; struct rsc *rsci = NULL; - struct rsi *rsip, rsikey; __be32 *rpcstart; __be32 *reject_stat = resv->iov_base + resv->iov_len; int ret; @@ -1023,30 +1094,14 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0)) goto auth_err; - /* - * We've successfully parsed the credential. Let's check out the - * verifier. An AUTH_NULL verifier is allowed (and required) for - * INIT and CONTINUE_INIT requests. AUTH_RPCSEC_GSS is required for - * PROC_DATA and PROC_DESTROY. - * - * AUTH_NULL verifier is 0 (AUTH_NULL), 0 (length). - * AUTH_RPCSEC_GSS verifier is: - * 6 (AUTH_RPCSEC_GSS), length, checksum. - * checksum is calculated over rpcheader from xid up to here. - */ *authp = rpc_autherr_badverf; switch (gc->gc_proc) { case RPC_GSS_PROC_INIT: case RPC_GSS_PROC_CONTINUE_INIT: - if (argv->iov_len < 2 * 4) - goto auth_err; - if (svc_getnl(argv) != RPC_AUTH_NULL) - goto auth_err; - if (svc_getnl(argv) != 0) - goto auth_err; - break; + return svcauth_gss_handle_init(rqstp, gc, authp); case RPC_GSS_PROC_DATA: case RPC_GSS_PROC_DESTROY: + /* Look up the context, and check the verifier: */ *authp = rpcsec_gsserr_credproblem; rsci = gss_svc_searchbyctx(&gc->gc_ctx); if (!rsci) @@ -1067,51 +1122,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) /* now act upon the command: */ switch (gc->gc_proc) { - case RPC_GSS_PROC_INIT: - case RPC_GSS_PROC_CONTINUE_INIT: - *authp = rpc_autherr_badcred; - if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0) - goto auth_err; - memset(&rsikey, 0, sizeof(rsikey)); - if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) - goto drop; - *authp = rpc_autherr_badverf; - if (svc_safe_getnetobj(argv, &tmpobj)) { - kfree(rsikey.in_handle.data); - goto auth_err; - } - if (dup_netobj(&rsikey.in_token, &tmpobj)) { - kfree(rsikey.in_handle.data); - goto drop; - } - - rsip = rsi_lookup(&rsikey); - rsi_free(&rsikey); - if (!rsip) { - goto drop; - } - switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { - case -EAGAIN: - case -ETIMEDOUT: - case -ENOENT: - goto drop; - case 0: - if (gss_write_init_verf(rqstp, rsip)) - goto drop; - if (resv->iov_len + 4 > PAGE_SIZE) - goto drop; - svc_putnl(resv, RPC_SUCCESS); - if (svc_safe_putnetobj(resv, &rsip->out_handle)) - goto drop; - if (resv->iov_len + 3 * 4 > PAGE_SIZE) - goto drop; - svc_putnl(resv, rsip->major_status); - svc_putnl(resv, rsip->minor_status); - svc_putnl(resv, GSS_SEQ_WIN); - if (svc_safe_putnetobj(resv, &rsip->out_token)) - goto drop; - } - goto complete; case RPC_GSS_PROC_DESTROY: if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; @@ -1158,7 +1168,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) goto out; } auth_err: - /* Restore write pointer to original value: */ + /* Restore write pointer to its original value: */ xdr_ressize_check(rqstp, reject_stat); ret = SVC_DENIED; goto out; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index ebe344f34d1..8e05557414c 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1218,23 +1218,15 @@ static const struct seq_operations cache_content_op = { static int content_open(struct inode *inode, struct file *file) { - int res; struct handle *han; struct cache_detail *cd = PDE(inode)->data; - han = kmalloc(sizeof(*han), GFP_KERNEL); + han = __seq_open_private(file, &cache_content_op, sizeof(*han)); if (han == NULL) return -ENOMEM; han->cd = cd; - - res = seq_open(file, &cache_content_op); - if (res) - kfree(han); - else - ((struct seq_file *)file->private_data)->private = han; - - return res; + return 0; } static const struct file_operations content_file_operations = { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 52429b1ffcc..76be83ee4b0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -127,7 +127,14 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s struct rpc_clnt *clnt = NULL; struct rpc_auth *auth; int err; - int len; + size_t len; + + /* sanity check the name before trying to print it */ + err = -EINVAL; + len = strlen(servname); + if (len > RPC_MAXNETNAMELEN) + goto out_no_rpciod; + len++; dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, servname, xprt); @@ -148,7 +155,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt->cl_parent = clnt; clnt->cl_server = clnt->cl_inline_name; - len = strlen(servname) + 1; if (len > sizeof(clnt->cl_inline_name)) { char *buf = kmalloc(len, GFP_KERNEL); if (buf != 0) @@ -234,8 +240,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) { struct rpc_xprt *xprt; struct rpc_clnt *clnt; - struct rpc_xprtsock_create xprtargs = { - .proto = args->protocol, + struct xprt_create xprtargs = { + .ident = args->protocol, .srcaddr = args->saddress, .dstaddr = args->address, .addrlen = args->addrsize, @@ -253,7 +259,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) */ if (args->servername == NULL) { struct sockaddr_in *addr = - (struct sockaddr_in *) &args->address; + (struct sockaddr_in *) args->address; snprintf(servername, sizeof(servername), NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); args->servername = servername; @@ -269,9 +275,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) xprt->resvport = 0; - dprintk("RPC: creating %s client for %s (xprt %p)\n", - args->program->name, args->servername, xprt); - clnt = rpc_new_client(xprt, args->servername, args->program, args->version, args->authflavor); if (IS_ERR(clnt)) @@ -439,7 +442,7 @@ rpc_release_client(struct rpc_clnt *clnt) */ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, struct rpc_program *program, - int vers) + u32 vers) { struct rpc_clnt *clnt; struct rpc_version *version; @@ -843,8 +846,7 @@ call_allocate(struct rpc_task *task) dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); if (RPC_IS_ASYNC(task) || !signalled()) { - xprt_release(task); - task->tk_action = call_reserve; + task->tk_action = call_allocate; rpc_delay(task, HZ>>4); return; } @@ -871,6 +873,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) buf->head[0].iov_len = len; buf->tail[0].iov_len = 0; buf->page_len = 0; + buf->flags = 0; buf->len = 0; buf->buflen = len; } @@ -937,7 +940,7 @@ call_bind(struct rpc_task *task) static void call_bind_status(struct rpc_task *task) { - int status = -EACCES; + int status = -EIO; if (task->tk_status >= 0) { dprint_status(task); @@ -947,9 +950,20 @@ call_bind_status(struct rpc_task *task) } switch (task->tk_status) { + case -EAGAIN: + dprintk("RPC: %5u rpcbind waiting for another request " + "to finish\n", task->tk_pid); + /* avoid busy-waiting here -- could be a network outage. */ + rpc_delay(task, 5*HZ); + goto retry_timeout; case -EACCES: dprintk("RPC: %5u remote rpcbind: RPC program/version " "unavailable\n", task->tk_pid); + /* fail immediately if this is an RPC ping */ + if (task->tk_msg.rpc_proc->p_proc == 0) { + status = -EOPNOTSUPP; + break; + } rpc_delay(task, 3*HZ); goto retry_timeout; case -ETIMEDOUT: @@ -957,6 +971,7 @@ call_bind_status(struct rpc_task *task) task->tk_pid); goto retry_timeout; case -EPFNOSUPPORT: + /* server doesn't support any rpcbind version we know of */ dprintk("RPC: %5u remote rpcbind service unavailable\n", task->tk_pid); break; @@ -969,7 +984,6 @@ call_bind_status(struct rpc_task *task) default: dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); - status = -EIO; } rpc_exit(task, status); @@ -1257,7 +1271,6 @@ call_refresh(struct rpc_task *task) { dprint_status(task); - xprt_release(task); /* Must do to obtain new XID */ task->tk_action = call_refreshresult; task->tk_status = 0; task->tk_client->cl_stats->rpcauthrefresh++; @@ -1375,6 +1388,8 @@ call_verify(struct rpc_task *task) dprintk("RPC: %5u %s: retry stale creds\n", task->tk_pid, __FUNCTION__); rpcauth_invalcred(task); + /* Ensure we obtain a new XID! */ + xprt_release(task); task->tk_action = call_refresh; goto out_retry; case RPC_AUTH_BADCRED: @@ -1523,13 +1538,18 @@ void rpc_show_tasks(void) spin_lock(&clnt->cl_lock); list_for_each_entry(t, &clnt->cl_tasks, tk_task) { const char *rpc_waitq = "none"; + int proc; + + if (t->tk_msg.rpc_proc) + proc = t->tk_msg.rpc_proc->p_proc; + else + proc = -1; if (RPC_IS_QUEUED(t)) rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, - (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), + t->tk_pid, proc, t->tk_flags, t->tk_status, t->tk_client, (t->tk_client ? t->tk_client->cl_prog : 0), diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 669e12a4ed1..18f0a8dcc09 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -14,7 +14,7 @@ #include <linux/pagemap.h> #include <linux/mount.h> #include <linux/namei.h> -#include <linux/dnotify.h> +#include <linux/fsnotify.h> #include <linux/kernel.h> #include <asm/ioctls.h> @@ -329,6 +329,7 @@ rpc_show_info(struct seq_file *m, void *v) clnt->cl_prog, clnt->cl_vers); seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); + seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT)); return 0; } @@ -585,6 +586,7 @@ rpc_populate(struct dentry *parent, if (S_ISDIR(mode)) inc_nlink(dir); d_add(dentry, inode); + fsnotify_create(dir, dentry); } mutex_unlock(&dir->i_mutex); return 0; @@ -606,7 +608,7 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry) inode->i_ino = iunique(dir->i_sb, 100); d_instantiate(dentry, inode); inc_nlink(dir); - inode_dir_notify(dir, DN_CREATE); + fsnotify_mkdir(dir, dentry); return 0; out_err: printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", @@ -748,7 +750,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi rpci->flags = flags; rpci->ops = ops; rpci->nkern_readwriters = 1; - inode_dir_notify(dir, DN_CREATE); + fsnotify_create(dir, dentry); dget(dentry); out: mutex_unlock(&dir->i_mutex); @@ -840,7 +842,7 @@ static struct file_system_type rpc_pipe_fs_type = { }; static void -init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) +init_once(struct kmem_cache * cachep, void *foo) { struct rpc_inode *rpci = (struct rpc_inode *) foo; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index d1740dbab99..a05493aedb6 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -16,11 +16,14 @@ #include <linux/types.h> #include <linux/socket.h> +#include <linux/in.h> +#include <linux/in6.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/sched.h> +#include <linux/sunrpc/xprtsock.h> #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_BIND @@ -91,26 +94,6 @@ enum { #define RPCB_MAXADDRLEN (128u) /* - * r_netid - * - * Quoting RFC 3530, section 2.2: - * - * For TCP over IPv4 the value of r_netid is the string "tcp". For UDP - * over IPv4 the value of r_netid is the string "udp". - * - * ... - * - * For TCP over IPv6 the value of r_netid is the string "tcp6". For UDP - * over IPv6 the value of r_netid is the string "udp6". - */ -#define RPCB_NETID_UDP "\165\144\160" /* "udp" */ -#define RPCB_NETID_TCP "\164\143\160" /* "tcp" */ -#define RPCB_NETID_UDP6 "\165\144\160\066" /* "udp6" */ -#define RPCB_NETID_TCP6 "\164\143\160\066" /* "tcp6" */ - -#define RPCB_MAXNETIDLEN (4u) - -/* * r_owner * * The "owner" is allowed to unset a service in the rpcbind database. @@ -120,7 +103,7 @@ enum { #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) static void rpcb_getport_done(struct rpc_task *, void *); -extern struct rpc_program rpcb_program; +static struct rpc_program rpcb_program; struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -137,10 +120,13 @@ struct rpcbind_args { static struct rpc_procinfo rpcb_procedures2[]; static struct rpc_procinfo rpcb_procedures3[]; -static struct rpcb_info { +struct rpcb_info { int rpc_vers; struct rpc_procinfo * rpc_proc; -} rpcb_next_version[]; +}; + +static struct rpcb_info rpcb_next_version[]; +static struct rpcb_info rpcb_next_version6[]; static void rpcb_getport_prepare(struct rpc_task *task, void *calldata) { @@ -190,7 +176,17 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, RPC_CLNT_CREATE_INTR), }; - ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); + switch (srvaddr->sa_family) { + case AF_INET: + ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); + break; + case AF_INET6: + ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT); + break; + default: + return NULL; + } + if (!privileged) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; return rpc_create(&args); @@ -234,7 +230,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) prog, vers, prot, port); rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, - IPPROTO_UDP, 2, 1); + XPRT_TRANSPORT_UDP, 2, 1); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -316,6 +312,7 @@ void rpcb_getport_async(struct rpc_task *task) struct rpc_task *child; struct sockaddr addr; int status; + struct rpcb_info *info; dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __FUNCTION__, @@ -325,7 +322,7 @@ void rpcb_getport_async(struct rpc_task *task) BUG_ON(clnt->cl_parent != clnt); if (xprt_test_and_set_binding(xprt)) { - status = -EACCES; /* tell caller to check again */ + status = -EAGAIN; /* tell caller to check again */ dprintk("RPC: %5u %s: waiting for another binder\n", task->tk_pid, __FUNCTION__); goto bailout_nowake; @@ -343,18 +340,43 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } - if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) { + rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); + + /* Don't ever use rpcbind v2 for AF_INET6 requests */ + switch (addr.sa_family) { + case AF_INET: + info = rpcb_next_version; + break; + case AF_INET6: + info = rpcb_next_version6; + break; + default: + status = -EAFNOSUPPORT; + dprintk("RPC: %5u %s: bad address family\n", + task->tk_pid, __FUNCTION__); + goto bailout_nofree; + } + if (info[xprt->bind_index].rpc_proc == NULL) { xprt->bind_index = 0; - status = -EACCES; /* tell caller to try again later */ + status = -EPFNOSUPPORT; dprintk("RPC: %5u %s: no more getport versions available\n", task->tk_pid, __FUNCTION__); goto bailout_nofree; } - bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; + bind_version = info[xprt->bind_index].rpc_vers; dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __FUNCTION__, bind_version); + rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, + bind_version, 0); + if (IS_ERR(rpcb_clnt)) { + status = PTR_ERR(rpcb_clnt); + dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", + task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); + goto bailout_nofree; + } + map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); if (!map) { status = -ENOMEM; @@ -367,28 +389,19 @@ void rpcb_getport_async(struct rpc_task *task) map->r_prot = xprt->prot; map->r_port = 0; map->r_xprt = xprt_get(xprt); - map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP : - RPCB_NETID_UDP; - memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR), - sizeof(map->r_addr)); + map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); + memcpy(map->r_addr, + rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR), + sizeof(map->r_addr)); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ - rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); - rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0); - if (IS_ERR(rpcb_clnt)) { - status = PTR_ERR(rpcb_clnt); - dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", - task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); - goto bailout; - } - child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; dprintk("RPC: %5u %s: rpc_run_task failed\n", task->tk_pid, __FUNCTION__); - goto bailout_nofree; + goto bailout; } rpc_put_task(child); @@ -403,6 +416,7 @@ bailout_nofree: bailout_nowake: task->tk_status = status; } +EXPORT_SYMBOL_GPL(rpcb_getport_async); /* * Rpcbind child task calls this callback via tk_exit. @@ -413,6 +427,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) struct rpc_xprt *xprt = map->r_xprt; int status = child->tk_status; + /* Garbage reply: retry with a lesser rpcbind version */ + if (status == -EIO) + status = -EPROTONOSUPPORT; + /* rpcbind server doesn't support this rpcbind protocol version */ if (status == -EPROTONOSUPPORT) xprt->bind_index++; @@ -490,16 +508,24 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { char *addr; - int addr_len, c, i, f, first, val; + u32 addr_len; + int c, i, f, first, val; *portp = 0; - addr_len = (unsigned int) ntohl(*p++); - if (addr_len > RPCB_MAXADDRLEN) /* sanity */ - return -EINVAL; - - dprintk("RPC: rpcb_decode_getaddr returned string: '%s'\n", - (char *) p); - + addr_len = ntohl(*p++); + + /* + * Simple sanity check. The smallest possible universal + * address is an IPv4 address string containing 11 bytes. + */ + if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN) + goto out_err; + + /* + * Start at the end and walk backwards until the first dot + * is encountered. When the second dot is found, we have + * both parts of the port number. + */ addr = (char *)p; val = 0; first = 1; @@ -521,8 +547,19 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, } } + /* + * Simple sanity check. If we never saw a dot in the reply, + * then this was probably just garbage. + */ + if (first) + goto out_err; + dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp); return 0; + +out_err: + dprintk("RPC: rpcbind server returned malformed reply\n"); + return -EIO; } #define RPCB_program_sz (1u) @@ -531,7 +568,7 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, #define RPCB_port_sz (1u) #define RPCB_boolean_sz (1u) -#define RPCB_netid_sz (1+XDR_QUADLEN(RPCB_MAXNETIDLEN)) +#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) #define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN)) #define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) @@ -593,6 +630,14 @@ static struct rpcb_info rpcb_next_version[] = { { 0, NULL }, }; +static struct rpcb_info rpcb_next_version6[] = { +#ifdef CONFIG_SUNRPC_BIND34 + { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, + { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, +#endif + { 0, NULL }, +}; + static struct rpc_version rpcb_version2 = { .number = 2, .nrprocs = RPCB_HIGHPROC_2, @@ -621,7 +666,7 @@ static struct rpc_version *rpcb_version[] = { static struct rpc_stat rpcb_stats; -struct rpc_program rpcb_program = { +static struct rpc_program rpcb_program = { .name = "rpcbind", .number = RPCBIND_PROGRAM, .nrvers = ARRAY_SIZE(rpcb_version), diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 954d7ec86c7..c98873f39ae 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -777,6 +777,7 @@ void *rpc_malloc(struct rpc_task *task, size_t size) task->tk_pid, size, buf); return &buf->data; } +EXPORT_SYMBOL_GPL(rpc_malloc); /** * rpc_free - free buffer allocated via rpc_malloc @@ -802,6 +803,7 @@ void rpc_free(void *buffer) else kfree(buf); } +EXPORT_SYMBOL_GPL(rpc_free); /* * Creation and deletion of RPC task structures @@ -845,7 +847,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons task->tk_start = jiffies; dprintk("RPC: new task initialized, procpid %u\n", - current->pid); + task_pid_nr(current)); } static struct rpc_task * diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 1d377d1ab7f..97ac45f034d 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -34,6 +34,7 @@ size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len) desc->offset += len; return len; } +EXPORT_SYMBOL_GPL(xdr_skb_read_bits); /** * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer @@ -137,6 +138,7 @@ copy_tail: out: return copied; } +EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb); /** * csum_partial_copy_to_xdr - checksum and copy data @@ -179,3 +181,4 @@ no_checksum: return -1; return 0; } +EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr); diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 74ba7d443df..4d4f3738b68 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -21,6 +21,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/metrics.h> +#include <net/net_namespace.h> #define RPCDBG_FACILITY RPCDBG_MISC @@ -265,7 +266,7 @@ rpc_proc_init(void) dprintk("RPC: registering /proc/net/rpc\n"); if (!proc_net_rpc) { struct proc_dir_entry *ent; - ent = proc_mkdir("rpc", proc_net); + ent = proc_mkdir("rpc", init_net.proc_net); if (ent) { ent->owner = THIS_MODULE; proc_net_rpc = ent; @@ -279,7 +280,7 @@ rpc_proc_exit(void) dprintk("RPC: unregistering /proc/net/rpc\n"); if (proc_net_rpc) { proc_net_rpc = NULL; - remove_proc_entry("net/rpc", NULL); + remove_proc_entry("rpc", init_net.proc_net); } } diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 384c4ad5ab8..33d89e842c8 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -20,7 +20,7 @@ #include <linux/sunrpc/auth.h> #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> - +#include <linux/sunrpc/xprtsock.h> /* RPC scheduler */ EXPORT_SYMBOL(rpc_execute); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 55ea6df069d..a4a6bf7deaa 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -777,6 +777,30 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port) } /* + * Printk the given error with the address of the client that caused it. + */ +static int +__attribute__ ((format (printf, 2, 3))) +svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) +{ + va_list args; + int r; + char buf[RPC_MAX_ADDRBUFLEN]; + + if (!net_ratelimit()) + return 0; + + printk(KERN_WARNING "svc: %s: ", + svc_print_addr(rqstp, buf, sizeof(buf))); + + va_start(args, fmt); + r = vprintk(fmt, args); + va_end(args); + + return r; +} + +/* * Process the RPC request. */ int @@ -963,14 +987,13 @@ svc_process(struct svc_rqst *rqstp) return 0; err_short_len: - if (net_ratelimit()) - printk("svc: short len %Zd, dropping request\n", argv->iov_len); + svc_printk(rqstp, "short len %Zd, dropping request\n", + argv->iov_len); goto dropit; /* drop request */ err_bad_dir: - if (net_ratelimit()) - printk("svc: bad direction %d, dropping request\n", dir); + svc_printk(rqstp, "bad direction %d, dropping request\n", dir); serv->sv_stats->rpcbadfmt++; goto dropit; /* drop request */ @@ -1000,8 +1023,7 @@ err_bad_prog: goto sendit; err_bad_vers: - if (net_ratelimit()) - printk("svc: unknown version (%d for prog %d, %s)\n", + svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n", vers, prog, progp->pg_name); serv->sv_stats->rpcbadfmt++; @@ -1011,16 +1033,14 @@ err_bad_vers: goto sendit; err_bad_proc: - if (net_ratelimit()) - printk("svc: unknown procedure (%d)\n", proc); + svc_printk(rqstp, "unknown procedure (%d)\n", proc); serv->sv_stats->rpcbadfmt++; svc_putnl(resv, RPC_PROC_UNAVAIL); goto sendit; err_garbage: - if (net_ratelimit()) - printk("svc: failed to decode args\n"); + svc_printk(rqstp, "failed to decode args\n"); rpc_stat = rpc_garbage_args; err_bad: diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 036ab520df2..c75bffeb89e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -19,6 +19,7 @@ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ +#include <linux/kernel.h> #include <linux/sched.h> #include <linux/errno.h> #include <linux/fcntl.h> @@ -103,7 +104,7 @@ static struct lock_class_key svc_slock_key[2]; static inline void svc_reclassify_socket(struct socket *sock) { struct sock *sk = sock->sk; - BUG_ON(sk->sk_lock.owner != NULL); + BUG_ON(sock_owned_by_user(sk)); switch (sk->sk_family) { case AF_INET: sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", @@ -877,7 +878,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) } else { rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; rqstp->rq_respages = rqstp->rq_pages + 1 + - (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE; + DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE); } if (serv->sv_stats) diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 738db32a287..864b541bbf5 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -114,7 +114,6 @@ done: static ctl_table debug_table[] = { { - .ctl_name = CTL_RPCDEBUG, .procname = "rpc_debug", .data = &rpc_debug, .maxlen = sizeof(int), @@ -122,7 +121,6 @@ static ctl_table debug_table[] = { .proc_handler = &proc_dodebug }, { - .ctl_name = CTL_NFSDEBUG, .procname = "nfs_debug", .data = &nfs_debug, .maxlen = sizeof(int), @@ -130,7 +128,6 @@ static ctl_table debug_table[] = { .proc_handler = &proc_dodebug }, { - .ctl_name = CTL_NFSDDEBUG, .procname = "nfsd_debug", .data = &nfsd_debug, .maxlen = sizeof(int), @@ -138,7 +135,6 @@ static ctl_table debug_table[] = { .proc_handler = &proc_dodebug }, { - .ctl_name = CTL_NLMDEBUG, .procname = "nlm_debug", .data = &nlm_debug, .maxlen = sizeof(int), diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c index 8142fdb8a93..31becbf0926 100644 --- a/net/sunrpc/timer.c +++ b/net/sunrpc/timer.c @@ -17,6 +17,7 @@ #include <linux/types.h> #include <linux/unistd.h> +#include <linux/module.h> #include <linux/sunrpc/clnt.h> @@ -40,6 +41,7 @@ rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) rt->ntimeouts[i] = 0; } } +EXPORT_SYMBOL_GPL(rpc_init_rtt); /* * NB: When computing the smoothed RTT and standard deviation, @@ -75,6 +77,7 @@ rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) if (*sdrtt < RPC_RTO_MIN) *sdrtt = RPC_RTO_MIN; } +EXPORT_SYMBOL_GPL(rpc_update_rtt); /* * Estimate rto for an nfs rpc sent via. an unreliable datagram. @@ -103,3 +106,4 @@ rpc_calc_rto(struct rpc_rtt *rt, unsigned timer) return res; } +EXPORT_SYMBOL_GPL(rpc_calc_rto); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 6a59180e166..fdc5e6d7562 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1030,6 +1030,8 @@ xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, unsigned page_len, thislen, page_offset; struct scatterlist sg[1]; + sg_init_table(sg, 1); + if (offset >= buf->head[0].iov_len) { offset -= buf->head[0].iov_len; } else { @@ -1059,9 +1061,7 @@ xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, do { if (thislen > page_len) thislen = page_len; - sg->page = buf->pages[i]; - sg->offset = page_offset; - sg->length = thislen; + sg_set_page(sg, buf->pages[i], thislen, page_offset); ret = actor(sg, data); if (ret) goto out; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index c8c2edccad7..282a9a2ec90 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -62,6 +62,9 @@ static inline void do_xprt_reserve(struct rpc_task *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); +static spinlock_t xprt_list_lock = SPIN_LOCK_UNLOCKED; +static LIST_HEAD(xprt_list); + /* * The transport code maintains an estimate on the maximum number of out- * standing RPC requests, using a smoothed version of the congestion @@ -81,6 +84,78 @@ static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) /** + * xprt_register_transport - register a transport implementation + * @transport: transport to register + * + * If a transport implementation is loaded as a kernel module, it can + * call this interface to make itself known to the RPC client. + * + * Returns: + * 0: transport successfully registered + * -EEXIST: transport already registered + * -EINVAL: transport module being unloaded + */ +int xprt_register_transport(struct xprt_class *transport) +{ + struct xprt_class *t; + int result; + + result = -EEXIST; + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + /* don't register the same transport class twice */ + if (t->ident == transport->ident) + goto out; + } + + result = -EINVAL; + if (try_module_get(THIS_MODULE)) { + list_add_tail(&transport->list, &xprt_list); + printk(KERN_INFO "RPC: Registered %s transport module.\n", + transport->name); + result = 0; + } + +out: + spin_unlock(&xprt_list_lock); + return result; +} +EXPORT_SYMBOL_GPL(xprt_register_transport); + +/** + * xprt_unregister_transport - unregister a transport implementation + * transport: transport to unregister + * + * Returns: + * 0: transport successfully unregistered + * -ENOENT: transport never registered + */ +int xprt_unregister_transport(struct xprt_class *transport) +{ + struct xprt_class *t; + int result; + + result = 0; + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + if (t == transport) { + printk(KERN_INFO + "RPC: Unregistered %s transport module.\n", + transport->name); + list_del_init(&transport->list); + module_put(THIS_MODULE); + goto out; + } + } + result = -ENOENT; + +out: + spin_unlock(&xprt_list_lock); + return result; +} +EXPORT_SYMBOL_GPL(xprt_unregister_transport); + +/** * xprt_reserve_xprt - serialize write access to transports * @task: task that is requesting access to the transport * @@ -118,6 +193,7 @@ out_sleep: rpc_sleep_on(&xprt->sending, task, NULL, NULL); return 0; } +EXPORT_SYMBOL_GPL(xprt_reserve_xprt); static void xprt_clear_locked(struct rpc_xprt *xprt) { @@ -167,6 +243,7 @@ out_sleep: rpc_sleep_on(&xprt->sending, task, NULL, NULL); return 0; } +EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) { @@ -246,6 +323,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) __xprt_lock_write_next(xprt); } } +EXPORT_SYMBOL_GPL(xprt_release_xprt); /** * xprt_release_xprt_cong - allow other requests to use a transport @@ -262,6 +340,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) __xprt_lock_write_next_cong(xprt); } } +EXPORT_SYMBOL_GPL(xprt_release_xprt_cong); static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) { @@ -314,6 +393,7 @@ void xprt_release_rqst_cong(struct rpc_task *task) { __xprt_put_cong(task->tk_xprt, task->tk_rqstp); } +EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); /** * xprt_adjust_cwnd - adjust transport congestion window @@ -345,6 +425,7 @@ void xprt_adjust_cwnd(struct rpc_task *task, int result) xprt->cwnd = cwnd; __xprt_put_cong(xprt, req); } +EXPORT_SYMBOL_GPL(xprt_adjust_cwnd); /** * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue @@ -359,6 +440,7 @@ void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status) else rpc_wake_up(&xprt->pending); } +EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks); /** * xprt_wait_for_buffer_space - wait for transport output buffer to clear @@ -373,6 +455,7 @@ void xprt_wait_for_buffer_space(struct rpc_task *task) task->tk_timeout = req->rq_timeout; rpc_sleep_on(&xprt->pending, task, NULL, NULL); } +EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); /** * xprt_write_space - wake the task waiting for transport output buffer space @@ -393,6 +476,7 @@ void xprt_write_space(struct rpc_xprt *xprt) } spin_unlock_bh(&xprt->transport_lock); } +EXPORT_SYMBOL_GPL(xprt_write_space); /** * xprt_set_retrans_timeout_def - set a request's retransmit timeout @@ -406,6 +490,7 @@ void xprt_set_retrans_timeout_def(struct rpc_task *task) { task->tk_timeout = task->tk_rqstp->rq_timeout; } +EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def); /* * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout @@ -425,6 +510,7 @@ void xprt_set_retrans_timeout_rtt(struct rpc_task *task) if (task->tk_timeout > max_timeout || task->tk_timeout == 0) task->tk_timeout = max_timeout; } +EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt); static void xprt_reset_majortimeo(struct rpc_rqst *req) { @@ -500,6 +586,7 @@ void xprt_disconnect(struct rpc_xprt *xprt) xprt_wake_pending_tasks(xprt, -ENOTCONN); spin_unlock_bh(&xprt->transport_lock); } +EXPORT_SYMBOL_GPL(xprt_disconnect); static void xprt_init_autodisconnect(unsigned long data) @@ -610,6 +697,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) xprt->stat.bad_xids++; return NULL; } +EXPORT_SYMBOL_GPL(xprt_lookup_rqst); /** * xprt_update_rtt - update an RPC client's RTT state after receiving a reply @@ -629,6 +717,7 @@ void xprt_update_rtt(struct rpc_task *task) rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); } } +EXPORT_SYMBOL_GPL(xprt_update_rtt); /** * xprt_complete_rqst - called when reply processing is complete @@ -653,6 +742,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) req->rq_received = req->rq_private_buf.len = copied; rpc_wake_up_task(task); } +EXPORT_SYMBOL_GPL(xprt_complete_rqst); static void xprt_timer(struct rpc_task *task) { @@ -889,23 +979,25 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i * @args: rpc transport creation arguments * */ -struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args) +struct rpc_xprt *xprt_create_transport(struct xprt_create *args) { struct rpc_xprt *xprt; struct rpc_rqst *req; + struct xprt_class *t; - switch (args->proto) { - case IPPROTO_UDP: - xprt = xs_setup_udp(args); - break; - case IPPROTO_TCP: - xprt = xs_setup_tcp(args); - break; - default: - printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", - args->proto); - return ERR_PTR(-EIO); + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + if (t->ident == args->ident) { + spin_unlock(&xprt_list_lock); + goto found; + } } + spin_unlock(&xprt_list_lock); + printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident); + return ERR_PTR(-EIO); + +found: + xprt = t->setup(args); if (IS_ERR(xprt)) { dprintk("RPC: xprt_create_transport: failed, %ld\n", -PTR_ERR(xprt)); diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile new file mode 100644 index 00000000000..264f0feeb51 --- /dev/null +++ b/net/sunrpc/xprtrdma/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o + +xprtrdma-y := transport.o rpc_rdma.o verbs.o diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c new file mode 100644 index 00000000000..f877b88091c --- /dev/null +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -0,0 +1,868 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * rpc_rdma.c + * + * This file contains the guts of the RPC RDMA protocol, and + * does marshaling/unmarshaling, etc. It is also where interfacing + * to the Linux RPC framework lives. + */ + +#include "xprt_rdma.h" + +#include <linux/highmem.h> + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +enum rpcrdma_chunktype { + rpcrdma_noch = 0, + rpcrdma_readch, + rpcrdma_areadch, + rpcrdma_writech, + rpcrdma_replych +}; + +#ifdef RPC_DEBUG +static const char transfertypes[][12] = { + "pure inline", /* no chunks */ + " read chunk", /* some argument via rdma read */ + "*read chunk", /* entire request via rdma read */ + "write chunk", /* some result via rdma write */ + "reply chunk" /* entire reply via rdma write */ +}; +#endif + +/* + * Chunk assembly from upper layer xdr_buf. + * + * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk + * elements. Segments are then coalesced when registered, if possible + * within the selected memreg mode. + * + * Note, this routine is never called if the connection's memory + * registration strategy is 0 (bounce buffers). + */ + +static int +rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos, + enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs) +{ + int len, n = 0, p; + + if (pos == 0 && xdrbuf->head[0].iov_len) { + seg[n].mr_page = NULL; + seg[n].mr_offset = xdrbuf->head[0].iov_base; + seg[n].mr_len = xdrbuf->head[0].iov_len; + pos += xdrbuf->head[0].iov_len; + ++n; + } + + if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) { + if (n == nsegs) + return 0; + seg[n].mr_page = xdrbuf->pages[0]; + seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base; + seg[n].mr_len = min_t(u32, + PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len); + len = xdrbuf->page_len - seg[n].mr_len; + pos += len; + ++n; + p = 1; + while (len > 0) { + if (n == nsegs) + return 0; + seg[n].mr_page = xdrbuf->pages[p]; + seg[n].mr_offset = NULL; + seg[n].mr_len = min_t(u32, PAGE_SIZE, len); + len -= seg[n].mr_len; + ++n; + ++p; + } + } + + if (pos < xdrbuf->len && xdrbuf->tail[0].iov_len) { + if (n == nsegs) + return 0; + seg[n].mr_page = NULL; + seg[n].mr_offset = xdrbuf->tail[0].iov_base; + seg[n].mr_len = xdrbuf->tail[0].iov_len; + pos += xdrbuf->tail[0].iov_len; + ++n; + } + + if (pos < xdrbuf->len) + dprintk("RPC: %s: marshaled only %d of %d\n", + __func__, pos, xdrbuf->len); + + return n; +} + +/* + * Create read/write chunk lists, and reply chunks, for RDMA + * + * Assume check against THRESHOLD has been done, and chunks are required. + * Assume only encoding one list entry for read|write chunks. The NFSv3 + * protocol is simple enough to allow this as it only has a single "bulk + * result" in each procedure - complicated NFSv4 COMPOUNDs are not. (The + * RDMA/Sessions NFSv4 proposal addresses this for future v4 revs.) + * + * When used for a single reply chunk (which is a special write + * chunk used for the entire reply, rather than just the data), it + * is used primarily for READDIR and READLINK which would otherwise + * be severely size-limited by a small rdma inline read max. The server + * response will come back as an RDMA Write, followed by a message + * of type RDMA_NOMSG carrying the xid and length. As a result, reply + * chunks do not provide data alignment, however they do not require + * "fixup" (moving the response to the upper layer buffer) either. + * + * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): + * + * Read chunklist (a linked list): + * N elements, position P (same P for all chunks of same arg!): + * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 + * + * Write chunklist (a list of (one) counted array): + * N elements: + * 1 - N - HLOO - HLOO - ... - HLOO - 0 + * + * Reply chunk (a counted array): + * N elements: + * 1 - N - HLOO - HLOO - ... - HLOO + */ + +static unsigned int +rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, + struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type) +{ + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt); + int nsegs, nchunks = 0; + int pos; + struct rpcrdma_mr_seg *seg = req->rl_segments; + struct rpcrdma_read_chunk *cur_rchunk = NULL; + struct rpcrdma_write_array *warray = NULL; + struct rpcrdma_write_chunk *cur_wchunk = NULL; + __be32 *iptr = headerp->rm_body.rm_chunks; + + if (type == rpcrdma_readch || type == rpcrdma_areadch) { + /* a read chunk - server will RDMA Read our memory */ + cur_rchunk = (struct rpcrdma_read_chunk *) iptr; + } else { + /* a write or reply chunk - server will RDMA Write our memory */ + *iptr++ = xdr_zero; /* encode a NULL read chunk list */ + if (type == rpcrdma_replych) + *iptr++ = xdr_zero; /* a NULL write chunk list */ + warray = (struct rpcrdma_write_array *) iptr; + cur_wchunk = (struct rpcrdma_write_chunk *) (warray + 1); + } + + if (type == rpcrdma_replych || type == rpcrdma_areadch) + pos = 0; + else + pos = target->head[0].iov_len; + + nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS); + if (nsegs == 0) + return 0; + + do { + /* bind/register the memory, then build chunk from result. */ + int n = rpcrdma_register_external(seg, nsegs, + cur_wchunk != NULL, r_xprt); + if (n <= 0) + goto out; + if (cur_rchunk) { /* read */ + cur_rchunk->rc_discrim = xdr_one; + /* all read chunks have the same "position" */ + cur_rchunk->rc_position = htonl(pos); + cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey); + cur_rchunk->rc_target.rs_length = htonl(seg->mr_len); + xdr_encode_hyper( + (__be32 *)&cur_rchunk->rc_target.rs_offset, + seg->mr_base); + dprintk("RPC: %s: read chunk " + "elem %d@0x%llx:0x%x pos %d (%s)\n", __func__, + seg->mr_len, seg->mr_base, seg->mr_rkey, pos, + n < nsegs ? "more" : "last"); + cur_rchunk++; + r_xprt->rx_stats.read_chunk_count++; + } else { /* write/reply */ + cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey); + cur_wchunk->wc_target.rs_length = htonl(seg->mr_len); + xdr_encode_hyper( + (__be32 *)&cur_wchunk->wc_target.rs_offset, + seg->mr_base); + dprintk("RPC: %s: %s chunk " + "elem %d@0x%llx:0x%x (%s)\n", __func__, + (type == rpcrdma_replych) ? "reply" : "write", + seg->mr_len, seg->mr_base, seg->mr_rkey, + n < nsegs ? "more" : "last"); + cur_wchunk++; + if (type == rpcrdma_replych) + r_xprt->rx_stats.reply_chunk_count++; + else + r_xprt->rx_stats.write_chunk_count++; + r_xprt->rx_stats.total_rdma_request += seg->mr_len; + } + nchunks++; + seg += n; + nsegs -= n; + } while (nsegs); + + /* success. all failures return above */ + req->rl_nchunks = nchunks; + + BUG_ON(nchunks == 0); + + /* + * finish off header. If write, marshal discrim and nchunks. + */ + if (cur_rchunk) { + iptr = (__be32 *) cur_rchunk; + *iptr++ = xdr_zero; /* finish the read chunk list */ + *iptr++ = xdr_zero; /* encode a NULL write chunk list */ + *iptr++ = xdr_zero; /* encode a NULL reply chunk */ + } else { + warray->wc_discrim = xdr_one; + warray->wc_nchunks = htonl(nchunks); + iptr = (__be32 *) cur_wchunk; + if (type == rpcrdma_writech) { + *iptr++ = xdr_zero; /* finish the write chunk list */ + *iptr++ = xdr_zero; /* encode a NULL reply chunk */ + } + } + + /* + * Return header size. + */ + return (unsigned char *)iptr - (unsigned char *)headerp; + +out: + for (pos = 0; nchunks--;) + pos += rpcrdma_deregister_external( + &req->rl_segments[pos], r_xprt, NULL); + return 0; +} + +/* + * Copy write data inline. + * This function is used for "small" requests. Data which is passed + * to RPC via iovecs (or page list) is copied directly into the + * pre-registered memory buffer for this request. For small amounts + * of data, this is efficient. The cutoff value is tunable. + */ +static int +rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) +{ + int i, npages, curlen; + int copy_len; + unsigned char *srcp, *destp; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); + + destp = rqst->rq_svec[0].iov_base; + curlen = rqst->rq_svec[0].iov_len; + destp += curlen; + /* + * Do optional padding where it makes sense. Alignment of write + * payload can help the server, if our setting is accurate. + */ + pad -= (curlen + 36/*sizeof(struct rpcrdma_msg_padded)*/); + if (pad < 0 || rqst->rq_slen - curlen < RPCRDMA_INLINE_PAD_THRESH) + pad = 0; /* don't pad this request */ + + dprintk("RPC: %s: pad %d destp 0x%p len %d hdrlen %d\n", + __func__, pad, destp, rqst->rq_slen, curlen); + + copy_len = rqst->rq_snd_buf.page_len; + r_xprt->rx_stats.pullup_copy_count += copy_len; + npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; + for (i = 0; copy_len && i < npages; i++) { + if (i == 0) + curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base; + else + curlen = PAGE_SIZE; + if (curlen > copy_len) + curlen = copy_len; + dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n", + __func__, i, destp, copy_len, curlen); + srcp = kmap_atomic(rqst->rq_snd_buf.pages[i], + KM_SKB_SUNRPC_DATA); + if (i == 0) + memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen); + else + memcpy(destp, srcp, curlen); + kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA); + rqst->rq_svec[0].iov_len += curlen; + destp += curlen; + copy_len -= curlen; + } + if (rqst->rq_snd_buf.tail[0].iov_len) { + curlen = rqst->rq_snd_buf.tail[0].iov_len; + if (destp != rqst->rq_snd_buf.tail[0].iov_base) { + memcpy(destp, + rqst->rq_snd_buf.tail[0].iov_base, curlen); + r_xprt->rx_stats.pullup_copy_count += curlen; + } + dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n", + __func__, destp, copy_len, curlen); + rqst->rq_svec[0].iov_len += curlen; + } + /* header now contains entire send message */ + return pad; +} + +/* + * Marshal a request: the primary job of this routine is to choose + * the transfer modes. See comments below. + * + * Uses multiple RDMA IOVs for a request: + * [0] -- RPC RDMA header, which uses memory from the *start* of the + * preregistered buffer that already holds the RPC data in + * its middle. + * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol. + * [2] -- optional padding. + * [3] -- if padded, header only in [1] and data here. + */ + +int +rpcrdma_marshal_req(struct rpc_rqst *rqst) +{ + struct rpc_xprt *xprt = rqst->rq_task->tk_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + char *base; + size_t hdrlen, rpclen, padlen; + enum rpcrdma_chunktype rtype, wtype; + struct rpcrdma_msg *headerp; + + /* + * rpclen gets amount of data in first buffer, which is the + * pre-registered buffer. + */ + base = rqst->rq_svec[0].iov_base; + rpclen = rqst->rq_svec[0].iov_len; + + /* build RDMA header in private area at front */ + headerp = (struct rpcrdma_msg *) req->rl_base; + /* don't htonl XID, it's already done in request */ + headerp->rm_xid = rqst->rq_xid; + headerp->rm_vers = xdr_one; + headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests); + headerp->rm_type = __constant_htonl(RDMA_MSG); + + /* + * Chunks needed for results? + * + * o If the expected result is under the inline threshold, all ops + * return as inline (but see later). + * o Large non-read ops return as a single reply chunk. + * o Large read ops return data as write chunk(s), header as inline. + * + * Note: the NFS code sending down multiple result segments implies + * the op is one of read, readdir[plus], readlink or NFSv4 getacl. + */ + + /* + * This code can handle read chunks, write chunks OR reply + * chunks -- only one type. If the request is too big to fit + * inline, then we will choose read chunks. If the request is + * a READ, then use write chunks to separate the file data + * into pages; otherwise use reply chunks. + */ + if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) + wtype = rpcrdma_noch; + else if (rqst->rq_rcv_buf.page_len == 0) + wtype = rpcrdma_replych; + else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) + wtype = rpcrdma_writech; + else + wtype = rpcrdma_replych; + + /* + * Chunks needed for arguments? + * + * o If the total request is under the inline threshold, all ops + * are sent as inline. + * o Large non-write ops are sent with the entire message as a + * single read chunk (protocol 0-position special case). + * o Large write ops transmit data as read chunk(s), header as + * inline. + * + * Note: the NFS code sending down multiple argument segments + * implies the op is a write. + * TBD check NFSv4 setacl + */ + if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) + rtype = rpcrdma_noch; + else if (rqst->rq_snd_buf.page_len == 0) + rtype = rpcrdma_areadch; + else + rtype = rpcrdma_readch; + + /* The following simplification is not true forever */ + if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) + wtype = rpcrdma_noch; + BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch); + + if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS && + (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) { + /* forced to "pure inline"? */ + dprintk("RPC: %s: too much data (%d/%d) for inline\n", + __func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len); + return -1; + } + + hdrlen = 28; /*sizeof *headerp;*/ + padlen = 0; + + /* + * Pull up any extra send data into the preregistered buffer. + * When padding is in use and applies to the transfer, insert + * it and change the message type. + */ + if (rtype == rpcrdma_noch) { + + padlen = rpcrdma_inline_pullup(rqst, + RPCRDMA_INLINE_PAD_VALUE(rqst)); + + if (padlen) { + headerp->rm_type = __constant_htonl(RDMA_MSGP); + headerp->rm_body.rm_padded.rm_align = + htonl(RPCRDMA_INLINE_PAD_VALUE(rqst)); + headerp->rm_body.rm_padded.rm_thresh = + __constant_htonl(RPCRDMA_INLINE_PAD_THRESH); + headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero; + headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; + headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; + hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ + BUG_ON(wtype != rpcrdma_noch); + + } else { + headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero; + headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero; + headerp->rm_body.rm_nochunks.rm_empty[2] = xdr_zero; + /* new length after pullup */ + rpclen = rqst->rq_svec[0].iov_len; + /* + * Currently we try to not actually use read inline. + * Reply chunks have the desirable property that + * they land, packed, directly in the target buffers + * without headers, so they require no fixup. The + * additional RDMA Write op sends the same amount + * of data, streams on-the-wire and adds no overhead + * on receive. Therefore, we request a reply chunk + * for non-writes wherever feasible and efficient. + */ + if (wtype == rpcrdma_noch && + r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER) + wtype = rpcrdma_replych; + } + } + + /* + * Marshal chunks. This routine will return the header length + * consumed by marshaling. + */ + if (rtype != rpcrdma_noch) { + hdrlen = rpcrdma_create_chunks(rqst, + &rqst->rq_snd_buf, headerp, rtype); + wtype = rtype; /* simplify dprintk */ + + } else if (wtype != rpcrdma_noch) { + hdrlen = rpcrdma_create_chunks(rqst, + &rqst->rq_rcv_buf, headerp, wtype); + } + + if (hdrlen == 0) + return -1; + + dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n" + " headerp 0x%p base 0x%p lkey 0x%x\n", + __func__, transfertypes[wtype], hdrlen, rpclen, padlen, + headerp, base, req->rl_iov.lkey); + + /* + * initialize send_iov's - normally only two: rdma chunk header and + * single preregistered RPC header buffer, but if padding is present, + * then use a preregistered (and zeroed) pad buffer between the RPC + * header and any write data. In all non-rdma cases, any following + * data has been copied into the RPC header buffer. + */ + req->rl_send_iov[0].addr = req->rl_iov.addr; + req->rl_send_iov[0].length = hdrlen; + req->rl_send_iov[0].lkey = req->rl_iov.lkey; + + req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base); + req->rl_send_iov[1].length = rpclen; + req->rl_send_iov[1].lkey = req->rl_iov.lkey; + + req->rl_niovs = 2; + + if (padlen) { + struct rpcrdma_ep *ep = &r_xprt->rx_ep; + + req->rl_send_iov[2].addr = ep->rep_pad.addr; + req->rl_send_iov[2].length = padlen; + req->rl_send_iov[2].lkey = ep->rep_pad.lkey; + + req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen; + req->rl_send_iov[3].length = rqst->rq_slen - rpclen; + req->rl_send_iov[3].lkey = req->rl_iov.lkey; + + req->rl_niovs = 4; + } + + return 0; +} + +/* + * Chase down a received write or reply chunklist to get length + * RDMA'd by server. See map at rpcrdma_create_chunks()! :-) + */ +static int +rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, __be32 **iptrp) +{ + unsigned int i, total_len; + struct rpcrdma_write_chunk *cur_wchunk; + + i = ntohl(**iptrp); /* get array count */ + if (i > max) + return -1; + cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1); + total_len = 0; + while (i--) { + struct rpcrdma_segment *seg = &cur_wchunk->wc_target; + ifdebug(FACILITY) { + u64 off; + xdr_decode_hyper((__be32 *)&seg->rs_offset, &off); + dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n", + __func__, + ntohl(seg->rs_length), + off, + ntohl(seg->rs_handle)); + } + total_len += ntohl(seg->rs_length); + ++cur_wchunk; + } + /* check and adjust for properly terminated write chunk */ + if (wrchunk) { + __be32 *w = (__be32 *) cur_wchunk; + if (*w++ != xdr_zero) + return -1; + cur_wchunk = (struct rpcrdma_write_chunk *) w; + } + if ((char *) cur_wchunk > rep->rr_base + rep->rr_len) + return -1; + + *iptrp = (__be32 *) cur_wchunk; + return total_len; +} + +/* + * Scatter inline received data back into provided iov's. + */ +static void +rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) +{ + int i, npages, curlen, olen; + char *destp; + + curlen = rqst->rq_rcv_buf.head[0].iov_len; + if (curlen > copy_len) { /* write chunk header fixup */ + curlen = copy_len; + rqst->rq_rcv_buf.head[0].iov_len = curlen; + } + + dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", + __func__, srcp, copy_len, curlen); + + /* Shift pointer for first receive segment only */ + rqst->rq_rcv_buf.head[0].iov_base = srcp; + srcp += curlen; + copy_len -= curlen; + + olen = copy_len; + i = 0; + rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen; + if (copy_len && rqst->rq_rcv_buf.page_len) { + npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base + + rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT; + for (; i < npages; i++) { + if (i == 0) + curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base; + else + curlen = PAGE_SIZE; + if (curlen > copy_len) + curlen = copy_len; + dprintk("RPC: %s: page %d" + " srcp 0x%p len %d curlen %d\n", + __func__, i, srcp, copy_len, curlen); + destp = kmap_atomic(rqst->rq_rcv_buf.pages[i], + KM_SKB_SUNRPC_DATA); + if (i == 0) + memcpy(destp + rqst->rq_rcv_buf.page_base, + srcp, curlen); + else + memcpy(destp, srcp, curlen); + flush_dcache_page(rqst->rq_rcv_buf.pages[i]); + kunmap_atomic(destp, KM_SKB_SUNRPC_DATA); + srcp += curlen; + copy_len -= curlen; + if (copy_len == 0) + break; + } + rqst->rq_rcv_buf.page_len = olen - copy_len; + } else + rqst->rq_rcv_buf.page_len = 0; + + if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) { + curlen = copy_len; + if (curlen > rqst->rq_rcv_buf.tail[0].iov_len) + curlen = rqst->rq_rcv_buf.tail[0].iov_len; + if (rqst->rq_rcv_buf.tail[0].iov_base != srcp) + memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); + dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n", + __func__, srcp, copy_len, curlen); + rqst->rq_rcv_buf.tail[0].iov_len = curlen; + copy_len -= curlen; ++i; + } else + rqst->rq_rcv_buf.tail[0].iov_len = 0; + + if (copy_len) + dprintk("RPC: %s: %d bytes in" + " %d extra segments (%d lost)\n", + __func__, olen, i, copy_len); + + /* TBD avoid a warning from call_decode() */ + rqst->rq_private_buf = rqst->rq_rcv_buf; +} + +/* + * This function is called when an async event is posted to + * the connection which changes the connection state. All it + * does at this point is mark the connection up/down, the rpc + * timers do the rest. + */ +void +rpcrdma_conn_func(struct rpcrdma_ep *ep) +{ + struct rpc_xprt *xprt = ep->rep_xprt; + + spin_lock_bh(&xprt->transport_lock); + if (ep->rep_connected > 0) { + if (!xprt_test_and_set_connected(xprt)) + xprt_wake_pending_tasks(xprt, 0); + } else { + if (xprt_test_and_clear_connected(xprt)) + xprt_wake_pending_tasks(xprt, ep->rep_connected); + } + spin_unlock_bh(&xprt->transport_lock); +} + +/* + * This function is called when memory window unbind which we are waiting + * for completes. Just use rr_func (zeroed by upcall) to signal completion. + */ +static void +rpcrdma_unbind_func(struct rpcrdma_rep *rep) +{ + wake_up(&rep->rr_unbind); +} + +/* + * Called as a tasklet to do req/reply match and complete a request + * Errors must result in the RPC task either being awakened, or + * allowed to timeout, to discover the errors at that time. + */ +void +rpcrdma_reply_handler(struct rpcrdma_rep *rep) +{ + struct rpcrdma_msg *headerp; + struct rpcrdma_req *req; + struct rpc_rqst *rqst; + struct rpc_xprt *xprt = rep->rr_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + __be32 *iptr; + int i, rdmalen, status; + + /* Check status. If bad, signal disconnect and return rep to pool */ + if (rep->rr_len == ~0U) { + rpcrdma_recv_buffer_put(rep); + if (r_xprt->rx_ep.rep_connected == 1) { + r_xprt->rx_ep.rep_connected = -EIO; + rpcrdma_conn_func(&r_xprt->rx_ep); + } + return; + } + if (rep->rr_len < 28) { + dprintk("RPC: %s: short/invalid reply\n", __func__); + goto repost; + } + headerp = (struct rpcrdma_msg *) rep->rr_base; + if (headerp->rm_vers != xdr_one) { + dprintk("RPC: %s: invalid version %d\n", + __func__, ntohl(headerp->rm_vers)); + goto repost; + } + + /* Get XID and try for a match. */ + spin_lock(&xprt->transport_lock); + rqst = xprt_lookup_rqst(xprt, headerp->rm_xid); + if (rqst == NULL) { + spin_unlock(&xprt->transport_lock); + dprintk("RPC: %s: reply 0x%p failed " + "to match any request xid 0x%08x len %d\n", + __func__, rep, headerp->rm_xid, rep->rr_len); +repost: + r_xprt->rx_stats.bad_reply_count++; + rep->rr_func = rpcrdma_reply_handler; + if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) + rpcrdma_recv_buffer_put(rep); + + return; + } + + /* get request object */ + req = rpcr_to_rdmar(rqst); + + dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" + " RPC request 0x%p xid 0x%08x\n", + __func__, rep, req, rqst, headerp->rm_xid); + + BUG_ON(!req || req->rl_reply); + + /* from here on, the reply is no longer an orphan */ + req->rl_reply = rep; + + /* check for expected message types */ + /* The order of some of these tests is important. */ + switch (headerp->rm_type) { + case __constant_htonl(RDMA_MSG): + /* never expect read chunks */ + /* never expect reply chunks (two ways to check) */ + /* never expect write chunks without having offered RDMA */ + if (headerp->rm_body.rm_chunks[0] != xdr_zero || + (headerp->rm_body.rm_chunks[1] == xdr_zero && + headerp->rm_body.rm_chunks[2] != xdr_zero) || + (headerp->rm_body.rm_chunks[1] != xdr_zero && + req->rl_nchunks == 0)) + goto badheader; + if (headerp->rm_body.rm_chunks[1] != xdr_zero) { + /* count any expected write chunks in read reply */ + /* start at write chunk array count */ + iptr = &headerp->rm_body.rm_chunks[2]; + rdmalen = rpcrdma_count_chunks(rep, + req->rl_nchunks, 1, &iptr); + /* check for validity, and no reply chunk after */ + if (rdmalen < 0 || *iptr++ != xdr_zero) + goto badheader; + rep->rr_len -= + ((unsigned char *)iptr - (unsigned char *)headerp); + status = rep->rr_len + rdmalen; + r_xprt->rx_stats.total_rdma_reply += rdmalen; + } else { + /* else ordinary inline */ + iptr = (__be32 *)((unsigned char *)headerp + 28); + rep->rr_len -= 28; /*sizeof *headerp;*/ + status = rep->rr_len; + } + /* Fix up the rpc results for upper layer */ + rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); + break; + + case __constant_htonl(RDMA_NOMSG): + /* never expect read or write chunks, always reply chunks */ + if (headerp->rm_body.rm_chunks[0] != xdr_zero || + headerp->rm_body.rm_chunks[1] != xdr_zero || + headerp->rm_body.rm_chunks[2] != xdr_one || + req->rl_nchunks == 0) + goto badheader; + iptr = (__be32 *)((unsigned char *)headerp + 28); + rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr); + if (rdmalen < 0) + goto badheader; + r_xprt->rx_stats.total_rdma_reply += rdmalen; + /* Reply chunk buffer already is the reply vector - no fixup. */ + status = rdmalen; + break; + +badheader: + default: + dprintk("%s: invalid rpcrdma reply header (type %d):" + " chunks[012] == %d %d %d" + " expected chunks <= %d\n", + __func__, ntohl(headerp->rm_type), + headerp->rm_body.rm_chunks[0], + headerp->rm_body.rm_chunks[1], + headerp->rm_body.rm_chunks[2], + req->rl_nchunks); + status = -EIO; + r_xprt->rx_stats.bad_reply_count++; + break; + } + + /* If using mw bind, start the deregister process now. */ + /* (Note: if mr_free(), cannot perform it here, in tasklet context) */ + if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) { + case RPCRDMA_MEMWINDOWS: + for (i = 0; req->rl_nchunks-- > 1;) + i += rpcrdma_deregister_external( + &req->rl_segments[i], r_xprt, NULL); + /* Optionally wait (not here) for unbinds to complete */ + rep->rr_func = rpcrdma_unbind_func; + (void) rpcrdma_deregister_external(&req->rl_segments[i], + r_xprt, rep); + break; + case RPCRDMA_MEMWINDOWS_ASYNC: + for (i = 0; req->rl_nchunks--;) + i += rpcrdma_deregister_external(&req->rl_segments[i], + r_xprt, NULL); + break; + default: + break; + } + + dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", + __func__, xprt, rqst, status); + xprt_complete_rqst(rqst->rq_task, status); + spin_unlock(&xprt->transport_lock); +} diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c new file mode 100644 index 00000000000..dc55cc974c9 --- /dev/null +++ b/net/sunrpc/xprtrdma/transport.c @@ -0,0 +1,800 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * transport.c + * + * This file contains the top-level implementation of an RPC RDMA + * transport. + * + * Naming convention: functions beginning with xprt_ are part of the + * transport switch. All others are RPC RDMA internal. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/seq_file.h> + +#include "xprt_rdma.h" + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +MODULE_LICENSE("Dual BSD/GPL"); + +MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS"); +MODULE_AUTHOR("Network Appliance, Inc."); + +/* + * tunables + */ + +static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; +static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; +static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; +static unsigned int xprt_rdma_inline_write_padding; +#if !RPCRDMA_PERSISTENT_REGISTRATION +static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ +#else +static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL; +#endif + +#ifdef RPC_DEBUG + +static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; +static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; +static unsigned int zero; +static unsigned int max_padding = PAGE_SIZE; +static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; +static unsigned int max_memreg = RPCRDMA_LAST - 1; + +static struct ctl_table_header *sunrpc_table_header; + +static ctl_table xr_tunables_table[] = { + { + .ctl_name = CTL_SLOTTABLE_RDMA, + .procname = "rdma_slot_table_entries", + .data = &xprt_rdma_slot_table_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_slot_table_size, + .extra2 = &max_slot_table_size + }, + { + .ctl_name = CTL_RDMA_MAXINLINEREAD, + .procname = "rdma_max_inline_read", + .data = &xprt_rdma_max_inline_read, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, + { + .ctl_name = CTL_RDMA_MAXINLINEWRITE, + .procname = "rdma_max_inline_write", + .data = &xprt_rdma_max_inline_write, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, + { + .ctl_name = CTL_RDMA_WRITEPADDING, + .procname = "rdma_inline_write_padding", + .data = &xprt_rdma_inline_write_padding, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &max_padding, + }, + { + .ctl_name = CTL_RDMA_MEMREG, + .procname = "rdma_memreg_strategy", + .data = &xprt_rdma_memreg_strategy, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_memreg, + .extra2 = &max_memreg, + }, + { + .ctl_name = 0, + }, +}; + +static ctl_table sunrpc_table[] = { + { + .ctl_name = CTL_SUNRPC, + .procname = "sunrpc", + .mode = 0555, + .child = xr_tunables_table + }, + { + .ctl_name = 0, + }, +}; + +#endif + +static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ + +static void +xprt_rdma_format_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in *addr = (struct sockaddr_in *) + &rpcx_to_rdmad(xprt).addr; + char *buf; + + buf = kzalloc(20, GFP_KERNEL); + if (buf) + snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) + snprintf(buf, 8, "%u", ntohs(addr->sin_port)); + xprt->address_strings[RPC_DISPLAY_PORT] = buf; + + xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; + + buf = kzalloc(48, GFP_KERNEL); + if (buf) + snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), "rdma"); + xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(10, GFP_KERNEL); + if (buf) + snprintf(buf, 10, "%02x%02x%02x%02x", + NIPQUAD(addr->sin_addr.s_addr)); + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) + snprintf(buf, 8, "%4hx", ntohs(addr->sin_port)); + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(30, GFP_KERNEL); + if (buf) + snprintf(buf, 30, NIPQUAD_FMT".%u.%u", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port) >> 8, + ntohs(addr->sin_port) & 0xff); + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + + /* netid */ + xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; +} + +static void +xprt_rdma_free_addresses(struct rpc_xprt *xprt) +{ + kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); + kfree(xprt->address_strings[RPC_DISPLAY_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_ALL]); + kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]); + kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]); +} + +static void +xprt_rdma_connect_worker(struct work_struct *work) +{ + struct rpcrdma_xprt *r_xprt = + container_of(work, struct rpcrdma_xprt, rdma_connect.work); + struct rpc_xprt *xprt = &r_xprt->xprt; + int rc = 0; + + if (!xprt->shutdown) { + xprt_clear_connected(xprt); + + dprintk("RPC: %s: %sconnect\n", __func__, + r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); + rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); + if (rc) + goto out; + } + goto out_clear; + +out: + xprt_wake_pending_tasks(xprt, rc); + +out_clear: + dprintk("RPC: %s: exit\n", __func__); + xprt_clear_connecting(xprt); +} + +/* + * xprt_rdma_destroy + * + * Destroy the xprt. + * Free all memory associated with the object, including its own. + * NOTE: none of the *destroy methods free memory for their top-level + * objects, even though they may have allocated it (they do free + * private memory). It's up to the caller to handle it. In this + * case (RDMA transport), all structure memory is inlined with the + * struct rpcrdma_xprt. + */ +static void +xprt_rdma_destroy(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + int rc; + + dprintk("RPC: %s: called\n", __func__); + + cancel_delayed_work(&r_xprt->rdma_connect); + flush_scheduled_work(); + + xprt_clear_connected(xprt); + + rpcrdma_buffer_destroy(&r_xprt->rx_buf); + rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); + if (rc) + dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n", + __func__, rc); + rpcrdma_ia_close(&r_xprt->rx_ia); + + xprt_rdma_free_addresses(xprt); + + kfree(xprt->slot); + xprt->slot = NULL; + kfree(xprt); + + dprintk("RPC: %s: returning\n", __func__); + + module_put(THIS_MODULE); +} + +/** + * xprt_setup_rdma - Set up transport to use RDMA + * + * @args: rpc transport arguments + */ +static struct rpc_xprt * +xprt_setup_rdma(struct xprt_create *args) +{ + struct rpcrdma_create_data_internal cdata; + struct rpc_xprt *xprt; + struct rpcrdma_xprt *new_xprt; + struct rpcrdma_ep *new_ep; + struct sockaddr_in *sin; + int rc; + + if (args->addrlen > sizeof(xprt->addr)) { + dprintk("RPC: %s: address too large\n", __func__); + return ERR_PTR(-EBADF); + } + + xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); + if (xprt == NULL) { + dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", + __func__); + return ERR_PTR(-ENOMEM); + } + + xprt->max_reqs = xprt_rdma_slot_table_entries; + xprt->slot = kcalloc(xprt->max_reqs, + sizeof(struct rpc_rqst), GFP_KERNEL); + if (xprt->slot == NULL) { + kfree(xprt); + dprintk("RPC: %s: couldn't allocate %d slots\n", + __func__, xprt->max_reqs); + return ERR_PTR(-ENOMEM); + } + + /* 60 second timeout, no retries */ + xprt_set_timeout(&xprt->timeout, 0, 60UL * HZ); + xprt->bind_timeout = (60U * HZ); + xprt->connect_timeout = (60U * HZ); + xprt->reestablish_timeout = (5U * HZ); + xprt->idle_timeout = (5U * 60 * HZ); + + xprt->resvport = 0; /* privileged port not needed */ + xprt->tsh_size = 0; /* RPC-RDMA handles framing */ + xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE; + xprt->ops = &xprt_rdma_procs; + + /* + * Set up RDMA-specific connect data. + */ + + /* Put server RDMA address in local cdata */ + memcpy(&cdata.addr, args->dstaddr, args->addrlen); + + /* Ensure xprt->addr holds valid server TCP (not RDMA) + * address, for any side protocols which peek at it */ + xprt->prot = IPPROTO_TCP; + xprt->addrlen = args->addrlen; + memcpy(&xprt->addr, &cdata.addr, xprt->addrlen); + + sin = (struct sockaddr_in *)&cdata.addr; + if (ntohs(sin->sin_port) != 0) + xprt_set_bound(xprt); + + dprintk("RPC: %s: %u.%u.%u.%u:%u\n", __func__, + NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port)); + + /* Set max requests */ + cdata.max_requests = xprt->max_reqs; + + /* Set some length limits */ + cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ + cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ + + cdata.inline_wsize = xprt_rdma_max_inline_write; + if (cdata.inline_wsize > cdata.wsize) + cdata.inline_wsize = cdata.wsize; + + cdata.inline_rsize = xprt_rdma_max_inline_read; + if (cdata.inline_rsize > cdata.rsize) + cdata.inline_rsize = cdata.rsize; + + cdata.padding = xprt_rdma_inline_write_padding; + + /* + * Create new transport instance, which includes initialized + * o ia + * o endpoint + * o buffers + */ + + new_xprt = rpcx_to_rdmax(xprt); + + rc = rpcrdma_ia_open(new_xprt, (struct sockaddr *) &cdata.addr, + xprt_rdma_memreg_strategy); + if (rc) + goto out1; + + /* + * initialize and create ep + */ + new_xprt->rx_data = cdata; + new_ep = &new_xprt->rx_ep; + new_ep->rep_remote_addr = cdata.addr; + + rc = rpcrdma_ep_create(&new_xprt->rx_ep, + &new_xprt->rx_ia, &new_xprt->rx_data); + if (rc) + goto out2; + + /* + * Allocate pre-registered send and receive buffers for headers and + * any inline data. Also specify any padding which will be provided + * from a preregistered zero buffer. + */ + rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia, + &new_xprt->rx_data); + if (rc) + goto out3; + + /* + * Register a callback for connection events. This is necessary because + * connection loss notification is async. We also catch connection loss + * when reaping receives. + */ + INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker); + new_ep->rep_func = rpcrdma_conn_func; + new_ep->rep_xprt = xprt; + + xprt_rdma_format_addresses(xprt); + + if (!try_module_get(THIS_MODULE)) + goto out4; + + return xprt; + +out4: + xprt_rdma_free_addresses(xprt); + rc = -EINVAL; +out3: + (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); +out2: + rpcrdma_ia_close(&new_xprt->rx_ia); +out1: + kfree(xprt->slot); + kfree(xprt); + return ERR_PTR(rc); +} + +/* + * Close a connection, during shutdown or timeout/reconnect + */ +static void +xprt_rdma_close(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + dprintk("RPC: %s: closing\n", __func__); + xprt_disconnect(xprt); + (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); +} + +static void +xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) +{ + struct sockaddr_in *sap; + + sap = (struct sockaddr_in *)&xprt->addr; + sap->sin_port = htons(port); + sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; + sap->sin_port = htons(port); + dprintk("RPC: %s: %u\n", __func__, port); +} + +static void +xprt_rdma_connect(struct rpc_task *task) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + if (!xprt_test_and_set_connecting(xprt)) { + if (r_xprt->rx_ep.rep_connected != 0) { + /* Reconnect */ + schedule_delayed_work(&r_xprt->rdma_connect, + xprt->reestablish_timeout); + } else { + schedule_delayed_work(&r_xprt->rdma_connect, 0); + if (!RPC_IS_ASYNC(task)) + flush_scheduled_work(); + } + } +} + +static int +xprt_rdma_reserve_xprt(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + int credits = atomic_read(&r_xprt->rx_buf.rb_credits); + + /* == RPC_CWNDSCALE @ init, but *after* setup */ + if (r_xprt->rx_buf.rb_cwndscale == 0UL) { + r_xprt->rx_buf.rb_cwndscale = xprt->cwnd; + dprintk("RPC: %s: cwndscale %lu\n", __func__, + r_xprt->rx_buf.rb_cwndscale); + BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); + } + xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; + return xprt_reserve_xprt_cong(task); +} + +/* + * The RDMA allocate/free functions need the task structure as a place + * to hide the struct rpcrdma_req, which is necessary for the actual send/recv + * sequence. For this reason, the recv buffers are attached to send + * buffers for portions of the RPC. Note that the RPC layer allocates + * both send and receive buffers in the same call. We may register + * the receive buffer portion when using reply chunks. + */ +static void * +xprt_rdma_allocate(struct rpc_task *task, size_t size) +{ + struct rpc_xprt *xprt = task->tk_xprt; + struct rpcrdma_req *req, *nreq; + + req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); + BUG_ON(NULL == req); + + if (size > req->rl_size) { + dprintk("RPC: %s: size %zd too large for buffer[%zd]: " + "prog %d vers %d proc %d\n", + __func__, size, req->rl_size, + task->tk_client->cl_prog, task->tk_client->cl_vers, + task->tk_msg.rpc_proc->p_proc); + /* + * Outgoing length shortage. Our inline write max must have + * been configured to perform direct i/o. + * + * This is therefore a large metadata operation, and the + * allocate call was made on the maximum possible message, + * e.g. containing long filename(s) or symlink data. In + * fact, while these metadata operations *might* carry + * large outgoing payloads, they rarely *do*. However, we + * have to commit to the request here, so reallocate and + * register it now. The data path will never require this + * reallocation. + * + * If the allocation or registration fails, the RPC framework + * will (doggedly) retry. + */ + if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy == + RPCRDMA_BOUNCEBUFFERS) { + /* forced to "pure inline" */ + dprintk("RPC: %s: too much data (%zd) for inline " + "(r/w max %d/%d)\n", __func__, size, + rpcx_to_rdmad(xprt).inline_rsize, + rpcx_to_rdmad(xprt).inline_wsize); + size = req->rl_size; + rpc_exit(task, -EIO); /* fail the operation */ + rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; + goto out; + } + if (task->tk_flags & RPC_TASK_SWAPPER) + nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); + else + nreq = kmalloc(sizeof *req + size, GFP_NOFS); + if (nreq == NULL) + goto outfail; + + if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia, + nreq->rl_base, size + sizeof(struct rpcrdma_req) + - offsetof(struct rpcrdma_req, rl_base), + &nreq->rl_handle, &nreq->rl_iov)) { + kfree(nreq); + goto outfail; + } + rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size; + nreq->rl_size = size; + nreq->rl_niovs = 0; + nreq->rl_nchunks = 0; + nreq->rl_buffer = (struct rpcrdma_buffer *)req; + nreq->rl_reply = req->rl_reply; + memcpy(nreq->rl_segments, + req->rl_segments, sizeof nreq->rl_segments); + /* flag the swap with an unused field */ + nreq->rl_iov.length = 0; + req->rl_reply = NULL; + req = nreq; + } + dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); +out: + return req->rl_xdr_buf; + +outfail: + rpcrdma_buffer_put(req); + rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; + return NULL; +} + +/* + * This function returns all RDMA resources to the pool. + */ +static void +xprt_rdma_free(void *buffer) +{ + struct rpcrdma_req *req; + struct rpcrdma_xprt *r_xprt; + struct rpcrdma_rep *rep; + int i; + + if (buffer == NULL) + return; + + req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]); + r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); + rep = req->rl_reply; + + dprintk("RPC: %s: called on 0x%p%s\n", + __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); + + /* + * Finish the deregistration. When using mw bind, this was + * begun in rpcrdma_reply_handler(). In all other modes, we + * do it here, in thread context. The process is considered + * complete when the rr_func vector becomes NULL - this + * was put in place during rpcrdma_reply_handler() - the wait + * call below will not block if the dereg is "done". If + * interrupted, our framework will clean up. + */ + for (i = 0; req->rl_nchunks;) { + --req->rl_nchunks; + i += rpcrdma_deregister_external( + &req->rl_segments[i], r_xprt, NULL); + } + + if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) { + rep->rr_func = NULL; /* abandon the callback */ + req->rl_reply = NULL; + } + + if (req->rl_iov.length == 0) { /* see allocate above */ + struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer; + oreq->rl_reply = req->rl_reply; + (void) rpcrdma_deregister_internal(&r_xprt->rx_ia, + req->rl_handle, + &req->rl_iov); + kfree(req); + req = oreq; + } + + /* Put back request+reply buffers */ + rpcrdma_buffer_put(req); +} + +/* + * send_request invokes the meat of RPC RDMA. It must do the following: + * 1. Marshal the RPC request into an RPC RDMA request, which means + * putting a header in front of data, and creating IOVs for RDMA + * from those in the request. + * 2. In marshaling, detect opportunities for RDMA, and use them. + * 3. Post a recv message to set up asynch completion, then send + * the request (rpcrdma_ep_post). + * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP). + */ + +static int +xprt_rdma_send_request(struct rpc_task *task) +{ + struct rpc_rqst *rqst = task->tk_rqstp; + struct rpc_xprt *xprt = task->tk_xprt; + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + /* marshal the send itself */ + if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) { + r_xprt->rx_stats.failed_marshal_count++; + dprintk("RPC: %s: rpcrdma_marshal_req failed\n", + __func__); + return -EIO; + } + + if (req->rl_reply == NULL) /* e.g. reconnection */ + rpcrdma_recv_buffer_get(req); + + if (req->rl_reply) { + req->rl_reply->rr_func = rpcrdma_reply_handler; + /* this need only be done once, but... */ + req->rl_reply->rr_xprt = xprt; + } + + if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { + xprt_disconnect(xprt); + return -ENOTCONN; /* implies disconnect */ + } + + rqst->rq_bytes_sent = 0; + return 0; +} + +static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + long idle_time = 0; + + if (xprt_connected(xprt)) + idle_time = (long)(jiffies - xprt->last_used) / HZ; + + seq_printf(seq, + "\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu " + "%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n", + + 0, /* need a local port? */ + xprt->stat.bind_count, + xprt->stat.connect_count, + xprt->stat.connect_time, + idle_time, + xprt->stat.sends, + xprt->stat.recvs, + xprt->stat.bad_xids, + xprt->stat.req_u, + xprt->stat.bklog_u, + + r_xprt->rx_stats.read_chunk_count, + r_xprt->rx_stats.write_chunk_count, + r_xprt->rx_stats.reply_chunk_count, + r_xprt->rx_stats.total_rdma_request, + r_xprt->rx_stats.total_rdma_reply, + r_xprt->rx_stats.pullup_copy_count, + r_xprt->rx_stats.fixup_copy_count, + r_xprt->rx_stats.hardway_register_count, + r_xprt->rx_stats.failed_marshal_count, + r_xprt->rx_stats.bad_reply_count); +} + +/* + * Plumbing for rpc transport switch and kernel module + */ + +static struct rpc_xprt_ops xprt_rdma_procs = { + .reserve_xprt = xprt_rdma_reserve_xprt, + .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ + .release_request = xprt_release_rqst_cong, /* ditto */ + .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ + .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ + .set_port = xprt_rdma_set_port, + .connect = xprt_rdma_connect, + .buf_alloc = xprt_rdma_allocate, + .buf_free = xprt_rdma_free, + .send_request = xprt_rdma_send_request, + .close = xprt_rdma_close, + .destroy = xprt_rdma_destroy, + .print_stats = xprt_rdma_print_stats +}; + +static struct xprt_class xprt_rdma = { + .list = LIST_HEAD_INIT(xprt_rdma.list), + .name = "rdma", + .owner = THIS_MODULE, + .ident = XPRT_TRANSPORT_RDMA, + .setup = xprt_setup_rdma, +}; + +static void __exit xprt_rdma_cleanup(void) +{ + int rc; + + dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); +#ifdef RPC_DEBUG + if (sunrpc_table_header) { + unregister_sysctl_table(sunrpc_table_header); + sunrpc_table_header = NULL; + } +#endif + rc = xprt_unregister_transport(&xprt_rdma); + if (rc) + dprintk("RPC: %s: xprt_unregister returned %i\n", + __func__, rc); +} + +static int __init xprt_rdma_init(void) +{ + int rc; + + rc = xprt_register_transport(&xprt_rdma); + + if (rc) + return rc; + + dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n"); + + dprintk(KERN_INFO "Defaults:\n"); + dprintk(KERN_INFO "\tSlots %d\n" + "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", + xprt_rdma_slot_table_entries, + xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); + dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n", + xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); + +#ifdef RPC_DEBUG + if (!sunrpc_table_header) + sunrpc_table_header = register_sysctl_table(sunrpc_table); +#endif + return 0; +} + +module_init(xprt_rdma_init); +module_exit(xprt_rdma_cleanup); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c new file mode 100644 index 00000000000..44b0fb942e8 --- /dev/null +++ b/net/sunrpc/xprtrdma/verbs.c @@ -0,0 +1,1627 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * verbs.c + * + * Encapsulates the major functions managing: + * o adapters + * o endpoints + * o connections + * o buffer memory + */ + +#include <linux/pci.h> /* for Tavor hack below */ + +#include "xprt_rdma.h" + +/* + * Globals/Macros + */ + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +/* + * internal functions + */ + +/* + * handle replies in tasklet context, using a single, global list + * rdma tasklet function -- just turn around and call the func + * for all replies on the list + */ + +static DEFINE_SPINLOCK(rpcrdma_tk_lock_g); +static LIST_HEAD(rpcrdma_tasklets_g); + +static void +rpcrdma_run_tasklet(unsigned long data) +{ + struct rpcrdma_rep *rep; + void (*func)(struct rpcrdma_rep *); + unsigned long flags; + + data = data; + spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); + while (!list_empty(&rpcrdma_tasklets_g)) { + rep = list_entry(rpcrdma_tasklets_g.next, + struct rpcrdma_rep, rr_list); + list_del(&rep->rr_list); + func = rep->rr_func; + rep->rr_func = NULL; + spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); + + if (func) + func(rep); + else + rpcrdma_recv_buffer_put(rep); + + spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); + } + spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); +} + +static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); + +static inline void +rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep) +{ + unsigned long flags; + + spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); + list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g); + spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); + tasklet_schedule(&rpcrdma_tasklet_g); +} + +static void +rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) +{ + struct rpcrdma_ep *ep = context; + + dprintk("RPC: %s: QP error %X on device %s ep %p\n", + __func__, event->event, event->device->name, context); + if (ep->rep_connected == 1) { + ep->rep_connected = -EIO; + ep->rep_func(ep); + wake_up_all(&ep->rep_connect_wait); + } +} + +static void +rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) +{ + struct rpcrdma_ep *ep = context; + + dprintk("RPC: %s: CQ error %X on device %s ep %p\n", + __func__, event->event, event->device->name, context); + if (ep->rep_connected == 1) { + ep->rep_connected = -EIO; + ep->rep_func(ep); + wake_up_all(&ep->rep_connect_wait); + } +} + +static inline +void rpcrdma_event_process(struct ib_wc *wc) +{ + struct rpcrdma_rep *rep = + (struct rpcrdma_rep *)(unsigned long) wc->wr_id; + + dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n", + __func__, rep, wc->status, wc->opcode, wc->byte_len); + + if (!rep) /* send or bind completion that we don't care about */ + return; + + if (IB_WC_SUCCESS != wc->status) { + dprintk("RPC: %s: %s WC status %X, connection lost\n", + __func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send", + wc->status); + rep->rr_len = ~0U; + rpcrdma_schedule_tasklet(rep); + return; + } + + switch (wc->opcode) { + case IB_WC_RECV: + rep->rr_len = wc->byte_len; + ib_dma_sync_single_for_cpu( + rdmab_to_ia(rep->rr_buffer)->ri_id->device, + rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); + /* Keep (only) the most recent credits, after check validity */ + if (rep->rr_len >= 16) { + struct rpcrdma_msg *p = + (struct rpcrdma_msg *) rep->rr_base; + unsigned int credits = ntohl(p->rm_credit); + if (credits == 0) { + dprintk("RPC: %s: server" + " dropped credits to 0!\n", __func__); + /* don't deadlock */ + credits = 1; + } else if (credits > rep->rr_buffer->rb_max_requests) { + dprintk("RPC: %s: server" + " over-crediting: %d (%d)\n", + __func__, credits, + rep->rr_buffer->rb_max_requests); + credits = rep->rr_buffer->rb_max_requests; + } + atomic_set(&rep->rr_buffer->rb_credits, credits); + } + /* fall through */ + case IB_WC_BIND_MW: + rpcrdma_schedule_tasklet(rep); + break; + default: + dprintk("RPC: %s: unexpected WC event %X\n", + __func__, wc->opcode); + break; + } +} + +static inline int +rpcrdma_cq_poll(struct ib_cq *cq) +{ + struct ib_wc wc; + int rc; + + for (;;) { + rc = ib_poll_cq(cq, 1, &wc); + if (rc < 0) { + dprintk("RPC: %s: ib_poll_cq failed %i\n", + __func__, rc); + return rc; + } + if (rc == 0) + break; + + rpcrdma_event_process(&wc); + } + + return 0; +} + +/* + * rpcrdma_cq_event_upcall + * + * This upcall handles recv, send, bind and unbind events. + * It is reentrant but processes single events in order to maintain + * ordering of receives to keep server credits. + * + * It is the responsibility of the scheduled tasklet to return + * recv buffers to the pool. NOTE: this affects synchronization of + * connection shutdown. That is, the structures required for + * the completion of the reply handler must remain intact until + * all memory has been reclaimed. + * + * Note that send events are suppressed and do not result in an upcall. + */ +static void +rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context) +{ + int rc; + + rc = rpcrdma_cq_poll(cq); + if (rc) + return; + + rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + if (rc) { + dprintk("RPC: %s: ib_req_notify_cq failed %i\n", + __func__, rc); + return; + } + + rpcrdma_cq_poll(cq); +} + +#ifdef RPC_DEBUG +static const char * const conn[] = { + "address resolved", + "address error", + "route resolved", + "route error", + "connect request", + "connect response", + "connect error", + "unreachable", + "rejected", + "established", + "disconnected", + "device removal" +}; +#endif + +static int +rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) +{ + struct rpcrdma_xprt *xprt = id->context; + struct rpcrdma_ia *ia = &xprt->rx_ia; + struct rpcrdma_ep *ep = &xprt->rx_ep; + struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; + struct ib_qp_attr attr; + struct ib_qp_init_attr iattr; + int connstate = 0; + + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + case RDMA_CM_EVENT_ROUTE_RESOLVED: + complete(&ia->ri_done); + break; + case RDMA_CM_EVENT_ADDR_ERROR: + ia->ri_async_rc = -EHOSTUNREACH; + dprintk("RPC: %s: CM address resolution error, ep 0x%p\n", + __func__, ep); + complete(&ia->ri_done); + break; + case RDMA_CM_EVENT_ROUTE_ERROR: + ia->ri_async_rc = -ENETUNREACH; + dprintk("RPC: %s: CM route resolution error, ep 0x%p\n", + __func__, ep); + complete(&ia->ri_done); + break; + case RDMA_CM_EVENT_ESTABLISHED: + connstate = 1; + ib_query_qp(ia->ri_id->qp, &attr, + IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC, + &iattr); + dprintk("RPC: %s: %d responder resources" + " (%d initiator)\n", + __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic); + goto connected; + case RDMA_CM_EVENT_CONNECT_ERROR: + connstate = -ENOTCONN; + goto connected; + case RDMA_CM_EVENT_UNREACHABLE: + connstate = -ENETDOWN; + goto connected; + case RDMA_CM_EVENT_REJECTED: + connstate = -ECONNREFUSED; + goto connected; + case RDMA_CM_EVENT_DISCONNECTED: + connstate = -ECONNABORTED; + goto connected; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + connstate = -ENODEV; +connected: + dprintk("RPC: %s: %s: %u.%u.%u.%u:%u" + " (ep 0x%p event 0x%x)\n", + __func__, + (event->event <= 11) ? conn[event->event] : + "unknown connection error", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), + ep, event->event); + atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1); + dprintk("RPC: %s: %sconnected\n", + __func__, connstate > 0 ? "" : "dis"); + ep->rep_connected = connstate; + ep->rep_func(ep); + wake_up_all(&ep->rep_connect_wait); + break; + default: + ia->ri_async_rc = -EINVAL; + dprintk("RPC: %s: unexpected CM event %X\n", + __func__, event->event); + complete(&ia->ri_done); + break; + } + + return 0; +} + +static struct rdma_cm_id * +rpcrdma_create_id(struct rpcrdma_xprt *xprt, + struct rpcrdma_ia *ia, struct sockaddr *addr) +{ + struct rdma_cm_id *id; + int rc; + + id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); + if (IS_ERR(id)) { + rc = PTR_ERR(id); + dprintk("RPC: %s: rdma_create_id() failed %i\n", + __func__, rc); + return id; + } + + ia->ri_async_rc = 0; + rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); + if (rc) { + dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", + __func__, rc); + goto out; + } + wait_for_completion(&ia->ri_done); + rc = ia->ri_async_rc; + if (rc) + goto out; + + ia->ri_async_rc = 0; + rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); + if (rc) { + dprintk("RPC: %s: rdma_resolve_route() failed %i\n", + __func__, rc); + goto out; + } + wait_for_completion(&ia->ri_done); + rc = ia->ri_async_rc; + if (rc) + goto out; + + return id; + +out: + rdma_destroy_id(id); + return ERR_PTR(rc); +} + +/* + * Drain any cq, prior to teardown. + */ +static void +rpcrdma_clean_cq(struct ib_cq *cq) +{ + struct ib_wc wc; + int count = 0; + + while (1 == ib_poll_cq(cq, 1, &wc)) + ++count; + + if (count) + dprintk("RPC: %s: flushed %d events (last 0x%x)\n", + __func__, count, wc.opcode); +} + +/* + * Exported functions. + */ + +/* + * Open and initialize an Interface Adapter. + * o initializes fields of struct rpcrdma_ia, including + * interface and provider attributes and protection zone. + */ +int +rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) +{ + int rc; + struct rpcrdma_ia *ia = &xprt->rx_ia; + + init_completion(&ia->ri_done); + + ia->ri_id = rpcrdma_create_id(xprt, ia, addr); + if (IS_ERR(ia->ri_id)) { + rc = PTR_ERR(ia->ri_id); + goto out1; + } + + ia->ri_pd = ib_alloc_pd(ia->ri_id->device); + if (IS_ERR(ia->ri_pd)) { + rc = PTR_ERR(ia->ri_pd); + dprintk("RPC: %s: ib_alloc_pd() failed %i\n", + __func__, rc); + goto out2; + } + + /* + * Optionally obtain an underlying physical identity mapping in + * order to do a memory window-based bind. This base registration + * is protected from remote access - that is enabled only by binding + * for the specific bytes targeted during each RPC operation, and + * revoked after the corresponding completion similar to a storage + * adapter. + */ + if (memreg > RPCRDMA_REGISTER) { + int mem_priv = IB_ACCESS_LOCAL_WRITE; + switch (memreg) { +#if RPCRDMA_PERSISTENT_REGISTRATION + case RPCRDMA_ALLPHYSICAL: + mem_priv |= IB_ACCESS_REMOTE_WRITE; + mem_priv |= IB_ACCESS_REMOTE_READ; + break; +#endif + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + mem_priv |= IB_ACCESS_MW_BIND; + break; + default: + break; + } + ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); + if (IS_ERR(ia->ri_bind_mem)) { + printk(KERN_ALERT "%s: ib_get_dma_mr for " + "phys register failed with %lX\n\t" + "Will continue with degraded performance\n", + __func__, PTR_ERR(ia->ri_bind_mem)); + memreg = RPCRDMA_REGISTER; + ia->ri_bind_mem = NULL; + } + } + + /* Else will do memory reg/dereg for each chunk */ + ia->ri_memreg_strategy = memreg; + + return 0; +out2: + rdma_destroy_id(ia->ri_id); +out1: + return rc; +} + +/* + * Clean up/close an IA. + * o if event handles and PD have been initialized, free them. + * o close the IA + */ +void +rpcrdma_ia_close(struct rpcrdma_ia *ia) +{ + int rc; + + dprintk("RPC: %s: entering\n", __func__); + if (ia->ri_bind_mem != NULL) { + rc = ib_dereg_mr(ia->ri_bind_mem); + dprintk("RPC: %s: ib_dereg_mr returned %i\n", + __func__, rc); + } + if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) + rdma_destroy_qp(ia->ri_id); + if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { + rc = ib_dealloc_pd(ia->ri_pd); + dprintk("RPC: %s: ib_dealloc_pd returned %i\n", + __func__, rc); + } + if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) + rdma_destroy_id(ia->ri_id); +} + +/* + * Create unconnected endpoint. + */ +int +rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, + struct rpcrdma_create_data_internal *cdata) +{ + struct ib_device_attr devattr; + int rc; + + rc = ib_query_device(ia->ri_id->device, &devattr); + if (rc) { + dprintk("RPC: %s: ib_query_device failed %d\n", + __func__, rc); + return rc; + } + + /* check provider's send/recv wr limits */ + if (cdata->max_requests > devattr.max_qp_wr) + cdata->max_requests = devattr.max_qp_wr; + + ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; + ep->rep_attr.qp_context = ep; + /* send_cq and recv_cq initialized below */ + ep->rep_attr.srq = NULL; + ep->rep_attr.cap.max_send_wr = cdata->max_requests; + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + /* Add room for mw_binds+unbinds - overkill! */ + ep->rep_attr.cap.max_send_wr++; + ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS); + if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) + return -EINVAL; + break; + default: + break; + } + ep->rep_attr.cap.max_recv_wr = cdata->max_requests; + ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); + ep->rep_attr.cap.max_recv_sge = 1; + ep->rep_attr.cap.max_inline_data = 0; + ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + ep->rep_attr.qp_type = IB_QPT_RC; + ep->rep_attr.port_num = ~0; + + dprintk("RPC: %s: requested max: dtos: send %d recv %d; " + "iovs: send %d recv %d\n", + __func__, + ep->rep_attr.cap.max_send_wr, + ep->rep_attr.cap.max_recv_wr, + ep->rep_attr.cap.max_send_sge, + ep->rep_attr.cap.max_recv_sge); + + /* set trigger for requesting send completion */ + ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/; + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + ep->rep_cqinit -= RPCRDMA_MAX_SEGS; + break; + default: + break; + } + if (ep->rep_cqinit <= 2) + ep->rep_cqinit = 0; + INIT_CQCOUNT(ep); + ep->rep_ia = ia; + init_waitqueue_head(&ep->rep_connect_wait); + + /* + * Create a single cq for receive dto and mw_bind (only ever + * care about unbind, really). Send completions are suppressed. + * Use single threaded tasklet upcalls to maintain ordering. + */ + ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall, + rpcrdma_cq_async_error_upcall, NULL, + ep->rep_attr.cap.max_recv_wr + + ep->rep_attr.cap.max_send_wr + 1, 0); + if (IS_ERR(ep->rep_cq)) { + rc = PTR_ERR(ep->rep_cq); + dprintk("RPC: %s: ib_create_cq failed: %i\n", + __func__, rc); + goto out1; + } + + rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP); + if (rc) { + dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", + __func__, rc); + goto out2; + } + + ep->rep_attr.send_cq = ep->rep_cq; + ep->rep_attr.recv_cq = ep->rep_cq; + + /* Initialize cma parameters */ + + /* RPC/RDMA does not use private data */ + ep->rep_remote_cma.private_data = NULL; + ep->rep_remote_cma.private_data_len = 0; + + /* Client offers RDMA Read but does not initiate */ + switch (ia->ri_memreg_strategy) { + case RPCRDMA_BOUNCEBUFFERS: + ep->rep_remote_cma.responder_resources = 0; + break; + case RPCRDMA_MTHCAFMR: + case RPCRDMA_REGISTER: + ep->rep_remote_cma.responder_resources = cdata->max_requests * + (RPCRDMA_MAX_DATA_SEGS / 8); + break; + case RPCRDMA_MEMWINDOWS: + case RPCRDMA_MEMWINDOWS_ASYNC: +#if RPCRDMA_PERSISTENT_REGISTRATION + case RPCRDMA_ALLPHYSICAL: +#endif + ep->rep_remote_cma.responder_resources = cdata->max_requests * + (RPCRDMA_MAX_DATA_SEGS / 2); + break; + default: + break; + } + if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom) + ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; + ep->rep_remote_cma.initiator_depth = 0; + + ep->rep_remote_cma.retry_count = 7; + ep->rep_remote_cma.flow_control = 0; + ep->rep_remote_cma.rnr_retry_count = 0; + + return 0; + +out2: + if (ib_destroy_cq(ep->rep_cq)) + ; +out1: + return rc; +} + +/* + * rpcrdma_ep_destroy + * + * Disconnect and destroy endpoint. After this, the only + * valid operations on the ep are to free it (if dynamically + * allocated) or re-create it. + * + * The caller's error handling must be sure to not leak the endpoint + * if this function fails. + */ +int +rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) +{ + int rc; + + dprintk("RPC: %s: entering, connected is %d\n", + __func__, ep->rep_connected); + + if (ia->ri_id->qp) { + rc = rpcrdma_ep_disconnect(ep, ia); + if (rc) + dprintk("RPC: %s: rpcrdma_ep_disconnect" + " returned %i\n", __func__, rc); + } + + ep->rep_func = NULL; + + /* padding - could be done in rpcrdma_buffer_destroy... */ + if (ep->rep_pad_mr) { + rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); + ep->rep_pad_mr = NULL; + } + + if (ia->ri_id->qp) { + rdma_destroy_qp(ia->ri_id); + ia->ri_id->qp = NULL; + } + + rpcrdma_clean_cq(ep->rep_cq); + rc = ib_destroy_cq(ep->rep_cq); + if (rc) + dprintk("RPC: %s: ib_destroy_cq returned %i\n", + __func__, rc); + + return rc; +} + +/* + * Connect unconnected endpoint. + */ +int +rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) +{ + struct rdma_cm_id *id; + int rc = 0; + int retry_count = 0; + int reconnect = (ep->rep_connected != 0); + + if (reconnect) { + struct rpcrdma_xprt *xprt; +retry: + rc = rpcrdma_ep_disconnect(ep, ia); + if (rc && rc != -ENOTCONN) + dprintk("RPC: %s: rpcrdma_ep_disconnect" + " status %i\n", __func__, rc); + rpcrdma_clean_cq(ep->rep_cq); + + xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); + id = rpcrdma_create_id(xprt, ia, + (struct sockaddr *)&xprt->rx_data.addr); + if (IS_ERR(id)) { + rc = PTR_ERR(id); + goto out; + } + /* TEMP TEMP TEMP - fail if new device: + * Deregister/remarshal *all* requests! + * Close and recreate adapter, pd, etc! + * Re-determine all attributes still sane! + * More stuff I haven't thought of! + * Rrrgh! + */ + if (ia->ri_id->device != id->device) { + printk("RPC: %s: can't reconnect on " + "different device!\n", __func__); + rdma_destroy_id(id); + rc = -ENETDOWN; + goto out; + } + /* END TEMP */ + rdma_destroy_id(ia->ri_id); + ia->ri_id = id; + } + + rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); + if (rc) { + dprintk("RPC: %s: rdma_create_qp failed %i\n", + __func__, rc); + goto out; + } + +/* XXX Tavor device performs badly with 2K MTU! */ +if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { + struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device); + if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR && + (pcid->vendor == PCI_VENDOR_ID_MELLANOX || + pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) { + struct ib_qp_attr attr = { + .path_mtu = IB_MTU_1024 + }; + rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU); + } +} + + /* Theoretically a client initiator_depth > 0 is not needed, + * but many peers fail to complete the connection unless they + * == responder_resources! */ + if (ep->rep_remote_cma.initiator_depth != + ep->rep_remote_cma.responder_resources) + ep->rep_remote_cma.initiator_depth = + ep->rep_remote_cma.responder_resources; + + ep->rep_connected = 0; + + rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); + if (rc) { + dprintk("RPC: %s: rdma_connect() failed with %i\n", + __func__, rc); + goto out; + } + + if (reconnect) + return 0; + + wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); + + /* + * Check state. A non-peer reject indicates no listener + * (ECONNREFUSED), which may be a transient state. All + * others indicate a transport condition which has already + * undergone a best-effort. + */ + if (ep->rep_connected == -ECONNREFUSED + && ++retry_count <= RDMA_CONNECT_RETRY_MAX) { + dprintk("RPC: %s: non-peer_reject, retry\n", __func__); + goto retry; + } + if (ep->rep_connected <= 0) { + /* Sometimes, the only way to reliably connect to remote + * CMs is to use same nonzero values for ORD and IRD. */ + ep->rep_remote_cma.initiator_depth = + ep->rep_remote_cma.responder_resources; + if (ep->rep_remote_cma.initiator_depth == 0) + ++ep->rep_remote_cma.initiator_depth; + if (ep->rep_remote_cma.responder_resources == 0) + ++ep->rep_remote_cma.responder_resources; + if (retry_count++ == 0) + goto retry; + rc = ep->rep_connected; + } else { + dprintk("RPC: %s: connected\n", __func__); + } + +out: + if (rc) + ep->rep_connected = rc; + return rc; +} + +/* + * rpcrdma_ep_disconnect + * + * This is separate from destroy to facilitate the ability + * to reconnect without recreating the endpoint. + * + * This call is not reentrant, and must not be made in parallel + * on the same endpoint. + */ +int +rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) +{ + int rc; + + rpcrdma_clean_cq(ep->rep_cq); + rc = rdma_disconnect(ia->ri_id); + if (!rc) { + /* returns without wait if not connected */ + wait_event_interruptible(ep->rep_connect_wait, + ep->rep_connected != 1); + dprintk("RPC: %s: after wait, %sconnected\n", __func__, + (ep->rep_connected == 1) ? "still " : "dis"); + } else { + dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc); + ep->rep_connected = rc; + } + return rc; +} + +/* + * Initialize buffer memory + */ +int +rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, + struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) +{ + char *p; + size_t len; + int i, rc; + + buf->rb_max_requests = cdata->max_requests; + spin_lock_init(&buf->rb_lock); + atomic_set(&buf->rb_credits, 1); + + /* Need to allocate: + * 1. arrays for send and recv pointers + * 2. arrays of struct rpcrdma_req to fill in pointers + * 3. array of struct rpcrdma_rep for replies + * 4. padding, if any + * 5. mw's, if any + * Send/recv buffers in req/rep need to be registered + */ + + len = buf->rb_max_requests * + (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); + len += cdata->padding; + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MTHCAFMR: + /* TBD we are perhaps overallocating here */ + len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * + sizeof(struct rpcrdma_mw); + break; + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * + sizeof(struct rpcrdma_mw); + break; + default: + break; + } + + /* allocate 1, 4 and 5 in one shot */ + p = kzalloc(len, GFP_KERNEL); + if (p == NULL) { + dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n", + __func__, len); + rc = -ENOMEM; + goto out; + } + buf->rb_pool = p; /* for freeing it later */ + + buf->rb_send_bufs = (struct rpcrdma_req **) p; + p = (char *) &buf->rb_send_bufs[buf->rb_max_requests]; + buf->rb_recv_bufs = (struct rpcrdma_rep **) p; + p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; + + /* + * Register the zeroed pad buffer, if any. + */ + if (cdata->padding) { + rc = rpcrdma_register_internal(ia, p, cdata->padding, + &ep->rep_pad_mr, &ep->rep_pad); + if (rc) + goto out; + } + p += cdata->padding; + + /* + * Allocate the fmr's, or mw's for mw_bind chunk registration. + * We "cycle" the mw's in order to minimize rkey reuse, + * and also reduce unbind-to-bind collision. + */ + INIT_LIST_HEAD(&buf->rb_mws); + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MTHCAFMR: + { + struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; + struct ib_fmr_attr fa = { + RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT + }; + /* TBD we are perhaps overallocating here */ + for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { + r->r.fmr = ib_alloc_fmr(ia->ri_pd, + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, + &fa); + if (IS_ERR(r->r.fmr)) { + rc = PTR_ERR(r->r.fmr); + dprintk("RPC: %s: ib_alloc_fmr" + " failed %i\n", __func__, rc); + goto out; + } + list_add(&r->mw_list, &buf->rb_mws); + ++r; + } + } + break; + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + { + struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; + /* Allocate one extra request's worth, for full cycling */ + for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { + r->r.mw = ib_alloc_mw(ia->ri_pd); + if (IS_ERR(r->r.mw)) { + rc = PTR_ERR(r->r.mw); + dprintk("RPC: %s: ib_alloc_mw" + " failed %i\n", __func__, rc); + goto out; + } + list_add(&r->mw_list, &buf->rb_mws); + ++r; + } + } + break; + default: + break; + } + + /* + * Allocate/init the request/reply buffers. Doing this + * using kmalloc for now -- one for each buf. + */ + for (i = 0; i < buf->rb_max_requests; i++) { + struct rpcrdma_req *req; + struct rpcrdma_rep *rep; + + len = cdata->inline_wsize + sizeof(struct rpcrdma_req); + /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */ + /* Typical ~2400b, so rounding up saves work later */ + if (len < 4096) + len = 4096; + req = kmalloc(len, GFP_KERNEL); + if (req == NULL) { + dprintk("RPC: %s: request buffer %d alloc" + " failed\n", __func__, i); + rc = -ENOMEM; + goto out; + } + memset(req, 0, sizeof(struct rpcrdma_req)); + buf->rb_send_bufs[i] = req; + buf->rb_send_bufs[i]->rl_buffer = buf; + + rc = rpcrdma_register_internal(ia, req->rl_base, + len - offsetof(struct rpcrdma_req, rl_base), + &buf->rb_send_bufs[i]->rl_handle, + &buf->rb_send_bufs[i]->rl_iov); + if (rc) + goto out; + + buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req); + + len = cdata->inline_rsize + sizeof(struct rpcrdma_rep); + rep = kmalloc(len, GFP_KERNEL); + if (rep == NULL) { + dprintk("RPC: %s: reply buffer %d alloc failed\n", + __func__, i); + rc = -ENOMEM; + goto out; + } + memset(rep, 0, sizeof(struct rpcrdma_rep)); + buf->rb_recv_bufs[i] = rep; + buf->rb_recv_bufs[i]->rr_buffer = buf; + init_waitqueue_head(&rep->rr_unbind); + + rc = rpcrdma_register_internal(ia, rep->rr_base, + len - offsetof(struct rpcrdma_rep, rr_base), + &buf->rb_recv_bufs[i]->rr_handle, + &buf->rb_recv_bufs[i]->rr_iov); + if (rc) + goto out; + + } + dprintk("RPC: %s: max_requests %d\n", + __func__, buf->rb_max_requests); + /* done */ + return 0; +out: + rpcrdma_buffer_destroy(buf); + return rc; +} + +/* + * Unregister and destroy buffer memory. Need to deal with + * partial initialization, so it's callable from failed create. + * Must be called before destroying endpoint, as registrations + * reference it. + */ +void +rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) +{ + int rc, i; + struct rpcrdma_ia *ia = rdmab_to_ia(buf); + + /* clean up in reverse order from create + * 1. recv mr memory (mr free, then kfree) + * 1a. bind mw memory + * 2. send mr memory (mr free, then kfree) + * 3. padding (if any) [moved to rpcrdma_ep_destroy] + * 4. arrays + */ + dprintk("RPC: %s: entering\n", __func__); + + for (i = 0; i < buf->rb_max_requests; i++) { + if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) { + rpcrdma_deregister_internal(ia, + buf->rb_recv_bufs[i]->rr_handle, + &buf->rb_recv_bufs[i]->rr_iov); + kfree(buf->rb_recv_bufs[i]); + } + if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { + while (!list_empty(&buf->rb_mws)) { + struct rpcrdma_mw *r; + r = list_entry(buf->rb_mws.next, + struct rpcrdma_mw, mw_list); + list_del(&r->mw_list); + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MTHCAFMR: + rc = ib_dealloc_fmr(r->r.fmr); + if (rc) + dprintk("RPC: %s:" + " ib_dealloc_fmr" + " failed %i\n", + __func__, rc); + break; + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + rc = ib_dealloc_mw(r->r.mw); + if (rc) + dprintk("RPC: %s:" + " ib_dealloc_mw" + " failed %i\n", + __func__, rc); + break; + default: + break; + } + } + rpcrdma_deregister_internal(ia, + buf->rb_send_bufs[i]->rl_handle, + &buf->rb_send_bufs[i]->rl_iov); + kfree(buf->rb_send_bufs[i]); + } + } + + kfree(buf->rb_pool); +} + +/* + * Get a set of request/reply buffers. + * + * Reply buffer (if needed) is attached to send buffer upon return. + * Rule: + * rb_send_index and rb_recv_index MUST always be pointing to the + * *next* available buffer (non-NULL). They are incremented after + * removing buffers, and decremented *before* returning them. + */ +struct rpcrdma_req * +rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) +{ + struct rpcrdma_req *req; + unsigned long flags; + + spin_lock_irqsave(&buffers->rb_lock, flags); + if (buffers->rb_send_index == buffers->rb_max_requests) { + spin_unlock_irqrestore(&buffers->rb_lock, flags); + dprintk("RPC: %s: out of request buffers\n", __func__); + return ((struct rpcrdma_req *)NULL); + } + + req = buffers->rb_send_bufs[buffers->rb_send_index]; + if (buffers->rb_send_index < buffers->rb_recv_index) { + dprintk("RPC: %s: %d extra receives outstanding (ok)\n", + __func__, + buffers->rb_recv_index - buffers->rb_send_index); + req->rl_reply = NULL; + } else { + req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; + buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; + } + buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; + if (!list_empty(&buffers->rb_mws)) { + int i = RPCRDMA_MAX_SEGS - 1; + do { + struct rpcrdma_mw *r; + r = list_entry(buffers->rb_mws.next, + struct rpcrdma_mw, mw_list); + list_del(&r->mw_list); + req->rl_segments[i].mr_chunk.rl_mw = r; + } while (--i >= 0); + } + spin_unlock_irqrestore(&buffers->rb_lock, flags); + return req; +} + +/* + * Put request/reply buffers back into pool. + * Pre-decrement counter/array index. + */ +void +rpcrdma_buffer_put(struct rpcrdma_req *req) +{ + struct rpcrdma_buffer *buffers = req->rl_buffer; + struct rpcrdma_ia *ia = rdmab_to_ia(buffers); + int i; + unsigned long flags; + + BUG_ON(req->rl_nchunks != 0); + spin_lock_irqsave(&buffers->rb_lock, flags); + buffers->rb_send_bufs[--buffers->rb_send_index] = req; + req->rl_niovs = 0; + if (req->rl_reply) { + buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply; + init_waitqueue_head(&req->rl_reply->rr_unbind); + req->rl_reply->rr_func = NULL; + req->rl_reply = NULL; + } + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MTHCAFMR: + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + /* + * Cycle mw's back in reverse order, and "spin" them. + * This delays and scrambles reuse as much as possible. + */ + i = 1; + do { + struct rpcrdma_mw **mw; + mw = &req->rl_segments[i].mr_chunk.rl_mw; + list_add_tail(&(*mw)->mw_list, &buffers->rb_mws); + *mw = NULL; + } while (++i < RPCRDMA_MAX_SEGS); + list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list, + &buffers->rb_mws); + req->rl_segments[0].mr_chunk.rl_mw = NULL; + break; + default: + break; + } + spin_unlock_irqrestore(&buffers->rb_lock, flags); +} + +/* + * Recover reply buffers from pool. + * This happens when recovering from error conditions. + * Post-increment counter/array index. + */ +void +rpcrdma_recv_buffer_get(struct rpcrdma_req *req) +{ + struct rpcrdma_buffer *buffers = req->rl_buffer; + unsigned long flags; + + if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */ + buffers = ((struct rpcrdma_req *) buffers)->rl_buffer; + spin_lock_irqsave(&buffers->rb_lock, flags); + if (buffers->rb_recv_index < buffers->rb_max_requests) { + req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; + buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; + } + spin_unlock_irqrestore(&buffers->rb_lock, flags); +} + +/* + * Put reply buffers back into pool when not attached to + * request. This happens in error conditions, and when + * aborting unbinds. Pre-decrement counter/array index. + */ +void +rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) +{ + struct rpcrdma_buffer *buffers = rep->rr_buffer; + unsigned long flags; + + rep->rr_func = NULL; + spin_lock_irqsave(&buffers->rb_lock, flags); + buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep; + spin_unlock_irqrestore(&buffers->rb_lock, flags); +} + +/* + * Wrappers for internal-use kmalloc memory registration, used by buffer code. + */ + +int +rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, + struct ib_mr **mrp, struct ib_sge *iov) +{ + struct ib_phys_buf ipb; + struct ib_mr *mr; + int rc; + + /* + * All memory passed here was kmalloc'ed, therefore phys-contiguous. + */ + iov->addr = ib_dma_map_single(ia->ri_id->device, + va, len, DMA_BIDIRECTIONAL); + iov->length = len; + + if (ia->ri_bind_mem != NULL) { + *mrp = NULL; + iov->lkey = ia->ri_bind_mem->lkey; + return 0; + } + + ipb.addr = iov->addr; + ipb.size = iov->length; + mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1, + IB_ACCESS_LOCAL_WRITE, &iov->addr); + + dprintk("RPC: %s: phys convert: 0x%llx " + "registered 0x%llx length %d\n", + __func__, (unsigned long long)ipb.addr, + (unsigned long long)iov->addr, len); + + if (IS_ERR(mr)) { + *mrp = NULL; + rc = PTR_ERR(mr); + dprintk("RPC: %s: failed with %i\n", __func__, rc); + } else { + *mrp = mr; + iov->lkey = mr->lkey; + rc = 0; + } + + return rc; +} + +int +rpcrdma_deregister_internal(struct rpcrdma_ia *ia, + struct ib_mr *mr, struct ib_sge *iov) +{ + int rc; + + ib_dma_unmap_single(ia->ri_id->device, + iov->addr, iov->length, DMA_BIDIRECTIONAL); + + if (NULL == mr) + return 0; + + rc = ib_dereg_mr(mr); + if (rc) + dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc); + return rc; +} + +/* + * Wrappers for chunk registration, shared by read/write chunk code. + */ + +static void +rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing) +{ + seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + seg->mr_dmalen = seg->mr_len; + if (seg->mr_page) + seg->mr_dma = ib_dma_map_page(ia->ri_id->device, + seg->mr_page, offset_in_page(seg->mr_offset), + seg->mr_dmalen, seg->mr_dir); + else + seg->mr_dma = ib_dma_map_single(ia->ri_id->device, + seg->mr_offset, + seg->mr_dmalen, seg->mr_dir); +} + +static void +rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) +{ + if (seg->mr_page) + ib_dma_unmap_page(ia->ri_id->device, + seg->mr_dma, seg->mr_dmalen, seg->mr_dir); + else + ib_dma_unmap_single(ia->ri_id->device, + seg->mr_dma, seg->mr_dmalen, seg->mr_dir); +} + +int +rpcrdma_register_external(struct rpcrdma_mr_seg *seg, + int nsegs, int writing, struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : + IB_ACCESS_REMOTE_READ); + struct rpcrdma_mr_seg *seg1 = seg; + int i; + int rc = 0; + + switch (ia->ri_memreg_strategy) { + +#if RPCRDMA_PERSISTENT_REGISTRATION + case RPCRDMA_ALLPHYSICAL: + rpcrdma_map_one(ia, seg, writing); + seg->mr_rkey = ia->ri_bind_mem->rkey; + seg->mr_base = seg->mr_dma; + seg->mr_nsegs = 1; + nsegs = 1; + break; +#endif + + /* Registration using fast memory registration */ + case RPCRDMA_MTHCAFMR: + { + u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; + int len, pageoff = offset_in_page(seg->mr_offset); + seg1->mr_offset -= pageoff; /* start of page */ + seg1->mr_len += pageoff; + len = -pageoff; + if (nsegs > RPCRDMA_MAX_DATA_SEGS) + nsegs = RPCRDMA_MAX_DATA_SEGS; + for (i = 0; i < nsegs;) { + rpcrdma_map_one(ia, seg, writing); + physaddrs[i] = seg->mr_dma; + len += seg->mr_len; + ++seg; + ++i; + /* Check for holes */ + if ((i < nsegs && offset_in_page(seg->mr_offset)) || + offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) + break; + } + nsegs = i; + rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, + physaddrs, nsegs, seg1->mr_dma); + if (rc) { + dprintk("RPC: %s: failed ib_map_phys_fmr " + "%u@0x%llx+%i (%d)... status %i\n", __func__, + len, (unsigned long long)seg1->mr_dma, + pageoff, nsegs, rc); + while (nsegs--) + rpcrdma_unmap_one(ia, --seg); + } else { + seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; + seg1->mr_base = seg1->mr_dma + pageoff; + seg1->mr_nsegs = nsegs; + seg1->mr_len = len; + } + } + break; + + /* Registration using memory windows */ + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + { + struct ib_mw_bind param; + rpcrdma_map_one(ia, seg, writing); + param.mr = ia->ri_bind_mem; + param.wr_id = 0ULL; /* no send cookie */ + param.addr = seg->mr_dma; + param.length = seg->mr_len; + param.send_flags = 0; + param.mw_access_flags = mem_priv; + + DECR_CQCOUNT(&r_xprt->rx_ep); + rc = ib_bind_mw(ia->ri_id->qp, + seg->mr_chunk.rl_mw->r.mw, ¶m); + if (rc) { + dprintk("RPC: %s: failed ib_bind_mw " + "%u@0x%llx status %i\n", + __func__, seg->mr_len, + (unsigned long long)seg->mr_dma, rc); + rpcrdma_unmap_one(ia, seg); + } else { + seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; + seg->mr_base = param.addr; + seg->mr_nsegs = 1; + nsegs = 1; + } + } + break; + + /* Default registration each time */ + default: + { + struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; + int len = 0; + if (nsegs > RPCRDMA_MAX_DATA_SEGS) + nsegs = RPCRDMA_MAX_DATA_SEGS; + for (i = 0; i < nsegs;) { + rpcrdma_map_one(ia, seg, writing); + ipb[i].addr = seg->mr_dma; + ipb[i].size = seg->mr_len; + len += seg->mr_len; + ++seg; + ++i; + /* Check for holes */ + if ((i < nsegs && offset_in_page(seg->mr_offset)) || + offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) + break; + } + nsegs = i; + seg1->mr_base = seg1->mr_dma; + seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, + ipb, nsegs, mem_priv, &seg1->mr_base); + if (IS_ERR(seg1->mr_chunk.rl_mr)) { + rc = PTR_ERR(seg1->mr_chunk.rl_mr); + dprintk("RPC: %s: failed ib_reg_phys_mr " + "%u@0x%llx (%d)... status %i\n", + __func__, len, + (unsigned long long)seg1->mr_dma, nsegs, rc); + while (nsegs--) + rpcrdma_unmap_one(ia, --seg); + } else { + seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; + seg1->mr_nsegs = nsegs; + seg1->mr_len = len; + } + } + break; + } + if (rc) + return -1; + + return nsegs; +} + +int +rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, + struct rpcrdma_xprt *r_xprt, void *r) +{ + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_mr_seg *seg1 = seg; + int nsegs = seg->mr_nsegs, rc; + + switch (ia->ri_memreg_strategy) { + +#if RPCRDMA_PERSISTENT_REGISTRATION + case RPCRDMA_ALLPHYSICAL: + BUG_ON(nsegs != 1); + rpcrdma_unmap_one(ia, seg); + rc = 0; + break; +#endif + + case RPCRDMA_MTHCAFMR: + { + LIST_HEAD(l); + list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l); + rc = ib_unmap_fmr(&l); + while (seg1->mr_nsegs--) + rpcrdma_unmap_one(ia, seg++); + } + if (rc) + dprintk("RPC: %s: failed ib_unmap_fmr," + " status %i\n", __func__, rc); + break; + + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + { + struct ib_mw_bind param; + BUG_ON(nsegs != 1); + param.mr = ia->ri_bind_mem; + param.addr = 0ULL; /* unbind */ + param.length = 0; + param.mw_access_flags = 0; + if (r) { + param.wr_id = (u64) (unsigned long) r; + param.send_flags = IB_SEND_SIGNALED; + INIT_CQCOUNT(&r_xprt->rx_ep); + } else { + param.wr_id = 0ULL; + param.send_flags = 0; + DECR_CQCOUNT(&r_xprt->rx_ep); + } + rc = ib_bind_mw(ia->ri_id->qp, + seg->mr_chunk.rl_mw->r.mw, ¶m); + rpcrdma_unmap_one(ia, seg); + } + if (rc) + dprintk("RPC: %s: failed ib_(un)bind_mw," + " status %i\n", __func__, rc); + else + r = NULL; /* will upcall on completion */ + break; + + default: + rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); + seg1->mr_chunk.rl_mr = NULL; + while (seg1->mr_nsegs--) + rpcrdma_unmap_one(ia, seg++); + if (rc) + dprintk("RPC: %s: failed ib_dereg_mr," + " status %i\n", __func__, rc); + break; + } + if (r) { + struct rpcrdma_rep *rep = r; + void (*func)(struct rpcrdma_rep *) = rep->rr_func; + rep->rr_func = NULL; + func(rep); /* dereg done, callback now */ + } + return nsegs; +} + +/* + * Prepost any receive buffer, then post send. + * + * Receive buffer is donated to hardware, reclaimed upon recv completion. + */ +int +rpcrdma_ep_post(struct rpcrdma_ia *ia, + struct rpcrdma_ep *ep, + struct rpcrdma_req *req) +{ + struct ib_send_wr send_wr, *send_wr_fail; + struct rpcrdma_rep *rep = req->rl_reply; + int rc; + + if (rep) { + rc = rpcrdma_ep_post_recv(ia, ep, rep); + if (rc) + goto out; + req->rl_reply = NULL; + } + + send_wr.next = NULL; + send_wr.wr_id = 0ULL; /* no send cookie */ + send_wr.sg_list = req->rl_send_iov; + send_wr.num_sge = req->rl_niovs; + send_wr.opcode = IB_WR_SEND; + send_wr.imm_data = 0; + if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */ + ib_dma_sync_single_for_device(ia->ri_id->device, + req->rl_send_iov[3].addr, req->rl_send_iov[3].length, + DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_id->device, + req->rl_send_iov[1].addr, req->rl_send_iov[1].length, + DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_id->device, + req->rl_send_iov[0].addr, req->rl_send_iov[0].length, + DMA_TO_DEVICE); + + if (DECR_CQCOUNT(ep) > 0) + send_wr.send_flags = 0; + else { /* Provider must take a send completion every now and then */ + INIT_CQCOUNT(ep); + send_wr.send_flags = IB_SEND_SIGNALED; + } + + rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail); + if (rc) + dprintk("RPC: %s: ib_post_send returned %i\n", __func__, + rc); +out: + return rc; +} + +/* + * (Re)post a receive buffer. + */ +int +rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, + struct rpcrdma_ep *ep, + struct rpcrdma_rep *rep) +{ + struct ib_recv_wr recv_wr, *recv_wr_fail; + int rc; + + recv_wr.next = NULL; + recv_wr.wr_id = (u64) (unsigned long) rep; + recv_wr.sg_list = &rep->rr_iov; + recv_wr.num_sge = 1; + + ib_dma_sync_single_for_cpu(ia->ri_id->device, + rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); + + DECR_CQCOUNT(ep); + rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); + + if (rc) + dprintk("RPC: %s: ib_post_recv returned %i\n", __func__, + rc); + return rc; +} diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h new file mode 100644 index 00000000000..2427822f8bd --- /dev/null +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_SUNRPC_XPRT_RDMA_H +#define _LINUX_SUNRPC_XPRT_RDMA_H + +#include <linux/wait.h> /* wait_queue_head_t, etc */ +#include <linux/spinlock.h> /* spinlock_t, etc */ +#include <asm/atomic.h> /* atomic_t, etc */ + +#include <rdma/rdma_cm.h> /* RDMA connection api */ +#include <rdma/ib_verbs.h> /* RDMA verbs api */ + +#include <linux/sunrpc/clnt.h> /* rpc_xprt */ +#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ +#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ + +/* + * Interface Adapter -- one per transport instance + */ +struct rpcrdma_ia { + struct rdma_cm_id *ri_id; + struct ib_pd *ri_pd; + struct ib_mr *ri_bind_mem; + struct completion ri_done; + int ri_async_rc; + enum rpcrdma_memreg ri_memreg_strategy; +}; + +/* + * RDMA Endpoint -- one per transport instance + */ + +struct rpcrdma_ep { + atomic_t rep_cqcount; + int rep_cqinit; + int rep_connected; + struct rpcrdma_ia *rep_ia; + struct ib_cq *rep_cq; + struct ib_qp_init_attr rep_attr; + wait_queue_head_t rep_connect_wait; + struct ib_sge rep_pad; /* holds zeroed pad */ + struct ib_mr *rep_pad_mr; /* holds zeroed pad */ + void (*rep_func)(struct rpcrdma_ep *); + struct rpc_xprt *rep_xprt; /* for rep_func */ + struct rdma_conn_param rep_remote_cma; + struct sockaddr_storage rep_remote_addr; +}; + +#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) +#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) + +/* + * struct rpcrdma_rep -- this structure encapsulates state required to recv + * and complete a reply, asychronously. It needs several pieces of + * state: + * o recv buffer (posted to provider) + * o ib_sge (also donated to provider) + * o status of reply (length, success or not) + * o bookkeeping state to get run by tasklet (list, etc) + * + * These are allocated during initialization, per-transport instance; + * however, the tasklet execution list itself is global, as it should + * always be pretty short. + * + * N of these are associated with a transport instance, and stored in + * struct rpcrdma_buffer. N is the max number of outstanding requests. + */ + +/* temporary static scatter/gather max */ +#define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */ +#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ +#define MAX_RPCRDMAHDR (\ + /* max supported RPC/RDMA header */ \ + sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \ + (sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32)) + +struct rpcrdma_buffer; + +struct rpcrdma_rep { + unsigned int rr_len; /* actual received reply length */ + struct rpcrdma_buffer *rr_buffer; /* home base for this structure */ + struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ + void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ + struct list_head rr_list; /* tasklet list */ + wait_queue_head_t rr_unbind; /* optional unbind wait */ + struct ib_sge rr_iov; /* for posting */ + struct ib_mr *rr_handle; /* handle for mem in rr_iov */ + char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ +}; + +/* + * struct rpcrdma_req -- structure central to the request/reply sequence. + * + * N of these are associated with a transport instance, and stored in + * struct rpcrdma_buffer. N is the max number of outstanding requests. + * + * It includes pre-registered buffer memory for send AND recv. + * The recv buffer, however, is not owned by this structure, and + * is "donated" to the hardware when a recv is posted. When a + * reply is handled, the recv buffer used is given back to the + * struct rpcrdma_req associated with the request. + * + * In addition to the basic memory, this structure includes an array + * of iovs for send operations. The reason is that the iovs passed to + * ib_post_{send,recv} must not be modified until the work request + * completes. + * + * NOTES: + * o RPCRDMA_MAX_SEGS is the max number of addressible chunk elements we + * marshal. The number needed varies depending on the iov lists that + * are passed to us, the memory registration mode we are in, and if + * physical addressing is used, the layout. + */ + +struct rpcrdma_mr_seg { /* chunk descriptors */ + union { /* chunk memory handles */ + struct ib_mr *rl_mr; /* if registered directly */ + struct rpcrdma_mw { /* if registered from region */ + union { + struct ib_mw *mw; + struct ib_fmr *fmr; + } r; + struct list_head mw_list; + } *rl_mw; + } mr_chunk; + u64 mr_base; /* registration result */ + u32 mr_rkey; /* registration result */ + u32 mr_len; /* length of chunk or segment */ + int mr_nsegs; /* number of segments in chunk or 0 */ + enum dma_data_direction mr_dir; /* segment mapping direction */ + dma_addr_t mr_dma; /* segment mapping address */ + size_t mr_dmalen; /* segment mapping length */ + struct page *mr_page; /* owning page, if any */ + char *mr_offset; /* kva if no page, else offset */ +}; + +struct rpcrdma_req { + size_t rl_size; /* actual length of buffer */ + unsigned int rl_niovs; /* 0, 2 or 4 */ + unsigned int rl_nchunks; /* non-zero if chunks */ + struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ + struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ + struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ + struct ib_sge rl_send_iov[4]; /* for active requests */ + struct ib_sge rl_iov; /* for posting */ + struct ib_mr *rl_handle; /* handle for mem in rl_iov */ + char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */ + __u32 rl_xdr_buf[0]; /* start of returned rpc rq_buffer */ +}; +#define rpcr_to_rdmar(r) \ + container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0]) + +/* + * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for + * inline requests/replies, and client/server credits. + * + * One of these is associated with a transport instance + */ +struct rpcrdma_buffer { + spinlock_t rb_lock; /* protects indexes */ + atomic_t rb_credits; /* most recent server credits */ + unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ + int rb_max_requests;/* client max requests */ + struct list_head rb_mws; /* optional memory windows/fmrs */ + int rb_send_index; + struct rpcrdma_req **rb_send_bufs; + int rb_recv_index; + struct rpcrdma_rep **rb_recv_bufs; + char *rb_pool; +}; +#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) + +/* + * Internal structure for transport instance creation. This + * exists primarily for modularity. + * + * This data should be set with mount options + */ +struct rpcrdma_create_data_internal { + struct sockaddr_storage addr; /* RDMA server address */ + unsigned int max_requests; /* max requests (slots) in flight */ + unsigned int rsize; /* mount rsize - max read hdr+data */ + unsigned int wsize; /* mount wsize - max write hdr+data */ + unsigned int inline_rsize; /* max non-rdma read data payload */ + unsigned int inline_wsize; /* max non-rdma write data payload */ + unsigned int padding; /* non-rdma write header padding */ +}; + +#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \ + (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_rsize) + +#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\ + (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_wsize) + +#define RPCRDMA_INLINE_PAD_VALUE(rq)\ + rpcx_to_rdmad(rq->rq_task->tk_xprt).padding + +/* + * Statistics for RPCRDMA + */ +struct rpcrdma_stats { + unsigned long read_chunk_count; + unsigned long write_chunk_count; + unsigned long reply_chunk_count; + + unsigned long long total_rdma_request; + unsigned long long total_rdma_reply; + + unsigned long long pullup_copy_count; + unsigned long long fixup_copy_count; + unsigned long hardway_register_count; + unsigned long failed_marshal_count; + unsigned long bad_reply_count; +}; + +/* + * RPCRDMA transport -- encapsulates the structures above for + * integration with RPC. + * + * The contained structures are embedded, not pointers, + * for convenience. This structure need not be visible externally. + * + * It is allocated and initialized during mount, and released + * during unmount. + */ +struct rpcrdma_xprt { + struct rpc_xprt xprt; + struct rpcrdma_ia rx_ia; + struct rpcrdma_ep rx_ep; + struct rpcrdma_buffer rx_buf; + struct rpcrdma_create_data_internal rx_data; + struct delayed_work rdma_connect; + struct rpcrdma_stats rx_stats; +}; + +#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) +#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) + +/* + * Interface Adapter calls - xprtrdma/verbs.c + */ +int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int); +void rpcrdma_ia_close(struct rpcrdma_ia *); + +/* + * Endpoint calls - xprtrdma/verbs.c + */ +int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, + struct rpcrdma_create_data_internal *); +int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); +int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); +int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); + +int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, + struct rpcrdma_req *); +int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, + struct rpcrdma_rep *); + +/* + * Buffer calls - xprtrdma/verbs.c + */ +int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *, + struct rpcrdma_ia *, + struct rpcrdma_create_data_internal *); +void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); + +struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); +void rpcrdma_buffer_put(struct rpcrdma_req *); +void rpcrdma_recv_buffer_get(struct rpcrdma_req *); +void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); + +int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int, + struct ib_mr **, struct ib_sge *); +int rpcrdma_deregister_internal(struct rpcrdma_ia *, + struct ib_mr *, struct ib_sge *); + +int rpcrdma_register_external(struct rpcrdma_mr_seg *, + int, int, struct rpcrdma_xprt *); +int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, + struct rpcrdma_xprt *, void *); + +/* + * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c + */ +void rpcrdma_conn_func(struct rpcrdma_ep *); +void rpcrdma_reply_handler(struct rpcrdma_rep *); + +/* + * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c + */ +int rpcrdma_marshal_req(struct rpc_rqst *); + +#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4ae7eed7f61..02298f529da 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -13,10 +13,14 @@ * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> * * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> + * + * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005. + * <gilles.quillard@bull.net> */ #include <linux/types.h> #include <linux/slab.h> +#include <linux/module.h> #include <linux/capability.h> #include <linux/pagemap.h> #include <linux/errno.h> @@ -28,6 +32,7 @@ #include <linux/tcp.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/sched.h> +#include <linux/sunrpc/xprtsock.h> #include <linux/file.h> #include <net/sock.h> @@ -260,14 +265,29 @@ struct sock_xprt { #define TCP_RCV_COPY_XID (1UL << 2) #define TCP_RCV_COPY_DATA (1UL << 3) -static void xs_format_peer_addresses(struct rpc_xprt *xprt) +static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) +{ + return (struct sockaddr *) &xprt->addr; +} + +static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) { - struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; + return (struct sockaddr_in *) &xprt->addr; +} + +static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) +{ + return (struct sockaddr_in6 *) &xprt->addr; +} + +static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in *addr = xs_addr_in(xprt); char *buf; buf = kzalloc(20, GFP_KERNEL); if (buf) { - snprintf(buf, 20, "%u.%u.%u.%u", + snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); } xprt->address_strings[RPC_DISPLAY_ADDR] = buf; @@ -279,26 +299,123 @@ static void xs_format_peer_addresses(struct rpc_xprt *xprt) } xprt->address_strings[RPC_DISPLAY_PORT] = buf; - if (xprt->prot == IPPROTO_UDP) - xprt->address_strings[RPC_DISPLAY_PROTO] = "udp"; - else - xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp"; + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + if (xprt->prot == IPPROTO_UDP) + snprintf(buf, 8, "udp"); + else + snprintf(buf, 8, "tcp"); + } + xprt->address_strings[RPC_DISPLAY_PROTO] = buf; buf = kzalloc(48, GFP_KERNEL); if (buf) { - snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s", + snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", NIPQUAD(addr->sin_addr.s_addr), ntohs(addr->sin_port), xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); } xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(10, GFP_KERNEL); + if (buf) { + snprintf(buf, 10, "%02x%02x%02x%02x", + NIPQUAD(addr->sin_addr.s_addr)); + } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%4hx", + ntohs(addr->sin_port)); + } + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(30, GFP_KERNEL); + if (buf) { + snprintf(buf, 30, NIPQUAD_FMT".%u.%u", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port) >> 8, + ntohs(addr->sin_port) & 0xff); + } + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + + xprt->address_strings[RPC_DISPLAY_NETID] = + kstrdup(xprt->prot == IPPROTO_UDP ? + RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL); +} + +static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in6 *addr = xs_addr_in6(xprt); + char *buf; + + buf = kzalloc(40, GFP_KERNEL); + if (buf) { + snprintf(buf, 40, NIP6_FMT, + NIP6(addr->sin6_addr)); + } + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%u", + ntohs(addr->sin6_port)); + } + xprt->address_strings[RPC_DISPLAY_PORT] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + if (xprt->prot == IPPROTO_UDP) + snprintf(buf, 8, "udp"); + else + snprintf(buf, 8, "tcp"); + } + xprt->address_strings[RPC_DISPLAY_PROTO] = buf; + + buf = kzalloc(64, GFP_KERNEL); + if (buf) { + snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s", + NIP6(addr->sin6_addr), + ntohs(addr->sin6_port), + xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + } + xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(36, GFP_KERNEL); + if (buf) { + snprintf(buf, 36, NIP6_SEQFMT, + NIP6(addr->sin6_addr)); + } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%4hx", + ntohs(addr->sin6_port)); + } + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(50, GFP_KERNEL); + if (buf) { + snprintf(buf, 50, NIP6_FMT".%u.%u", + NIP6(addr->sin6_addr), + ntohs(addr->sin6_port) >> 8, + ntohs(addr->sin6_port) & 0xff); + } + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + + xprt->address_strings[RPC_DISPLAY_NETID] = + kstrdup(xprt->prot == IPPROTO_UDP ? + RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL); } static void xs_free_peer_addresses(struct rpc_xprt *xprt) { - kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); - kfree(xprt->address_strings[RPC_DISPLAY_PORT]); - kfree(xprt->address_strings[RPC_DISPLAY_ALL]); + int i; + + for (i = 0; i < RPC_DISPLAY_MAX; i++) + kfree(xprt->address_strings[i]); } #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) @@ -463,19 +580,20 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, - (struct sockaddr *) &xprt->addr, + xs_addr(xprt), xprt->addrlen, xdr, req->rq_bytes_sent); dprintk("RPC: xs_udp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); - if (likely(status >= (int) req->rq_slen)) - return 0; - - /* Still some bytes left; set up for a retry later. */ - if (status > 0) + if (status >= 0) { + task->tk_bytes_sent += status; + if (status >= req->rq_slen) + return 0; + /* Still some bytes left; set up for a retry later. */ status = -EAGAIN; + } switch (status) { case -ENETUNREACH: @@ -523,7 +641,8 @@ static int xs_tcp_send_request(struct rpc_task *task) struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; - int status, retry = 0; + int status; + unsigned int retry = 0; xs_encode_tcp_record_marker(&req->rq_snd_buf); @@ -661,6 +780,7 @@ static void xs_destroy(struct rpc_xprt *xprt) xs_free_peer_addresses(xprt); kfree(xprt->slot); kfree(xprt); + module_put(THIS_MODULE); } static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) @@ -1139,14 +1259,23 @@ static unsigned short xs_get_random_port(void) */ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) { - struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr; + struct sockaddr *addr = xs_addr(xprt); dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); - sap->sin_port = htons(port); + switch (addr->sa_family) { + case AF_INET: + ((struct sockaddr_in *)addr)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); + break; + default: + BUG(); + } } -static int xs_bind(struct sock_xprt *transport, struct socket *sock) +static int xs_bind4(struct sock_xprt *transport, struct socket *sock) { struct sockaddr_in myaddr = { .sin_family = AF_INET, @@ -1174,8 +1303,42 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) else port--; } while (err == -EADDRINUSE && port != transport->port); - dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n", - NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err); + dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n", + __FUNCTION__, NIPQUAD(myaddr.sin_addr), + port, err ? "failed" : "ok", err); + return err; +} + +static int xs_bind6(struct sock_xprt *transport, struct socket *sock) +{ + struct sockaddr_in6 myaddr = { + .sin6_family = AF_INET6, + }; + struct sockaddr_in6 *sa; + int err; + unsigned short port = transport->port; + + if (!transport->xprt.resvport) + port = 0; + sa = (struct sockaddr_in6 *)&transport->addr; + myaddr.sin6_addr = sa->sin6_addr; + do { + myaddr.sin6_port = htons(port); + err = kernel_bind(sock, (struct sockaddr *) &myaddr, + sizeof(myaddr)); + if (!transport->xprt.resvport) + break; + if (err == 0) { + transport->port = port; + break; + } + if (port <= xprt_min_resvport) + port = xprt_max_resvport; + else + port--; + } while (err == -EADDRINUSE && port != transport->port); + dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n", + NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err); return err; } @@ -1183,38 +1346,69 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) static struct lock_class_key xs_key[2]; static struct lock_class_key xs_slock_key[2]; -static inline void xs_reclassify_socket(struct socket *sock) +static inline void xs_reclassify_socket4(struct socket *sock) { struct sock *sk = sock->sk; - BUG_ON(sk->sk_lock.owner != NULL); - switch (sk->sk_family) { - case AF_INET: - sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS", - &xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]); - break; - case AF_INET6: - sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS", - &xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]); - break; + BUG_ON(sock_owned_by_user(sk)); + sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC", + &xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]); +} - default: - BUG(); - } +static inline void xs_reclassify_socket6(struct socket *sock) +{ + struct sock *sk = sock->sk; + + BUG_ON(sock_owned_by_user(sk)); + sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", + &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); } #else -static inline void xs_reclassify_socket(struct socket *sock) +static inline void xs_reclassify_socket4(struct socket *sock) +{ +} + +static inline void xs_reclassify_socket6(struct socket *sock) { } #endif +static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_user_data = xprt; + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; + sk->sk_data_ready = xs_udp_data_ready; + sk->sk_write_space = xs_udp_write_space; + sk->sk_no_check = UDP_CSUM_NORCV; + sk->sk_allocation = GFP_ATOMIC; + + xprt_set_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + xs_udp_do_set_buffer_size(xprt); +} + /** - * xs_udp_connect_worker - set up a UDP socket + * xs_udp_connect_worker4 - set up a UDP socket * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_udp_connect_worker(struct work_struct *work) +static void xs_udp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); @@ -1232,9 +1426,9 @@ static void xs_udp_connect_worker(struct work_struct *work) dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } - xs_reclassify_socket(sock); + xs_reclassify_socket4(sock); - if (xs_bind(transport, sock)) { + if (xs_bind4(transport, sock)) { sock_release(sock); goto out; } @@ -1242,29 +1436,48 @@ static void xs_udp_connect_worker(struct work_struct *work) dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - if (!transport->inet) { - struct sock *sk = sock->sk; + xs_udp_finish_connecting(xprt, sock); + status = 0; +out: + xprt_wake_pending_tasks(xprt, status); + xprt_clear_connecting(xprt); +} - write_lock_bh(&sk->sk_callback_lock); +/** + * xs_udp_connect_worker6 - set up a UDP socket + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_udp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock = transport->sock; + int err, status = -EIO; - sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; - sk->sk_data_ready = xs_udp_data_ready; - sk->sk_write_space = xs_udp_write_space; - sk->sk_no_check = UDP_CSUM_NORCV; - sk->sk_allocation = GFP_ATOMIC; + if (xprt->shutdown || !xprt_bound(xprt)) + goto out; - xprt_set_connected(xprt); + /* Start by resetting any existing state */ + xs_close(xprt); - /* Reset to new socket */ - transport->sock = sock; - transport->inet = sk; + if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + dprintk("RPC: can't create UDP transport socket (%d).\n", -err); + goto out; + } + xs_reclassify_socket6(sock); - write_unlock_bh(&sk->sk_callback_lock); + if (xs_bind6(transport, sock) < 0) { + sock_release(sock); + goto out; } - xs_udp_do_set_buffer_size(xprt); + + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + + xs_udp_finish_connecting(xprt, sock); status = 0; out: xprt_wake_pending_tasks(xprt, status); @@ -1295,13 +1508,52 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) result); } +static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_user_data = xprt; + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; + sk->sk_data_ready = xs_tcp_data_ready; + sk->sk_state_change = xs_tcp_state_change; + sk->sk_write_space = xs_tcp_write_space; + sk->sk_allocation = GFP_ATOMIC; + + /* socket options */ + sk->sk_userlocks |= SOCK_BINDPORT_LOCK; + sock_reset_flag(sk, SOCK_LINGER); + tcp_sk(sk)->linger2 = 0; + tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + + xprt_clear_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + + /* Tell the socket layer to start connecting... */ + xprt->stat.connect_count++; + xprt->stat.connect_start = jiffies; + return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); +} + /** - * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint + * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker(struct work_struct *work) +static void xs_tcp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); @@ -1315,13 +1567,12 @@ static void xs_tcp_connect_worker(struct work_struct *work) if (!sock) { /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport " - "socket (%d).\n", -err); + dprintk("RPC: can't create TCP transport socket (%d).\n", -err); goto out; } - xs_reclassify_socket(sock); + xs_reclassify_socket4(sock); - if (xs_bind(transport, sock)) { + if (xs_bind4(transport, sock) < 0) { sock_release(sock); goto out; } @@ -1332,43 +1583,70 @@ static void xs_tcp_connect_worker(struct work_struct *work) dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - if (!transport->inet) { - struct sock *sk = sock->sk; - - write_lock_bh(&sk->sk_callback_lock); + status = xs_tcp_finish_connecting(xprt, sock); + dprintk("RPC: %p connect status %d connected %d sock state %d\n", + xprt, -status, xprt_connected(xprt), + sock->sk->sk_state); + if (status < 0) { + switch (status) { + case -EINPROGRESS: + case -EALREADY: + goto out_clear; + case -ECONNREFUSED: + case -ECONNRESET: + /* retry with existing socket, after a delay */ + break; + default: + /* get rid of existing socket, and retry */ + xs_close(xprt); + break; + } + } +out: + xprt_wake_pending_tasks(xprt, status); +out_clear: + xprt_clear_connecting(xprt); +} - sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; - sk->sk_data_ready = xs_tcp_data_ready; - sk->sk_state_change = xs_tcp_state_change; - sk->sk_write_space = xs_tcp_write_space; - sk->sk_allocation = GFP_ATOMIC; +/** + * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_tcp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock = transport->sock; + int err, status = -EIO; - /* socket options */ - sk->sk_userlocks |= SOCK_BINDPORT_LOCK; - sock_reset_flag(sk, SOCK_LINGER); - tcp_sk(sk)->linger2 = 0; - tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + if (xprt->shutdown || !xprt_bound(xprt)) + goto out; - xprt_clear_connected(xprt); + if (!sock) { + /* start from scratch */ + if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { + dprintk("RPC: can't create TCP transport socket (%d).\n", -err); + goto out; + } + xs_reclassify_socket6(sock); - /* Reset to new socket */ - transport->sock = sock; - transport->inet = sk; + if (xs_bind6(transport, sock) < 0) { + sock_release(sock); + goto out; + } + } else + /* "close" the socket, preserving the local port */ + xs_tcp_reuse_connection(xprt); - write_unlock_bh(&sk->sk_callback_lock); - } + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - /* Tell the socket layer to start connecting... */ - xprt->stat.connect_count++; - xprt->stat.connect_start = jiffies; - status = kernel_connect(sock, (struct sockaddr *) &xprt->addr, - xprt->addrlen, O_NONBLOCK); + status = xs_tcp_finish_connecting(xprt, sock); dprintk("RPC: %p connect status %d connected %d sock state %d\n", - xprt, -status, xprt_connected(xprt), - sock->sk->sk_state); + xprt, -status, xprt_connected(xprt), sock->sk->sk_state); if (status < 0) { switch (status) { case -EINPROGRESS: @@ -1508,7 +1786,8 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, }; -static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size) +static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, + unsigned int slot_table_size) { struct rpc_xprt *xprt; struct sock_xprt *new; @@ -1549,8 +1828,9 @@ static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) +struct rpc_xprt *xs_setup_udp(struct xprt_create *args) { + struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; struct sock_xprt *transport; @@ -1559,15 +1839,11 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) - xprt_set_bound(xprt); - xprt->prot = IPPROTO_UDP; xprt->tsh_size = 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); - INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_UDP_CONN_TO; xprt->reestablish_timeout = XS_UDP_REEST_TO; @@ -1580,11 +1856,37 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) else xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); - xs_format_peer_addresses(xprt); + switch (addr->sa_family) { + case AF_INET: + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, + xs_udp_connect_worker4); + xs_format_ipv4_peer_addresses(xprt); + break; + case AF_INET6: + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, + xs_udp_connect_worker6); + xs_format_ipv6_peer_addresses(xprt); + break; + default: + kfree(xprt); + return ERR_PTR(-EAFNOSUPPORT); + } + dprintk("RPC: set up transport to address %s\n", xprt->address_strings[RPC_DISPLAY_ALL]); - return xprt; + if (try_module_get(THIS_MODULE)) + return xprt; + + kfree(xprt->slot); + kfree(xprt); + return ERR_PTR(-EINVAL); } /** @@ -1592,8 +1894,9 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) +struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) { + struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; struct sock_xprt *transport; @@ -1602,14 +1905,10 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) - xprt_set_bound(xprt); - xprt->prot = IPPROTO_TCP; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; - INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_TCP_CONN_TO; xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; @@ -1622,15 +1921,55 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) else xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); - xs_format_peer_addresses(xprt); + switch (addr->sa_family) { + case AF_INET: + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4); + xs_format_ipv4_peer_addresses(xprt); + break; + case AF_INET6: + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6); + xs_format_ipv6_peer_addresses(xprt); + break; + default: + kfree(xprt); + return ERR_PTR(-EAFNOSUPPORT); + } + dprintk("RPC: set up transport to address %s\n", xprt->address_strings[RPC_DISPLAY_ALL]); - return xprt; + if (try_module_get(THIS_MODULE)) + return xprt; + + kfree(xprt->slot); + kfree(xprt); + return ERR_PTR(-EINVAL); } +static struct xprt_class xs_udp_transport = { + .list = LIST_HEAD_INIT(xs_udp_transport.list), + .name = "udp", + .owner = THIS_MODULE, + .ident = IPPROTO_UDP, + .setup = xs_setup_udp, +}; + +static struct xprt_class xs_tcp_transport = { + .list = LIST_HEAD_INIT(xs_tcp_transport.list), + .name = "tcp", + .owner = THIS_MODULE, + .ident = IPPROTO_TCP, + .setup = xs_setup_tcp, +}; + /** - * init_socket_xprt - set up xprtsock's sysctls + * init_socket_xprt - set up xprtsock's sysctls, register with RPC client * */ int init_socket_xprt(void) @@ -1640,11 +1979,14 @@ int init_socket_xprt(void) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif + xprt_register_transport(&xs_udp_transport); + xprt_register_transport(&xs_tcp_transport); + return 0; } /** - * cleanup_socket_xprt - remove xprtsock's sysctls + * cleanup_socket_xprt - remove xprtsock's sysctls, unregister * */ void cleanup_socket_xprt(void) @@ -1655,4 +1997,7 @@ void cleanup_socket_xprt(void) sunrpc_table_header = NULL; } #endif + + xprt_unregister_transport(&xs_udp_transport); + xprt_unregister_transport(&xs_tcp_transport); } diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 711ca4b1f05..3bbef2ab22a 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -38,6 +38,7 @@ #include <net/tipc/tipc_bearer.h> #include <net/tipc/tipc_msg.h> #include <linux/netdevice.h> +#include <net/net_namespace.h> #define MAX_ETH_BEARERS 2 #define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI @@ -76,7 +77,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, skb_reset_network_header(clone); dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev; clone->dev = dev; - dev->hard_header(clone, dev, ETH_P_TIPC, + dev_hard_header(clone, dev, ETH_P_TIPC, &dest->dev_addr.eth_addr, dev->dev_addr, clone->len); dev_queue_xmit(clone); @@ -100,6 +101,11 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; u32 size; + if (dev->nd_net != &init_net) { + kfree_skb(buf); + return 0; + } + if (likely(eb_ptr->bearer)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { size = msg_size((struct tipc_msg *)buf->data); @@ -129,7 +135,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) /* Find device with specified name */ - for_each_netdev(pdev){ + for_each_netdev(&init_net, pdev){ if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { dev = pdev; break; @@ -192,6 +198,9 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt, struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + while ((eb_ptr->dev != dev)) { if (++eb_ptr == stop) return NOTIFY_DONE; /* couldn't find device */ @@ -234,12 +243,12 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt, static char *eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) { unchar *addr = (unchar *)&a->dev_addr; + DECLARE_MAC_BUF(mac); if (str_size < 18) *str_buf = '\0'; else - sprintf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x", - addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + sprintf(str_buf, "%s", print_mac(mac, addr)); return str_buf; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 84110172031..e36b4b5a522 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -162,13 +162,16 @@ static void advance_queue(struct tipc_sock *tsock) * * Returns 0 on success, errno otherwise */ -static int tipc_create(struct socket *sock, int protocol) +static int tipc_create(struct net *net, struct socket *sock, int protocol) { struct tipc_sock *tsock; struct tipc_port *port; struct sock *sk; u32 ref; + if (net != &init_net) + return -EAFNOSUPPORT; + if (unlikely(protocol != 0)) return -EPROTONOSUPPORT; @@ -198,7 +201,7 @@ static int tipc_create(struct socket *sock, int protocol) return -EPROTOTYPE; } - sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1); + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, 1); if (!sk) { tipc_deleteport(ref); return -ENOMEM; @@ -1372,7 +1375,7 @@ static int accept(struct socket *sock, struct socket *newsock, int flags) } buf = skb_peek(&sock->sk->sk_receive_queue); - res = tipc_create(newsock, 0); + res = tipc_create(sock->sk->sk_net, newsock, 0); if (!res) { struct tipc_sock *new_tsock = tipc_sk(newsock->sk); struct tipc_portid id; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a05c34260e7..9163ec526c2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -103,6 +103,7 @@ #include <asm/uaccess.h> #include <linux/skbuff.h> #include <linux/netdevice.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/tcp_states.h> #include <net/af_unix.h> @@ -333,7 +334,7 @@ static void unix_write_space(struct sock *sk) read_lock(&sk->sk_callback_lock); if (unix_writable(sk)) { if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible_sync(sk->sk_sleep); sk_wake_async(sk, 2, POLL_OUT); } read_unlock(&sk->sk_callback_lock); @@ -482,7 +483,7 @@ static int unix_listen(struct socket *sock, int backlog) sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; /* set credentials so connect can copy them */ - sk->sk_peercred.pid = current->tgid; + sk->sk_peercred.pid = task_tgid_vnr(current); sk->sk_peercred.uid = current->euid; sk->sk_peercred.gid = current->egid; err = 0; @@ -593,7 +594,7 @@ static struct proto unix_proto = { */ static struct lock_class_key af_unix_sk_receive_queue_lock_key; -static struct sock * unix_create1(struct socket *sock) +static struct sock * unix_create1(struct net *net, struct socket *sock) { struct sock *sk = NULL; struct unix_sock *u; @@ -601,7 +602,7 @@ static struct sock * unix_create1(struct socket *sock) if (atomic_read(&unix_nr_socks) >= 2*get_max_files()) goto out; - sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1); + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, 1); if (!sk) goto out; @@ -627,8 +628,11 @@ out: return sk; } -static int unix_create(struct socket *sock, int protocol) +static int unix_create(struct net *net, struct socket *sock, int protocol) { + if (net != &init_net) + return -EAFNOSUPPORT; + if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; @@ -654,7 +658,7 @@ static int unix_create(struct socket *sock, int protocol) return -ESOCKTNOSUPPORT; } - return unix_create1(sock) ? 0 : -ENOMEM; + return unix_create1(net, sock) ? 0 : -ENOMEM; } static int unix_release(struct socket *sock) @@ -1038,7 +1042,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = -ENOMEM; /* create new sock for complete connection */ - newsk = unix_create1(NULL); + newsk = unix_create1(sk->sk_net, NULL); if (newsk == NULL) goto out; @@ -1129,7 +1133,7 @@ restart: unix_peer(newsk) = sk; newsk->sk_state = TCP_ESTABLISHED; newsk->sk_type = sk->sk_type; - newsk->sk_peercred.pid = current->tgid; + newsk->sk_peercred.pid = task_tgid_vnr(current); newsk->sk_peercred.uid = current->euid; newsk->sk_peercred.gid = current->egid; newu = unix_sk(newsk); @@ -1190,7 +1194,7 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) sock_hold(skb); unix_peer(ska)=skb; unix_peer(skb)=ska; - ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid; + ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current); ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid; ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid; @@ -1635,7 +1639,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) goto out_unlock; - wake_up_interruptible(&u->peer_wait); + wake_up_interruptible_sync(&u->peer_wait); if (msg->msg_name) unix_copy_addr(msg, skb->sk); @@ -2082,25 +2086,7 @@ static const struct seq_operations unix_seq_ops = { static int unix_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - int *iter = kmalloc(sizeof(int), GFP_KERNEL); - - if (!iter) - goto out; - - rc = seq_open(file, &unix_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = iter; - *iter = 0; -out: - return rc; -out_kfree: - kfree(iter); - goto out; + return seq_open_private(file, &unix_seq_ops, sizeof(int)); } static const struct file_operations unix_seq_fops = { @@ -2135,7 +2121,7 @@ static int __init af_unix_init(void) sock_register(&unix_family_ops); #ifdef CONFIG_PROC_FS - proc_net_fops_create("unix", 0, &unix_seq_fops); + proc_net_fops_create(&init_net, "unix", 0, &unix_seq_fops); #endif unix_sysctl_register(); out: @@ -2146,7 +2132,7 @@ static void __exit af_unix_exit(void) { sock_unregister(PF_UNIX); unix_sysctl_unregister(); - proc_net_remove("unix"); + proc_net_remove(&init_net, "unix"); proto_unregister(&unix_proto); } diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index 236e7eaf1b7..f2e54c3f064 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -29,6 +29,7 @@ #include <linux/seq_file.h> #include <linux/smp_lock.h> +#include <net/net_namespace.h> #include <asm/io.h> #define PROC_STATS_FORMAT "%30s: %12lu\n" @@ -287,7 +288,7 @@ static const struct file_operations wandev_fops = { int __init wanrouter_proc_init(void) { struct proc_dir_entry *p; - proc_router = proc_mkdir(ROUTER_NAME, proc_net); + proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net); if (!proc_router) goto fail; @@ -303,7 +304,7 @@ int __init wanrouter_proc_init(void) fail_stat: remove_proc_entry("config", proc_router); fail_config: - remove_proc_entry(ROUTER_NAME, proc_net); + remove_proc_entry(ROUTER_NAME, init_net.proc_net); fail: return -ENOMEM; } @@ -316,7 +317,7 @@ void wanrouter_proc_cleanup(void) { remove_proc_entry("config", proc_router); remove_proc_entry("status", proc_router); - remove_proc_entry(ROUTER_NAME, proc_net); + remove_proc_entry(ROUTER_NAME, init_net.proc_net); } /* diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index a228d56a91b..6426055a8be 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -1,6 +1,19 @@ config CFG80211 tristate "Improved wireless configuration API" +config NL80211 + bool "nl80211 new netlink interface support" + depends on CFG80211 + default y + ---help--- + This option turns on the new netlink interface + (nl80211) support in cfg80211. + + If =n, drivers using mac80211 will be configured via + wireless extension support provided by that subsystem. + + If unsure, say Y. + config WIRELESS_EXT bool "Wireless extensions" default n @@ -10,7 +23,9 @@ config WIRELESS_EXT Wireless extensions will be replaced by cfg80211 and will be required only by legacy drivers that implement - wireless extension handlers. + wireless extension handlers. This option does not + affect the wireless-extension backward compatibility + code in cfg80211. Say N (if you can) unless you know you need wireless extensions for external modules. diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 092116e390b..65710a42e5a 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_WIRELESS_EXT) += wext.o obj-$(CONFIG_CFG80211) += cfg80211.o cfg80211-y += core.o sysfs.o radiotap.o +cfg80211-$(CONFIG_NL80211) += nl80211.o diff --git a/net/wireless/core.c b/net/wireless/core.c index 9771451eae2..febc33bc9c0 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -16,6 +16,7 @@ #include <net/genetlink.h> #include <net/cfg80211.h> #include <net/wireless.h> +#include "nl80211.h" #include "core.h" #include "sysfs.h" @@ -36,6 +37,146 @@ static int wiphy_counter; /* for debugfs */ static struct dentry *ieee80211_debugfs_dir; +/* requires cfg80211_drv_mutex to be held! */ +static struct cfg80211_registered_device *cfg80211_drv_by_wiphy(int wiphy) +{ + struct cfg80211_registered_device *result = NULL, *drv; + + list_for_each_entry(drv, &cfg80211_drv_list, list) { + if (drv->idx == wiphy) { + result = drv; + break; + } + } + + return result; +} + +/* requires cfg80211_drv_mutex to be held! */ +static struct cfg80211_registered_device * +__cfg80211_drv_from_info(struct genl_info *info) +{ + int ifindex; + struct cfg80211_registered_device *bywiphy = NULL, *byifidx = NULL; + struct net_device *dev; + int err = -EINVAL; + + if (info->attrs[NL80211_ATTR_WIPHY]) { + bywiphy = cfg80211_drv_by_wiphy( + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY])); + err = -ENODEV; + } + + if (info->attrs[NL80211_ATTR_IFINDEX]) { + ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); + dev = dev_get_by_index(&init_net, ifindex); + if (dev) { + if (dev->ieee80211_ptr) + byifidx = + wiphy_to_dev(dev->ieee80211_ptr->wiphy); + dev_put(dev); + } + err = -ENODEV; + } + + if (bywiphy && byifidx) { + if (bywiphy != byifidx) + return ERR_PTR(-EINVAL); + else + return bywiphy; /* == byifidx */ + } + if (bywiphy) + return bywiphy; + + if (byifidx) + return byifidx; + + return ERR_PTR(err); +} + +struct cfg80211_registered_device * +cfg80211_get_dev_from_info(struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + + mutex_lock(&cfg80211_drv_mutex); + drv = __cfg80211_drv_from_info(info); + + /* if it is not an error we grab the lock on + * it to assure it won't be going away while + * we operate on it */ + if (!IS_ERR(drv)) + mutex_lock(&drv->mtx); + + mutex_unlock(&cfg80211_drv_mutex); + + return drv; +} + +struct cfg80211_registered_device * +cfg80211_get_dev_from_ifindex(int ifindex) +{ + struct cfg80211_registered_device *drv = ERR_PTR(-ENODEV); + struct net_device *dev; + + mutex_lock(&cfg80211_drv_mutex); + dev = dev_get_by_index(&init_net, ifindex); + if (!dev) + goto out; + if (dev->ieee80211_ptr) { + drv = wiphy_to_dev(dev->ieee80211_ptr->wiphy); + mutex_lock(&drv->mtx); + } else + drv = ERR_PTR(-ENODEV); + dev_put(dev); + out: + mutex_unlock(&cfg80211_drv_mutex); + return drv; +} + +void cfg80211_put_dev(struct cfg80211_registered_device *drv) +{ + BUG_ON(IS_ERR(drv)); + mutex_unlock(&drv->mtx); +} + +int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, + char *newname) +{ + int idx, taken = -1, result, digits; + + /* prohibit calling the thing phy%d when %d is not its number */ + sscanf(newname, PHY_NAME "%d%n", &idx, &taken); + if (taken == strlen(newname) && idx != rdev->idx) { + /* count number of places needed to print idx */ + digits = 1; + while (idx /= 10) + digits++; + /* + * deny the name if it is phy<idx> where <idx> is printed + * without leading zeroes. taken == strlen(newname) here + */ + if (taken == strlen(PHY_NAME) + digits) + return -EINVAL; + } + + /* this will check for collisions */ + result = device_rename(&rdev->wiphy.dev, newname); + if (result) + return result; + + if (!debugfs_rename(rdev->wiphy.debugfsdir->d_parent, + rdev->wiphy.debugfsdir, + rdev->wiphy.debugfsdir->d_parent, + newname)) + printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n", + newname); + + nl80211_notify_dev_rename(rdev); + + return 0; +} + /* exported functions */ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) @@ -204,10 +345,16 @@ static int cfg80211_init(void) if (err) goto out_fail_notifier; + err = nl80211_init(); + if (err) + goto out_fail_nl80211; + ieee80211_debugfs_dir = debugfs_create_dir("ieee80211", NULL); return 0; +out_fail_nl80211: + unregister_netdevice_notifier(&cfg80211_netdev_notifier); out_fail_notifier: wiphy_sysfs_exit(); out_fail_sysfs: @@ -218,6 +365,7 @@ subsys_initcall(cfg80211_init); static void cfg80211_exit(void) { debugfs_remove(ieee80211_debugfs_dir); + nl80211_exit(); unregister_netdevice_notifier(&cfg80211_netdev_notifier); wiphy_sysfs_exit(); } diff --git a/net/wireless/core.h b/net/wireless/core.h index 158db1edb92..eb0f846b40d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -43,7 +43,39 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) extern struct mutex cfg80211_drv_mutex; extern struct list_head cfg80211_drv_list; +/* + * This function returns a pointer to the driver + * that the genl_info item that is passed refers to. + * If successful, it returns non-NULL and also locks + * the driver's mutex! + * + * This means that you need to call cfg80211_put_dev() + * before being allowed to acquire &cfg80211_drv_mutex! + * + * This is necessary because we need to lock the global + * mutex to get an item off the list safely, and then + * we lock the drv mutex so it doesn't go away under us. + * + * We don't want to keep cfg80211_drv_mutex locked + * for all the time in order to allow requests on + * other interfaces to go through at the same time. + * + * The result of this can be a PTR_ERR and hence must + * be checked with IS_ERR() for errors. + */ +extern struct cfg80211_registered_device * +cfg80211_get_dev_from_info(struct genl_info *info); + +/* identical to cfg80211_get_dev_from_info but only operate on ifindex */ +extern struct cfg80211_registered_device * +cfg80211_get_dev_from_ifindex(int ifindex); + +extern void cfg80211_put_dev(struct cfg80211_registered_device *drv); + /* free object */ extern void cfg80211_dev_free(struct cfg80211_registered_device *drv); +extern int cfg80211_dev_rename(struct cfg80211_registered_device *drv, + char *newname); + #endif /* __NET_WIRELESS_CORE_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c new file mode 100644 index 00000000000..48b0d453e4e --- /dev/null +++ b/net/wireless/nl80211.c @@ -0,0 +1,431 @@ +/* + * This is the new netlink-based wireless configuration interface. + * + * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net> + */ + +#include <linux/if.h> +#include <linux/module.h> +#include <linux/err.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/if_ether.h> +#include <linux/ieee80211.h> +#include <linux/nl80211.h> +#include <linux/rtnetlink.h> +#include <linux/netlink.h> +#include <net/genetlink.h> +#include <net/cfg80211.h> +#include "core.h" +#include "nl80211.h" + +/* the netlink family */ +static struct genl_family nl80211_fam = { + .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ + .name = "nl80211", /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ + .maxattr = NL80211_ATTR_MAX, +}; + +/* internal helper: get drv and dev */ +static int get_drv_dev_by_info_ifindex(struct genl_info *info, + struct cfg80211_registered_device **drv, + struct net_device **dev) +{ + int ifindex; + + if (!info->attrs[NL80211_ATTR_IFINDEX]) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); + *dev = dev_get_by_index(&init_net, ifindex); + if (!*dev) + return -ENODEV; + + *drv = cfg80211_get_dev_from_ifindex(ifindex); + if (IS_ERR(*drv)) { + dev_put(*dev); + return PTR_ERR(*drv); + } + + return 0; +} + +/* policy for the attributes */ +static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { + [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, + .len = BUS_ID_SIZE-1 }, + + [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 }, + [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, + [NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 }, +}; + +/* message building helper */ +static inline void *nl80211hdr_put(struct sk_buff *skb, u32 pid, u32 seq, + int flags, u8 cmd) +{ + /* since there is no private header just add the generic one */ + return genlmsg_put(skb, pid, seq, &nl80211_fam, flags, cmd); +} + +/* netlink command implementations */ + +static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, + struct cfg80211_registered_device *dev) +{ + void *hdr; + + hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); + if (!hdr) + return -1; + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->idx); + NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); + return genlmsg_end(msg, hdr); + + nla_put_failure: + return genlmsg_cancel(msg, hdr); +} + +static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) +{ + int idx = 0; + int start = cb->args[0]; + struct cfg80211_registered_device *dev; + + mutex_lock(&cfg80211_drv_mutex); + list_for_each_entry(dev, &cfg80211_drv_list, list) { + if (++idx < start) + continue; + if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + dev) < 0) + break; + } + mutex_unlock(&cfg80211_drv_mutex); + + cb->args[0] = idx; + + return skb->len; +} + +static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + struct cfg80211_registered_device *dev; + + dev = cfg80211_get_dev_from_info(info); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + goto out_err; + + if (nl80211_send_wiphy(msg, info->snd_pid, info->snd_seq, 0, dev) < 0) + goto out_free; + + cfg80211_put_dev(dev); + + return genlmsg_unicast(msg, info->snd_pid); + + out_free: + nlmsg_free(msg); + out_err: + cfg80211_put_dev(dev); + return -ENOBUFS; +} + +static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + int result; + + if (!info->attrs[NL80211_ATTR_WIPHY_NAME]) + return -EINVAL; + + rdev = cfg80211_get_dev_from_info(info); + if (IS_ERR(rdev)) + return PTR_ERR(rdev); + + result = cfg80211_dev_rename(rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); + + cfg80211_put_dev(rdev); + return result; +} + + +static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, + struct net_device *dev) +{ + void *hdr; + + hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_INTERFACE); + if (!hdr) + return -1; + + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); + NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, dev->name); + /* TODO: interface type */ + return genlmsg_end(msg, hdr); + + nla_put_failure: + return genlmsg_cancel(msg, hdr); +} + +static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *cb) +{ + int wp_idx = 0; + int if_idx = 0; + int wp_start = cb->args[0]; + int if_start = cb->args[1]; + struct cfg80211_registered_device *dev; + struct wireless_dev *wdev; + + mutex_lock(&cfg80211_drv_mutex); + list_for_each_entry(dev, &cfg80211_drv_list, list) { + if (++wp_idx < wp_start) + continue; + if_idx = 0; + + mutex_lock(&dev->devlist_mtx); + list_for_each_entry(wdev, &dev->netdev_list, list) { + if (++if_idx < if_start) + continue; + if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + wdev->netdev) < 0) + break; + } + mutex_unlock(&dev->devlist_mtx); + } + mutex_unlock(&cfg80211_drv_mutex); + + cb->args[0] = wp_idx; + cb->args[1] = if_idx; + + return skb->len; +} + +static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + struct cfg80211_registered_device *dev; + struct net_device *netdev; + int err; + + err = get_drv_dev_by_info_ifindex(info, &dev, &netdev); + if (err) + return err; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + goto out_err; + + if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, netdev) < 0) + goto out_free; + + dev_put(netdev); + cfg80211_put_dev(dev); + + return genlmsg_unicast(msg, info->snd_pid); + + out_free: + nlmsg_free(msg); + out_err: + dev_put(netdev); + cfg80211_put_dev(dev); + return -ENOBUFS; +} + +static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + int err, ifindex; + enum nl80211_iftype type; + struct net_device *dev; + + if (info->attrs[NL80211_ATTR_IFTYPE]) { + type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); + if (type > NL80211_IFTYPE_MAX) + return -EINVAL; + } else + return -EINVAL; + + err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + if (err) + return err; + ifindex = dev->ifindex; + dev_put(dev); + + if (!drv->ops->change_virtual_intf) { + err = -EOPNOTSUPP; + goto unlock; + } + + rtnl_lock(); + err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, type); + rtnl_unlock(); + + unlock: + cfg80211_put_dev(drv); + return err; +} + +static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + int err; + enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; + + if (!info->attrs[NL80211_ATTR_IFNAME]) + return -EINVAL; + + if (info->attrs[NL80211_ATTR_IFTYPE]) { + type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); + if (type > NL80211_IFTYPE_MAX) + return -EINVAL; + } + + drv = cfg80211_get_dev_from_info(info); + if (IS_ERR(drv)) + return PTR_ERR(drv); + + if (!drv->ops->add_virtual_intf) { + err = -EOPNOTSUPP; + goto unlock; + } + + rtnl_lock(); + err = drv->ops->add_virtual_intf(&drv->wiphy, + nla_data(info->attrs[NL80211_ATTR_IFNAME]), type); + rtnl_unlock(); + + unlock: + cfg80211_put_dev(drv); + return err; +} + +static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + int ifindex, err; + struct net_device *dev; + + err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + if (err) + return err; + ifindex = dev->ifindex; + dev_put(dev); + + if (!drv->ops->del_virtual_intf) { + err = -EOPNOTSUPP; + goto out; + } + + rtnl_lock(); + err = drv->ops->del_virtual_intf(&drv->wiphy, ifindex); + rtnl_unlock(); + + out: + cfg80211_put_dev(drv); + return err; +} + +static struct genl_ops nl80211_ops[] = { + { + .cmd = NL80211_CMD_GET_WIPHY, + .doit = nl80211_get_wiphy, + .dumpit = nl80211_dump_wiphy, + .policy = nl80211_policy, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = NL80211_CMD_SET_WIPHY, + .doit = nl80211_set_wiphy, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_GET_INTERFACE, + .doit = nl80211_get_interface, + .dumpit = nl80211_dump_interface, + .policy = nl80211_policy, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = NL80211_CMD_SET_INTERFACE, + .doit = nl80211_set_interface, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_NEW_INTERFACE, + .doit = nl80211_new_interface, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_DEL_INTERFACE, + .doit = nl80211_del_interface, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, +}; + +/* multicast groups */ +static struct genl_multicast_group nl80211_config_mcgrp = { + .name = "config", +}; + +/* notification functions */ + +void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev) +{ + struct sk_buff *msg; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return; + + if (nl80211_send_wiphy(msg, 0, 0, 0, rdev) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast(msg, 0, nl80211_config_mcgrp.id, GFP_KERNEL); +} + +/* initialisation/exit functions */ + +int nl80211_init(void) +{ + int err, i; + + err = genl_register_family(&nl80211_fam); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(nl80211_ops); i++) { + err = genl_register_ops(&nl80211_fam, &nl80211_ops[i]); + if (err) + goto err_out; + } + + err = genl_register_mc_group(&nl80211_fam, &nl80211_config_mcgrp); + if (err) + goto err_out; + + return 0; + err_out: + genl_unregister_family(&nl80211_fam); + return err; +} + +void nl80211_exit(void) +{ + genl_unregister_family(&nl80211_fam); +} diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h new file mode 100644 index 00000000000..f3ea5c029ae --- /dev/null +++ b/net/wireless/nl80211.h @@ -0,0 +1,24 @@ +#ifndef __NET_WIRELESS_NL80211_H +#define __NET_WIRELESS_NL80211_H + +#include "core.h" + +#ifdef CONFIG_NL80211 +extern int nl80211_init(void); +extern void nl80211_exit(void); +extern void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev); +#else +static inline int nl80211_init(void) +{ + return 0; +} +static inline void nl80211_exit(void) +{ +} +static inline void nl80211_notify_dev_rename( + struct cfg80211_registered_device *rdev) +{ +} +#endif /* CONFIG_NL80211 */ + +#endif /* __NET_WIRELESS_NL80211_H */ diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index 68c11d09991..28fbd0b0b56 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -161,7 +161,11 @@ int ieee80211_radiotap_iterator_next( [IEEE80211_RADIOTAP_DBM_TX_POWER] = 0x11, [IEEE80211_RADIOTAP_ANTENNA] = 0x11, [IEEE80211_RADIOTAP_DB_ANTSIGNAL] = 0x11, - [IEEE80211_RADIOTAP_DB_ANTNOISE] = 0x11 + [IEEE80211_RADIOTAP_DB_ANTNOISE] = 0x11, + [IEEE80211_RADIOTAP_RX_FLAGS] = 0x22, + [IEEE80211_RADIOTAP_TX_FLAGS] = 0x22, + [IEEE80211_RADIOTAP_RTS_RETRIES] = 0x11, + [IEEE80211_RADIOTAP_DATA_RETRIES] = 0x11, /* * add more here as they are defined in * include/net/ieee80211_radiotap.h diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 2d5d2255a27..29f820e1825 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -53,8 +53,7 @@ static void wiphy_dev_release(struct device *dev) } #ifdef CONFIG_HOTPLUG -static int wiphy_uevent(struct device *dev, char **envp, - int num_envp, char *buf, int size) +static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env) { /* TODO, we probably need stuff here */ return 0; diff --git a/net/wireless/wext.c b/net/wireless/wext.c index d6aaf65192e..85e5f9dd0d8 100644 --- a/net/wireless/wext.c +++ b/net/wireless/wext.c @@ -93,6 +93,7 @@ #include <linux/if_arp.h> /* ARPHRD_ETHER */ #include <linux/etherdevice.h> /* compare_ether_addr */ #include <linux/interrupt.h> +#include <net/net_namespace.h> #include <linux/wireless.h> /* Pretty obvious */ #include <net/iw_handler.h> /* New driver API */ @@ -672,7 +673,26 @@ static const struct seq_operations wireless_seq_ops = { static int wireless_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &wireless_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &wireless_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_proc_net(inode); + if (!seq->private) { + seq_release(inode, file); + res = -ENXIO; + } + } + return res; +} + +static int wireless_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations wireless_seq_fops = { @@ -680,17 +700,22 @@ static const struct file_operations wireless_seq_fops = { .open = wireless_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = wireless_seq_release, }; -int __init wext_proc_init(void) +int wext_proc_init(struct net *net) { /* Create /proc/net/wireless entry */ - if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops)) + if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops)) return -ENOMEM; return 0; } + +void wext_proc_exit(struct net *net) +{ + proc_net_remove(net, "wireless"); +} #endif /* CONFIG_PROC_FS */ /************************** IOCTL SUPPORT **************************/ @@ -1010,7 +1035,7 @@ static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr, * Main IOCTl dispatcher. * Check the type of IOCTL and call the appropriate wrapper... */ -static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd) +static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd) { struct net_device *dev; iw_handler handler; @@ -1019,7 +1044,7 @@ static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd) * The copy_to/from_user() of ifr is also dealt with in there */ /* Make sure the device exist */ - if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) + if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL) return -ENODEV; /* A bunch of special cases, then the generic case... @@ -1053,7 +1078,7 @@ static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd) } /* entry point from dev ioctl */ -int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, +int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, void __user *arg) { int ret; @@ -1065,9 +1090,9 @@ int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, && !capable(CAP_NET_ADMIN)) return -EPERM; - dev_load(ifr->ifr_name); + dev_load(net, ifr->ifr_name); rtnl_lock(); - ret = wireless_process_ioctl(ifr, cmd); + ret = wireless_process_ioctl(net, ifr, cmd); rtnl_unlock(); if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct ifreq))) return -EFAULT; @@ -1129,10 +1154,12 @@ static int rtnetlink_fill_iwinfo(struct sk_buff *skb, struct net_device *dev, { struct ifinfomsg *r; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r)); - r = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, 0, 0, type, sizeof(*r), 0); + if (nlh == NULL) + return -EMSGSIZE; + + r = nlmsg_data(nlh); r->ifi_family = AF_UNSPEC; r->__ifi_pad = 0; r->ifi_type = dev->type; @@ -1141,15 +1168,13 @@ static int rtnetlink_fill_iwinfo(struct sk_buff *skb, struct net_device *dev, r->ifi_change = 0; /* Wireless changes don't affect those flags */ /* Add the wireless events in the netlink packet */ - RTA_PUT(skb, IFLA_WIRELESS, event_len, event); + NLA_PUT(skb, IFLA_WIRELESS, event_len, event); - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); -nlmsg_failure: -rtattr_failure: - nlmsg_trim(skb, b); - return -1; +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } /* ---------------------------------------------------------------- */ @@ -1162,17 +1187,19 @@ rtattr_failure: static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len) { struct sk_buff *skb; - int size = NLMSG_GOODSIZE; + int err; - skb = alloc_skb(size, GFP_ATOMIC); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return; - if (rtnetlink_fill_iwinfo(skb, dev, RTM_NEWLINK, - event, event_len) < 0) { + err = rtnetlink_fill_iwinfo(skb, dev, RTM_NEWLINK, event, event_len); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); kfree_skb(skb); return; } + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; skb_queue_tail(&wireless_nlevent_queue, skb); tasklet_schedule(&wireless_nlevent_tasklet); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 479927cb45c..fc416f9606a 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -191,6 +191,9 @@ static int x25_device_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct x25_neigh *nb; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (dev->type == ARPHRD_X25 #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) || dev->type == ARPHRD_ETHER @@ -466,10 +469,10 @@ static struct proto x25_proto = { .obj_size = sizeof(struct x25_sock), }; -static struct sock *x25_alloc_socket(void) +static struct sock *x25_alloc_socket(struct net *net) { struct x25_sock *x25; - struct sock *sk = sk_alloc(AF_X25, GFP_ATOMIC, &x25_proto, 1); + struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, 1); if (!sk) goto out; @@ -485,17 +488,20 @@ out: return sk; } -static int x25_create(struct socket *sock, int protocol) +static int x25_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct x25_sock *x25; int rc = -ESOCKTNOSUPPORT; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol) goto out; rc = -ENOMEM; - if ((sk = x25_alloc_socket()) == NULL) + if ((sk = x25_alloc_socket(net)) == NULL) goto out; x25 = x25_sk(sk); @@ -543,7 +549,7 @@ static struct sock *x25_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) goto out; - if ((sk = x25_alloc_socket()) == NULL) + if ((sk = x25_alloc_socket(osk->sk_net)) == NULL) goto out; x25 = x25_sk(sk); diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 848a6b6f90a..f0679d28311 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -95,6 +95,9 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, struct sk_buff *nskb; struct x25_neigh *nb; + if (dev->nd_net != &init_net) + goto drop; + nskb = skb_copy(skb, GFP_ATOMIC); if (!nskb) goto drop; diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index 7405b9c5b7f..7d55e50c936 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <net/x25.h> @@ -301,7 +302,7 @@ int __init x25_proc_init(void) struct proc_dir_entry *p; int rc = -ENOMEM; - x25_proc_dir = proc_mkdir("x25", proc_net); + x25_proc_dir = proc_mkdir("x25", init_net.proc_net); if (!x25_proc_dir) goto out; @@ -328,7 +329,7 @@ out_forward: out_socket: remove_proc_entry("route", x25_proc_dir); out_route: - remove_proc_entry("x25", proc_net); + remove_proc_entry("x25", init_net.proc_net); goto out; } @@ -337,7 +338,7 @@ void __exit x25_proc_exit(void) remove_proc_entry("forward", x25_proc_dir); remove_proc_entry("route", x25_proc_dir); remove_proc_entry("socket", x25_proc_dir); - remove_proc_entry("x25", proc_net); + remove_proc_entry("x25", init_net.proc_net); } #else /* CONFIG_PROC_FS */ diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c index 060fcfaa2f4..86b5b4da097 100644 --- a/net/x25/x25_route.c +++ b/net/x25/x25_route.c @@ -129,7 +129,7 @@ void x25_route_device_down(struct net_device *dev) */ struct net_device *x25_dev_get(char *devname) { - struct net_device *dev = dev_get_by_name(devname); + struct net_device *dev = dev_get_by_name(&init_net, devname); if (dev && (!(dev->flags & IFF_UP) || (dev->type != ARPHRD_X25 diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index de3c1a625a4..45744a3d3a5 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -3,6 +3,6 @@ # obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ - xfrm_input.o xfrm_algo.o + xfrm_input.o xfrm_output.o xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 5ced62c19c6..0426388d351 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/pfkeyv2.h> #include <linux/crypto.h> +#include <linux/scatterlist.h> #include <net/xfrm.h> #if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE) #include <net/ah.h> @@ -552,9 +553,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, if (copy > len) copy = len; - sg.page = virt_to_page(skb->data + offset); - sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; - sg.length = copy; + sg_init_one(&sg, skb->data + offset, copy); err = icv_update(desc, &sg, copy); if (unlikely(err)) @@ -577,9 +576,9 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, if (copy > len) copy = len; - sg.page = frag->page; - sg.offset = frag->page_offset + offset-start; - sg.length = copy; + sg_init_table(&sg, 1); + sg_set_page(&sg, frag->page, copy, + frag->page_offset + offset-start); err = icv_update(desc, &sg, copy); if (unlikely(err)) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 113f4442998..cb97fda1b6d 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -49,13 +49,16 @@ EXPORT_SYMBOL(secpath_dup); int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) { int offset, offset_seq; + int hlen; switch (nexthdr) { case IPPROTO_AH: + hlen = sizeof(struct ip_auth_hdr); offset = offsetof(struct ip_auth_hdr, spi); offset_seq = offsetof(struct ip_auth_hdr, seq_no); break; case IPPROTO_ESP: + hlen = sizeof(struct ip_esp_hdr); offset = offsetof(struct ip_esp_hdr, spi); offset_seq = offsetof(struct ip_esp_hdr, seq_no); break; @@ -69,7 +72,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) return 1; } - if (!pskb_may_pull(skb, 16)) + if (!pskb_may_pull(skb, hlen)) return -EINVAL; *spi = *(__be32*)(skb_transport_header(skb) + offset); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c new file mode 100644 index 00000000000..f4bfd6c4565 --- /dev/null +++ b/net/xfrm/xfrm_output.c @@ -0,0 +1,95 @@ +/* + * xfrm_output.c - Common IPsec encapsulation code. + * + * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <net/dst.h> +#include <net/xfrm.h> + +static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) +{ + int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev) + - skb_headroom(skb); + + if (nhead > 0) + return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); + + /* Check tail too... */ + return 0; +} + +static int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = xfrm_state_check_expire(x); + if (err < 0) + goto err; + err = xfrm_state_check_space(x, skb); +err: + return err; +} + +int xfrm_output(struct sk_buff *skb) +{ + struct dst_entry *dst = skb->dst; + struct xfrm_state *x = dst->xfrm; + int err; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); + if (err) + goto error_nolock; + } + + do { + spin_lock_bh(&x->lock); + err = xfrm_state_check(x, skb); + if (err) + goto error; + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq = ++x->replay.oseq; + if (xfrm_aevent_is_on()) + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + } + + err = x->outer_mode->output(x, skb); + if (err) + goto error; + + x->curlft.bytes += skb->len; + x->curlft.packets++; + + spin_unlock_bh(&x->lock); + + err = x->type->output(x, skb); + if (err) + goto error_nolock; + + if (!(skb->dst = dst_pop(dst))) { + err = -EHOSTUNREACH; + goto error_nolock; + } + dst = skb->dst; + x = dst->xfrm; + } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); + + err = 0; + +error_nolock: + return err; +error: + spin_unlock_bh(&x->lock); + goto error_nolock; +} +EXPORT_SYMBOL_GPL(xfrm_output); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7012891d39f..b702bd8a389 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -23,7 +23,6 @@ #include <linux/netfilter.h> #include <linux/module.h> #include <linux/cache.h> -#include <linux/audit.h> #include <net/xfrm.h> #include <net/ip.h> @@ -50,8 +49,6 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock); static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); -static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family); -static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo); static inline int __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) @@ -87,72 +84,6 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, return 0; } -int xfrm_register_type(struct xfrm_type *type, unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family); - struct xfrm_type **typemap; - int err = 0; - - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - typemap = afinfo->type_map; - - if (likely(typemap[type->proto] == NULL)) - typemap[type->proto] = type; - else - err = -EEXIST; - xfrm_policy_unlock_afinfo(afinfo); - return err; -} -EXPORT_SYMBOL(xfrm_register_type); - -int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family); - struct xfrm_type **typemap; - int err = 0; - - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - typemap = afinfo->type_map; - - if (unlikely(typemap[type->proto] != type)) - err = -ENOENT; - else - typemap[type->proto] = NULL; - xfrm_policy_unlock_afinfo(afinfo); - return err; -} -EXPORT_SYMBOL(xfrm_unregister_type); - -struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo; - struct xfrm_type **typemap; - struct xfrm_type *type; - int modload_attempted = 0; - -retry: - afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return NULL; - typemap = afinfo->type_map; - - type = typemap[proto]; - if (unlikely(type && !try_module_get(type->owner))) - type = NULL; - if (!type && !modload_attempted) { - xfrm_policy_put_afinfo(afinfo); - request_module("xfrm-type-%d-%d", - (int) family, (int) proto); - modload_attempted = 1; - goto retry; - } - - xfrm_policy_put_afinfo(afinfo); - return type; -} - int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsigned short family) { @@ -171,94 +102,6 @@ int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, } EXPORT_SYMBOL(xfrm_dst_lookup); -void xfrm_put_type(struct xfrm_type *type) -{ - module_put(type->owner); -} - -int xfrm_register_mode(struct xfrm_mode *mode, int family) -{ - struct xfrm_policy_afinfo *afinfo; - struct xfrm_mode **modemap; - int err; - - if (unlikely(mode->encap >= XFRM_MODE_MAX)) - return -EINVAL; - - afinfo = xfrm_policy_lock_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - - err = -EEXIST; - modemap = afinfo->mode_map; - if (likely(modemap[mode->encap] == NULL)) { - modemap[mode->encap] = mode; - err = 0; - } - - xfrm_policy_unlock_afinfo(afinfo); - return err; -} -EXPORT_SYMBOL(xfrm_register_mode); - -int xfrm_unregister_mode(struct xfrm_mode *mode, int family) -{ - struct xfrm_policy_afinfo *afinfo; - struct xfrm_mode **modemap; - int err; - - if (unlikely(mode->encap >= XFRM_MODE_MAX)) - return -EINVAL; - - afinfo = xfrm_policy_lock_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - - err = -ENOENT; - modemap = afinfo->mode_map; - if (likely(modemap[mode->encap] == mode)) { - modemap[mode->encap] = NULL; - err = 0; - } - - xfrm_policy_unlock_afinfo(afinfo); - return err; -} -EXPORT_SYMBOL(xfrm_unregister_mode); - -struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) -{ - struct xfrm_policy_afinfo *afinfo; - struct xfrm_mode *mode; - int modload_attempted = 0; - - if (unlikely(encap >= XFRM_MODE_MAX)) - return NULL; - -retry: - afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return NULL; - - mode = afinfo->mode_map[encap]; - if (unlikely(mode && !try_module_get(mode->owner))) - mode = NULL; - if (!mode && !modload_attempted) { - xfrm_policy_put_afinfo(afinfo); - request_module("xfrm-mode-%d-%d", family, encap); - modload_attempted = 1; - goto retry; - } - - xfrm_policy_put_afinfo(afinfo); - return mode; -} - -void xfrm_put_mode(struct xfrm_mode *mode) -{ - module_put(mode->owner); -} - static inline unsigned long make_jiffies(long secs) { if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) @@ -850,10 +693,9 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) continue; err = security_xfrm_policy_delete(pol); if (err) { - xfrm_audit_log(audit_info->loginuid, - audit_info->secid, - AUDIT_MAC_IPSEC_DELSPD, 0, - pol, NULL); + xfrm_audit_policy_delete(pol, 0, + audit_info->loginuid, + audit_info->secid); return err; } } @@ -865,10 +707,9 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) continue; err = security_xfrm_policy_delete(pol); if (err) { - xfrm_audit_log(audit_info->loginuid, - audit_info->secid, - AUDIT_MAC_IPSEC_DELSPD, - 0, pol, NULL); + xfrm_audit_policy_delete(pol, 0, + audit_info->loginuid, + audit_info->secid); return err; } } @@ -909,8 +750,8 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) hlist_del(&pol->byidx); write_unlock_bh(&xfrm_policy_lock); - xfrm_audit_log(audit_info->loginuid, audit_info->secid, - AUDIT_MAC_IPSEC_DELSPD, 1, pol, NULL); + xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, + audit_info->secid); xfrm_policy_kill(pol); killed++; @@ -930,11 +771,9 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) hlist_del(&pol->byidx); write_unlock_bh(&xfrm_policy_lock); - xfrm_audit_log(audit_info->loginuid, - audit_info->secid, - AUDIT_MAC_IPSEC_DELSPD, 1, - pol, NULL); - + xfrm_audit_policy_delete(pol, 1, + audit_info->loginuid, + audit_info->secid); xfrm_policy_kill(pol); killed++; @@ -1477,7 +1316,7 @@ restart: pol_dead = 0; xfrm_nr = 0; - if (sk && sk->sk_policy[1]) { + if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); if (IS_ERR(policy)) return PTR_ERR(policy); @@ -1687,17 +1526,13 @@ static inline int xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) { struct xfrm_state *x; - int err; if (!skb->sp || idx < 0 || idx >= skb->sp->len) return 0; x = skb->sp->xvec[idx]; if (!x->type->reject) return 0; - xfrm_state_hold(x); - err = x->type->reject(x, skb, fl); - xfrm_state_put(x); - return err; + return x->type->reject(x, skb, fl); } /* When skb is transformed back to its "native" form, we have to @@ -1954,8 +1789,8 @@ static int stale_bundle(struct dst_entry *dst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { - dst->dev = &loopback_dev; - dev_hold(&loopback_dev); + dst->dev = init_net.loopback_dev; + dev_hold(dst->dev); dev_put(dev); } } @@ -2105,7 +1940,8 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, if (xdst->genid != dst->xfrm->genid) return 0; - if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL && + if (strict && fl && + !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) && !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) return 0; @@ -2150,123 +1986,6 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, EXPORT_SYMBOL(xfrm_bundle_ok); -#ifdef CONFIG_AUDITSYSCALL -/* Audit addition and deletion of SAs and ipsec policy */ - -void xfrm_audit_log(uid_t auid, u32 sid, int type, int result, - struct xfrm_policy *xp, struct xfrm_state *x) -{ - - char *secctx; - u32 secctx_len; - struct xfrm_sec_ctx *sctx = NULL; - struct audit_buffer *audit_buf; - int family; - extern int audit_enabled; - - if (audit_enabled == 0) - return; - - BUG_ON((type == AUDIT_MAC_IPSEC_ADDSA || - type == AUDIT_MAC_IPSEC_DELSA) && !x); - BUG_ON((type == AUDIT_MAC_IPSEC_ADDSPD || - type == AUDIT_MAC_IPSEC_DELSPD) && !xp); - - audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type); - if (audit_buf == NULL) - return; - - switch(type) { - case AUDIT_MAC_IPSEC_ADDSA: - audit_log_format(audit_buf, "SAD add: auid=%u", auid); - break; - case AUDIT_MAC_IPSEC_DELSA: - audit_log_format(audit_buf, "SAD delete: auid=%u", auid); - break; - case AUDIT_MAC_IPSEC_ADDSPD: - audit_log_format(audit_buf, "SPD add: auid=%u", auid); - break; - case AUDIT_MAC_IPSEC_DELSPD: - audit_log_format(audit_buf, "SPD delete: auid=%u", auid); - break; - default: - return; - } - - if (sid != 0 && - security_secid_to_secctx(sid, &secctx, &secctx_len) == 0) { - audit_log_format(audit_buf, " subj=%s", secctx); - security_release_secctx(secctx, secctx_len); - } else - audit_log_task_context(audit_buf); - - if (xp) { - family = xp->selector.family; - if (xp->security) - sctx = xp->security; - } else { - family = x->props.family; - if (x->security) - sctx = x->security; - } - - if (sctx) - audit_log_format(audit_buf, - " sec_alg=%u sec_doi=%u sec_obj=%s", - sctx->ctx_alg, sctx->ctx_doi, sctx->ctx_str); - - switch(family) { - case AF_INET: - { - struct in_addr saddr, daddr; - if (xp) { - saddr.s_addr = xp->selector.saddr.a4; - daddr.s_addr = xp->selector.daddr.a4; - } else { - saddr.s_addr = x->props.saddr.a4; - daddr.s_addr = x->id.daddr.a4; - } - audit_log_format(audit_buf, - " src=%u.%u.%u.%u dst=%u.%u.%u.%u", - NIPQUAD(saddr), NIPQUAD(daddr)); - } - break; - case AF_INET6: - { - struct in6_addr saddr6, daddr6; - if (xp) { - memcpy(&saddr6, xp->selector.saddr.a6, - sizeof(struct in6_addr)); - memcpy(&daddr6, xp->selector.daddr.a6, - sizeof(struct in6_addr)); - } else { - memcpy(&saddr6, x->props.saddr.a6, - sizeof(struct in6_addr)); - memcpy(&daddr6, x->id.daddr.a6, - sizeof(struct in6_addr)); - } - audit_log_format(audit_buf, - " src=" NIP6_FMT " dst=" NIP6_FMT, - NIP6(saddr6), NIP6(daddr6)); - } - break; - } - - if (x) - audit_log_format(audit_buf, " spi=%lu(0x%lx) protocol=%s", - (unsigned long)ntohl(x->id.spi), - (unsigned long)ntohl(x->id.spi), - x->id.proto == IPPROTO_AH ? "AH" : - (x->id.proto == IPPROTO_ESP ? - "ESP" : "IPCOMP")); - - audit_log_format(audit_buf, " res=%u", result); - audit_log_end(audit_buf); -} - -EXPORT_SYMBOL(xfrm_audit_log); -#endif /* CONFIG_AUDITSYSCALL */ - int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { int err = 0; @@ -2339,25 +2058,13 @@ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) read_unlock(&xfrm_policy_afinfo_lock); } -static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family) +static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct xfrm_policy_afinfo *afinfo; - if (unlikely(family >= NPROTO)) - return NULL; - write_lock_bh(&xfrm_policy_afinfo_lock); - afinfo = xfrm_policy_afinfo[family]; - if (unlikely(!afinfo)) - write_unlock_bh(&xfrm_policy_afinfo_lock); - return afinfo; -} + struct net_device *dev = ptr; -static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo) -{ - write_unlock_bh(&xfrm_policy_afinfo_lock); -} + if (dev->nd_net != &init_net) + return NOTIFY_DONE; -static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) -{ switch (event) { case NETDEV_DOWN: xfrm_flush_bundles(); @@ -2412,6 +2119,72 @@ void __init xfrm_init(void) xfrm_input_init(); } +#ifdef CONFIG_AUDITSYSCALL +static inline void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, + struct audit_buffer *audit_buf) +{ + if (xp->security) + audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", + xp->security->ctx_alg, xp->security->ctx_doi, + xp->security->ctx_str); + + switch(xp->selector.family) { + case AF_INET: + audit_log_format(audit_buf, " src=%u.%u.%u.%u dst=%u.%u.%u.%u", + NIPQUAD(xp->selector.saddr.a4), + NIPQUAD(xp->selector.daddr.a4)); + break; + case AF_INET6: + { + struct in6_addr saddr6, daddr6; + + memcpy(&saddr6, xp->selector.saddr.a6, + sizeof(struct in6_addr)); + memcpy(&daddr6, xp->selector.daddr.a6, + sizeof(struct in6_addr)); + audit_log_format(audit_buf, + " src=" NIP6_FMT " dst=" NIP6_FMT, + NIP6(saddr6), NIP6(daddr6)); + } + break; + } +} + +void +xfrm_audit_policy_add(struct xfrm_policy *xp, int result, u32 auid, u32 sid) +{ + struct audit_buffer *audit_buf; + extern int audit_enabled; + + if (audit_enabled == 0) + return; + audit_buf = xfrm_audit_start(sid, auid); + if (audit_buf == NULL) + return; + audit_log_format(audit_buf, " op=SPD-add res=%u", result); + xfrm_audit_common_policyinfo(xp, audit_buf); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); + +void +xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, u32 auid, u32 sid) +{ + struct audit_buffer *audit_buf; + extern int audit_enabled; + + if (audit_enabled == 0) + return; + audit_buf = xfrm_audit_start(sid, auid); + if (audit_buf == NULL) + return; + audit_log_format(audit_buf, " op=SPD-delete res=%u", result); + xfrm_audit_common_policyinfo(xp, audit_buf); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); +#endif + #ifdef CONFIG_XFRM_MIGRATE static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp, struct xfrm_selector *sel_tgt) @@ -2519,7 +2292,8 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) continue; n++; - if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL) + if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && + pol->xfrm_vec[i].mode != XFRM_MODE_BEET) continue; /* update endpoints */ memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d4356e6f7f9..224b44e31a0 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -19,7 +19,6 @@ #include <linux/ipsec.h> #include <linux/module.h> #include <linux/cache.h> -#include <linux/audit.h> #include <asm/uaccess.h> #include "xfrm_hash.h" @@ -58,6 +57,9 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; +static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); +static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); + static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, u32 reqid, @@ -188,6 +190,184 @@ int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); void km_state_expired(struct xfrm_state *x, int hard, u32 pid); +static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family) +{ + struct xfrm_state_afinfo *afinfo; + if (unlikely(family >= NPROTO)) + return NULL; + write_lock_bh(&xfrm_state_afinfo_lock); + afinfo = xfrm_state_afinfo[family]; + if (unlikely(!afinfo)) + write_unlock_bh(&xfrm_state_afinfo_lock); + return afinfo; +} + +static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo) +{ + write_unlock_bh(&xfrm_state_afinfo_lock); +} + +int xfrm_register_type(struct xfrm_type *type, unsigned short family) +{ + struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); + struct xfrm_type **typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_map; + + if (likely(typemap[type->proto] == NULL)) + typemap[type->proto] = type; + else + err = -EEXIST; + xfrm_state_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_register_type); + +int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) +{ + struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); + struct xfrm_type **typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_map; + + if (unlikely(typemap[type->proto] != type)) + err = -ENOENT; + else + typemap[type->proto] = NULL; + xfrm_state_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_unregister_type); + +static struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_type **typemap; + struct xfrm_type *type; + int modload_attempted = 0; + +retry: + afinfo = xfrm_state_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return NULL; + typemap = afinfo->type_map; + + type = typemap[proto]; + if (unlikely(type && !try_module_get(type->owner))) + type = NULL; + if (!type && !modload_attempted) { + xfrm_state_put_afinfo(afinfo); + request_module("xfrm-type-%d-%d", family, proto); + modload_attempted = 1; + goto retry; + } + + xfrm_state_put_afinfo(afinfo); + return type; +} + +static void xfrm_put_type(struct xfrm_type *type) +{ + module_put(type->owner); +} + +int xfrm_register_mode(struct xfrm_mode *mode, int family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_mode **modemap; + int err; + + if (unlikely(mode->encap >= XFRM_MODE_MAX)) + return -EINVAL; + + afinfo = xfrm_state_lock_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + err = -EEXIST; + modemap = afinfo->mode_map; + if (modemap[mode->encap]) + goto out; + + err = -ENOENT; + if (!try_module_get(afinfo->owner)) + goto out; + + mode->afinfo = afinfo; + modemap[mode->encap] = mode; + err = 0; + +out: + xfrm_state_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_register_mode); + +int xfrm_unregister_mode(struct xfrm_mode *mode, int family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_mode **modemap; + int err; + + if (unlikely(mode->encap >= XFRM_MODE_MAX)) + return -EINVAL; + + afinfo = xfrm_state_lock_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + err = -ENOENT; + modemap = afinfo->mode_map; + if (likely(modemap[mode->encap] == mode)) { + modemap[mode->encap] = NULL; + module_put(mode->afinfo->owner); + err = 0; + } + + xfrm_state_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_unregister_mode); + +static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_mode *mode; + int modload_attempted = 0; + + if (unlikely(encap >= XFRM_MODE_MAX)) + return NULL; + +retry: + afinfo = xfrm_state_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return NULL; + + mode = afinfo->mode_map[encap]; + if (unlikely(mode && !try_module_get(mode->owner))) + mode = NULL; + if (!mode && !modload_attempted) { + xfrm_state_put_afinfo(afinfo); + request_module("xfrm-mode-%d-%d", family, encap); + modload_attempted = 1; + goto retry; + } + + xfrm_state_put_afinfo(afinfo); + return mode; +} + +static void xfrm_put_mode(struct xfrm_mode *mode) +{ + module_put(mode->owner); +} + static void xfrm_state_gc_destroy(struct xfrm_state *x) { del_timer_sync(&x->timer); @@ -197,8 +377,10 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->calg); kfree(x->encap); kfree(x->coaddr); - if (x->mode) - xfrm_put_mode(x->mode); + if (x->inner_mode) + xfrm_put_mode(x->inner_mode); + if (x->outer_mode) + xfrm_put_mode(x->outer_mode); if (x->type) { x->type->destructor(x); xfrm_put_type(x->type); @@ -301,8 +483,8 @@ expired: if (!err && x->id.spi) km_state_expired(x, 1, 0); - xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, - AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + xfrm_audit_state_delete(x, err ? 0 : 1, + audit_get_loginuid(current->audit_context), 0); out: spin_unlock(&x->lock); @@ -403,11 +585,9 @@ xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { - xfrm_audit_log(audit_info->loginuid, - audit_info->secid, - AUDIT_MAC_IPSEC_DELSA, - 0, NULL, x); - + xfrm_audit_state_delete(x, 0, + audit_info->loginuid, + audit_info->secid); return err; } } @@ -443,10 +623,9 @@ restart: spin_unlock_bh(&xfrm_state_lock); err = xfrm_state_delete(x); - xfrm_audit_log(audit_info->loginuid, - audit_info->secid, - AUDIT_MAC_IPSEC_DELSA, - err ? 0 : 1, NULL, x); + xfrm_audit_state_delete(x, err ? 0 : 1, + audit_info->loginuid, + audit_info->secid); xfrm_state_put(x); spin_lock_bh(&xfrm_state_lock); @@ -852,7 +1031,6 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); - wake_up(&km_waitq); xfrm_state_num++; @@ -1156,29 +1334,6 @@ int xfrm_state_check_expire(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_check_expire); -static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) -{ - int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev) - - skb_headroom(skb); - - if (nhead > 0) - return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); - - /* Check tail too... */ - return 0; -} - -int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb) -{ - int err = xfrm_state_check_expire(x); - if (err < 0) - goto err; - err = xfrm_state_check_space(x, skb); -err: - return err; -} -EXPORT_SYMBOL(xfrm_state_check); - struct xfrm_state * xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) @@ -1303,26 +1458,33 @@ u32 xfrm_get_acqseq(void) } EXPORT_SYMBOL(xfrm_get_acqseq); -void -xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi) +int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) { unsigned int h; struct xfrm_state *x0; + int err = -ENOENT; + __be32 minspi = htonl(low); + __be32 maxspi = htonl(high); + spin_lock_bh(&x->lock); + if (x->km.state == XFRM_STATE_DEAD) + goto unlock; + + err = 0; if (x->id.spi) - return; + goto unlock; + + err = -ENOENT; if (minspi == maxspi) { x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family); if (x0) { xfrm_state_put(x0); - return; + goto unlock; } x->id.spi = minspi; } else { u32 spi = 0; - u32 low = ntohl(minspi); - u32 high = ntohl(maxspi); for (h=0; h<high-low+1; h++) { spi = low + net_random()%(high-low+1); x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family); @@ -1338,8 +1500,14 @@ xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi) h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); spin_unlock_bh(&xfrm_state_lock); - wake_up(&km_waitq); + + err = 0; } + +unlock: + spin_unlock_bh(&x->lock); + + return err; } EXPORT_SYMBOL(xfrm_alloc_spi); @@ -1424,7 +1592,6 @@ void xfrm_replay_notify(struct xfrm_state *x, int event) !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) x->xflags &= ~XFRM_TIME_DEFER; } -EXPORT_SYMBOL(xfrm_replay_notify); static void xfrm_replay_timer_handler(unsigned long data) { @@ -1715,7 +1882,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); -struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) +static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) { struct xfrm_state_afinfo *afinfo; if (unlikely(family >= NPROTO)) @@ -1727,14 +1894,11 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) return afinfo; } -void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) +static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) { read_unlock(&xfrm_state_afinfo_lock); } -EXPORT_SYMBOL(xfrm_state_get_afinfo); -EXPORT_SYMBOL(xfrm_state_put_afinfo); - /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ void xfrm_state_delete_tunnel(struct xfrm_state *x) { @@ -1785,6 +1949,14 @@ int xfrm_init_state(struct xfrm_state *x) goto error; err = -EPROTONOSUPPORT; + x->inner_mode = xfrm_get_mode(x->props.mode, x->sel.family); + if (x->inner_mode == NULL) + goto error; + + if (!(x->inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && + family != x->sel.family) + goto error; + x->type = xfrm_get_type(x->id.proto, family); if (x->type == NULL) goto error; @@ -1793,8 +1965,8 @@ int xfrm_init_state(struct xfrm_state *x) if (err) goto error; - x->mode = xfrm_get_mode(x->props.mode, family); - if (x->mode == NULL) + x->outer_mode = xfrm_get_mode(x->props.mode, family); + if (x->outer_mode == NULL) goto error; x->km.state = XFRM_STATE_VALID; @@ -1821,3 +1993,72 @@ void __init xfrm_state_init(void) INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); } +#ifdef CONFIG_AUDITSYSCALL +static inline void xfrm_audit_common_stateinfo(struct xfrm_state *x, + struct audit_buffer *audit_buf) +{ + if (x->security) + audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", + x->security->ctx_alg, x->security->ctx_doi, + x->security->ctx_str); + + switch(x->props.family) { + case AF_INET: + audit_log_format(audit_buf, " src=%u.%u.%u.%u dst=%u.%u.%u.%u", + NIPQUAD(x->props.saddr.a4), + NIPQUAD(x->id.daddr.a4)); + break; + case AF_INET6: + { + struct in6_addr saddr6, daddr6; + + memcpy(&saddr6, x->props.saddr.a6, + sizeof(struct in6_addr)); + memcpy(&daddr6, x->id.daddr.a6, + sizeof(struct in6_addr)); + audit_log_format(audit_buf, + " src=" NIP6_FMT " dst=" NIP6_FMT, + NIP6(saddr6), NIP6(daddr6)); + } + break; + } +} + +void +xfrm_audit_state_add(struct xfrm_state *x, int result, u32 auid, u32 sid) +{ + struct audit_buffer *audit_buf; + extern int audit_enabled; + + if (audit_enabled == 0) + return; + audit_buf = xfrm_audit_start(sid, auid); + if (audit_buf == NULL) + return; + audit_log_format(audit_buf, " op=SAD-add res=%u",result); + xfrm_audit_common_stateinfo(x, audit_buf); + audit_log_format(audit_buf, " spi=%lu(0x%lx)", + (unsigned long)x->id.spi, (unsigned long)x->id.spi); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_state_add); + +void +xfrm_audit_state_delete(struct xfrm_state *x, int result, u32 auid, u32 sid) +{ + struct audit_buffer *audit_buf; + extern int audit_enabled; + + if (audit_enabled == 0) + return; + audit_buf = xfrm_audit_start(sid, auid); + if (audit_buf == NULL) + return; + audit_log_format(audit_buf, " op=SAD-delete res=%u",result); + xfrm_audit_common_stateinfo(x, audit_buf); + audit_log_format(audit_buf, " spi=%lu(0x%lx)", + (unsigned long)x->id.spi, (unsigned long)x->id.spi); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_state_delete); +#endif /* CONFIG_AUDITSYSCALL */ diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 61339e17a0f..d41588d101d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -19,7 +19,6 @@ #include <linux/string.h> #include <linux/net.h> #include <linux/skbuff.h> -#include <linux/rtnetlink.h> #include <linux/pfkeyv2.h> #include <linux/ipsec.h> #include <linux/init.h> @@ -31,25 +30,22 @@ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #include <linux/in6.h> #endif -#include <linux/audit.h> -static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) +static inline int alg_len(struct xfrm_algo *alg) { - struct rtattr *rt = xfrma[type - 1]; + return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); +} + +static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) +{ + struct nlattr *rt = attrs[type]; struct xfrm_algo *algp; - int len; if (!rt) return 0; - len = (rt->rta_len - sizeof(*rt)) - sizeof(*algp); - if (len < 0) - return -EINVAL; - - algp = RTA_DATA(rt); - - len -= (algp->alg_key_len + 7U) / 8; - if (len < 0) + algp = nla_data(rt); + if (nla_len(rt) < alg_len(algp)) return -EINVAL; switch (type) { @@ -77,55 +73,25 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) return 0; } -static int verify_encap_tmpl(struct rtattr **xfrma) -{ - struct rtattr *rt = xfrma[XFRMA_ENCAP - 1]; - struct xfrm_encap_tmpl *encap; - - if (!rt) - return 0; - - if ((rt->rta_len - sizeof(*rt)) < sizeof(*encap)) - return -EINVAL; - - return 0; -} - -static int verify_one_addr(struct rtattr **xfrma, enum xfrm_attr_type_t type, +static void verify_one_addr(struct nlattr **attrs, enum xfrm_attr_type_t type, xfrm_address_t **addrp) { - struct rtattr *rt = xfrma[type - 1]; - - if (!rt) - return 0; - - if ((rt->rta_len - sizeof(*rt)) < sizeof(**addrp)) - return -EINVAL; - - if (addrp) - *addrp = RTA_DATA(rt); + struct nlattr *rt = attrs[type]; - return 0; + if (rt && addrp) + *addrp = nla_data(rt); } -static inline int verify_sec_ctx_len(struct rtattr **xfrma) +static inline int verify_sec_ctx_len(struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_SEC_CTX - 1]; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_user_sec_ctx *uctx; - int len = 0; if (!rt) return 0; - if (rt->rta_len < sizeof(*uctx)) - return -EINVAL; - - uctx = RTA_DATA(rt); - - len += sizeof(struct xfrm_user_sec_ctx); - len += uctx->ctx_len; - - if (uctx->len != len) + uctx = nla_data(rt); + if (uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len)) return -EINVAL; return 0; @@ -133,7 +99,7 @@ static inline int verify_sec_ctx_len(struct rtattr **xfrma) static int verify_newsa_info(struct xfrm_usersa_info *p, - struct rtattr **xfrma) + struct nlattr **attrs) { int err; @@ -157,35 +123,35 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->id.proto) { case IPPROTO_AH: - if (!xfrma[XFRMA_ALG_AUTH-1] || - xfrma[XFRMA_ALG_CRYPT-1] || - xfrma[XFRMA_ALG_COMP-1]) + if (!attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_CRYPT] || + attrs[XFRMA_ALG_COMP]) goto out; break; case IPPROTO_ESP: - if ((!xfrma[XFRMA_ALG_AUTH-1] && - !xfrma[XFRMA_ALG_CRYPT-1]) || - xfrma[XFRMA_ALG_COMP-1]) + if ((!attrs[XFRMA_ALG_AUTH] && + !attrs[XFRMA_ALG_CRYPT]) || + attrs[XFRMA_ALG_COMP]) goto out; break; case IPPROTO_COMP: - if (!xfrma[XFRMA_ALG_COMP-1] || - xfrma[XFRMA_ALG_AUTH-1] || - xfrma[XFRMA_ALG_CRYPT-1]) + if (!attrs[XFRMA_ALG_COMP] || + attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_CRYPT]) goto out; break; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case IPPROTO_DSTOPTS: case IPPROTO_ROUTING: - if (xfrma[XFRMA_ALG_COMP-1] || - xfrma[XFRMA_ALG_AUTH-1] || - xfrma[XFRMA_ALG_CRYPT-1] || - xfrma[XFRMA_ENCAP-1] || - xfrma[XFRMA_SEC_CTX-1] || - !xfrma[XFRMA_COADDR-1]) + if (attrs[XFRMA_ALG_COMP] || + attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_CRYPT] || + attrs[XFRMA_ENCAP] || + attrs[XFRMA_SEC_CTX] || + !attrs[XFRMA_COADDR]) goto out; break; #endif @@ -194,17 +160,13 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; } - if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH))) - goto out; - if ((err = verify_one_alg(xfrma, XFRMA_ALG_CRYPT))) - goto out; - if ((err = verify_one_alg(xfrma, XFRMA_ALG_COMP))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH))) goto out; - if ((err = verify_encap_tmpl(xfrma))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT))) goto out; - if ((err = verify_sec_ctx_len(xfrma))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_COMP))) goto out; - if ((err = verify_one_addr(xfrma, XFRMA_COADDR, NULL))) + if ((err = verify_sec_ctx_len(attrs))) goto out; err = -EINVAL; @@ -227,25 +189,22 @@ out: static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, struct xfrm_algo_desc *(*get_byname)(char *, int), - struct rtattr *u_arg) + struct nlattr *rta) { - struct rtattr *rta = u_arg; struct xfrm_algo *p, *ualg; struct xfrm_algo_desc *algo; - int len; if (!rta) return 0; - ualg = RTA_DATA(rta); + ualg = nla_data(rta); algo = get_byname(ualg->alg_name, 1); if (!algo) return -ENOSYS; *props = algo->desc.sadb_alg_id; - len = sizeof(*ualg) + (ualg->alg_key_len + 7U) / 8; - p = kmemdup(ualg, len, GFP_KERNEL); + p = kmemdup(ualg, alg_len(ualg), GFP_KERNEL); if (!p) return -ENOMEM; @@ -254,24 +213,6 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, return 0; } -static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_arg) -{ - struct rtattr *rta = u_arg; - struct xfrm_encap_tmpl *p, *uencap; - - if (!rta) - return 0; - - uencap = RTA_DATA(rta); - p = kmemdup(uencap, sizeof(*p), GFP_KERNEL); - if (!p) - return -ENOMEM; - - *encapp = p; - return 0; -} - - static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) { int len = 0; @@ -283,34 +224,6 @@ static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) return len; } -static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg) -{ - struct xfrm_user_sec_ctx *uctx; - - if (!u_arg) - return 0; - - uctx = RTA_DATA(u_arg); - return security_xfrm_state_alloc(x, uctx); -} - -static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg) -{ - struct rtattr *rta = u_arg; - xfrm_address_t *p, *uaddrp; - - if (!rta) - return 0; - - uaddrp = RTA_DATA(rta); - p = kmemdup(uaddrp, sizeof(*p), GFP_KERNEL); - if (!p) - return -ENOMEM; - - *addrpp = p; - return 0; -} - static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { memcpy(&x->id, &p->id, sizeof(x->id)); @@ -336,53 +249,38 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * * somehow made shareable and move it to xfrm_state.c - JHS * */ -static int xfrm_update_ae_params(struct xfrm_state *x, struct rtattr **xfrma) +static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) { - int err = - EINVAL; - struct rtattr *rp = xfrma[XFRMA_REPLAY_VAL-1]; - struct rtattr *lt = xfrma[XFRMA_LTIME_VAL-1]; - struct rtattr *et = xfrma[XFRMA_ETIMER_THRESH-1]; - struct rtattr *rt = xfrma[XFRMA_REPLAY_THRESH-1]; + struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; + struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; + struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; if (rp) { struct xfrm_replay_state *replay; - if (RTA_PAYLOAD(rp) < sizeof(*replay)) - goto error; - replay = RTA_DATA(rp); + replay = nla_data(rp); memcpy(&x->replay, replay, sizeof(*replay)); memcpy(&x->preplay, replay, sizeof(*replay)); } if (lt) { struct xfrm_lifetime_cur *ltime; - if (RTA_PAYLOAD(lt) < sizeof(*ltime)) - goto error; - ltime = RTA_DATA(lt); + ltime = nla_data(lt); x->curlft.bytes = ltime->bytes; x->curlft.packets = ltime->packets; x->curlft.add_time = ltime->add_time; x->curlft.use_time = ltime->use_time; } - if (et) { - if (RTA_PAYLOAD(et) < sizeof(u32)) - goto error; - x->replay_maxage = *(u32*)RTA_DATA(et); - } - - if (rt) { - if (RTA_PAYLOAD(rt) < sizeof(u32)) - goto error; - x->replay_maxdiff = *(u32*)RTA_DATA(rt); - } + if (et) + x->replay_maxage = nla_get_u32(et); - return 0; -error: - return err; + if (rt) + x->replay_maxdiff = nla_get_u32(rt); } static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, - struct rtattr **xfrma, + struct nlattr **attrs, int *errp) { struct xfrm_state *x = xfrm_state_alloc(); @@ -395,25 +293,37 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, if ((err = attach_one_algo(&x->aalg, &x->props.aalgo, xfrm_aalg_get_byname, - xfrma[XFRMA_ALG_AUTH-1]))) + attrs[XFRMA_ALG_AUTH]))) goto error; if ((err = attach_one_algo(&x->ealg, &x->props.ealgo, xfrm_ealg_get_byname, - xfrma[XFRMA_ALG_CRYPT-1]))) + attrs[XFRMA_ALG_CRYPT]))) goto error; if ((err = attach_one_algo(&x->calg, &x->props.calgo, xfrm_calg_get_byname, - xfrma[XFRMA_ALG_COMP-1]))) - goto error; - if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) - goto error; - if ((err = attach_one_addr(&x->coaddr, xfrma[XFRMA_COADDR-1]))) + attrs[XFRMA_ALG_COMP]))) goto error; + + if (attrs[XFRMA_ENCAP]) { + x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]), + sizeof(*x->encap), GFP_KERNEL); + if (x->encap == NULL) + goto error; + } + + if (attrs[XFRMA_COADDR]) { + x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]), + sizeof(*x->coaddr), GFP_KERNEL); + if (x->coaddr == NULL) + goto error; + } + err = xfrm_init_state(x); if (err) goto error; - if ((err = attach_sec_ctx(x, xfrma[XFRMA_SEC_CTX-1]))) + if (attrs[XFRMA_SEC_CTX] && + security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX]))) goto error; x->km.seq = p->seq; @@ -426,9 +336,7 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, /* override default values from above */ - err = xfrm_update_ae_params(x, (struct rtattr **)xfrma); - if (err < 0) - goto error; + xfrm_update_ae_params(x, attrs); return x; @@ -441,18 +349,18 @@ error_no_put: } static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { - struct xfrm_usersa_info *p = NLMSG_DATA(nlh); + struct xfrm_usersa_info *p = nlmsg_data(nlh); struct xfrm_state *x; int err; struct km_event c; - err = verify_newsa_info(p, xfrma); + err = verify_newsa_info(p, attrs); if (err) return err; - x = xfrm_state_construct(p, xfrma, &err); + x = xfrm_state_construct(p, attrs, &err); if (!x) return err; @@ -462,8 +370,8 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, else err = xfrm_state_update(x); - xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, - AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x); + xfrm_audit_state_add(x, err ? 0 : 1, NETLINK_CB(skb).loginuid, + NETLINK_CB(skb).sid); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -482,7 +390,7 @@ out: } static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, - struct rtattr **xfrma, + struct nlattr **attrs, int *errp) { struct xfrm_state *x = NULL; @@ -494,10 +402,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, } else { xfrm_address_t *saddr = NULL; - err = verify_one_addr(xfrma, XFRMA_SRCADDR, &saddr); - if (err) - goto out; - + verify_one_addr(attrs, XFRMA_SRCADDR, &saddr); if (!saddr) { err = -EINVAL; goto out; @@ -515,14 +420,14 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, } static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; int err = -ESRCH; struct km_event c; - struct xfrm_usersa_id *p = NLMSG_DATA(nlh); + struct xfrm_usersa_id *p = nlmsg_data(nlh); - x = xfrm_user_state_lookup(p, xfrma, &err); + x = xfrm_user_state_lookup(p, attrs, &err); if (x == NULL) return err; @@ -545,8 +450,8 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_notify(x, &c); out: - xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, - AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + xfrm_audit_state_delete(x, err ? 0 : 1, NETLINK_CB(skb).loginuid, + NETLINK_CB(skb).sid); xfrm_state_put(x); return err; } @@ -576,67 +481,92 @@ struct xfrm_dump_info { int this_idx; }; -static int dump_one_state(struct xfrm_state *x, int count, void *ptr) +static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) { - struct xfrm_dump_info *sp = ptr; - struct sk_buff *in_skb = sp->in_skb; - struct sk_buff *skb = sp->out_skb; - struct xfrm_usersa_info *p; - struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); + struct xfrm_user_sec_ctx *uctx; + struct nlattr *attr; + int ctx_size = sizeof(*uctx) + s->ctx_len; - if (sp->this_idx < sp->start_idx) - goto out; + attr = nla_reserve(skb, XFRMA_SEC_CTX, ctx_size); + if (attr == NULL) + return -EMSGSIZE; + + uctx = nla_data(attr); + uctx->exttype = XFRMA_SEC_CTX; + uctx->len = ctx_size; + uctx->ctx_doi = s->ctx_doi; + uctx->ctx_alg = s->ctx_alg; + uctx->ctx_len = s->ctx_len; + memcpy(uctx + 1, s->ctx_str, s->ctx_len); - nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, - sp->nlmsg_seq, - XFRM_MSG_NEWSA, sizeof(*p)); - nlh->nlmsg_flags = sp->nlmsg_flags; + return 0; +} - p = NLMSG_DATA(nlh); +/* Don't change this without updating xfrm_sa_len! */ +static int copy_to_user_state_extra(struct xfrm_state *x, + struct xfrm_usersa_info *p, + struct sk_buff *skb) +{ + spin_lock_bh(&x->lock); copy_to_user_state(x, p); + if (x->coaddr) + NLA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + + if (x->lastused) + NLA_PUT_U64(skb, XFRMA_LASTUSED, x->lastused); + spin_unlock_bh(&x->lock); + if (x->aalg) - RTA_PUT(skb, XFRMA_ALG_AUTH, - sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg); + NLA_PUT(skb, XFRMA_ALG_AUTH, alg_len(x->aalg), x->aalg); if (x->ealg) - RTA_PUT(skb, XFRMA_ALG_CRYPT, - sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg); + NLA_PUT(skb, XFRMA_ALG_CRYPT, alg_len(x->ealg), x->ealg); if (x->calg) - RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + NLA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); if (x->encap) - RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + NLA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); - if (x->security) { - int ctx_size = sizeof(struct xfrm_sec_ctx) + - x->security->ctx_len; - struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); - - uctx->exttype = XFRMA_SEC_CTX; - uctx->len = ctx_size; - uctx->ctx_doi = x->security->ctx_doi; - uctx->ctx_alg = x->security->ctx_alg; - uctx->ctx_len = x->security->ctx_len; - memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); - } + if (x->security && copy_sec_ctx(x->security, skb) < 0) + goto nla_put_failure; - if (x->coaddr) - RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + return 0; - if (x->lastused) - RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused); +nla_put_failure: + return -EMSGSIZE; +} - nlh->nlmsg_len = skb_tail_pointer(skb) - b; +static int dump_one_state(struct xfrm_state *x, int count, void *ptr) +{ + struct xfrm_dump_info *sp = ptr; + struct sk_buff *in_skb = sp->in_skb; + struct sk_buff *skb = sp->out_skb; + struct xfrm_usersa_info *p; + struct nlmsghdr *nlh; + int err; + + if (sp->this_idx < sp->start_idx) + goto out; + + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags); + if (nlh == NULL) + return -EMSGSIZE; + + p = nlmsg_data(nlh); + + err = copy_to_user_state_extra(x, p, skb); + if (err) + goto nla_put_failure; + + nlmsg_end(skb, nlh); out: sp->this_idx++; return 0; -nlmsg_failure: -rtattr_failure: - nlmsg_trim(skb, b); - return -1; +nla_put_failure: + nlmsg_cancel(skb, nlh); + return err; } static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) @@ -661,7 +591,7 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, struct xfrm_dump_info info; struct sk_buff *skb; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return ERR_PTR(-ENOMEM); @@ -679,6 +609,13 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, return skb; } +static inline size_t xfrm_spdinfo_msgsize(void) +{ + return NLMSG_ALIGN(4) + + nla_total_size(sizeof(struct xfrmu_spdinfo)) + + nla_total_size(sizeof(struct xfrmu_spdhinfo)); +} + static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) { struct xfrmk_spdinfo si; @@ -714,18 +651,14 @@ nla_put_failure: } static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct sk_buff *r_skb; - u32 *flags = NLMSG_DATA(nlh); + u32 *flags = nlmsg_data(nlh); u32 spid = NETLINK_CB(skb).pid; u32 seq = nlh->nlmsg_seq; - int len = NLMSG_LENGTH(sizeof(u32)); - len += RTA_SPACE(sizeof(struct xfrmu_spdinfo)); - len += RTA_SPACE(sizeof(struct xfrmu_spdhinfo)); - - r_skb = alloc_skb(len, GFP_ATOMIC); + r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; @@ -735,6 +668,13 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, return nlmsg_unicast(xfrm_nl, r_skb, spid); } +static inline size_t xfrm_sadinfo_msgsize(void) +{ + return NLMSG_ALIGN(4) + + nla_total_size(sizeof(struct xfrmu_sadhinfo)) + + nla_total_size(4); /* XFRMA_SAD_CNT */ +} + static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) { struct xfrmk_sadinfo si; @@ -764,19 +704,14 @@ nla_put_failure: } static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct sk_buff *r_skb; - u32 *flags = NLMSG_DATA(nlh); + u32 *flags = nlmsg_data(nlh); u32 spid = NETLINK_CB(skb).pid; u32 seq = nlh->nlmsg_seq; - int len = NLMSG_LENGTH(sizeof(u32)); - - len += RTA_SPACE(sizeof(struct xfrmu_sadhinfo)); - len += RTA_SPACE(sizeof(u32)); - - r_skb = alloc_skb(len, GFP_ATOMIC); + r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; @@ -787,14 +722,14 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { - struct xfrm_usersa_id *p = NLMSG_DATA(nlh); + struct xfrm_usersa_id *p = nlmsg_data(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; int err = -ESRCH; - x = xfrm_user_state_lookup(p, xfrma, &err); + x = xfrm_user_state_lookup(p, attrs, &err); if (x == NULL) goto out_noput; @@ -802,8 +737,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid); } xfrm_state_put(x); out_noput: @@ -834,7 +768,7 @@ static int verify_userspi_info(struct xfrm_userspi_info *p) } static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; struct xfrm_userspi_info *p; @@ -843,7 +777,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, int family; int err; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); err = verify_userspi_info(p); if (err) goto out_noput; @@ -869,23 +803,17 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, if (x == NULL) goto out_noput; - resp_skb = ERR_PTR(-ENOENT); - - spin_lock_bh(&x->lock); - if (x->km.state != XFRM_STATE_DEAD) { - xfrm_alloc_spi(x, htonl(p->min), htonl(p->max)); - if (x->id.spi) - resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); - } - spin_unlock_bh(&x->lock); + err = xfrm_alloc_spi(x, p->min, p->max); + if (err) + goto out; + resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); goto out; } - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid); out: xfrm_state_put(x); @@ -964,15 +892,15 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) return verify_policy_dir(p->dir); } -static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct rtattr **xfrma) +static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_user_sec_ctx *uctx; if (!rt) return 0; - uctx = RTA_DATA(rt); + uctx = nla_data(rt); return security_xfrm_policy_alloc(pol, uctx); } @@ -1032,38 +960,35 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) return 0; } -static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma) +static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_TMPL-1]; + struct nlattr *rt = attrs[XFRMA_TMPL]; if (!rt) { pol->xfrm_nr = 0; } else { - struct xfrm_user_tmpl *utmpl = RTA_DATA(rt); - int nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl); + struct xfrm_user_tmpl *utmpl = nla_data(rt); + int nr = nla_len(rt) / sizeof(*utmpl); int err; err = validate_tmpl(nr, utmpl, pol->family); if (err) return err; - copy_templates(pol, RTA_DATA(rt), nr); + copy_templates(pol, utmpl, nr); } return 0; } -static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma) +static int copy_from_user_policy_type(u8 *tp, struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1]; + struct nlattr *rt = attrs[XFRMA_POLICY_TYPE]; struct xfrm_userpolicy_type *upt; u8 type = XFRM_POLICY_TYPE_MAIN; int err; if (rt) { - if (rt->rta_len < sizeof(*upt)) - return -EINVAL; - - upt = RTA_DATA(rt); + upt = nla_data(rt); type = upt->type; } @@ -1101,7 +1026,7 @@ static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_i p->share = XFRM_SHARE_ANY; /* XXX xp->share */ } -static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct rtattr **xfrma, int *errp) +static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp) { struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL); int err; @@ -1113,12 +1038,12 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, copy_from_user_policy(xp, p); - err = copy_from_user_policy_type(&xp->type, xfrma); + err = copy_from_user_policy_type(&xp->type, attrs); if (err) goto error; - if (!(err = copy_from_user_tmpl(xp, xfrma))) - err = copy_from_user_sec_ctx(xp, xfrma); + if (!(err = copy_from_user_tmpl(xp, attrs))) + err = copy_from_user_sec_ctx(xp, attrs); if (err) goto error; @@ -1130,9 +1055,9 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, } static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { - struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh); + struct xfrm_userpolicy_info *p = nlmsg_data(nlh); struct xfrm_policy *xp; struct km_event c; int err; @@ -1141,11 +1066,11 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, err = verify_newpolicy_info(p); if (err) return err; - err = verify_sec_ctx_len(xfrma); + err = verify_sec_ctx_len(attrs); if (err) return err; - xp = xfrm_policy_construct(p, xfrma, &err); + xp = xfrm_policy_construct(p, attrs, &err); if (!xp) return err; @@ -1155,8 +1080,8 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); - xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, - AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL); + xfrm_audit_policy_add(xp, err ? 0 : 1, NETLINK_CB(skb).loginuid, + NETLINK_CB(skb).sid); if (err) { security_xfrm_policy_free(xp); @@ -1197,32 +1122,9 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) up->ealgos = kp->ealgos; up->calgos = kp->calgos; } - RTA_PUT(skb, XFRMA_TMPL, - (sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr), - vec); - return 0; - -rtattr_failure: - return -1; -} - -static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) -{ - int ctx_size = sizeof(struct xfrm_sec_ctx) + s->ctx_len; - struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); - - uctx->exttype = XFRMA_SEC_CTX; - uctx->len = ctx_size; - uctx->ctx_doi = s->ctx_doi; - uctx->ctx_alg = s->ctx_alg; - uctx->ctx_len = s->ctx_len; - memcpy(uctx + 1, s->ctx_str, s->ctx_len); - return 0; - - rtattr_failure: - return -1; + return nla_put(skb, XFRMA_TMPL, + sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr, vec); } static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buff *skb) @@ -1240,21 +1142,23 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s } return 0; } +static inline size_t userpolicy_type_attrsize(void) +{ +#ifdef CONFIG_XFRM_SUB_POLICY + return nla_total_size(sizeof(struct xfrm_userpolicy_type)); +#else + return 0; +#endif +} #ifdef CONFIG_XFRM_SUB_POLICY static int copy_to_user_policy_type(u8 type, struct sk_buff *skb) { - struct xfrm_userpolicy_type upt; - - memset(&upt, 0, sizeof(upt)); - upt.type = type; - - RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); + struct xfrm_userpolicy_type upt = { + .type = type, + }; - return 0; - -rtattr_failure: - return -1; + return nla_put(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); } #else @@ -1271,17 +1175,16 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct sk_buff *in_skb = sp->in_skb; struct sk_buff *skb = sp->out_skb; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); if (sp->this_idx < sp->start_idx) goto out; - nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, - sp->nlmsg_seq, - XFRM_MSG_NEWPOLICY, sizeof(*p)); - p = NLMSG_DATA(nlh); - nlh->nlmsg_flags = sp->nlmsg_flags; + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); + if (nlh == NULL) + return -EMSGSIZE; + p = nlmsg_data(nlh); copy_to_user_policy(xp, p, dir); if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; @@ -1290,14 +1193,14 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; + nlmsg_end(skb, nlh); out: sp->this_idx++; return 0; nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) @@ -1326,7 +1229,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, struct xfrm_dump_info info; struct sk_buff *skb; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); @@ -1345,7 +1248,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, } static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; @@ -1354,10 +1257,10 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int delete; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; - err = copy_from_user_policy_type(&type, xfrma); + err = copy_from_user_policy_type(&type, attrs); if (err) return err; @@ -1368,16 +1271,16 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (p->index) xp = xfrm_policy_byid(type, p->dir, p->index, delete, &err); else { - struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_policy tmp; - err = verify_sec_ctx_len(xfrma); + err = verify_sec_ctx_len(attrs); if (err) return err; memset(&tmp, 0, sizeof(struct xfrm_policy)); if (rt) { - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + struct xfrm_user_sec_ctx *uctx = nla_data(rt); if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; @@ -1396,13 +1299,13 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, - MSG_DONTWAIT); + err = nlmsg_unicast(xfrm_nl, resp_skb, + NETLINK_CB(skb).pid); } } else { - xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, - AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL); + xfrm_audit_policy_delete(xp, err ? 0 : 1, + NETLINK_CB(skb).loginuid, + NETLINK_CB(skb).sid); if (err != 0) goto out; @@ -1420,10 +1323,10 @@ out: } static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct km_event c; - struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); + struct xfrm_usersa_flush *p = nlmsg_data(nlh); struct xfrm_audit audit_info; int err; @@ -1441,18 +1344,25 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } +static inline size_t xfrm_aevent_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) + + nla_total_size(sizeof(struct xfrm_replay_state)) + + nla_total_size(sizeof(struct xfrm_lifetime_cur)) + + nla_total_size(4) /* XFRM_AE_RTHR */ + + nla_total_size(4); /* XFRM_AE_ETHR */ +} static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) { struct xfrm_aevent_id *id; struct nlmsghdr *nlh; - struct xfrm_lifetime_cur ltime; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id)); - id = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); + if (nlh == NULL) + return -EMSGSIZE; + id = nlmsg_data(nlh); memcpy(&id->sa_id.daddr, &x->id.daddr,sizeof(x->id.daddr)); id->sa_id.spi = x->id.spi; id->sa_id.family = x->props.family; @@ -1461,54 +1371,34 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve id->reqid = x->props.reqid; id->flags = c->data.aevent; - RTA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); + NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); + NLA_PUT(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); - ltime.bytes = x->curlft.bytes; - ltime.packets = x->curlft.packets; - ltime.add_time = x->curlft.add_time; - ltime.use_time = x->curlft.use_time; + if (id->flags & XFRM_AE_RTHR) + NLA_PUT_U32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff); - RTA_PUT(skb, XFRMA_LTIME_VAL, sizeof(struct xfrm_lifetime_cur), <ime); + if (id->flags & XFRM_AE_ETHR) + NLA_PUT_U32(skb, XFRMA_ETIMER_THRESH, + x->replay_maxage * 10 / HZ); - if (id->flags&XFRM_AE_RTHR) { - RTA_PUT(skb,XFRMA_REPLAY_THRESH,sizeof(u32),&x->replay_maxdiff); - } - - if (id->flags&XFRM_AE_ETHR) { - u32 etimer = x->replay_maxage*10/HZ; - RTA_PUT(skb,XFRMA_ETIMER_THRESH,sizeof(u32),&etimer); - } - - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); -rtattr_failure: -nlmsg_failure: - nlmsg_trim(skb, b); - return -1; +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; struct sk_buff *r_skb; int err; struct km_event c; - struct xfrm_aevent_id *p = NLMSG_DATA(nlh); - int len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id)); + struct xfrm_aevent_id *p = nlmsg_data(nlh); struct xfrm_usersa_id *id = &p->sa_id; - len += RTA_SPACE(sizeof(struct xfrm_replay_state)); - len += RTA_SPACE(sizeof(struct xfrm_lifetime_cur)); - - if (p->flags&XFRM_AE_RTHR) - len+=RTA_SPACE(sizeof(u32)); - - if (p->flags&XFRM_AE_ETHR) - len+=RTA_SPACE(sizeof(u32)); - - r_skb = alloc_skb(len, GFP_ATOMIC); + r_skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; @@ -1530,22 +1420,21 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (build_aevent(r_skb, x, &c) < 0) BUG(); - err = netlink_unicast(xfrm_nl, r_skb, - NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = nlmsg_unicast(xfrm_nl, r_skb, NETLINK_CB(skb).pid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; } static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; struct km_event c; int err = - EINVAL; - struct xfrm_aevent_id *p = NLMSG_DATA(nlh); - struct rtattr *rp = xfrma[XFRMA_REPLAY_VAL-1]; - struct rtattr *lt = xfrma[XFRMA_LTIME_VAL-1]; + struct xfrm_aevent_id *p = nlmsg_data(nlh); + struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; if (!lt && !rp) return err; @@ -1562,10 +1451,8 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; spin_lock_bh(&x->lock); - err = xfrm_update_ae_params(x, xfrma); + xfrm_update_ae_params(x, attrs); spin_unlock_bh(&x->lock); - if (err < 0) - goto out; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; @@ -1579,14 +1466,14 @@ out: } static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; struct xfrm_audit audit_info; - err = copy_from_user_policy_type(&type, xfrma); + err = copy_from_user_policy_type(&type, attrs); if (err) return err; @@ -1604,31 +1491,31 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, } static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_policy *xp; - struct xfrm_user_polexpire *up = NLMSG_DATA(nlh); + struct xfrm_user_polexpire *up = nlmsg_data(nlh); struct xfrm_userpolicy_info *p = &up->pol; u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; - err = copy_from_user_policy_type(&type, xfrma); + err = copy_from_user_policy_type(&type, attrs); if (err) return err; if (p->index) xp = xfrm_policy_byid(type, p->dir, p->index, 0, &err); else { - struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_policy tmp; - err = verify_sec_ctx_len(xfrma); + err = verify_sec_ctx_len(attrs); if (err) return err; memset(&tmp, 0, sizeof(struct xfrm_policy)); if (rt) { - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + struct xfrm_user_sec_ctx *uctx = nla_data(rt); if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; @@ -1650,8 +1537,8 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = 0; if (up->hard) { xfrm_policy_delete(xp, p->dir); - xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, - AUDIT_MAC_IPSEC_DELSPD, 1, xp, NULL); + xfrm_audit_policy_delete(xp, 1, NETLINK_CB(skb).loginuid, + NETLINK_CB(skb).sid); } else { // reset the timers here? @@ -1665,11 +1552,11 @@ out: } static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; int err; - struct xfrm_user_expire *ue = NLMSG_DATA(nlh); + struct xfrm_user_expire *ue = nlmsg_data(nlh); struct xfrm_usersa_info *p = &ue->state; x = xfrm_state_lookup(&p->id.daddr, p->id.spi, p->id.proto, p->family); @@ -1686,8 +1573,8 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (ue->hard) { __xfrm_state_delete(x); - xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, - AUDIT_MAC_IPSEC_DELSA, 1, NULL, x); + xfrm_audit_state_delete(x, 1, NETLINK_CB(skb).loginuid, + NETLINK_CB(skb).sid); } err = 0; out: @@ -1697,14 +1584,14 @@ out: } static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_policy *xp; struct xfrm_user_tmpl *ut; int i; - struct rtattr *rt = xfrma[XFRMA_TMPL-1]; + struct nlattr *rt = attrs[XFRMA_TMPL]; - struct xfrm_user_acquire *ua = NLMSG_DATA(nlh); + struct xfrm_user_acquire *ua = nlmsg_data(nlh); struct xfrm_state *x = xfrm_state_alloc(); int err = -ENOMEM; @@ -1719,7 +1606,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, } /* build an XP */ - xp = xfrm_policy_construct(&ua->policy, (struct rtattr **) xfrma, &err); + xp = xfrm_policy_construct(&ua->policy, attrs, &err); if (!xp) { kfree(x); return err; @@ -1729,7 +1616,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, memcpy(&x->props.saddr, &ua->saddr, sizeof(ua->saddr)); memcpy(&x->sel, &ua->sel, sizeof(ua->sel)); - ut = RTA_DATA(rt); + ut = nla_data(rt); /* extract the templates and for each call km_key */ for (i = 0; i < xp->xfrm_nr; i++, ut++) { struct xfrm_tmpl *t = &xp->xfrm_vec[i]; @@ -1751,29 +1638,15 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, } #ifdef CONFIG_XFRM_MIGRATE -static int verify_user_migrate(struct rtattr **xfrma) -{ - struct rtattr *rt = xfrma[XFRMA_MIGRATE-1]; - struct xfrm_user_migrate *um; - - if (!rt) - return -EINVAL; - - if ((rt->rta_len - sizeof(*rt)) < sizeof(*um)) - return -EINVAL; - - return 0; -} - static int copy_from_user_migrate(struct xfrm_migrate *ma, - struct rtattr **xfrma, int *num) + struct nlattr **attrs, int *num) { - struct rtattr *rt = xfrma[XFRMA_MIGRATE-1]; + struct nlattr *rt = attrs[XFRMA_MIGRATE]; struct xfrm_user_migrate *um; int i, num_migrate; - um = RTA_DATA(rt); - num_migrate = (rt->rta_len - sizeof(*rt)) / sizeof(*um); + um = nla_data(rt); + num_migrate = nla_len(rt) / sizeof(*um); if (num_migrate <= 0 || num_migrate > XFRM_MAX_DEPTH) return -EINVAL; @@ -1797,24 +1670,23 @@ static int copy_from_user_migrate(struct xfrm_migrate *ma, } static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { - struct xfrm_userpolicy_id *pi = NLMSG_DATA(nlh); + struct xfrm_userpolicy_id *pi = nlmsg_data(nlh); struct xfrm_migrate m[XFRM_MAX_DEPTH]; u8 type; int err; int n = 0; - err = verify_user_migrate((struct rtattr **)xfrma); - if (err) - return err; + if (attrs[XFRMA_MIGRATE] == NULL) + return -EINVAL; - err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + err = copy_from_user_policy_type(&type, attrs); if (err) return err; err = copy_from_user_migrate((struct xfrm_migrate *)m, - (struct rtattr **)xfrma, &n); + attrs, &n); if (err) return err; @@ -1827,7 +1699,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, } #else static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { return -ENOPROTOOPT; } @@ -1849,11 +1721,14 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) memcpy(&um.new_daddr, &m->new_daddr, sizeof(um.new_daddr)); memcpy(&um.new_saddr, &m->new_saddr, sizeof(um.new_saddr)); - RTA_PUT(skb, XFRMA_MIGRATE, sizeof(um), &um); - return 0; + return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); +} -rtattr_failure: - return -1; +static inline size_t xfrm_migrate_msgsize(int num_migrate) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) + + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) + + userpolicy_type_attrsize(); } static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, @@ -1863,13 +1738,13 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); int i; - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id)); - pol_id = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id), 0); + if (nlh == NULL) + return -EMSGSIZE; + pol_id = nlmsg_data(nlh); /* copy data from selector, dir, and type to the pol_id */ memset(pol_id, 0, sizeof(*pol_id)); memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); @@ -1883,25 +1758,18 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, goto nlmsg_failure; } - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate) { struct sk_buff *skb; - size_t len; - len = RTA_SPACE(sizeof(struct xfrm_user_migrate) * num_migrate); - len += NLMSG_SPACE(sizeof(struct xfrm_userpolicy_id)); -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -1909,9 +1777,7 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, if (build_migrate(skb, m, num_migrate, sel, dir, type) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_MIGRATE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, - GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); } #else static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, @@ -1921,7 +1787,7 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, } #endif -#define XMSGSIZE(type) NLMSG_LENGTH(sizeof(struct type)) +#define XMSGSIZE(type) sizeof(struct type) static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), @@ -1937,19 +1803,36 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), [XFRM_MSG_POLEXPIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_polexpire), [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_flush), - [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0), + [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = 0, [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), - [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)), - [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)), + [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), }; #undef XMSGSIZE +static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { + [XFRMA_ALG_AUTH] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ALG_CRYPT] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) }, + [XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) }, + [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) }, + [XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) }, + [XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) }, + [XFRMA_REPLAY_THRESH] = { .type = NLA_U32 }, + [XFRMA_ETIMER_THRESH] = { .type = NLA_U32 }, + [XFRMA_SRCADDR] = { .len = sizeof(xfrm_address_t) }, + [XFRMA_COADDR] = { .len = sizeof(xfrm_address_t) }, + [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)}, + [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, +}; + static struct xfrm_link { - int (*doit)(struct sk_buff *, struct nlmsghdr *, struct rtattr **); + int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); int (*dump)(struct sk_buff *, struct netlink_callback *); } xfrm_dispatch[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, @@ -1977,9 +1860,9 @@ static struct xfrm_link { static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct rtattr *xfrma[XFRMA_MAX]; + struct nlattr *attrs[XFRMA_MAX+1]; struct xfrm_link *link; - int type, min_len; + int type, err; type = nlh->nlmsg_type; if (type > XFRM_MSG_MAX) @@ -2001,98 +1884,71 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, NULL); } - memset(xfrma, 0, sizeof(xfrma)); - - if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type])) - return -EINVAL; - - if (nlh->nlmsg_len > min_len) { - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - struct rtattr *attr = (void *) nlh + NLMSG_ALIGN(min_len); - - while (RTA_OK(attr, attrlen)) { - unsigned short flavor = attr->rta_type; - if (flavor) { - if (flavor > XFRMA_MAX) - return -EINVAL; - xfrma[flavor - 1] = attr; - } - attr = RTA_NEXT(attr, attrlen); - } - } + err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, + xfrma_policy); + if (err < 0) + return err; if (link->doit == NULL) return -EINVAL; - return link->doit(skb, nlh, xfrma); + return link->doit(skb, nlh, attrs); } -static void xfrm_netlink_rcv(struct sock *sk, int len) +static void xfrm_netlink_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - mutex_lock(&xfrm_cfg_mutex); - netlink_run_queue(sk, &qlen, &xfrm_user_rcv_msg); - mutex_unlock(&xfrm_cfg_mutex); + mutex_lock(&xfrm_cfg_mutex); + netlink_rcv_skb(skb, &xfrm_user_rcv_msg); + mutex_unlock(&xfrm_cfg_mutex); +} - } while (qlen); +static inline size_t xfrm_expire_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)); } static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) { struct xfrm_user_expire *ue; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_EXPIRE, - sizeof(*ue)); - ue = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); + if (nlh == NULL) + return -EMSGSIZE; + ue = nlmsg_data(nlh); copy_to_user_state(x, &ue->state); ue->hard = (c->data.hard != 0) ? 1 : 0; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; - -nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + return nlmsg_end(skb, nlh); } static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) { struct sk_buff *skb; - int len = NLMSG_LENGTH(sizeof(struct xfrm_user_expire)); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_expire_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_expire(skb, x, c) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) { struct sk_buff *skb; - int len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id)); - len += RTA_SPACE(sizeof(struct xfrm_replay_state)); - len += RTA_SPACE(sizeof(struct xfrm_lifetime_cur)); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_aevent(skb, x, c) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_AEVENTS; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); } static int xfrm_notify_sa_flush(struct km_event *c) @@ -2100,42 +1956,45 @@ static int xfrm_notify_sa_flush(struct km_event *c) struct xfrm_usersa_flush *p; struct nlmsghdr *nlh; struct sk_buff *skb; - sk_buff_data_t b; - int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush)); + int len = NLMSG_ALIGN(sizeof(struct xfrm_usersa_flush)); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - nlh = NLMSG_PUT(skb, c->pid, c->seq, - XFRM_MSG_FLUSHSA, sizeof(*p)); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); + if (nlh == NULL) { + kfree_skb(skb); + return -EMSGSIZE; + } - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); p->proto = c->data.proto; - nlh->nlmsg_len = skb->tail - b; - - NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + nlmsg_end(skb, nlh); -nlmsg_failure: - kfree_skb(skb); - return -1; + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); } -static inline int xfrm_sa_len(struct xfrm_state *x) +static inline size_t xfrm_sa_len(struct xfrm_state *x) { - int l = 0; + size_t l = 0; if (x->aalg) - l += RTA_SPACE(sizeof(*x->aalg) + (x->aalg->alg_key_len+7)/8); + l += nla_total_size(alg_len(x->aalg)); if (x->ealg) - l += RTA_SPACE(sizeof(*x->ealg) + (x->ealg->alg_key_len+7)/8); + l += nla_total_size(alg_len(x->ealg)); if (x->calg) - l += RTA_SPACE(sizeof(*x->calg)); + l += nla_total_size(sizeof(*x->calg)); if (x->encap) - l += RTA_SPACE(sizeof(*x->encap)); + l += nla_total_size(sizeof(*x->encap)); + if (x->security) + l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) + + x->security->ctx_len); + if (x->coaddr) + l += nla_total_size(sizeof(*x->coaddr)); + + /* Must count this as this may become non-zero behind our back. */ + l += nla_total_size(sizeof(x->lastused)); return l; } @@ -2146,57 +2005,51 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) struct xfrm_usersa_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - sk_buff_data_t b; int len = xfrm_sa_len(x); int headlen; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELSA) { - len += RTA_SPACE(headlen); + len += nla_total_size(headlen); headlen = sizeof(*id); } - len += NLMSG_SPACE(headlen); + len += NLMSG_ALIGN(headlen); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + if (nlh == NULL) + goto nla_put_failure; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELSA) { - id = NLMSG_DATA(nlh); + struct nlattr *attr; + + id = nlmsg_data(nlh); memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr)); id->spi = x->id.spi; id->family = x->props.family; id->proto = x->id.proto; - p = RTA_DATA(__RTA_PUT(skb, XFRMA_SA, sizeof(*p))); - } - - copy_to_user_state(x, p); + attr = nla_reserve(skb, XFRMA_SA, sizeof(*p)); + if (attr == NULL) + goto nla_put_failure; - if (x->aalg) - RTA_PUT(skb, XFRMA_ALG_AUTH, - sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg); - if (x->ealg) - RTA_PUT(skb, XFRMA_ALG_CRYPT, - sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg); - if (x->calg) - RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + p = nla_data(attr); + } - if (x->encap) - RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + if (copy_to_user_state_extra(x, p, skb)) + goto nla_put_failure; - nlh->nlmsg_len = skb->tail - b; + nlmsg_end(skb, nlh); - NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); -nlmsg_failure: -rtattr_failure: +nla_put_failure: + /* Somebody screwed up with xfrm_sa_len! */ + WARN_ON(1); kfree_skb(skb); return -1; } @@ -2224,20 +2077,28 @@ static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c) } +static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x, + struct xfrm_policy *xp) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire)) + + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + + nla_total_size(xfrm_user_sec_ctx_size(x->security)) + + userpolicy_type_attrsize(); +} + static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, struct xfrm_tmpl *xt, struct xfrm_policy *xp, int dir) { struct xfrm_user_acquire *ua; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); __u32 seq = xfrm_get_acqseq(); - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE, - sizeof(*ua)); - ua = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_ACQUIRE, sizeof(*ua), 0); + if (nlh == NULL) + return -EMSGSIZE; + ua = nlmsg_data(nlh); memcpy(&ua->id, &x->id, sizeof(ua->id)); memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr)); memcpy(&ua->sel, &x->sel, sizeof(ua->sel)); @@ -2254,35 +2115,26 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, struct xfrm_policy *xp, int dir) { struct sk_buff *skb; - size_t len; - len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); - len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire)); - len += RTA_SPACE(xfrm_user_sec_ctx_size(x->security)); -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_acquire(skb, x, xt, xp, dir) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_ACQUIRE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); } /* User gives us xfrm_user_policy_info followed by an array of 0 @@ -2344,18 +2196,26 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, return xp; } +static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire)) + + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + + nla_total_size(xfrm_user_sec_ctx_size(xp->security)) + + userpolicy_type_attrsize(); +} + static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, int dir, struct km_event *c) { struct xfrm_user_polexpire *upe; struct nlmsghdr *nlh; int hard = c->data.hard; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe)); - upe = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); + if (nlh == NULL) + return -EMSGSIZE; + upe = nlmsg_data(nlh); copy_to_user_policy(xp, &upe->pol, dir); if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; @@ -2365,34 +2225,25 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, goto nlmsg_failure; upe->hard = !!hard; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) { struct sk_buff *skb; - size_t len; - len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); - len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire)); - len += RTA_SPACE(xfrm_user_sec_ctx_size(xp->security)); -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_polexpire(skb, xp, dir, c) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) @@ -2401,30 +2252,30 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - sk_buff_data_t b; - int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); + int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); int headlen; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELPOLICY) { - len += RTA_SPACE(headlen); + len += nla_total_size(headlen); headlen = sizeof(*id); } -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - len += NLMSG_SPACE(headlen); + len += userpolicy_type_attrsize(); + len += NLMSG_ALIGN(headlen); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen); + nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + if (nlh == NULL) + goto nlmsg_failure; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELPOLICY) { - id = NLMSG_DATA(nlh); + struct nlattr *attr; + + id = nlmsg_data(nlh); memset(id, 0, sizeof(*id)); id->dir = dir; if (c->data.byid) @@ -2432,10 +2283,12 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * else memcpy(&id->sel, &xp->selector, sizeof(id->sel)); - p = RTA_DATA(__RTA_PUT(skb, XFRMA_POLICY, sizeof(*p))); - } + attr = nla_reserve(skb, XFRMA_POLICY, sizeof(*p)); + if (attr == NULL) + goto nlmsg_failure; - nlh->nlmsg_flags = 0; + p = nla_data(attr); + } copy_to_user_policy(xp, p, dir); if (copy_to_user_tmpl(xp, skb) < 0) @@ -2443,13 +2296,11 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; - nlh->nlmsg_len = skb->tail - b; + nlmsg_end(skb, nlh); - NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: -rtattr_failure: kfree_skb(skb); return -1; } @@ -2458,28 +2309,20 @@ static int xfrm_notify_policy_flush(struct km_event *c) { struct nlmsghdr *nlh; struct sk_buff *skb; - sk_buff_data_t b; - int len = 0; -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - len += NLMSG_LENGTH(0); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(userpolicy_type_attrsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - - nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); + if (nlh == NULL) + goto nlmsg_failure; if (copy_to_user_policy_type(c->data.type, skb) < 0) goto nlmsg_failure; - nlh->nlmsg_len = skb->tail - b; + nlmsg_end(skb, nlh); - NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); @@ -2506,48 +2349,48 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev } +static inline size_t xfrm_report_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_report)); +} + static int build_report(struct sk_buff *skb, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { struct xfrm_user_report *ur; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur)); - ur = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur), 0); + if (nlh == NULL) + return -EMSGSIZE; + ur = nlmsg_data(nlh); ur->proto = proto; memcpy(&ur->sel, sel, sizeof(ur->sel)); if (addr) - RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); + NLA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); -nlmsg_failure: -rtattr_failure: - nlmsg_trim(skb, b); - return -1; +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { struct sk_buff *skb; - size_t len; - len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct xfrm_user_report))); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_report(skb, proto, sel, addr) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_REPORT; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); } static struct xfrm_mgr netlink_mgr = { @@ -2566,7 +2409,7 @@ static int __init xfrm_user_init(void) printk(KERN_INFO "Initializing XFRM netlink socket\n"); - nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, + nlsk = netlink_kernel_create(&init_net, NETLINK_XFRM, XFRMNLGRP_MAX, xfrm_netlink_rcv, NULL, THIS_MODULE); if (nlsk == NULL) return -ENOMEM; |