aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c34
-rw-r--r--net/8021q/vlan.h2
-rw-r--r--net/8021q/vlan_dev.c5
-rw-r--r--net/9p/Kconfig10
-rw-r--r--net/9p/Makefile3
-rw-r--r--net/9p/client.c32
-rw-r--r--net/9p/conv.c128
-rw-r--r--net/9p/error.c15
-rw-r--r--net/9p/fcprint.c8
-rw-r--r--net/9p/mod.c8
-rw-r--r--net/9p/trans_fd.c204
-rw-r--r--net/9p/trans_virtio.c175
-rw-r--r--net/9p/util.c36
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/core/sock.c2
-rw-r--r--net/econet/af_econet.c2
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/cipso_ipv4.c4
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/ipconfig.c6
-rw-r--r--net/ipv4/raw.c10
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp_input.c17
-rw-r--r--net/ipv6/addrconf.c75
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/mcast.c4
-rw-r--r--net/ipv6/ndisc.c12
-rw-r--r--net/ipv6/raw.c10
-rw-r--r--net/ipv6/route.c12
-rw-r--r--net/irda/discovery.c8
-rw-r--r--net/irda/irlmp.c5
-rw-r--r--net/irda/irnet/irnet_irda.c5
-rw-r--r--net/mac80211/debugfs_key.c15
-rw-r--r--net/mac80211/iface.c9
-rw-r--r--net/mac80211/mesh.c2
-rw-r--r--net/mac80211/mesh_hwmp.c2
-rw-r--r--net/mac80211/mesh_pathtbl.c17
-rw-r--r--net/mac80211/mlme.c78
-rw-r--r--net/mac80211/rx.c12
-rw-r--r--net/mac80211/tx.c5
-rw-r--r--net/mac80211/util.c47
-rw-r--r--net/mac80211/wme.c3
-rw-r--r--net/netfilter/nf_conntrack_netlink.c3
-rw-r--r--net/netfilter/xt_iprange.c2
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/sched/cls_api.c2
-rw-r--r--net/sctp/sm_make_chunk.c16
-rw-r--r--net/sunrpc/auth_generic.c8
-rw-r--r--net/sunrpc/svc_xprt.c23
-rw-r--r--net/sunrpc/svcauth_unix.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c102
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c11
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c290
-rw-r--r--net/xfrm/xfrm_output.c6
56 files changed, 1071 insertions, 444 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 2a739adaa92..51961300b58 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -382,6 +382,24 @@ static void vlan_sync_address(struct net_device *dev,
memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
}
+static void vlan_transfer_features(struct net_device *dev,
+ struct net_device *vlandev)
+{
+ unsigned long old_features = vlandev->features;
+
+ if (dev->features & NETIF_F_VLAN_TSO) {
+ vlandev->features &= ~VLAN_TSO_FEATURES;
+ vlandev->features |= dev->features & VLAN_TSO_FEATURES;
+ }
+ if (dev->features & NETIF_F_VLAN_CSUM) {
+ vlandev->features &= ~NETIF_F_ALL_CSUM;
+ vlandev->features |= dev->features & NETIF_F_ALL_CSUM;
+ }
+
+ if (old_features != vlandev->features)
+ netdev_features_change(vlandev);
+}
+
static void __vlan_device_event(struct net_device *dev, unsigned long event)
{
switch (event) {
@@ -410,10 +428,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
int i, flgs;
struct net_device *vlandev;
- if (is_vlan_dev(dev)) {
+ if (is_vlan_dev(dev))
__vlan_device_event(dev, event);
- goto out;
- }
grp = __vlan_find_group(dev);
if (!grp)
@@ -450,6 +466,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
}
break;
+ case NETDEV_FEAT_CHANGE:
+ /* Propagate device features to underlying device */
+ for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ vlandev = vlan_group_get_device(grp, i);
+ if (!vlandev)
+ continue;
+
+ vlan_transfer_features(dev, vlandev);
+ }
+
+ break;
+
case NETDEV_DOWN:
/* Put all VLANs for this dev in the down state too. */
for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5229a72c7ea..79625696e86 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -7,6 +7,8 @@
#define VLAN_GRP_HASH_SIZE (1 << VLAN_GRP_HASH_SHIFT)
#define VLAN_GRP_HASH_MASK (VLAN_GRP_HASH_SIZE - 1)
+#define VLAN_TSO_FEATURES (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_SG)
+
/* Find a VLAN device by the MAC address of its Ethernet device, and
* it's VLAN ID. The default configuration is to have VLAN's scope
* to be box-wide, so the MAC will be ignored. The mac will only be
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c961f082600..b1cfbaa88db 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -663,6 +663,11 @@ static int vlan_dev_init(struct net_device *dev)
(1<<__LINK_STATE_DORMANT))) |
(1<<__LINK_STATE_PRESENT);
+ if (real_dev->features & NETIF_F_VLAN_TSO)
+ dev->features |= real_dev->features & VLAN_TSO_FEATURES;
+ if (real_dev->features & NETIF_F_VLAN_CSUM)
+ dev->features |= real_dev->features & NETIF_F_ALL_CSUM;
+
/* ipv6 shared card related stuff */
dev->dev_id = real_dev->dev_id;
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index bafc50c9e6f..ff34c5acc13 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -13,16 +13,6 @@ menuconfig NET_9P
If unsure, say N.
-config NET_9P_FD
- depends on NET_9P
- default y if NET_9P
- tristate "9P File Descriptor Transports (Experimental)"
- help
- This builds support for file descriptor transports for 9p
- which includes support for TCP/IP, named pipes, or passed
- file descriptors. TCP/IP is the default transport for 9p,
- so if you are going to use 9p, you'll likely want this.
-
config NET_9P_VIRTIO
depends on NET_9P && EXPERIMENTAL && VIRTIO
tristate "9P Virtio Transport (Experimental)"
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 8a105110189..519219480db 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -1,5 +1,4 @@
obj-$(CONFIG_NET_9P) := 9pnet.o
-obj-$(CONFIG_NET_9P_FD) += 9pnet_fd.o
obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
9pnet-objs := \
@@ -9,8 +8,6 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
error.o \
fcprint.o \
util.o \
-
-9pnet_fd-objs := \
trans_fd.o \
9pnet_virtio-objs := \
diff --git a/net/9p/client.c b/net/9p/client.c
index 84e087e2414..2ffe40cf2f0 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -64,21 +64,30 @@ static match_table_t tokens = {
* @options: options string passed from mount
* @v9ses: existing v9fs session information
*
+ * Return 0 upon success, -ERRNO upon failure
*/
-static void parse_opts(char *options, struct p9_client *clnt)
+static int parse_opts(char *opts, struct p9_client *clnt)
{
+ char *options;
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
- int ret;
+ int ret = 0;
clnt->trans_mod = v9fs_default_trans();
clnt->dotu = 1;
clnt->msize = 8192;
- if (!options)
- return;
+ if (!opts)
+ return 0;
+
+ options = kstrdup(opts, GFP_KERNEL);
+ if (!options) {
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "failed to allocate copy of option string\n");
+ return -ENOMEM;
+ }
while ((p = strsep(&options, ",")) != NULL) {
int token;
@@ -86,10 +95,11 @@ static void parse_opts(char *options, struct p9_client *clnt)
continue;
token = match_token(p, tokens, args);
if (token < Opt_trans) {
- ret = match_int(&args[0], &option);
- if (ret < 0) {
+ int r = match_int(&args[0], &option);
+ if (r < 0) {
P9_DPRINTK(P9_DEBUG_ERROR,
"integer field, but no integer?\n");
+ ret = r;
continue;
}
}
@@ -107,6 +117,8 @@ static void parse_opts(char *options, struct p9_client *clnt)
continue;
}
}
+ kfree(options);
+ return ret;
}
@@ -138,16 +150,20 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
if (!clnt)
return ERR_PTR(-ENOMEM);
+ clnt->trans = NULL;
spin_lock_init(&clnt->lock);
INIT_LIST_HEAD(&clnt->fidlist);
clnt->fidpool = p9_idpool_create();
- if (!clnt->fidpool) {
+ if (IS_ERR(clnt->fidpool)) {
err = PTR_ERR(clnt->fidpool);
clnt->fidpool = NULL;
goto error;
}
- parse_opts(options, clnt);
+ err = parse_opts(options, clnt);
+ if (err < 0)
+ goto error;
+
if (clnt->trans_mod == NULL) {
err = -EPROTONOSUPPORT;
P9_DPRINTK(P9_DEBUG_ERROR,
diff --git a/net/9p/conv.c b/net/9p/conv.c
index 3fe35d532c8..44547201f5b 100644
--- a/net/9p/conv.c
+++ b/net/9p/conv.c
@@ -197,7 +197,7 @@ static void buf_get_qid(struct cbuf *bufp, struct p9_qid *qid)
/**
* p9_size_wstat - calculate the size of a variable length stat struct
- * @stat: metadata (stat) structure
+ * @wstat: metadata (stat) structure
* @dotu: non-zero if 9P2000.u
*
*/
@@ -511,6 +511,12 @@ p9_create_common(struct cbuf *bufp, u32 size, u8 id)
return fc;
}
+/**
+ * p9_set_tag - set the tag field of an &p9_fcall structure
+ * @fc: fcall structure to set tag within
+ * @tag: tag id to set
+ */
+
void p9_set_tag(struct p9_fcall *fc, u16 tag)
{
fc->tag = tag;
@@ -518,6 +524,12 @@ void p9_set_tag(struct p9_fcall *fc, u16 tag)
}
EXPORT_SYMBOL(p9_set_tag);
+/**
+ * p9_create_tversion - allocates and creates a T_VERSION request
+ * @msize: requested maximum data size
+ * @version: version string to negotiate
+ *
+ */
struct p9_fcall *p9_create_tversion(u32 msize, char *version)
{
int size;
@@ -542,6 +554,16 @@ error:
}
EXPORT_SYMBOL(p9_create_tversion);
+/**
+ * p9_create_tauth - allocates and creates a T_AUTH request
+ * @afid: handle to use for authentication protocol
+ * @uname: user name attempting to authenticate
+ * @aname: mount specifier for remote server
+ * @n_uname: numeric id for user attempting to authneticate
+ * @dotu: 9P2000.u extension flag
+ *
+ */
+
struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname,
u32 n_uname, int dotu)
{
@@ -580,6 +602,18 @@ error:
}
EXPORT_SYMBOL(p9_create_tauth);
+/**
+ * p9_create_tattach - allocates and creates a T_ATTACH request
+ * @fid: handle to use for the new mount point
+ * @afid: handle to use for authentication protocol
+ * @uname: user name attempting to attach
+ * @aname: mount specifier for remote server
+ * @n_uname: numeric id for user attempting to attach
+ * @n_uname: numeric id for user attempting to attach
+ * @dotu: 9P2000.u extension flag
+ *
+ */
+
struct p9_fcall *
p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname,
u32 n_uname, int dotu)
@@ -616,6 +650,12 @@ error:
}
EXPORT_SYMBOL(p9_create_tattach);
+/**
+ * p9_create_tflush - allocates and creates a T_FLUSH request
+ * @oldtag: tag id for the transaction we are attempting to cancel
+ *
+ */
+
struct p9_fcall *p9_create_tflush(u16 oldtag)
{
int size;
@@ -639,6 +679,15 @@ error:
}
EXPORT_SYMBOL(p9_create_tflush);
+/**
+ * p9_create_twalk - allocates and creates a T_FLUSH request
+ * @fid: handle we are traversing from
+ * @newfid: a new handle for this transaction
+ * @nwname: number of path elements to traverse
+ * @wnames: array of path elements
+ *
+ */
+
struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname,
char **wnames)
{
@@ -677,6 +726,13 @@ error:
}
EXPORT_SYMBOL(p9_create_twalk);
+/**
+ * p9_create_topen - allocates and creates a T_OPEN request
+ * @fid: handle we are trying to open
+ * @mode: what mode we are trying to open the file in
+ *
+ */
+
struct p9_fcall *p9_create_topen(u32 fid, u8 mode)
{
int size;
@@ -701,6 +757,19 @@ error:
}
EXPORT_SYMBOL(p9_create_topen);
+/**
+ * p9_create_tcreate - allocates and creates a T_CREATE request
+ * @fid: handle of directory we are trying to create in
+ * @name: name of the file we are trying to create
+ * @perm: permissions for the file we are trying to create
+ * @mode: what mode we are trying to open the file in
+ * @extension: 9p2000.u extension string (for special files)
+ * @dotu: 9p2000.u enabled flag
+ *
+ * Note: Plan 9 create semantics include opening the resulting file
+ * which is why mode is included.
+ */
+
struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
char *extension, int dotu)
{
@@ -736,6 +805,13 @@ error:
}
EXPORT_SYMBOL(p9_create_tcreate);
+/**
+ * p9_create_tread - allocates and creates a T_READ request
+ * @fid: handle of the file we are trying to read
+ * @offset: offset to start reading from
+ * @count: how many bytes to read
+ */
+
struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count)
{
int size;
@@ -761,6 +837,17 @@ error:
}
EXPORT_SYMBOL(p9_create_tread);
+/**
+ * p9_create_twrite - allocates and creates a T_WRITE request from the kernel
+ * @fid: handle of the file we are trying to write
+ * @offset: offset to start writing at
+ * @count: how many bytes to write
+ * @data: data to write
+ *
+ * This function will create a requst with data buffers from the kernel
+ * such as the page cache.
+ */
+
struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count,
const char *data)
{
@@ -794,6 +881,16 @@ error:
}
EXPORT_SYMBOL(p9_create_twrite);
+/**
+ * p9_create_twrite_u - allocates and creates a T_WRITE request from userspace
+ * @fid: handle of the file we are trying to write
+ * @offset: offset to start writing at
+ * @count: how many bytes to write
+ * @data: data to write
+ *
+ * This function will create a request with data buffers from userspace
+ */
+
struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count,
const char __user *data)
{
@@ -827,6 +924,14 @@ error:
}
EXPORT_SYMBOL(p9_create_twrite_u);
+/**
+ * p9_create_tclunk - allocate a request to forget about a file handle
+ * @fid: handle of the file we closing or forgetting about
+ *
+ * clunk is used both to close open files and to discard transient handles
+ * which may be created during meta-data operations and hierarchy traversal.
+ */
+
struct p9_fcall *p9_create_tclunk(u32 fid)
{
int size;
@@ -850,6 +955,12 @@ error:
}
EXPORT_SYMBOL(p9_create_tclunk);
+/**
+ * p9_create_tremove - allocate and create a request to remove a file
+ * @fid: handle of the file or directory we are removing
+ *
+ */
+
struct p9_fcall *p9_create_tremove(u32 fid)
{
int size;
@@ -873,6 +984,12 @@ error:
}
EXPORT_SYMBOL(p9_create_tremove);
+/**
+ * p9_create_tstat - allocate and populate a request for attributes
+ * @fid: handle of the file or directory we are trying to get the attributes of
+ *
+ */
+
struct p9_fcall *p9_create_tstat(u32 fid)
{
int size;
@@ -896,6 +1013,14 @@ error:
}
EXPORT_SYMBOL(p9_create_tstat);
+/**
+ * p9_create_tstat - allocate and populate a request to change attributes
+ * @fid: handle of the file or directory we are trying to change
+ * @wstat: &p9_stat structure with attributes we wish to set
+ * @dotu: 9p2000.u enabled flag
+ *
+ */
+
struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat,
int dotu)
{
@@ -922,3 +1047,4 @@ error:
return fc;
}
EXPORT_SYMBOL(p9_create_twstat);
+
diff --git a/net/9p/error.c b/net/9p/error.c
index 64104b9cb42..fdebe431406 100644
--- a/net/9p/error.c
+++ b/net/9p/error.c
@@ -33,6 +33,13 @@
#include <linux/errno.h>
#include <net/9p/9p.h>
+/**
+ * struct errormap - map string errors from Plan 9 to Linux numeric ids
+ * @name: string sent over 9P
+ * @val: numeric id most closely representing @name
+ * @namelen: length of string
+ * @list: hash-table list for string lookup
+ */
struct errormap {
char *name;
int val;
@@ -177,8 +184,7 @@ static struct errormap errmap[] = {
};
/**
- * p9_error_init - preload
- * @errstr: error string
+ * p9_error_init - preload mappings into hash list
*
*/
@@ -206,6 +212,7 @@ EXPORT_SYMBOL(p9_error_init);
/**
* errstr2errno - convert error string to error number
* @errstr: error string
+ * @len: length of error string
*
*/
@@ -230,8 +237,8 @@ int p9_errstr2errno(char *errstr, int len)
if (errno == 0) {
/* TODO: if error isn't found, add it dynamically */
errstr[len] = 0;
- printk(KERN_ERR "%s: errstr :%s: not found\n", __func__,
- errstr);
+ printk(KERN_ERR "%s: server reported unknown error %s\n",
+ __func__, errstr);
errno = 1;
}
diff --git a/net/9p/fcprint.c b/net/9p/fcprint.c
index 40244fbd9b0..53dd8e28dd8 100644
--- a/net/9p/fcprint.c
+++ b/net/9p/fcprint.c
@@ -142,6 +142,14 @@ p9_printdata(char *buf, int buflen, u8 *data, int datalen)
return p9_dumpdata(buf, buflen, data, datalen < 16?datalen:16);
}
+/**
+ * p9_printfcall - decode and print a protocol structure into a buffer
+ * @buf: buffer to deposit decoded structure into
+ * @buflen: available space in buffer
+ * @fc: protocol rpc structure of type &p9_fcall
+ * @extended: whether or not session is operating with extended protocol
+ */
+
int
p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended)
{
diff --git a/net/9p/mod.c b/net/9p/mod.c
index c285aab2af0..bdee1fb7cc6 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -39,9 +39,6 @@ module_param_named(debug, p9_debug_level, uint, 0);
MODULE_PARM_DESC(debug, "9P debugging level");
#endif
-extern int p9_mux_global_init(void);
-extern void p9_mux_global_exit(void);
-
/*
* Dynamic Transport Registration Routines
*
@@ -52,7 +49,7 @@ static struct p9_trans_module *v9fs_default_transport;
/**
* v9fs_register_trans - register a new transport with 9p
- * @m - structure describing the transport module and entry points
+ * @m: structure describing the transport module and entry points
*
*/
void v9fs_register_trans(struct p9_trans_module *m)
@@ -65,7 +62,7 @@ EXPORT_SYMBOL(v9fs_register_trans);
/**
* v9fs_match_trans - match transport versus registered transports
- * @arg: string identifying transport
+ * @name: string identifying transport
*
*/
struct p9_trans_module *v9fs_match_trans(const substring_t *name)
@@ -110,6 +107,7 @@ static int __init init_p9(void)
p9_error_init();
printk(KERN_INFO "Installing 9P2000 support\n");
+ p9_trans_fd_init();
return ret;
}
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index f624dff7685..4507f744f44 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -47,12 +47,29 @@
#define SCHED_TIMEOUT 10
#define MAXPOLLWADDR 2
+/**
+ * struct p9_fd_opts - per-transport options
+ * @rfd: file descriptor for reading (trans=fd)
+ * @wfd: file descriptor for writing (trans=fd)
+ * @port: port to connect to (trans=tcp)
+ *
+ */
+
struct p9_fd_opts {
int rfd;
int wfd;
u16 port;
};
+
+/**
+ * struct p9_trans_fd - transport state
+ * @rd: reference to file to read from
+ * @wr: reference of file to write to
+ * @conn: connection state reference
+ *
+ */
+
struct p9_trans_fd {
struct file *rd;
struct file *wr;
@@ -90,10 +107,24 @@ enum {
};
struct p9_req;
-
typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a);
+
+/**
+ * struct p9_req - fd mux encoding of an rpc transaction
+ * @lock: protects req_list
+ * @tag: numeric tag for rpc transaction
+ * @tcall: request &p9_fcall structure
+ * @rcall: response &p9_fcall structure
+ * @err: error state
+ * @cb: callback for when response is received
+ * @cba: argument to pass to callback
+ * @flush: flag to indicate RPC has been flushed
+ * @req_list: list link for higher level objects to chain requests
+ *
+ */
+
struct p9_req {
- spinlock_t lock; /* protect request structure */
+ spinlock_t lock;
int tag;
struct p9_fcall *tcall;
struct p9_fcall *rcall;
@@ -104,7 +135,39 @@ struct p9_req {
struct list_head req_list;
};
-struct p9_mux_poll_task;
+struct p9_mux_poll_task {
+ struct task_struct *task;
+ struct list_head mux_list;
+ int muxnum;
+};
+
+/**
+ * struct p9_conn - fd mux connection state information
+ * @lock: protects mux_list (?)
+ * @mux_list: list link for mux to manage multiple connections (?)
+ * @poll_task: task polling on this connection
+ * @msize: maximum size for connection (dup)
+ * @extended: 9p2000.u flag (dup)
+ * @trans: reference to transport instance for this connection
+ * @tagpool: id accounting for transactions
+ * @err: error state
+ * @equeue: event wait_q (?)
+ * @req_list: accounting for requests which have been sent
+ * @unsent_req_list: accounting for requests that haven't been sent
+ * @rcall: current response &p9_fcall structure
+ * @rpos: read position in current frame
+ * @rbuf: current read buffer
+ * @wpos: write position for current frame
+ * @wsize: amount of data to write for current frame
+ * @wbuf: current write buffer
+ * @poll_wait: array of wait_q's for various worker threads
+ * @poll_waddr: ????
+ * @pt: poll state
+ * @rq: current read work
+ * @wq: current write work
+ * @wsched: ????
+ *
+ */
struct p9_conn {
spinlock_t lock; /* protect lock structure */
@@ -132,11 +195,16 @@ struct p9_conn {
unsigned long wsched;
};
-struct p9_mux_poll_task {
- struct task_struct *task;
- struct list_head mux_list;
- int muxnum;
-};
+/**
+ * struct p9_mux_rpc - fd mux rpc accounting structure
+ * @m: connection this request was issued on
+ * @err: error state
+ * @tcall: request &p9_fcall
+ * @rcall: response &p9_fcall
+ * @wqueue: wait queue that client is blocked on for this rpc
+ *
+ * Bug: isn't this information duplicated elsewhere like &p9_req
+ */
struct p9_mux_rpc {
struct p9_conn *m;
@@ -207,10 +275,12 @@ static void p9_mux_put_tag(struct p9_conn *m, u16 tag)
/**
* p9_mux_calc_poll_procs - calculates the number of polling procs
- * based on the number of mounted v9fs filesystems.
+ * @muxnum: number of mounts
*
+ * Calculation is based on the number of mounted v9fs filesystems.
* The current implementation returns sqrt of the number of mounts.
*/
+
static int p9_mux_calc_poll_procs(int muxnum)
{
int n;
@@ -331,12 +401,11 @@ static void p9_mux_poll_stop(struct p9_conn *m)
/**
* p9_conn_create - allocate and initialize the per-session mux data
- * Creates the polling task if this is the first session.
+ * @trans: transport structure
*
- * @trans - transport structure
- * @msize - maximum message size
- * @extended - extended flag
+ * Note: Creates the polling task if this is the first session.
*/
+
static struct p9_conn *p9_conn_create(struct p9_trans *trans)
{
int i, n;
@@ -406,7 +475,10 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
/**
* p9_mux_destroy - cancels all pending requests and frees mux resources
+ * @m: mux to destroy
+ *
*/
+
static void p9_conn_destroy(struct p9_conn *m)
{
P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m,
@@ -429,9 +501,14 @@ static void p9_conn_destroy(struct p9_conn *m)
}
/**
- * p9_pollwait - called by files poll operation to add v9fs-poll task
- * to files wait queue
+ * p9_pollwait - add poll task to the wait queue
+ * @filp: file pointer being polled
+ * @wait_address: wait_q to block on
+ * @p: poll state
+ *
+ * called by files poll operation to add v9fs-poll task to files wait queue
*/
+
static void
p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
{
@@ -462,7 +539,10 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
/**
* p9_poll_mux - polls a mux and schedules read or write works if necessary
+ * @m: connection to poll
+ *
*/
+
static void p9_poll_mux(struct p9_conn *m)
{
int n;
@@ -499,9 +579,14 @@ static void p9_poll_mux(struct p9_conn *m)
}
/**
- * p9_poll_proc - polls all v9fs transports for new events and queues
- * the appropriate work to the work queue
+ * p9_poll_proc - poll worker thread
+ * @a: thread state and arguments
+ *
+ * polls all v9fs transports for new events and queues the appropriate
+ * work to the work queue
+ *
*/
+
static int p9_poll_proc(void *a)
{
struct p9_conn *m, *mtmp;
@@ -527,7 +612,10 @@ static int p9_poll_proc(void *a)
/**
* p9_write_work - called when a transport can send some data
+ * @work: container for work to be done
+ *
*/
+
static void p9_write_work(struct work_struct *work)
{
int n, err;
@@ -638,7 +726,10 @@ static void process_request(struct p9_conn *m, struct p9_req *req)
/**
* p9_read_work - called when there is some data to be read from a transport
+ * @work: container of work to be done
+ *
*/
+
static void p9_read_work(struct work_struct *work)
{
int n, err;
@@ -793,7 +884,9 @@ error:
* @tc: request to be sent
* @cb: callback function to call when response is received
* @cba: parameter to pass to the callback function
+ *
*/
+
static struct p9_req *p9_send_request(struct p9_conn *m,
struct p9_fcall *tc,
p9_conn_req_callback cb, void *cba)
@@ -961,10 +1054,12 @@ p9_conn_rpc_cb(struct p9_req *req, void *a)
/**
* p9_fd_rpc- sends 9P request and waits until a response is available.
* The function can be interrupted.
- * @m: mux data
+ * @t: transport data
* @tc: request to be sent
* @rc: pointer where a pointer to the response is stored
+ *
*/
+
int
p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
{
@@ -1041,8 +1136,10 @@ p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
* @m: mux data
* @tc: request to be sent
* @cb: callback function to be called when response arrives
- * @cba: value to pass to the callback function
+ * @a: value to pass to the callback function
+ *
*/
+
int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc,
p9_conn_req_callback cb, void *a)
{
@@ -1065,7 +1162,9 @@ int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc,
* p9_conn_cancel - cancel all pending requests with error
* @m: mux data
* @err: error code
+ *
*/
+
void p9_conn_cancel(struct p9_conn *m, int err)
{
struct p9_req *req, *rtmp;
@@ -1097,35 +1196,46 @@ void p9_conn_cancel(struct p9_conn *m, int err)
}
/**
- * v9fs_parse_options - parse mount options into session structure
+ * parse_options - parse mount options into session structure
* @options: options string passed from mount
- * @v9ses: existing v9fs session information
+ * @opts: transport-specific structure to parse options into
*
+ * Returns 0 upon success, -ERRNO upon failure
*/
-static void parse_opts(char *options, struct p9_fd_opts *opts)
+static int parse_opts(char *params, struct p9_fd_opts *opts)
{
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
+ char *options;
int ret;
opts->port = P9_PORT;
opts->rfd = ~0;
opts->wfd = ~0;
- if (!options)
- return;
+ if (!params)
+ return 0;
+
+ options = kstrdup(params, GFP_KERNEL);
+ if (!options) {
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "failed to allocate copy of option string\n");
+ return -ENOMEM;
+ }
while ((p = strsep(&options, ",")) != NULL) {
int token;
+ int r;
if (!*p)
continue;
token = match_token(p, tokens, args);
- ret = match_int(&args[0], &option);
- if (ret < 0) {
+ r = match_int(&args[0], &option);
+ if (r < 0) {
P9_DPRINTK(P9_DEBUG_ERROR,
"integer field, but no integer?\n");
+ ret = r;
continue;
}
switch (token) {
@@ -1142,6 +1252,8 @@ static void parse_opts(char *options, struct p9_fd_opts *opts)
continue;
}
}
+ kfree(options);
+ return 0;
}
static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd)
@@ -1193,11 +1305,12 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket)
/**
* p9_fd_read- read from a fd
- * @v9ses: session information
+ * @trans: transport instance state
* @v: buffer to receive data into
* @len: size of receive buffer
*
*/
+
static int p9_fd_read(struct p9_trans *trans, void *v, int len)
{
int ret;
@@ -1220,11 +1333,12 @@ static int p9_fd_read(struct p9_trans *trans, void *v, int len)
/**
* p9_fd_write - write to a socket
- * @v9ses: session information
+ * @trans: transport instance state
* @v: buffer to send data from
* @len: size of send buffer
*
*/
+
static int p9_fd_write(struct p9_trans *trans, void *v, int len)
{
int ret;
@@ -1296,6 +1410,7 @@ end:
* @trans: private socket structure
*
*/
+
static void p9_fd_close(struct p9_trans *trans)
{
struct p9_trans_fd *ts;
@@ -1318,6 +1433,23 @@ static void p9_fd_close(struct p9_trans *trans)
kfree(ts);
}
+/*
+ * stolen from NFS - maybe should be made a generic function?
+ */
+static inline int valid_ipaddr4(const char *buf)
+{
+ int rc, count, in[4];
+
+ rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
+ if (rc != 4)
+ return -EINVAL;
+ for (count = 0; count < 4; count++) {
+ if (in[count] > 255)
+ return -EINVAL;
+ }
+ return 0;
+}
+
static struct p9_trans *
p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu)
{
@@ -1328,7 +1460,12 @@ p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu)
struct p9_fd_opts opts;
struct p9_trans_fd *p;
- parse_opts(args, &opts);
+ err = parse_opts(args, &opts);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ if (valid_ipaddr4(addr) < 0)
+ return ERR_PTR(-EINVAL);
csocket = NULL;
trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL);
@@ -1508,7 +1645,7 @@ static struct p9_trans_module p9_fd_trans = {
.create = p9_trans_create_fd,
};
-static int __init p9_trans_fd_init(void)
+int p9_trans_fd_init(void)
{
int ret = p9_mux_global_init();
if (ret) {
@@ -1522,9 +1659,4 @@ static int __init p9_trans_fd_init(void)
return 0;
}
-
-module_init(p9_trans_fd_init);
-
-MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>");
-MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
-MODULE_LICENSE("GPL");
+EXPORT_SYMBOL(p9_trans_fd_init);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index de7a9f532ed..42adc052b14 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -49,29 +49,75 @@
#define VIRTQUEUE_NUM 128
/* a single mutex to manage channel initialization and attachment */
-static DECLARE_MUTEX(virtio_9p_lock);
+static DEFINE_MUTEX(virtio_9p_lock);
/* global which tracks highest initialized channel */
static int chan_index;
#define P9_INIT_MAXTAG 16
-#define REQ_STATUS_IDLE 0
-#define REQ_STATUS_SENT 1
-#define REQ_STATUS_RCVD 2
-#define REQ_STATUS_FLSH 3
+
+/**
+ * enum p9_req_status_t - virtio request status
+ * @REQ_STATUS_IDLE: request slot unused
+ * @REQ_STATUS_SENT: request sent to server
+ * @REQ_STATUS_RCVD: response received from server
+ * @REQ_STATUS_FLSH: request has been flushed
+ *
+ * The @REQ_STATUS_IDLE state is used to mark a request slot as unused
+ * but use is actually tracked by the idpool structure which handles tag
+ * id allocation.
+ *
+ */
+
+enum p9_req_status_t {
+ REQ_STATUS_IDLE,
+ REQ_STATUS_SENT,
+ REQ_STATUS_RCVD,
+ REQ_STATUS_FLSH,
+};
+
+/**
+ * struct p9_req_t - virtio request slots
+ * @status: status of this request slot
+ * @wq: wait_queue for the client to block on for this request
+ *
+ * The virtio transport uses an array to track outstanding requests
+ * instead of a list. While this may incurr overhead during initial
+ * allocation or expansion, it makes request lookup much easier as the
+ * tag id is a index into an array. (We use tag+1 so that we can accomodate
+ * the -1 tag for the T_VERSION request).
+ * This also has the nice effect of only having to allocate wait_queues
+ * once, instead of constantly allocating and freeing them. Its possible
+ * other resources could benefit from this scheme as well.
+ *
+ */
struct p9_req_t {
int status;
wait_queue_head_t *wq;
};
-/* We keep all per-channel information in a structure.
+/**
+ * struct virtio_chan - per-instance transport information
+ * @initialized: whether the channel is initialized
+ * @inuse: whether the channel is in use
+ * @lock: protects multiple elements within this structure
+ * @vdev: virtio dev associated with this channel
+ * @vq: virtio queue associated with this channel
+ * @tagpool: accounting for tag ids (and request slots)
+ * @reqs: array of request slots
+ * @max_tag: current number of request_slots allocated
+ * @sg: scatter gather list which is used to pack a request (protected?)
+ *
+ * We keep all per-channel information in a structure.
* This structure is allocated within the devices dev->mem space.
* A pointer to the structure will get put in the transport private.
+ *
*/
+
static struct virtio_chan {
- bool initialized; /* channel is initialized */
- bool inuse; /* channel is in use */
+ bool initialized;
+ bool inuse;
spinlock_t lock;
@@ -86,7 +132,19 @@ static struct virtio_chan {
struct scatterlist sg[VIRTQUEUE_NUM];
} channels[MAX_9P_CHAN];
-/* Lookup requests by tag */
+/**
+ * p9_lookup_tag - Lookup requests by tag
+ * @c: virtio channel to lookup tag within
+ * @tag: numeric id for transaction
+ *
+ * this is a simple array lookup, but will grow the
+ * request_slots as necessary to accomodate transaction
+ * ids which did not previously have a slot.
+ *
+ * Bugs: there is currently no upper limit on request slots set
+ * here, but that should be constrained by the id accounting.
+ */
+
static struct p9_req_t *p9_lookup_tag(struct virtio_chan *c, u16 tag)
{
/* This looks up the original request by tag so we know which
@@ -130,11 +188,20 @@ static unsigned int rest_of_page(void *data)
return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE);
}
+/**
+ * p9_virtio_close - reclaim resources of a channel
+ * @trans: transport state
+ *
+ * This reclaims a channel by freeing its resources and
+ * reseting its inuse flag.
+ *
+ */
+
static void p9_virtio_close(struct p9_trans *trans)
{
struct virtio_chan *chan = trans->priv;
int count;
- unsigned int flags;
+ unsigned long flags;
spin_lock_irqsave(&chan->lock, flags);
p9_idpool_destroy(chan->tagpool);
@@ -144,13 +211,26 @@ static void p9_virtio_close(struct p9_trans *trans)
chan->max_tag = 0;
spin_unlock_irqrestore(&chan->lock, flags);
- down(&virtio_9p_lock);
+ mutex_lock(&virtio_9p_lock);
chan->inuse = false;
- up(&virtio_9p_lock);
+ mutex_unlock(&virtio_9p_lock);
kfree(trans);
}
+/**
+ * req_done - callback which signals activity from the server
+ * @vq: virtio queue activity was received on
+ *
+ * This notifies us that the server has triggered some activity
+ * on the virtio channel - most likely a response to request we
+ * sent. Figure out which requests now have responses and wake up
+ * those threads.
+ *
+ * Bugs: could do with some additional sanity checking, but appears to work.
+ *
+ */
+
static void req_done(struct virtqueue *vq)
{
struct virtio_chan *chan = vq->vdev->priv;
@@ -169,6 +249,20 @@ static void req_done(struct virtqueue *vq)
spin_unlock_irqrestore(&chan->lock, flags);
}
+/**
+ * pack_sg_list - pack a scatter gather list from a linear buffer
+ * @sg: scatter/gather list to pack into
+ * @start: which segment of the sg_list to start at
+ * @limit: maximum segment to pack data to
+ * @data: data to pack into scatter/gather list
+ * @count: amount of data to pack into the scatter/gather list
+ *
+ * sg_lists have multiple segments of various sizes. This will pack
+ * arbitrary data into an existing scatter gather list, segmenting the
+ * data as necessary within constraints.
+ *
+ */
+
static int
pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
int count)
@@ -189,6 +283,14 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
return index-start;
}
+/**
+ * p9_virtio_rpc - issue a request and wait for a response
+ * @t: transport state
+ * @tc: &p9_fcall request to transmit
+ * @rc: &p9_fcall to put reponse into
+ *
+ */
+
static int
p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
{
@@ -263,16 +365,26 @@ p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
return 0;
}
+/**
+ * p9_virtio_probe - probe for existence of 9P virtio channels
+ * @vdev: virtio device to probe
+ *
+ * This probes for existing virtio channels. At present only
+ * a single channel is in use, so in the future more work may need
+ * to be done here.
+ *
+ */
+
static int p9_virtio_probe(struct virtio_device *vdev)
{
int err;
struct virtio_chan *chan;
int index;
- down(&virtio_9p_lock);
+ mutex_lock(&virtio_9p_lock);
index = chan_index++;
chan = &channels[index];
- up(&virtio_9p_lock);
+ mutex_unlock(&virtio_9p_lock);
if (chan_index > MAX_9P_CHAN) {
printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n");
@@ -301,17 +413,34 @@ static int p9_virtio_probe(struct virtio_device *vdev)
out_free_vq:
vdev->config->del_vq(chan->vq);
fail:
- down(&virtio_9p_lock);
+ mutex_lock(&virtio_9p_lock);
chan_index--;
- up(&virtio_9p_lock);
+ mutex_unlock(&virtio_9p_lock);
return err;
}
-/* This sets up a transport channel for 9p communication. Right now
+
+/**
+ * p9_virtio_create - allocate a new virtio channel
+ * @devname: string identifying the channel to connect to (unused)
+ * @args: args passed from sys_mount() for per-transport options (unused)
+ * @msize: requested maximum packet size
+ * @extended: 9p2000.u enabled flag
+ *
+ * This sets up a transport channel for 9p communication. Right now
* we only match the first available channel, but eventually we couldlook up
* alternate channels by matching devname versus a virtio_config entry.
* We use a simple reference count mechanism to ensure that only a single
- * mount has a channel open at a time. */
+ * mount has a channel open at a time.
+ *
+ * Bugs: doesn't allow identification of a specific channel
+ * to allocate, channels are allocated sequentially. This was
+ * a pragmatic decision to get things rolling, but ideally some
+ * way of identifying the channel to attach to would be nice
+ * if we are going to support multiple channels.
+ *
+ */
+
static struct p9_trans *
p9_virtio_create(const char *devname, char *args, int msize,
unsigned char extended)
@@ -320,7 +449,7 @@ p9_virtio_create(const char *devname, char *args, int msize,
struct virtio_chan *chan = channels;
int index = 0;
- down(&virtio_9p_lock);
+ mutex_lock(&virtio_9p_lock);
while (index < MAX_9P_CHAN) {
if (chan->initialized && !chan->inuse) {
chan->inuse = true;
@@ -330,7 +459,7 @@ p9_virtio_create(const char *devname, char *args, int msize,
chan = &channels[index];
}
}
- up(&virtio_9p_lock);
+ mutex_unlock(&virtio_9p_lock);
if (index >= MAX_9P_CHAN) {
printk(KERN_ERR "9p: no channels available\n");
@@ -360,6 +489,12 @@ p9_virtio_create(const char *devname, char *args, int msize,
return trans;
}
+/**
+ * p9_virtio_remove - clean up resources associated with a virtio device
+ * @vdev: virtio device to remove
+ *
+ */
+
static void p9_virtio_remove(struct virtio_device *vdev)
{
struct virtio_chan *chan = vdev->priv;
diff --git a/net/9p/util.c b/net/9p/util.c
index ef7215565d8..958fc58cd1f 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -32,11 +32,23 @@
#include <linux/idr.h>
#include <net/9p/9p.h>
+/**
+ * struct p9_idpool - per-connection accounting for tag idpool
+ * @lock: protects the pool
+ * @pool: idr to allocate tag id from
+ *
+ */
+
struct p9_idpool {
spinlock_t lock;
struct idr pool;
};
+/**
+ * p9_idpool_create - create a new per-connection id pool
+ *
+ */
+
struct p9_idpool *p9_idpool_create(void)
{
struct p9_idpool *p;
@@ -52,6 +64,11 @@ struct p9_idpool *p9_idpool_create(void)
}
EXPORT_SYMBOL(p9_idpool_create);
+/**
+ * p9_idpool_destroy - create a new per-connection id pool
+ * @p: idpool to destory
+ */
+
void p9_idpool_destroy(struct p9_idpool *p)
{
idr_destroy(&p->pool);
@@ -61,9 +78,9 @@ EXPORT_SYMBOL(p9_idpool_destroy);
/**
* p9_idpool_get - allocate numeric id from pool
- * @p - pool to allocate from
+ * @p: pool to allocate from
*
- * XXX - This seems to be an awful generic function, should it be in idr.c with
+ * Bugs: This seems to be an awful generic function, should it be in idr.c with
* the lock included in struct idr?
*/
@@ -71,7 +88,7 @@ int p9_idpool_get(struct p9_idpool *p)
{
int i = 0;
int error;
- unsigned int flags;
+ unsigned long flags;
retry:
if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
@@ -94,15 +111,16 @@ EXPORT_SYMBOL(p9_idpool_get);
/**
* p9_idpool_put - release numeric id from pool
- * @p - pool to allocate from
+ * @id: numeric id which is being released
+ * @p: pool to release id into
*
- * XXX - This seems to be an awful generic function, should it be in idr.c with
+ * Bugs: This seems to be an awful generic function, should it be in idr.c with
* the lock included in struct idr?
*/
void p9_idpool_put(int id, struct p9_idpool *p)
{
- unsigned int flags;
+ unsigned long flags;
spin_lock_irqsave(&p->lock, flags);
idr_remove(&p->pool, id);
spin_unlock_irqrestore(&p->lock, flags);
@@ -111,11 +129,13 @@ EXPORT_SYMBOL(p9_idpool_put);
/**
* p9_idpool_check - check if the specified id is available
- * @id - id to check
- * @p - pool
+ * @id: id to check
+ * @p: pool to check
*/
+
int p9_idpool_check(int id, struct p9_idpool *p)
{
return idr_find(&p->pool, id) != NULL;
}
EXPORT_SYMBOL(p9_idpool_check);
+
diff --git a/net/core/dev.c b/net/core/dev.c
index a1607bc0cd4..58296307787 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -903,7 +903,11 @@ int dev_change_name(struct net_device *dev, char *newname)
strlcpy(dev->name, newname, IFNAMSIZ);
rollback:
- device_rename(&dev->dev, dev->name);
+ err = device_rename(&dev->dev, dev->name);
+ if (err) {
+ memcpy(dev->name, oldname, IFNAMSIZ);
+ return err;
+ }
write_lock_bh(&dev_base_lock);
hlist_del(&dev->name_hlist);
@@ -3137,7 +3141,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
* Load in the correct multicast list now the flags have changed.
*/
- if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST)
+ if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST)
dev->change_rx_flags(dev, IFF_MULTICAST);
dev_set_rx_mode(dev);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b04d643fc3c..8fb134da034 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -419,7 +419,7 @@ static void arp_reply(struct sk_buff *skb)
return;
size = arp_hdr_len(skb->dev);
- send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
+ send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
LL_RESERVED_SPACE(np->dev));
if (!send_skb)
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8dca2111049..fdf537707e5 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -390,6 +390,7 @@ struct pktgen_thread {
int cpu;
wait_queue_head_t queue;
+ struct completion start_done;
};
#define REMOVE 1
@@ -3414,6 +3415,7 @@ static int pktgen_thread_worker(void *arg)
BUG_ON(smp_processor_id() != cpu);
init_waitqueue_head(&t->queue);
+ complete(&t->start_done);
pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current));
@@ -3615,6 +3617,7 @@ static int __init pktgen_create_thread(int cpu)
INIT_LIST_HEAD(&t->if_list);
list_add_tail(&t->th_list, &pktgen_threads);
+ init_completion(&t->start_done);
p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
if (IS_ERR(p)) {
@@ -3639,6 +3642,7 @@ static int __init pktgen_create_thread(int cpu)
}
wake_up_process(p);
+ wait_for_completion(&t->start_done);
return 0;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index fa76f04fa9c..88094cb09c0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -270,7 +270,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
int err = 0;
int skb_len;
- /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
+ /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
number of warnings when compiling with -W --ANK
*/
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 68d15448004..7c9bb13b153 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -340,7 +340,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
dev_hold(dev);
- skb = sock_alloc_send_skb(sk, len+LL_RESERVED_SPACE(dev),
+ skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev),
msg->msg_flags & MSG_DONTWAIT, &err);
if (skb==NULL)
goto out_unlock;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 68b72a7a180..418862f1bf2 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -570,7 +570,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
* Allocate a buffer
*/
- skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+ skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
if (skb == NULL)
return NULL;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 05afb576d93..2c0e4572cc9 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -338,7 +338,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
return -ENOENT;
hash = cipso_v4_map_cache_hash(key, key_len);
- bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+ bkt = hash & (CIPSO_V4_CACHE_BUCKETS - 1);
spin_lock_bh(&cipso_v4_cache[bkt].lock);
list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) {
if (entry->hash == hash &&
@@ -417,7 +417,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb,
atomic_inc(&secattr->cache->refcount);
entry->lsm_data = secattr->cache;
- bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+ bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
spin_lock_bh(&cipso_v4_cache[bkt].lock);
if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
list_add(&entry->list, &cipso_v4_cache[bkt].list);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6250f4239b6..2769dc4a4c8 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -292,7 +292,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
struct iphdr *pip;
struct igmpv3_report *pig;
- skb = alloc_skb(size + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+ skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
if (skb == NULL)
return NULL;
@@ -653,7 +653,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
return -1;
}
- skb=alloc_skb(IGMP_SIZE+LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+ skb=alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
if (skb == NULL) {
ip_rt_put(rt);
return -1;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 89dee4346f6..ed45037ce9b 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -710,14 +710,14 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
struct net_device *dev = d->dev;
struct sk_buff *skb;
struct bootp_pkt *b;
- int hh_len = LL_RESERVED_SPACE(dev);
struct iphdr *h;
/* Allocate packet */
- skb = alloc_skb(sizeof(struct bootp_pkt) + hh_len + 15, GFP_KERNEL);
+ skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15,
+ GFP_KERNEL);
if (!skb)
return;
- skb_reserve(skb, hh_len);
+ skb_reserve(skb, LL_RESERVED_SPACE(dev));
b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt));
memset(b, 0, sizeof(struct bootp_pkt));
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 11d7f753a82..fead049daf4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -322,7 +322,6 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
- int hh_len;
struct iphdr *iph;
struct sk_buff *skb;
unsigned int iphlen;
@@ -336,13 +335,12 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
if (flags&MSG_PROBE)
goto out;
- hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
-
- skb = sock_alloc_send_skb(sk, length+hh_len+15,
- flags&MSG_DONTWAIT, &err);
+ skb = sock_alloc_send_skb(sk,
+ length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
+ flags & MSG_DONTWAIT, &err);
if (skb == NULL)
goto error;
- skb_reserve(skb, hh_len);
+ skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 92f90ae46f4..df41026b60d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -160,7 +160,7 @@ static struct dst_ops ipv4_dst_ops = {
.negative_advice = ipv4_negative_advice,
.link_failure = ipv4_link_failure,
.update_pmtu = ip_rt_update_pmtu,
- .local_out = ip_local_out,
+ .local_out = __ip_local_out,
.entry_size = sizeof(struct rtable),
.entries = ATOMIC_INIT(0),
};
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 26c936930e9..b54d9d37b63 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1842,9 +1842,16 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
}
- /* Don't lost mark skbs that were fwd transmitted after RTO */
- if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) &&
- !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) {
+ /* Marking forward transmissions that were made after RTO lost
+ * can cause unnecessary retransmissions in some scenarios,
+ * SACK blocks will mitigate that in some but not in all cases.
+ * We used to not mark them but it was causing break-ups with
+ * receivers that do only in-order receival.
+ *
+ * TODO: we could detect presence of such receiver and select
+ * different behavior per flow.
+ */
+ if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out += tcp_skb_pcount(skb);
}
@@ -1860,7 +1867,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
tp->reordering = min_t(unsigned int, tp->reordering,
sysctl_tcp_reordering);
tcp_set_ca_state(sk, TCP_CA_Loss);
- tp->high_seq = tp->frto_highmark;
+ tp->high_seq = tp->snd_nxt;
TCP_ECN_queue_cwr(tp);
tcp_clear_retrans_hints_partial(tp);
@@ -2482,7 +2489,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
tcp_verify_left_out(tp);
- if (tp->retrans_out == 0)
+ if (!tp->frto_counter && tp->retrans_out == 0)
tp->retrans_stamp = 0;
if (flag & FLAG_ECE)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e591e09e5e4..3a835578fd1 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1764,14 +1764,16 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
* 2) Configure prefixes with the auto flag set
*/
- /* Avoid arithmetic overflow. Really, we could
- save rt_expires in seconds, likely valid_lft,
- but it would require division in fib gc, that it
- not good.
- */
- if (valid_lft >= 0x7FFFFFFF/HZ)
+ if (valid_lft == INFINITY_LIFE_TIME)
+ rt_expires = ~0UL;
+ else if (valid_lft >= 0x7FFFFFFF/HZ) {
+ /* Avoid arithmetic overflow. Really, we could
+ * save rt_expires in seconds, likely valid_lft,
+ * but it would require division in fib gc, that it
+ * not good.
+ */
rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ);
- else
+ } else
rt_expires = valid_lft * HZ;
/*
@@ -1779,7 +1781,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
* Avoid arithmetic overflow there as well.
* Overflow can happen only if HZ < USER_HZ.
*/
- if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ)
+ if (HZ < USER_HZ && ~rt_expires && rt_expires > 0x7FFFFFFF / USER_HZ)
rt_expires = 0x7FFFFFFF / USER_HZ;
if (pinfo->onlink) {
@@ -1788,17 +1790,28 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
dev->ifindex, 1);
if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
- if (rt->rt6i_flags&RTF_EXPIRES) {
- if (valid_lft == 0) {
- ip6_del_rt(rt);
- rt = NULL;
- } else {
- rt->rt6i_expires = jiffies + rt_expires;
- }
+ /* Autoconf prefix route */
+ if (valid_lft == 0) {
+ ip6_del_rt(rt);
+ rt = NULL;
+ } else if (~rt_expires) {
+ /* not infinity */
+ rt->rt6i_expires = jiffies + rt_expires;
+ rt->rt6i_flags |= RTF_EXPIRES;
+ } else {
+ rt->rt6i_flags &= ~RTF_EXPIRES;
+ rt->rt6i_expires = 0;
}
} else if (valid_lft) {
+ int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
+ clock_t expires = 0;
+ if (~rt_expires) {
+ /* not infinity */
+ flags |= RTF_EXPIRES;
+ expires = jiffies_to_clock_t(rt_expires);
+ }
addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
- dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
+ dev, expires, flags);
}
if (rt)
dst_release(&rt->u.dst);
@@ -2021,7 +2034,8 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
struct inet6_dev *idev;
struct net_device *dev;
int scope;
- u32 flags = RTF_EXPIRES;
+ u32 flags;
+ clock_t expires;
ASSERT_RTNL();
@@ -2041,8 +2055,13 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
if (valid_lft == INFINITY_LIFE_TIME) {
ifa_flags |= IFA_F_PERMANENT;
flags = 0;
- } else if (valid_lft >= 0x7FFFFFFF/HZ)
- valid_lft = 0x7FFFFFFF/HZ;
+ expires = 0;
+ } else {
+ if (valid_lft >= 0x7FFFFFFF/HZ)
+ valid_lft = 0x7FFFFFFF/HZ;
+ flags = RTF_EXPIRES;
+ expires = jiffies_to_clock_t(valid_lft * HZ);
+ }
if (prefered_lft == 0)
ifa_flags |= IFA_F_DEPRECATED;
@@ -2060,7 +2079,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
spin_unlock_bh(&ifp->lock);
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
- jiffies_to_clock_t(valid_lft * HZ), flags);
+ expires, flags);
/*
* Note that section 3.1 of RFC 4429 indicates
* that the Optimistic flag should not be set for
@@ -3148,7 +3167,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
u32 prefered_lft, u32 valid_lft)
{
- u32 flags = RTF_EXPIRES;
+ u32 flags;
+ clock_t expires;
if (!valid_lft || (prefered_lft > valid_lft))
return -EINVAL;
@@ -3156,8 +3176,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
if (valid_lft == INFINITY_LIFE_TIME) {
ifa_flags |= IFA_F_PERMANENT;
flags = 0;
- } else if (valid_lft >= 0x7FFFFFFF/HZ)
- valid_lft = 0x7FFFFFFF/HZ;
+ expires = 0;
+ } else {
+ if (valid_lft >= 0x7FFFFFFF/HZ)
+ valid_lft = 0x7FFFFFFF/HZ;
+ flags = RTF_EXPIRES;
+ expires = jiffies_to_clock_t(valid_lft * HZ);
+ }
if (prefered_lft == 0)
ifa_flags |= IFA_F_DEPRECATED;
@@ -3176,7 +3201,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
ipv6_ifa_notify(0, ifp);
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
- jiffies_to_clock_t(valid_lft * HZ), flags);
+ expires, flags);
addrconf_verify(0);
return 0;
@@ -4242,7 +4267,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev)
neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6,
NET_IPV6_NEIGH, "ipv6",
&ndisc_ifinfo_sysctl_change,
- NULL);
+ ndisc_ifinfo_sysctl_strategy);
__addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
idev->dev->ifindex, idev, &idev->cnf);
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0af2e055f88..48cdce9c696 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -780,7 +780,7 @@ slow_path:
* Allocate buffer.
*/
- if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
+ if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
IP6_INC_STATS(ip6_dst_idev(skb->dst),
IPSTATS_MIB_FRAGFAILS);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 54f91efdae5..fd632dd7f98 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1411,7 +1411,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
IPV6_TLV_PADN, 0 };
/* we assume size > sizeof(ra) here */
- skb = sock_alloc_send_skb(sk, size + LL_RESERVED_SPACE(dev), 1, &err);
+ skb = sock_alloc_send_skb(sk, size + LL_ALLOCATED_SPACE(dev), 1, &err);
if (!skb)
return NULL;
@@ -1790,7 +1790,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
payload_len = len + sizeof(ra);
full_len = sizeof(struct ipv6hdr) + payload_len;
- skb = sock_alloc_send_skb(sk, LL_RESERVED_SPACE(dev) + full_len, 1, &err);
+ skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + full_len, 1, &err);
if (skb == NULL) {
rcu_read_lock();
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2c74885f835..282fdb31f8e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -479,7 +479,7 @@ static void __ndisc_send(struct net_device *dev,
skb = sock_alloc_send_skb(sk,
(MAX_HEADER + sizeof(struct ipv6hdr) +
- len + LL_RESERVED_SPACE(dev)),
+ len + LL_ALLOCATED_SPACE(dev)),
1, &err);
if (!skb) {
ND_PRINTK0(KERN_ERR
@@ -1521,7 +1521,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
buff = sock_alloc_send_skb(sk,
(MAX_HEADER + sizeof(struct ipv6hdr) +
- len + LL_RESERVED_SPACE(dev)),
+ len + LL_ALLOCATED_SPACE(dev)),
1, &err);
if (buff == NULL) {
ND_PRINTK0(KERN_ERR
@@ -1727,10 +1727,10 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
return ret;
}
-static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
- int nlen, void __user *oldval,
- size_t __user *oldlenp,
- void __user *newval, size_t newlen)
+int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
+ int nlen, void __user *oldval,
+ size_t __user *oldlenp,
+ void __user *newval, size_t newlen)
{
struct net_device *dev = ctl->extra1;
struct inet6_dev *idev;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 396f0ea1109..232e0dc45bf 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -609,7 +609,6 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6hdr *iph;
struct sk_buff *skb;
- unsigned int hh_len;
int err;
if (length > rt->u.dst.dev->mtu) {
@@ -619,13 +618,12 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
if (flags&MSG_PROBE)
goto out;
- hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
-
- skb = sock_alloc_send_skb(sk, length+hh_len+15,
- flags&MSG_DONTWAIT, &err);
+ skb = sock_alloc_send_skb(sk,
+ length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
+ flags & MSG_DONTWAIT, &err);
if (skb == NULL)
goto error;
- skb_reserve(skb, hh_len);
+ skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 12bba088034..48534c6c073 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -109,7 +109,7 @@ static struct dst_ops ip6_dst_ops_template = {
.negative_advice = ip6_negative_advice,
.link_failure = ip6_link_failure,
.update_pmtu = ip6_rt_update_pmtu,
- .local_out = ip6_local_out,
+ .local_out = __ip6_local_out,
.entry_size = sizeof(struct rt6_info),
.entries = ATOMIC_INIT(0),
};
@@ -475,7 +475,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
lifetime = ntohl(rinfo->lifetime);
if (lifetime == 0xffffffff) {
/* infinity */
- } else if (lifetime > 0x7fffffff/HZ) {
+ } else if (lifetime > 0x7fffffff/HZ - 1) {
/* Avoid arithmetic overflow */
lifetime = 0x7fffffff/HZ - 1;
}
@@ -1106,7 +1106,9 @@ int ip6_route_add(struct fib6_config *cfg)
}
rt->u.dst.obsolete = -1;
- rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
+ rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
+ jiffies + clock_t_to_jiffies(cfg->fc_expires) :
+ 0;
if (cfg->fc_protocol == RTPROT_UNSPEC)
cfg->fc_protocol = RTPROT_BOOT;
@@ -2200,7 +2202,9 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
- expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
+ expires = (rt->rt6i_flags & RTF_EXPIRES) ?
+ rt->rt6i_expires - jiffies : 0;
+
if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
expires, rt->u.dst.error) < 0)
goto nla_put_failure;
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
index bfacef8b76f..a6f99b5a149 100644
--- a/net/irda/discovery.c
+++ b/net/irda/discovery.c
@@ -40,6 +40,8 @@
#include <net/irda/discovery.h>
+#include <asm/unaligned.h>
+
/*
* Function irlmp_add_discovery (cachelog, discovery)
*
@@ -87,7 +89,7 @@ void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *new)
*/
hashbin_remove_this(cachelog, (irda_queue_t *) node);
/* Check if hints bits are unchanged */
- if(u16ho(node->data.hints) == u16ho(new->data.hints))
+ if (get_unaligned((__u16 *)node->data.hints) == get_unaligned((__u16 *)new->data.hints))
/* Set time of first discovery for this node */
new->firststamp = node->firststamp;
kfree(node);
@@ -281,9 +283,9 @@ struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
/* Mask out the ones we don't want :
* We want to match the discovery mask, and to get only
* the most recent one (unless we want old ones) */
- if ((u16ho(discovery->data.hints) & mask) &&
+ if ((get_unaligned((__u16 *)discovery->data.hints) & mask) &&
((old_entries) ||
- ((jiffies - discovery->firststamp) < j_timeout)) ) {
+ ((jiffies - discovery->firststamp) < j_timeout))) {
/* Create buffer as needed.
* As this function get called a lot and most time
* we don't have anything to put in the log (we are
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 1f81f8e7c61..7bf5b913828 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -1062,7 +1062,8 @@ void irlmp_discovery_expiry(discinfo_t *expiries, int number)
for(i = 0; i < number; i++) {
/* Check if we should notify client */
if ((client->expir_callback) &&
- (client->hint_mask.word & u16ho(expiries[i].hints)
+ (client->hint_mask.word &
+ get_unaligned((__u16 *)expiries[i].hints)
& 0x7f7f) )
client->expir_callback(&(expiries[i]),
EXPIRY_TIMEOUT,
@@ -1086,7 +1087,7 @@ discovery_t *irlmp_get_discovery_response(void)
IRDA_ASSERT(irlmp != NULL, return NULL;);
- u16ho(irlmp->discovery_rsp.data.hints) = irlmp->hints.word;
+ put_unaligned(irlmp->hints.word, (__u16 *)irlmp->discovery_rsp.data.hints);
/*
* Set character set for device name (we use ASCII), and
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index 75497e55927..cf9a4b531a9 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -10,6 +10,7 @@
#include "irnet_irda.h" /* Private header */
#include <linux/seq_file.h>
+#include <asm/unaligned.h>
/*
* PPP disconnect work: we need to make sure we're in
@@ -1673,7 +1674,7 @@ irnet_discovery_indication(discinfo_t * discovery,
/* Notify the control channel */
irnet_post_event(NULL, IRNET_DISCOVER,
discovery->saddr, discovery->daddr, discovery->info,
- u16ho(discovery->hints));
+ get_unaligned((__u16 *)discovery->hints));
DEXIT(IRDA_OCB_TRACE, "\n");
}
@@ -1704,7 +1705,7 @@ irnet_expiry_indication(discinfo_t * expiry,
/* Notify the control channel */
irnet_post_event(NULL, IRNET_EXPIRE,
expiry->saddr, expiry->daddr, expiry->info,
- u16ho(expiry->hints));
+ get_unaligned((__u16 *)expiry->hints));
DEXIT(IRDA_OCB_TRACE, "\n");
}
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 879e7210458..19efc3a6a93 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -255,14 +255,23 @@ void ieee80211_debugfs_key_remove(struct ieee80211_key *key)
void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata)
{
char buf[50];
+ struct ieee80211_key *key;
if (!sdata->debugfsdir)
return;
- sprintf(buf, "../keys/%d", sdata->default_key->debugfs.cnt);
- sdata->debugfs.default_key =
- debugfs_create_symlink("default_key", sdata->debugfsdir, buf);
+ /* this is running under the key lock */
+
+ key = sdata->default_key;
+ if (key) {
+ sprintf(buf, "../keys/%d", key->debugfs.cnt);
+ sdata->debugfs.default_key =
+ debugfs_create_symlink("default_key",
+ sdata->debugfsdir, buf);
+ } else
+ ieee80211_debugfs_key_remove_default(sdata);
}
+
void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata)
{
if (!sdata)
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 80954a51218..06e88a5a036 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -54,6 +54,15 @@ int ieee80211_if_add(struct net_device *dev, const char *name,
if (!ndev)
return -ENOMEM;
+ ndev->needed_headroom = local->tx_headroom +
+ 4*6 /* four MAC addresses */
+ + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */
+ + 6 /* mesh */
+ + 8 /* rfc1042/bridge tunnel */
+ - ETH_HLEN /* ethernet hard_header_len */
+ + IEEE80211_ENCRYPT_HEADROOM;
+ ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM;
+
ret = dev_alloc_name(ndev, ndev->name);
if (ret < 0)
goto fail;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index f76bc26ae4d..697ef67f96b 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -397,7 +397,7 @@ int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
put_unaligned(cpu_to_le32(sdata->u.sta.mesh_seqnum), &meshhdr->seqnum);
sdata->u.sta.mesh_seqnum++;
- return 5;
+ return 6;
}
void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 3df809222d1..af0cd1e3e21 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -120,7 +120,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
*pos++ = WLAN_EID_PREP;
break;
default:
- kfree(skb);
+ kfree_skb(skb);
return -ENOTSUPP;
break;
}
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 5845dc21ce8..99c2d360888 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -158,19 +158,25 @@ int mesh_path_add(u8 *dst, struct net_device *dev)
if (atomic_add_unless(&sdata->u.sta.mpaths, 1, MESH_MAX_MPATHS) == 0)
return -ENOSPC;
- read_lock(&pathtbl_resize_lock);
-
new_mpath = kzalloc(sizeof(struct mesh_path), GFP_KERNEL);
if (!new_mpath) {
atomic_dec(&sdata->u.sta.mpaths);
err = -ENOMEM;
goto endadd2;
}
+ new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL);
+ if (!new_node) {
+ kfree(new_mpath);
+ atomic_dec(&sdata->u.sta.mpaths);
+ err = -ENOMEM;
+ goto endadd2;
+ }
+
+ read_lock(&pathtbl_resize_lock);
memcpy(new_mpath->dst, dst, ETH_ALEN);
new_mpath->dev = dev;
new_mpath->flags = 0;
skb_queue_head_init(&new_mpath->frame_queue);
- new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL);
new_node->mpath = new_mpath;
new_mpath->timer.data = (unsigned long) new_mpath;
new_mpath->timer.function = mesh_path_timer;
@@ -202,7 +208,6 @@ int mesh_path_add(u8 *dst, struct net_device *dev)
endadd:
spin_unlock(&mesh_paths->hashwlock[hash_idx]);
-endadd2:
read_unlock(&pathtbl_resize_lock);
if (!err && grow) {
struct mesh_table *oldtbl, *newtbl;
@@ -215,10 +220,12 @@ endadd2:
return -ENOMEM;
}
rcu_assign_pointer(mesh_paths, newtbl);
+ write_unlock(&pathtbl_resize_lock);
+
synchronize_rcu();
mesh_table_free(oldtbl, false);
- write_unlock(&pathtbl_resize_lock);
}
+endadd2:
return err;
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a5e5c31c23a..e470bf12b76 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -665,6 +665,26 @@ static void ieee80211_authenticate(struct net_device *dev,
mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT);
}
+static int ieee80211_compatible_rates(struct ieee80211_sta_bss *bss,
+ struct ieee80211_supported_band *sband,
+ u64 *rates)
+{
+ int i, j, count;
+ *rates = 0;
+ count = 0;
+ for (i = 0; i < bss->supp_rates_len; i++) {
+ int rate = (bss->supp_rates[i] & 0x7F) * 5;
+
+ for (j = 0; j < sband->n_bitrates; j++)
+ if (sband->bitrates[j].bitrate == rate) {
+ *rates |= BIT(j);
+ count++;
+ break;
+ }
+ }
+
+ return count;
+}
static void ieee80211_send_assoc(struct net_device *dev,
struct ieee80211_if_sta *ifsta)
@@ -673,11 +693,12 @@ static void ieee80211_send_assoc(struct net_device *dev,
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
u8 *pos, *ies;
- int i, len;
+ int i, len, count, rates_len, supp_rates_len;
u16 capab;
struct ieee80211_sta_bss *bss;
int wmm = 0;
struct ieee80211_supported_band *sband;
+ u64 rates = 0;
skb = dev_alloc_skb(local->hw.extra_tx_headroom +
sizeof(*mgmt) + 200 + ifsta->extra_ie_len +
@@ -740,24 +761,39 @@ static void ieee80211_send_assoc(struct net_device *dev,
*pos++ = ifsta->ssid_len;
memcpy(pos, ifsta->ssid, ifsta->ssid_len);
+ /* all supported rates should be added here but some APs
+ * (e.g. D-Link DAP 1353 in b-only mode) don't like that
+ * Therefore only add rates the AP supports */
+ rates_len = ieee80211_compatible_rates(bss, sband, &rates);
+ supp_rates_len = rates_len;
+ if (supp_rates_len > 8)
+ supp_rates_len = 8;
+
len = sband->n_bitrates;
- if (len > 8)
- len = 8;
- pos = skb_put(skb, len + 2);
+ pos = skb_put(skb, supp_rates_len + 2);
*pos++ = WLAN_EID_SUPP_RATES;
- *pos++ = len;
- for (i = 0; i < len; i++) {
- int rate = sband->bitrates[i].bitrate;
- *pos++ = (u8) (rate / 5);
- }
+ *pos++ = supp_rates_len;
- if (sband->n_bitrates > len) {
- pos = skb_put(skb, sband->n_bitrates - len + 2);
- *pos++ = WLAN_EID_EXT_SUPP_RATES;
- *pos++ = sband->n_bitrates - len;
- for (i = len; i < sband->n_bitrates; i++) {
+ count = 0;
+ for (i = 0; i < sband->n_bitrates; i++) {
+ if (BIT(i) & rates) {
int rate = sband->bitrates[i].bitrate;
*pos++ = (u8) (rate / 5);
+ if (++count == 8)
+ break;
+ }
+ }
+
+ if (count == 8) {
+ pos = skb_put(skb, rates_len - count + 2);
+ *pos++ = WLAN_EID_EXT_SUPP_RATES;
+ *pos++ = rates_len - count;
+
+ for (i++; i < sband->n_bitrates; i++) {
+ if (BIT(i) & rates) {
+ int rate = sband->bitrates[i].bitrate;
+ *pos++ = (u8) (rate / 5);
+ }
}
}
@@ -3410,21 +3446,17 @@ static int ieee80211_sta_config_auth(struct net_device *dev,
struct ieee80211_sta_bss *bss, *selected = NULL;
int top_rssi = 0, freq;
- if (!(ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL |
- IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL))) {
- ifsta->state = IEEE80211_AUTHENTICATE;
- ieee80211_sta_reset_auth(dev, ifsta);
- return 0;
- }
-
spin_lock_bh(&local->sta_bss_lock);
freq = local->oper_channel->center_freq;
list_for_each_entry(bss, &local->sta_bss_list, list) {
if (!(bss->capability & WLAN_CAPABILITY_ESS))
continue;
- if (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^
- !!sdata->default_key)
+ if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL |
+ IEEE80211_STA_AUTO_BSSID_SEL |
+ IEEE80211_STA_AUTO_CHANNEL_SEL)) &&
+ (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^
+ !!sdata->default_key))
continue;
if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) &&
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 02f436a8606..1958bfb361c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1305,11 +1305,11 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
if (is_multicast_ether_addr(skb->data)) {
if (*mesh_ttl > 0) {
xmit_skb = skb_copy(skb, GFP_ATOMIC);
- if (!xmit_skb && net_ratelimit())
+ if (xmit_skb)
+ xmit_skb->pkt_type = PACKET_OTHERHOST;
+ else if (net_ratelimit())
printk(KERN_DEBUG "%s: failed to clone "
"multicast frame\n", dev->name);
- else
- xmit_skb->pkt_type = PACKET_OTHERHOST;
} else
IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.sta,
dropped_frames_ttl);
@@ -1395,7 +1395,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
padding = ((4 - subframe_len) & 0x3);
/* the last MSDU has no padding */
if (subframe_len > remaining) {
- printk(KERN_DEBUG "%s: wrong buffer size", dev->name);
+ printk(KERN_DEBUG "%s: wrong buffer size\n", dev->name);
return RX_DROP_UNUSABLE;
}
@@ -1418,7 +1418,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
eth = (struct ethhdr *) skb_pull(skb, ntohs(len) +
padding);
if (!eth) {
- printk(KERN_DEBUG "%s: wrong buffer size ",
+ printk(KERN_DEBUG "%s: wrong buffer size\n",
dev->name);
dev_kfree_skb(frame);
return RX_DROP_UNUSABLE;
@@ -1952,7 +1952,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
if (!skb_new) {
if (net_ratelimit())
printk(KERN_DEBUG "%s: failed to copy "
- "multicast frame for %s",
+ "multicast frame for %s\n",
wiphy_name(local->hw.wiphy),
prev->dev->name);
continue;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f35eaea98e7..1d7dd54aace 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1562,13 +1562,13 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
* be cloned. This could happen, e.g., with Linux bridge code passing
* us broadcast frames. */
- if (head_need > 0 || skb_cloned(skb)) {
+ if (head_need > 0 || skb_header_cloned(skb)) {
#if 0
printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes "
"of headroom\n", dev->name, head_need);
#endif
- if (skb_cloned(skb))
+ if (skb_header_cloned(skb))
I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
else
I802_DEBUG_INC(local->tx_expand_skb_head);
@@ -1898,6 +1898,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
control->flags |= IEEE80211_TXCTL_SHORT_PREAMBLE;
control->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
control->flags |= IEEE80211_TXCTL_NO_ACK;
+ control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
control->retry_limit = 1;
control->flags |= IEEE80211_TXCTL_CLEAR_PS_FILT;
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index cc9f715c7bf..131e9e6c8a3 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -153,15 +153,15 @@ int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
/* 7.1.3.5a.2 */
switch (ae) {
case 0:
- return 5;
+ return 6;
case 1:
- return 11;
+ return 12;
case 2:
- return 17;
+ return 18;
case 3:
- return 23;
+ return 24;
default:
- return 5;
+ return 6;
}
}
@@ -389,6 +389,41 @@ void ieee80211_iterate_active_interfaces(
struct ieee80211_local *local = hw_to_local(hw);
struct ieee80211_sub_if_data *sdata;
+ rtnl_lock();
+
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ switch (sdata->vif.type) {
+ case IEEE80211_IF_TYPE_INVALID:
+ case IEEE80211_IF_TYPE_MNTR:
+ case IEEE80211_IF_TYPE_VLAN:
+ continue;
+ case IEEE80211_IF_TYPE_AP:
+ case IEEE80211_IF_TYPE_STA:
+ case IEEE80211_IF_TYPE_IBSS:
+ case IEEE80211_IF_TYPE_WDS:
+ case IEEE80211_IF_TYPE_MESH_POINT:
+ break;
+ }
+ if (sdata->dev == local->mdev)
+ continue;
+ if (netif_running(sdata->dev))
+ iterator(data, sdata->dev->dev_addr,
+ &sdata->vif);
+ }
+
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
+
+void ieee80211_iterate_active_interfaces_atomic(
+ struct ieee80211_hw *hw,
+ void (*iterator)(void *data, u8 *mac,
+ struct ieee80211_vif *vif),
+ void *data)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_sub_if_data *sdata;
+
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -413,4 +448,4 @@ void ieee80211_iterate_active_interfaces(
rcu_read_unlock();
}
-EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
+EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic);
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 64faa3dc488..dc1598b8600 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -394,7 +394,8 @@ static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt)
qd->handle);
if (!q->queues[i]) {
q->queues[i] = &noop_qdisc;
- printk(KERN_ERR "%s child qdisc %i creation failed", dev->name, i);
+ printk(KERN_ERR "%s child qdisc %i creation failed\n",
+ dev->name, i);
}
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 16774ecd1c4..0edefcfc594 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -472,6 +472,9 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
+ if (ctnetlink_dump_id(skb, ct) < 0)
+ goto nla_put_failure;
+
if (events & IPCT_DESTROY) {
if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 500528d60cd..c63e9333c75 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -179,3 +179,5 @@ module_exit(iprange_mt_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>, Jan Engelhardt <jengelh@computergmbh.de>");
MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching");
+MODULE_ALIAS("ipt_iprange");
+MODULE_ALIAS("ip6t_iprange");
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 25070240d4a..2cee87da444 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -743,7 +743,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
if (len > dev->mtu+reserve)
goto out_unlock;
- skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
+ skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
msg->msg_flags & MSG_DONTWAIT, &err);
if (skb==NULL)
goto out_unlock;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 1086df7478b..9360fc81e8c 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -220,7 +220,7 @@ replay:
tp = kzalloc(sizeof(*tp), GFP_KERNEL);
if (tp == NULL)
goto errout;
- err = -EINVAL;
+ err = -ENOENT;
tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
if (tp_ops == NULL) {
#ifdef CONFIG_KMOD
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 81b606424e1..bbc7107c86c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2418,7 +2418,8 @@ static int sctp_process_param(struct sctp_association *asoc,
break;
case SCTP_PARAM_IPV6_ADDRESS:
- asoc->peer.ipv6_address = 1;
+ if (PF_INET6 == asoc->base.sk->sk_family)
+ asoc->peer.ipv6_address = 1;
break;
case SCTP_PARAM_HOST_NAME_ADDRESS:
@@ -2829,6 +2830,19 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
addr_param = (union sctp_addr_param *)
((void *)asconf_param + sizeof(sctp_addip_param_t));
+ switch (addr_param->v4.param_hdr.type) {
+ case SCTP_PARAM_IPV6_ADDRESS:
+ if (!asoc->peer.ipv6_address)
+ return SCTP_ERROR_INV_PARAM;
+ break;
+ case SCTP_PARAM_IPV4_ADDRESS:
+ if (!asoc->peer.ipv4_address)
+ return SCTP_ERROR_INV_PARAM;
+ break;
+ default:
+ return SCTP_ERROR_INV_PARAM;
+ }
+
af = sctp_get_af_specific(param_type2af(addr_param->v4.param_hdr.type));
if (unlikely(!af))
return SCTP_ERROR_INV_PARAM;
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index d927d9f5741..744b79fdcb1 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -17,8 +17,8 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-#define RPC_ANONYMOUS_USERID ((uid_t)-2)
-#define RPC_ANONYMOUS_GROUPID ((gid_t)-2)
+#define RPC_MACHINE_CRED_USERID ((uid_t)0)
+#define RPC_MACHINE_CRED_GROUPID ((gid_t)0)
struct generic_cred {
struct rpc_cred gc_base;
@@ -44,8 +44,8 @@ EXPORT_SYMBOL_GPL(rpc_lookup_cred);
struct rpc_cred *rpc_lookup_machine_cred(void)
{
struct auth_cred acred = {
- .uid = RPC_ANONYMOUS_USERID,
- .gid = RPC_ANONYMOUS_GROUPID,
+ .uid = RPC_MACHINE_CRED_USERID,
+ .gid = RPC_MACHINE_CRED_GROUPID,
.machine_cred = 1,
};
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d8e8d79a845..e46c825f495 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -6,30 +6,9 @@
#include <linux/sched.h>
#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/net.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/udp.h>
-#include <linux/tcp.h>
-#include <linux/unistd.h>
-#include <linux/slab.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/file.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <net/sock.h>
-#include <net/checksum.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
-#include <net/tcp_states.h>
-#include <linux/uaccess.h>
-#include <asm/ioctls.h>
-
-#include <linux/sunrpc/types.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svc_xprt.h>
@@ -296,8 +275,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
if (!(xprt->xpt_flags &
((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
return;
- if (test_bit(XPT_DEAD, &xprt->xpt_flags))
- return;
cpu = get_cpu();
pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 3f30ee6006a..f24800f2c09 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -278,7 +278,7 @@ static int ip_map_show(struct seq_file *m,
dom = im->m_client->h.name;
if (ipv6_addr_v4mapped(&addr)) {
- seq_printf(m, "%s" NIPQUAD_FMT "%s\n",
+ seq_printf(m, "%s " NIPQUAD_FMT " %s\n",
im->m_class,
ntohl(addr.s6_addr32[3]) >> 24 & 0xff,
ntohl(addr.s6_addr32[3]) >> 16 & 0xff,
@@ -286,7 +286,7 @@ static int ip_map_show(struct seq_file *m,
ntohl(addr.s6_addr32[3]) >> 0 & 0xff,
dom);
} else {
- seq_printf(m, "%s" NIP6_FMT "%s\n",
+ seq_printf(m, "%s " NIP6_FMT " %s\n",
im->m_class, NIP6(addr), dom);
}
return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index c22d6b6f2db..06ab4841537 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -260,11 +260,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
* On our side, we need to read into a pagelist. The first page immediately
* follows the RPC header.
*
- * This function returns 1 to indicate success. The data is not yet in
+ * This function returns:
+ * 0 - No error and no read-list found.
+ *
+ * 1 - Successful read-list processing. The data is not yet in
* the pagelist and therefore the RPC request must be deferred. The
* I/O completion will enqueue the transport again and
* svc_rdma_recvfrom will complete the request.
*
+ * <0 - Error processing/posting read-list.
+ *
* NOTE: The ctxt must not be touched after the last WR has been posted
* because the I/O completion processing may occur on another
* processor and free / modify the context. Ne touche pas!
@@ -284,7 +289,6 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
u64 sgl_offset;
struct rpcrdma_read_chunk *ch;
struct svc_rdma_op_ctxt *ctxt = NULL;
- struct svc_rdma_op_ctxt *head;
struct svc_rdma_op_ctxt *tmp_sge_ctxt;
struct svc_rdma_op_ctxt *tmp_ch_ctxt;
struct chunk_sge *ch_sge_ary;
@@ -302,25 +306,19 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
+ if (ch_count > RPCSVC_MAXPAGES)
+ return -EINVAL;
sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
sge, ch_sge_ary,
ch_count, byte_count);
- head = svc_rdma_get_context(xprt);
sgl_offset = 0;
ch_no = 0;
for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
ch->rc_discrim != 0; ch++, ch_no++) {
next_sge:
- if (!ctxt)
- ctxt = head;
- else {
- ctxt->next = svc_rdma_get_context(xprt);
- ctxt = ctxt->next;
- }
- ctxt->next = NULL;
+ ctxt = svc_rdma_get_context(xprt);
ctxt->direction = DMA_FROM_DEVICE;
- clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
/* Prepare READ WR */
@@ -347,20 +345,15 @@ next_sge:
* the client and the RPC needs to be enqueued.
*/
set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
- ctxt->next = hdr_ctxt;
- hdr_ctxt->next = head;
+ ctxt->read_hdr = hdr_ctxt;
}
/* Post the read */
err = svc_rdma_send(xprt, &read_wr);
if (err) {
- printk(KERN_ERR "svcrdma: Error posting send = %d\n",
+ printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
err);
- /*
- * Break the circular list so free knows when
- * to stop if the error happened to occur on
- * the last read
- */
- ctxt->next = NULL;
+ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ svc_rdma_put_context(ctxt, 0);
goto out;
}
atomic_inc(&rdma_stat_read);
@@ -371,7 +364,7 @@ next_sge:
goto next_sge;
}
sgl_offset = 0;
- err = 0;
+ err = 1;
}
out:
@@ -389,25 +382,12 @@ next_sge:
while (rqstp->rq_resused)
rqstp->rq_respages[--rqstp->rq_resused] = NULL;
- if (err) {
- printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err);
- set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
- /* Free the linked list of read contexts */
- while (head != NULL) {
- ctxt = head->next;
- svc_rdma_put_context(head, 1);
- head = ctxt;
- }
- return 0;
- }
-
- return 1;
+ return err;
}
static int rdma_read_complete(struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *data)
+ struct svc_rdma_op_ctxt *head)
{
- struct svc_rdma_op_ctxt *head = data->next;
int page_no;
int ret;
@@ -433,21 +413,12 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
rqstp->rq_arg.len = head->arg.len;
rqstp->rq_arg.buflen = head->arg.buflen;
+ /* Free the context */
+ svc_rdma_put_context(head, 0);
+
/* XXX: What should this be? */
rqstp->rq_prot = IPPROTO_MAX;
-
- /*
- * Free the contexts we used to build the RDMA_READ. We have
- * to be careful here because the context list uses the same
- * next pointer used to chain the contexts associated with the
- * RDMA_READ
- */
- data->next = NULL; /* terminate circular list */
- do {
- data = head->next;
- svc_rdma_put_context(head, 0);
- head = data;
- } while (head != NULL);
+ svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt);
ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len
@@ -457,8 +428,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
rqstp->rq_arg.head[0].iov_len);
- /* Indicate that we've consumed an RQ credit */
- rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
svc_xprt_received(rqstp->rq_xprt);
return ret;
}
@@ -480,13 +449,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
dprintk("svcrdma: rqstp=%p\n", rqstp);
- /*
- * The rq_xprt_ctxt indicates if we've consumed an RQ credit
- * or not. It is used in the rdma xpo_release_rqst function to
- * determine whether or not to return an RQ WQE to the RQ.
- */
- rqstp->rq_xprt_ctxt = NULL;
-
spin_lock_bh(&rdma_xprt->sc_read_complete_lock);
if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
@@ -537,21 +499,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
/* If the request is invalid, reply with an error */
if (len < 0) {
if (len == -ENOSYS)
- (void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
+ svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
goto close_out;
}
- /* Read read-list data. If we would need to wait, defer
- * it. Not that in this case, we don't return the RQ credit
- * until after the read completes.
- */
- if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) {
+ /* Read read-list data. */
+ ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt);
+ if (ret > 0) {
+ /* read-list posted, defer until data received from client. */
svc_xprt_received(xprt);
return 0;
}
-
- /* Indicate we've consumed an RQ credit */
- rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
+ if (ret < 0) {
+ /* Post of read-list failed, free context. */
+ svc_rdma_put_context(ctxt, 1);
+ return 0;
+ }
ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len
@@ -569,11 +532,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
return ret;
close_out:
- if (ctxt) {
+ if (ctxt)
svc_rdma_put_context(ctxt, 1);
- /* Indicate we've consumed an RQ credit */
- rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
- }
dprintk("svcrdma: transport %p is closing\n", xprt);
/*
* Set the close bit and enqueue it. svc_recv will see the
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 981f190c1b3..fb82b1b683f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -389,6 +389,17 @@ static int send_reply(struct svcxprt_rdma *rdma,
int page_no;
int ret;
+ /* Post a recv buffer to handle another request. */
+ ret = svc_rdma_post_recv(rdma);
+ if (ret) {
+ printk(KERN_INFO
+ "svcrdma: could not post a receive buffer, err=%d."
+ "Closing transport %p.\n", ret, rdma);
+ set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+ svc_rdma_put_context(ctxt, 0);
+ return -ENOTCONN;
+ }
+
/* Prepare the context */
ctxt->pages[0] = page;
ctxt->count = 1;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index af408fc1263..e132509d1db 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -103,8 +103,8 @@ static int rdma_bump_context_cache(struct svcxprt_rdma *xprt)
spin_lock_bh(&xprt->sc_ctxt_lock);
if (ctxt) {
at_least_one = 1;
- ctxt->next = xprt->sc_ctxt_head;
- xprt->sc_ctxt_head = ctxt;
+ INIT_LIST_HEAD(&ctxt->free_list);
+ list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
} else {
/* kmalloc failed...give up for now */
xprt->sc_ctxt_cnt--;
@@ -123,7 +123,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
while (1) {
spin_lock_bh(&xprt->sc_ctxt_lock);
- if (unlikely(xprt->sc_ctxt_head == NULL)) {
+ if (unlikely(list_empty(&xprt->sc_ctxt_free))) {
/* Try to bump my cache. */
spin_unlock_bh(&xprt->sc_ctxt_lock);
@@ -136,12 +136,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
continue;
}
- ctxt = xprt->sc_ctxt_head;
- xprt->sc_ctxt_head = ctxt->next;
+ ctxt = list_entry(xprt->sc_ctxt_free.next,
+ struct svc_rdma_op_ctxt,
+ free_list);
+ list_del_init(&ctxt->free_list);
spin_unlock_bh(&xprt->sc_ctxt_lock);
ctxt->xprt = xprt;
INIT_LIST_HEAD(&ctxt->dto_q);
ctxt->count = 0;
+ atomic_inc(&xprt->sc_ctxt_used);
break;
}
return ctxt;
@@ -159,14 +162,15 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
put_page(ctxt->pages[i]);
for (i = 0; i < ctxt->count; i++)
- dma_unmap_single(xprt->sc_cm_id->device->dma_device,
- ctxt->sge[i].addr,
- ctxt->sge[i].length,
- ctxt->direction);
+ ib_dma_unmap_single(xprt->sc_cm_id->device,
+ ctxt->sge[i].addr,
+ ctxt->sge[i].length,
+ ctxt->direction);
+
spin_lock_bh(&xprt->sc_ctxt_lock);
- ctxt->next = xprt->sc_ctxt_head;
- xprt->sc_ctxt_head = ctxt;
+ list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
spin_unlock_bh(&xprt->sc_ctxt_lock);
+ atomic_dec(&xprt->sc_ctxt_used);
}
/* ib_cq event handler */
@@ -228,23 +232,8 @@ static void dto_tasklet_func(unsigned long data)
list_del_init(&xprt->sc_dto_q);
spin_unlock_irqrestore(&dto_lock, flags);
- if (test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) {
- ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP);
- rq_cq_reap(xprt);
- set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
- /*
- * If data arrived before established event,
- * don't enqueue. This defers RPC I/O until the
- * RDMA connection is complete.
- */
- if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
- svc_xprt_enqueue(&xprt->sc_xprt);
- }
-
- if (test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) {
- ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
- sq_cq_reap(xprt);
- }
+ rq_cq_reap(xprt);
+ sq_cq_reap(xprt);
svc_xprt_put(&xprt->sc_xprt);
spin_lock_irqsave(&dto_lock, flags);
@@ -263,11 +252,15 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
struct svcxprt_rdma *xprt = cq_context;
unsigned long flags;
+ /* Guard against unconditional flush call for destroyed QP */
+ if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
+ return;
+
/*
* Set the bit regardless of whether or not it's on the list
* because it may be on the list already due to an SQ
* completion.
- */
+ */
set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags);
/*
@@ -290,6 +283,8 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
*
* Take all completing WC off the CQE and enqueue the associated DTO
* context on the dto_q for the transport.
+ *
+ * Note that caller must hold a transport reference.
*/
static void rq_cq_reap(struct svcxprt_rdma *xprt)
{
@@ -297,29 +292,47 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
struct ib_wc wc;
struct svc_rdma_op_ctxt *ctxt = NULL;
+ if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags))
+ return;
+
+ ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP);
atomic_inc(&rdma_stat_rq_poll);
- spin_lock_bh(&xprt->sc_rq_dto_lock);
while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) {
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
ctxt->wc_status = wc.status;
ctxt->byte_len = wc.byte_len;
if (wc.status != IB_WC_SUCCESS) {
/* Close the transport */
+ dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_rdma_put_context(ctxt, 1);
+ svc_xprt_put(&xprt->sc_xprt);
continue;
}
+ spin_lock_bh(&xprt->sc_rq_dto_lock);
list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
+ spin_unlock_bh(&xprt->sc_rq_dto_lock);
+ svc_xprt_put(&xprt->sc_xprt);
}
- spin_unlock_bh(&xprt->sc_rq_dto_lock);
if (ctxt)
atomic_inc(&rdma_stat_rq_prod);
+
+ set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+ /*
+ * If data arrived before established event,
+ * don't enqueue. This defers RPC I/O until the
+ * RDMA connection is complete.
+ */
+ if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
+ svc_xprt_enqueue(&xprt->sc_xprt);
}
/*
* Send Queue Completion Handler - potentially called on interrupt context.
+ *
+ * Note that caller must hold a transport reference.
*/
static void sq_cq_reap(struct svcxprt_rdma *xprt)
{
@@ -328,6 +341,11 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
struct ib_cq *cq = xprt->sc_sq_cq;
int ret;
+
+ if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
+ return;
+
+ ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
atomic_inc(&rdma_stat_sq_poll);
while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
@@ -349,14 +367,16 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
case IB_WR_RDMA_READ:
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
+ struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
+ BUG_ON(!read_hdr);
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
- set_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
spin_lock_bh(&xprt->sc_read_complete_lock);
- list_add_tail(&ctxt->dto_q,
+ list_add_tail(&read_hdr->dto_q,
&xprt->sc_read_complete_q);
spin_unlock_bh(&xprt->sc_read_complete_lock);
svc_xprt_enqueue(&xprt->sc_xprt);
}
+ svc_rdma_put_context(ctxt, 0);
break;
default:
@@ -365,6 +385,7 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
wc.opcode, wc.status);
break;
}
+ svc_xprt_put(&xprt->sc_xprt);
}
if (ctxt)
@@ -376,11 +397,15 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
struct svcxprt_rdma *xprt = cq_context;
unsigned long flags;
+ /* Guard against unconditional flush call for destroyed QP */
+ if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
+ return;
+
/*
* Set the bit regardless of whether or not it's on the list
* because it may be on the list already due to an RQ
* completion.
- */
+ */
set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags);
/*
@@ -407,28 +432,29 @@ static void create_context_cache(struct svcxprt_rdma *xprt,
xprt->sc_ctxt_max = ctxt_max;
xprt->sc_ctxt_bump = ctxt_bump;
xprt->sc_ctxt_cnt = 0;
- xprt->sc_ctxt_head = NULL;
+ atomic_set(&xprt->sc_ctxt_used, 0);
+
+ INIT_LIST_HEAD(&xprt->sc_ctxt_free);
for (i = 0; i < ctxt_count; i++) {
ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
if (ctxt) {
- ctxt->next = xprt->sc_ctxt_head;
- xprt->sc_ctxt_head = ctxt;
+ INIT_LIST_HEAD(&ctxt->free_list);
+ list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
xprt->sc_ctxt_cnt++;
}
}
}
-static void destroy_context_cache(struct svc_rdma_op_ctxt *ctxt)
+static void destroy_context_cache(struct svcxprt_rdma *xprt)
{
- struct svc_rdma_op_ctxt *next;
- if (!ctxt)
- return;
-
- do {
- next = ctxt->next;
+ while (!list_empty(&xprt->sc_ctxt_free)) {
+ struct svc_rdma_op_ctxt *ctxt;
+ ctxt = list_entry(xprt->sc_ctxt_free.next,
+ struct svc_rdma_op_ctxt,
+ free_list);
+ list_del_init(&ctxt->free_list);
kfree(ctxt);
- ctxt = next;
- } while (next);
+ }
}
static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
@@ -465,7 +491,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
reqs +
cma_xprt->sc_sq_depth +
RPCRDMA_MAX_THREADS + 1); /* max */
- if (!cma_xprt->sc_ctxt_head) {
+ if (list_empty(&cma_xprt->sc_ctxt_free)) {
kfree(cma_xprt);
return NULL;
}
@@ -520,7 +546,12 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
recv_wr.num_sge = ctxt->count;
recv_wr.wr_id = (u64)(unsigned long)ctxt;
+ svc_xprt_get(&xprt->sc_xprt);
ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
+ if (ret) {
+ svc_xprt_put(&xprt->sc_xprt);
+ svc_rdma_put_context(ctxt, 1);
+ }
return ret;
}
@@ -539,6 +570,7 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id)
{
struct svcxprt_rdma *listen_xprt = new_cma_id->context;
struct svcxprt_rdma *newxprt;
+ struct sockaddr *sa;
/* Create a new transport */
newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0);
@@ -551,6 +583,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id)
dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
newxprt, newxprt->sc_cm_id, listen_xprt);
+ /* Set the local and remote addresses in the transport */
+ sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
+ svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
+ sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
+ svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
+
/*
* Enqueue the new transport on the accept queue of the listening
* transport
@@ -627,6 +665,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
if (xprt) {
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
+ svc_xprt_put(xprt);
}
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
@@ -661,31 +700,27 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
cma_xprt = rdma_create_xprt(serv, 1);
if (!cma_xprt)
- return ERR_PTR(ENOMEM);
+ return ERR_PTR(-ENOMEM);
xprt = &cma_xprt->sc_xprt;
listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP);
if (IS_ERR(listen_id)) {
- svc_xprt_put(&cma_xprt->sc_xprt);
- dprintk("svcrdma: rdma_create_id failed = %ld\n",
- PTR_ERR(listen_id));
- return (void *)listen_id;
+ ret = PTR_ERR(listen_id);
+ dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
+ goto err0;
}
+
ret = rdma_bind_addr(listen_id, sa);
if (ret) {
- rdma_destroy_id(listen_id);
- svc_xprt_put(&cma_xprt->sc_xprt);
dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
- return ERR_PTR(ret);
+ goto err1;
}
cma_xprt->sc_cm_id = listen_id;
ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
if (ret) {
- rdma_destroy_id(listen_id);
- svc_xprt_put(&cma_xprt->sc_xprt);
dprintk("svcrdma: rdma_listen failed = %d\n", ret);
- return ERR_PTR(ret);
+ goto err1;
}
/*
@@ -696,6 +731,12 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen);
return &cma_xprt->sc_xprt;
+
+ err1:
+ rdma_destroy_id(listen_id);
+ err0:
+ kfree(cma_xprt);
+ return ERR_PTR(ret);
}
/*
@@ -716,7 +757,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct rdma_conn_param conn_param;
struct ib_qp_init_attr qp_attr;
struct ib_device_attr devattr;
- struct sockaddr *sa;
int ret;
int i;
@@ -826,7 +866,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_sq_depth = qp_attr.cap.max_send_wr;
newxprt->sc_max_requests = qp_attr.cap.max_recv_wr;
}
- svc_xprt_get(&newxprt->sc_xprt);
newxprt->sc_qp = newxprt->sc_cm_id->qp;
/* Register all of physical memory */
@@ -850,6 +889,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/* Swap out the handler */
newxprt->sc_cm_id->event_handler = rdma_cma_handler;
+ /*
+ * Arm the CQs for the SQ and RQ before accepting so we can't
+ * miss the first message
+ */
+ ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
+ ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
+
/* Accept Connection */
set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
memset(&conn_param, 0, sizeof conn_param);
@@ -886,58 +932,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_max_requests,
newxprt->sc_ord);
- /* Set the local and remote addresses in the transport */
- sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
- svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
- sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
- svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
-
- ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
- ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
return &newxprt->sc_xprt;
errout:
dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
/* Take a reference in case the DTO handler runs */
svc_xprt_get(&newxprt->sc_xprt);
- if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) {
+ if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
ib_destroy_qp(newxprt->sc_qp);
- svc_xprt_put(&newxprt->sc_xprt);
- }
rdma_destroy_id(newxprt->sc_cm_id);
/* This call to put will destroy the transport */
svc_xprt_put(&newxprt->sc_xprt);
return NULL;
}
-/*
- * Post an RQ WQE to the RQ when the rqst is being released. This
- * effectively returns an RQ credit to the client. The rq_xprt_ctxt
- * will be null if the request is deferred due to an RDMA_READ or the
- * transport had no data ready (EAGAIN). Note that an RPC deferred in
- * svc_process will still return the credit, this is because the data
- * is copied and no longer consume a WQE/WC.
- */
static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
{
- int err;
- struct svcxprt_rdma *rdma =
- container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt);
- if (rqstp->rq_xprt_ctxt) {
- BUG_ON(rqstp->rq_xprt_ctxt != rdma);
- err = svc_rdma_post_recv(rdma);
- if (err)
- dprintk("svcrdma: failed to post an RQ WQE error=%d\n",
- err);
- }
- rqstp->rq_xprt_ctxt = NULL;
}
/*
- * When connected, an svc_xprt has at least three references:
- *
- * - A reference held by the QP. We still hold that here because this
- * code deletes the QP and puts the reference.
+ * When connected, an svc_xprt has at least two references:
*
* - A reference held by the cm_id between the ESTABLISHED and
* DISCONNECTED events. If the remote peer disconnected first, this
@@ -946,7 +960,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
* - A reference held by the svc_recv code that called this function
* as part of close processing.
*
- * At a minimum two references should still be held.
+ * At a minimum one references should still be held.
*/
static void svc_rdma_detach(struct svc_xprt *xprt)
{
@@ -956,23 +970,53 @@ static void svc_rdma_detach(struct svc_xprt *xprt)
/* Disconnect and flush posted WQE */
rdma_disconnect(rdma->sc_cm_id);
-
- /* Destroy the QP if present (not a listener) */
- if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) {
- ib_destroy_qp(rdma->sc_qp);
- svc_xprt_put(xprt);
- }
-
- /* Destroy the CM ID */
- rdma_destroy_id(rdma->sc_cm_id);
}
-static void svc_rdma_free(struct svc_xprt *xprt)
+static void __svc_rdma_free(struct work_struct *work)
{
- struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt;
+ struct svcxprt_rdma *rdma =
+ container_of(work, struct svcxprt_rdma, sc_work);
dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
+
/* We should only be called from kref_put */
- BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0);
+ BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0);
+
+ /*
+ * Destroy queued, but not processed read completions. Note
+ * that this cleanup has to be done before destroying the
+ * cm_id because the device ptr is needed to unmap the dma in
+ * svc_rdma_put_context.
+ */
+ spin_lock_bh(&rdma->sc_read_complete_lock);
+ while (!list_empty(&rdma->sc_read_complete_q)) {
+ struct svc_rdma_op_ctxt *ctxt;
+ ctxt = list_entry(rdma->sc_read_complete_q.next,
+ struct svc_rdma_op_ctxt,
+ dto_q);
+ list_del_init(&ctxt->dto_q);
+ svc_rdma_put_context(ctxt, 1);
+ }
+ spin_unlock_bh(&rdma->sc_read_complete_lock);
+
+ /* Destroy queued, but not processed recv completions */
+ spin_lock_bh(&rdma->sc_rq_dto_lock);
+ while (!list_empty(&rdma->sc_rq_dto_q)) {
+ struct svc_rdma_op_ctxt *ctxt;
+ ctxt = list_entry(rdma->sc_rq_dto_q.next,
+ struct svc_rdma_op_ctxt,
+ dto_q);
+ list_del_init(&ctxt->dto_q);
+ svc_rdma_put_context(ctxt, 1);
+ }
+ spin_unlock_bh(&rdma->sc_rq_dto_lock);
+
+ /* Warn if we leaked a resource or under-referenced */
+ WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
+
+ /* Destroy the QP if present (not a listener) */
+ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
+ ib_destroy_qp(rdma->sc_qp);
+
if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
ib_destroy_cq(rdma->sc_sq_cq);
@@ -985,10 +1029,21 @@ static void svc_rdma_free(struct svc_xprt *xprt)
if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
ib_dealloc_pd(rdma->sc_pd);
- destroy_context_cache(rdma->sc_ctxt_head);
+ /* Destroy the CM ID */
+ rdma_destroy_id(rdma->sc_cm_id);
+
+ destroy_context_cache(rdma);
kfree(rdma);
}
+static void svc_rdma_free(struct svc_xprt *xprt)
+{
+ struct svcxprt_rdma *rdma =
+ container_of(xprt, struct svcxprt_rdma, sc_xprt);
+ INIT_WORK(&rdma->sc_work, __svc_rdma_free);
+ schedule_work(&rdma->sc_work);
+}
+
static int svc_rdma_has_wspace(struct svc_xprt *xprt)
{
struct svcxprt_rdma *rdma =
@@ -1018,7 +1073,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
int ret;
if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
- return 0;
+ return -ENOTCONN;
BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op !=
@@ -1029,7 +1084,8 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) {
spin_unlock_bh(&xprt->sc_lock);
atomic_inc(&rdma_stat_sq_starve);
- /* See if we can reap some SQ WR */
+
+ /* See if we can opportunistically reap SQ WR to make room */
sq_cq_reap(xprt);
/* Wait until SQ WR available if SQ still full */
@@ -1041,22 +1097,25 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
continue;
}
/* Bumped used SQ WR count and post */
+ svc_xprt_get(&xprt->sc_xprt);
ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
if (!ret)
atomic_inc(&xprt->sc_sq_count);
- else
+ else {
+ svc_xprt_put(&xprt->sc_xprt);
dprintk("svcrdma: failed to post SQ WR rc=%d, "
"sc_sq_count=%d, sc_sq_depth=%d\n",
ret, atomic_read(&xprt->sc_sq_count),
xprt->sc_sq_depth);
+ }
spin_unlock_bh(&xprt->sc_lock);
break;
}
return ret;
}
-int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
- enum rpcrdma_errcode err)
+void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
+ enum rpcrdma_errcode err)
{
struct ib_send_wr err_wr;
struct ib_sge sge;
@@ -1094,9 +1153,8 @@ int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
/* Post It */
ret = svc_rdma_send(xprt, &err_wr);
if (ret) {
- dprintk("svcrdma: Error posting send = %d\n", ret);
+ dprintk("svcrdma: Error %d posting send for protocol error\n",
+ ret);
svc_rdma_put_context(ctxt, 1);
}
-
- return ret;
}
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 09cd9c0c2d8..3f964db908a 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -25,11 +25,11 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
struct dst_entry *dst = skb->dst;
int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev)
- skb_headroom(skb);
+ int ntail = dst->dev->needed_tailroom - skb_tailroom(skb);
- if (nhead > 0)
- return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
+ if (nhead > 0 || ntail > 0)
+ return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC);
- /* Check tail too... */
return 0;
}