aboutsummaryrefslogtreecommitdiff
path: root/net/sunrpc
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/svc.c158
-rw-r--r--net/sunrpc/svcsock.c101
2 files changed, 183 insertions, 76 deletions
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4ab137403e1..b4db53ff143 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -27,22 +27,26 @@
#define RPCDBG_FACILITY RPCDBG_SVCDSP
+#define svc_serv_is_pooled(serv) ((serv)->sv_function)
+
/*
* Mode for mapping cpus to pools.
*/
enum {
- SVC_POOL_NONE = -1, /* uninitialised, choose one of the others */
+ SVC_POOL_AUTO = -1, /* choose one of the others */
SVC_POOL_GLOBAL, /* no mapping, just a single global pool
* (legacy & UP mode) */
SVC_POOL_PERCPU, /* one pool per cpu */
SVC_POOL_PERNODE /* one pool per numa node */
};
+#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL
/*
* Structure for mapping cpus to pools and vice versa.
* Setup once during sunrpc initialisation.
*/
static struct svc_pool_map {
+ int count; /* How many svc_servs use us */
int mode; /* Note: int not enum to avoid
* warnings about "enumeration value
* not handled in switch" */
@@ -50,9 +54,63 @@ static struct svc_pool_map {
unsigned int *pool_to; /* maps pool id to cpu or node */
unsigned int *to_pool; /* maps cpu or node to pool id */
} svc_pool_map = {
- .mode = SVC_POOL_NONE
+ .count = 0,
+ .mode = SVC_POOL_DEFAULT
};
+static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
+
+static int
+param_set_pool_mode(const char *val, struct kernel_param *kp)
+{
+ int *ip = (int *)kp->arg;
+ struct svc_pool_map *m = &svc_pool_map;
+ int err;
+
+ mutex_lock(&svc_pool_map_mutex);
+
+ err = -EBUSY;
+ if (m->count)
+ goto out;
+
+ err = 0;
+ if (!strncmp(val, "auto", 4))
+ *ip = SVC_POOL_AUTO;
+ else if (!strncmp(val, "global", 6))
+ *ip = SVC_POOL_GLOBAL;
+ else if (!strncmp(val, "percpu", 6))
+ *ip = SVC_POOL_PERCPU;
+ else if (!strncmp(val, "pernode", 7))
+ *ip = SVC_POOL_PERNODE;
+ else
+ err = -EINVAL;
+
+out:
+ mutex_unlock(&svc_pool_map_mutex);
+ return err;
+}
+static int
+param_get_pool_mode(char *buf, struct kernel_param *kp)
+{
+ int *ip = (int *)kp->arg;
+
+ switch (*ip)
+ {
+ case SVC_POOL_AUTO:
+ return strlcpy(buf, "auto", 20);
+ case SVC_POOL_GLOBAL:
+ return strlcpy(buf, "global", 20);
+ case SVC_POOL_PERCPU:
+ return strlcpy(buf, "percpu", 20);
+ case SVC_POOL_PERNODE:
+ return strlcpy(buf, "pernode", 20);
+ default:
+ return sprintf(buf, "%d", *ip);
+ }
+}
+
+module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
+ &svc_pool_map.mode, 0644);
/*
* Detect best pool mapping mode heuristically,
@@ -115,7 +173,7 @@ fail:
static int
svc_pool_map_init_percpu(struct svc_pool_map *m)
{
- unsigned int maxpools = highest_possible_processor_id()+1;
+ unsigned int maxpools = nr_cpu_ids;
unsigned int pidx = 0;
unsigned int cpu;
int err;
@@ -143,7 +201,7 @@ svc_pool_map_init_percpu(struct svc_pool_map *m)
static int
svc_pool_map_init_pernode(struct svc_pool_map *m)
{
- unsigned int maxpools = highest_possible_node_id()+1;
+ unsigned int maxpools = nr_node_ids;
unsigned int pidx = 0;
unsigned int node;
int err;
@@ -166,18 +224,25 @@ svc_pool_map_init_pernode(struct svc_pool_map *m)
/*
- * Build the global map of cpus to pools and vice versa.
+ * Add a reference to the global map of cpus to pools (and
+ * vice versa). Initialise the map if we're the first user.
+ * Returns the number of pools.
*/
static unsigned int
-svc_pool_map_init(void)
+svc_pool_map_get(void)
{
struct svc_pool_map *m = &svc_pool_map;
int npools = -1;
- if (m->mode != SVC_POOL_NONE)
+ mutex_lock(&svc_pool_map_mutex);
+
+ if (m->count++) {
+ mutex_unlock(&svc_pool_map_mutex);
return m->npools;
+ }
- m->mode = svc_pool_map_choose_mode();
+ if (m->mode == SVC_POOL_AUTO)
+ m->mode = svc_pool_map_choose_mode();
switch (m->mode) {
case SVC_POOL_PERCPU:
@@ -195,9 +260,36 @@ svc_pool_map_init(void)
}
m->npools = npools;
+ mutex_unlock(&svc_pool_map_mutex);
return m->npools;
}
+
+/*
+ * Drop a reference to the global map of cpus to pools.
+ * When the last reference is dropped, the map data is
+ * freed; this allows the sysadmin to change the pool
+ * mode using the pool_mode module option without
+ * rebooting or re-loading sunrpc.ko.
+ */
+static void
+svc_pool_map_put(void)
+{
+ struct svc_pool_map *m = &svc_pool_map;
+
+ mutex_lock(&svc_pool_map_mutex);
+
+ if (!--m->count) {
+ m->mode = SVC_POOL_DEFAULT;
+ kfree(m->to_pool);
+ kfree(m->pool_to);
+ m->npools = 0;
+ }
+
+ mutex_unlock(&svc_pool_map_mutex);
+}
+
+
/*
* Set the current thread's cpus_allowed mask so that it
* will only run on cpus in the given pool.
@@ -212,10 +304,9 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
/*
* The caller checks for sv_nrpools > 1, which
- * implies that we've been initialized and the
- * map mode is not NONE.
+ * implies that we've been initialized.
*/
- BUG_ON(m->mode == SVC_POOL_NONE);
+ BUG_ON(m->count == 0);
switch (m->mode)
{
@@ -246,18 +337,19 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
unsigned int pidx = 0;
/*
- * SVC_POOL_NONE happens in a pure client when
+ * An uninitialised map happens in a pure client when
* lockd is brought up, so silently treat it the
* same as SVC_POOL_GLOBAL.
*/
-
- switch (m->mode) {
- case SVC_POOL_PERCPU:
- pidx = m->to_pool[cpu];
- break;
- case SVC_POOL_PERNODE:
- pidx = m->to_pool[cpu_to_node(cpu)];
- break;
+ if (svc_serv_is_pooled(serv)) {
+ switch (m->mode) {
+ case SVC_POOL_PERCPU:
+ pidx = m->to_pool[cpu];
+ break;
+ case SVC_POOL_PERNODE:
+ pidx = m->to_pool[cpu_to_node(cpu)];
+ break;
+ }
}
return &serv->sv_pools[pidx % serv->sv_nrpools];
}
@@ -347,7 +439,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
svc_thread_fn func, int sig, struct module *mod)
{
struct svc_serv *serv;
- unsigned int npools = svc_pool_map_init();
+ unsigned int npools = svc_pool_map_get();
serv = __svc_create(prog, bufsize, npools, shutdown);
@@ -367,6 +459,7 @@ void
svc_destroy(struct svc_serv *serv)
{
struct svc_sock *svsk;
+ struct svc_sock *tmp;
dprintk("svc: svc_destroy(%s, %d)\n",
serv->sv_program->pg_name,
@@ -382,24 +475,23 @@ svc_destroy(struct svc_serv *serv)
del_timer_sync(&serv->sv_temptimer);
- while (!list_empty(&serv->sv_tempsocks)) {
- svsk = list_entry(serv->sv_tempsocks.next,
- struct svc_sock,
- sk_list);
- svc_close_socket(svsk);
- }
+ list_for_each_entry_safe(svsk, tmp, &serv->sv_tempsocks, sk_list)
+ svc_force_close_socket(svsk);
+
if (serv->sv_shutdown)
serv->sv_shutdown(serv);
- while (!list_empty(&serv->sv_permsocks)) {
- svsk = list_entry(serv->sv_permsocks.next,
- struct svc_sock,
- sk_list);
- svc_close_socket(svsk);
- }
+ list_for_each_entry_safe(svsk, tmp, &serv->sv_permsocks, sk_list)
+ svc_force_close_socket(svsk);
+
+ BUG_ON(!list_empty(&serv->sv_permsocks));
+ BUG_ON(!list_empty(&serv->sv_tempsocks));
cache_clean_deferred(serv);
+ if (svc_serv_is_pooled(serv))
+ svc_pool_map_put();
+
/* Unregister service with the portmapper */
svc_register(serv, 0, 0);
kfree(serv->sv_pools);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 63ae94771b8..f6e1eb1ea72 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -82,6 +82,7 @@ static void svc_delete_socket(struct svc_sock *svsk);
static void svc_udp_data_ready(struct sock *, int);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
+static void svc_close_socket(struct svc_sock *svsk);
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
static int svc_deferred_recv(struct svc_rqst *rqstp);
@@ -131,13 +132,13 @@ static char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len)
NIPQUAD(((struct sockaddr_in *) addr)->sin_addr),
htons(((struct sockaddr_in *) addr)->sin_port));
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
case AF_INET6:
snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u",
NIP6(((struct sockaddr_in6 *) addr)->sin6_addr),
htons(((struct sockaddr_in6 *) addr)->sin6_port));
break;
-#endif
+
default:
snprintf(buf, len, "unknown address type: %d", addr->sa_family);
break;
@@ -449,9 +450,7 @@ svc_wake_up(struct svc_serv *serv)
union svc_pktinfo_u {
struct in_pktinfo pkti;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct in6_pktinfo pkti6;
-#endif
};
static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
@@ -467,7 +466,7 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
}
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
case AF_INET6: {
struct in6_pktinfo *pki = CMSG_DATA(cmh);
@@ -479,7 +478,6 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
}
break;
-#endif
}
return;
}
@@ -721,45 +719,21 @@ svc_write_space(struct sock *sk)
}
}
-static void svc_udp_get_sender_address(struct svc_rqst *rqstp,
- struct sk_buff *skb)
+static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp,
+ struct cmsghdr *cmh)
{
switch (rqstp->rq_sock->sk_sk->sk_family) {
case AF_INET: {
- /* this seems to come from net/ipv4/udp.c:udp_recvmsg */
- struct sockaddr_in *sin = svc_addr_in(rqstp);
-
- sin->sin_family = AF_INET;
- sin->sin_port = skb->h.uh->source;
- sin->sin_addr.s_addr = skb->nh.iph->saddr;
- rqstp->rq_addrlen = sizeof(struct sockaddr_in);
- /* Remember which interface received this request */
- rqstp->rq_daddr.addr.s_addr = skb->nh.iph->daddr;
- }
+ struct in_pktinfo *pki = CMSG_DATA(cmh);
+ rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- case AF_INET6: {
- /* this is derived from net/ipv6/udp.c:udpv6_recvmesg */
- struct sockaddr_in6 *sin6 = svc_addr_in6(rqstp);
-
- sin6->sin6_family = AF_INET6;
- sin6->sin6_port = skb->h.uh->source;
- sin6->sin6_flowinfo = 0;
- sin6->sin6_scope_id = 0;
- if (ipv6_addr_type(&sin6->sin6_addr) &
- IPV6_ADDR_LINKLOCAL)
- sin6->sin6_scope_id = IP6CB(skb)->iif;
- ipv6_addr_copy(&sin6->sin6_addr,
- &skb->nh.ipv6h->saddr);
- rqstp->rq_addrlen = sizeof(struct sockaddr_in);
- /* Remember which interface received this request */
- ipv6_addr_copy(&rqstp->rq_daddr.addr6,
- &skb->nh.ipv6h->saddr);
}
+ case AF_INET6: {
+ struct in6_pktinfo *pki = CMSG_DATA(cmh);
+ ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
break;
-#endif
+ }
}
- return;
}
/*
@@ -771,7 +745,15 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server;
struct sk_buff *skb;
+ char buffer[CMSG_SPACE(sizeof(union svc_pktinfo_u))];
+ struct cmsghdr *cmh = (struct cmsghdr *)buffer;
int err, len;
+ struct msghdr msg = {
+ .msg_name = svc_addr(rqstp),
+ .msg_control = cmh,
+ .msg_controllen = sizeof(buffer),
+ .msg_flags = MSG_DONTWAIT,
+ };
if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
/* udp sockets need large rcvbuf as all pending
@@ -797,7 +779,9 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
}
clear_bit(SK_DATA, &svsk->sk_flags);
- while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
+ while ((err == kernel_recvmsg(svsk->sk_sock, &msg, NULL,
+ 0, 0, MSG_PEEK | MSG_DONTWAIT)) < 0 ||
+ (skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
if (err == -EAGAIN) {
svc_sock_received(svsk);
return err;
@@ -805,6 +789,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
/* possibly an icmp error */
dprintk("svc: recvfrom returned error %d\n", -err);
}
+ rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
if (skb->tstamp.off_sec == 0) {
struct timeval tv;
@@ -827,7 +812,16 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_prot = IPPROTO_UDP;
- svc_udp_get_sender_address(rqstp, skb);
+ if (cmh->cmsg_level != IPPROTO_IP ||
+ cmh->cmsg_type != IP_PKTINFO) {
+ if (net_ratelimit())
+ printk("rpcsvc: received unknown control message:"
+ "%d/%d\n",
+ cmh->cmsg_level, cmh->cmsg_type);
+ skb_free_datagram(svsk->sk_sk, skb);
+ return 0;
+ }
+ svc_udp_get_dest_address(rqstp, cmh);
if (skb_is_nonlinear(skb)) {
/* we have to copy */
@@ -884,6 +878,9 @@ svc_udp_sendto(struct svc_rqst *rqstp)
static void
svc_udp_init(struct svc_sock *svsk)
{
+ int one = 1;
+ mm_segment_t oldfs;
+
svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
svsk->sk_sk->sk_write_space = svc_write_space;
svsk->sk_recvfrom = svc_udp_recvfrom;
@@ -899,6 +896,13 @@ svc_udp_init(struct svc_sock *svsk)
set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */
set_bit(SK_CHNGBUF, &svsk->sk_flags);
+
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ /* make sure we get destination address info */
+ svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO,
+ (char __user *)&one, sizeof(one));
+ set_fs(oldfs);
}
/*
@@ -977,11 +981,9 @@ static inline int svc_port_is_privileged(struct sockaddr *sin)
case AF_INET:
return ntohs(((struct sockaddr_in *)sin)->sin_port)
< PROT_SOCK;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
return ntohs(((struct sockaddr_in6 *)sin)->sin6_port)
< PROT_SOCK;
-#endif
default:
return 0;
}
@@ -1786,7 +1788,7 @@ svc_delete_socket(struct svc_sock *svsk)
spin_unlock_bh(&serv->sv_lock);
}
-void svc_close_socket(struct svc_sock *svsk)
+static void svc_close_socket(struct svc_sock *svsk)
{
set_bit(SK_CLOSE, &svsk->sk_flags);
if (test_and_set_bit(SK_BUSY, &svsk->sk_flags))
@@ -1799,6 +1801,19 @@ void svc_close_socket(struct svc_sock *svsk)
svc_sock_put(svsk);
}
+void svc_force_close_socket(struct svc_sock *svsk)
+{
+ set_bit(SK_CLOSE, &svsk->sk_flags);
+ if (test_bit(SK_BUSY, &svsk->sk_flags)) {
+ /* Waiting to be processed, but no threads left,
+ * So just remove it from the waiting list
+ */
+ list_del_init(&svsk->sk_ready);
+ clear_bit(SK_BUSY, &svsk->sk_flags);
+ }
+ svc_close_socket(svsk);
+}
+
/**
* svc_makesock - Make a socket for nfsd and lockd
* @serv: RPC server structure