diff options
Diffstat (limited to 'net/sunrpc')
-rw-r--r-- | net/sunrpc/svc.c | 158 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 101 |
2 files changed, 183 insertions, 76 deletions
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 4ab137403e1..b4db53ff143 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -27,22 +27,26 @@ #define RPCDBG_FACILITY RPCDBG_SVCDSP +#define svc_serv_is_pooled(serv) ((serv)->sv_function) + /* * Mode for mapping cpus to pools. */ enum { - SVC_POOL_NONE = -1, /* uninitialised, choose one of the others */ + SVC_POOL_AUTO = -1, /* choose one of the others */ SVC_POOL_GLOBAL, /* no mapping, just a single global pool * (legacy & UP mode) */ SVC_POOL_PERCPU, /* one pool per cpu */ SVC_POOL_PERNODE /* one pool per numa node */ }; +#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL /* * Structure for mapping cpus to pools and vice versa. * Setup once during sunrpc initialisation. */ static struct svc_pool_map { + int count; /* How many svc_servs use us */ int mode; /* Note: int not enum to avoid * warnings about "enumeration value * not handled in switch" */ @@ -50,9 +54,63 @@ static struct svc_pool_map { unsigned int *pool_to; /* maps pool id to cpu or node */ unsigned int *to_pool; /* maps cpu or node to pool id */ } svc_pool_map = { - .mode = SVC_POOL_NONE + .count = 0, + .mode = SVC_POOL_DEFAULT }; +static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ + +static int +param_set_pool_mode(const char *val, struct kernel_param *kp) +{ + int *ip = (int *)kp->arg; + struct svc_pool_map *m = &svc_pool_map; + int err; + + mutex_lock(&svc_pool_map_mutex); + + err = -EBUSY; + if (m->count) + goto out; + + err = 0; + if (!strncmp(val, "auto", 4)) + *ip = SVC_POOL_AUTO; + else if (!strncmp(val, "global", 6)) + *ip = SVC_POOL_GLOBAL; + else if (!strncmp(val, "percpu", 6)) + *ip = SVC_POOL_PERCPU; + else if (!strncmp(val, "pernode", 7)) + *ip = SVC_POOL_PERNODE; + else + err = -EINVAL; + +out: + mutex_unlock(&svc_pool_map_mutex); + return err; +} +static int +param_get_pool_mode(char *buf, struct kernel_param *kp) +{ + int *ip = (int *)kp->arg; + + switch (*ip) + { + case SVC_POOL_AUTO: + return strlcpy(buf, "auto", 20); + case SVC_POOL_GLOBAL: + return strlcpy(buf, "global", 20); + case SVC_POOL_PERCPU: + return strlcpy(buf, "percpu", 20); + case SVC_POOL_PERNODE: + return strlcpy(buf, "pernode", 20); + default: + return sprintf(buf, "%d", *ip); + } +} + +module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode, + &svc_pool_map.mode, 0644); /* * Detect best pool mapping mode heuristically, @@ -115,7 +173,7 @@ fail: static int svc_pool_map_init_percpu(struct svc_pool_map *m) { - unsigned int maxpools = highest_possible_processor_id()+1; + unsigned int maxpools = nr_cpu_ids; unsigned int pidx = 0; unsigned int cpu; int err; @@ -143,7 +201,7 @@ svc_pool_map_init_percpu(struct svc_pool_map *m) static int svc_pool_map_init_pernode(struct svc_pool_map *m) { - unsigned int maxpools = highest_possible_node_id()+1; + unsigned int maxpools = nr_node_ids; unsigned int pidx = 0; unsigned int node; int err; @@ -166,18 +224,25 @@ svc_pool_map_init_pernode(struct svc_pool_map *m) /* - * Build the global map of cpus to pools and vice versa. + * Add a reference to the global map of cpus to pools (and + * vice versa). Initialise the map if we're the first user. + * Returns the number of pools. */ static unsigned int -svc_pool_map_init(void) +svc_pool_map_get(void) { struct svc_pool_map *m = &svc_pool_map; int npools = -1; - if (m->mode != SVC_POOL_NONE) + mutex_lock(&svc_pool_map_mutex); + + if (m->count++) { + mutex_unlock(&svc_pool_map_mutex); return m->npools; + } - m->mode = svc_pool_map_choose_mode(); + if (m->mode == SVC_POOL_AUTO) + m->mode = svc_pool_map_choose_mode(); switch (m->mode) { case SVC_POOL_PERCPU: @@ -195,9 +260,36 @@ svc_pool_map_init(void) } m->npools = npools; + mutex_unlock(&svc_pool_map_mutex); return m->npools; } + +/* + * Drop a reference to the global map of cpus to pools. + * When the last reference is dropped, the map data is + * freed; this allows the sysadmin to change the pool + * mode using the pool_mode module option without + * rebooting or re-loading sunrpc.ko. + */ +static void +svc_pool_map_put(void) +{ + struct svc_pool_map *m = &svc_pool_map; + + mutex_lock(&svc_pool_map_mutex); + + if (!--m->count) { + m->mode = SVC_POOL_DEFAULT; + kfree(m->to_pool); + kfree(m->pool_to); + m->npools = 0; + } + + mutex_unlock(&svc_pool_map_mutex); +} + + /* * Set the current thread's cpus_allowed mask so that it * will only run on cpus in the given pool. @@ -212,10 +304,9 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask) /* * The caller checks for sv_nrpools > 1, which - * implies that we've been initialized and the - * map mode is not NONE. + * implies that we've been initialized. */ - BUG_ON(m->mode == SVC_POOL_NONE); + BUG_ON(m->count == 0); switch (m->mode) { @@ -246,18 +337,19 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) unsigned int pidx = 0; /* - * SVC_POOL_NONE happens in a pure client when + * An uninitialised map happens in a pure client when * lockd is brought up, so silently treat it the * same as SVC_POOL_GLOBAL. */ - - switch (m->mode) { - case SVC_POOL_PERCPU: - pidx = m->to_pool[cpu]; - break; - case SVC_POOL_PERNODE: - pidx = m->to_pool[cpu_to_node(cpu)]; - break; + if (svc_serv_is_pooled(serv)) { + switch (m->mode) { + case SVC_POOL_PERCPU: + pidx = m->to_pool[cpu]; + break; + case SVC_POOL_PERNODE: + pidx = m->to_pool[cpu_to_node(cpu)]; + break; + } } return &serv->sv_pools[pidx % serv->sv_nrpools]; } @@ -347,7 +439,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, svc_thread_fn func, int sig, struct module *mod) { struct svc_serv *serv; - unsigned int npools = svc_pool_map_init(); + unsigned int npools = svc_pool_map_get(); serv = __svc_create(prog, bufsize, npools, shutdown); @@ -367,6 +459,7 @@ void svc_destroy(struct svc_serv *serv) { struct svc_sock *svsk; + struct svc_sock *tmp; dprintk("svc: svc_destroy(%s, %d)\n", serv->sv_program->pg_name, @@ -382,24 +475,23 @@ svc_destroy(struct svc_serv *serv) del_timer_sync(&serv->sv_temptimer); - while (!list_empty(&serv->sv_tempsocks)) { - svsk = list_entry(serv->sv_tempsocks.next, - struct svc_sock, - sk_list); - svc_close_socket(svsk); - } + list_for_each_entry_safe(svsk, tmp, &serv->sv_tempsocks, sk_list) + svc_force_close_socket(svsk); + if (serv->sv_shutdown) serv->sv_shutdown(serv); - while (!list_empty(&serv->sv_permsocks)) { - svsk = list_entry(serv->sv_permsocks.next, - struct svc_sock, - sk_list); - svc_close_socket(svsk); - } + list_for_each_entry_safe(svsk, tmp, &serv->sv_permsocks, sk_list) + svc_force_close_socket(svsk); + + BUG_ON(!list_empty(&serv->sv_permsocks)); + BUG_ON(!list_empty(&serv->sv_tempsocks)); cache_clean_deferred(serv); + if (svc_serv_is_pooled(serv)) + svc_pool_map_put(); + /* Unregister service with the portmapper */ svc_register(serv, 0, 0); kfree(serv->sv_pools); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 63ae94771b8..f6e1eb1ea72 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -82,6 +82,7 @@ static void svc_delete_socket(struct svc_sock *svsk); static void svc_udp_data_ready(struct sock *, int); static int svc_udp_recvfrom(struct svc_rqst *); static int svc_udp_sendto(struct svc_rqst *); +static void svc_close_socket(struct svc_sock *svsk); static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk); static int svc_deferred_recv(struct svc_rqst *rqstp); @@ -131,13 +132,13 @@ static char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len) NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), htons(((struct sockaddr_in *) addr)->sin_port)); break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), htons(((struct sockaddr_in6 *) addr)->sin6_port)); break; -#endif + default: snprintf(buf, len, "unknown address type: %d", addr->sa_family); break; @@ -449,9 +450,7 @@ svc_wake_up(struct svc_serv *serv) union svc_pktinfo_u { struct in_pktinfo pkti; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct in6_pktinfo pkti6; -#endif }; static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) @@ -467,7 +466,7 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); } break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: { struct in6_pktinfo *pki = CMSG_DATA(cmh); @@ -479,7 +478,6 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); } break; -#endif } return; } @@ -721,45 +719,21 @@ svc_write_space(struct sock *sk) } } -static void svc_udp_get_sender_address(struct svc_rqst *rqstp, - struct sk_buff *skb) +static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp, + struct cmsghdr *cmh) { switch (rqstp->rq_sock->sk_sk->sk_family) { case AF_INET: { - /* this seems to come from net/ipv4/udp.c:udp_recvmsg */ - struct sockaddr_in *sin = svc_addr_in(rqstp); - - sin->sin_family = AF_INET; - sin->sin_port = skb->h.uh->source; - sin->sin_addr.s_addr = skb->nh.iph->saddr; - rqstp->rq_addrlen = sizeof(struct sockaddr_in); - /* Remember which interface received this request */ - rqstp->rq_daddr.addr.s_addr = skb->nh.iph->daddr; - } + struct in_pktinfo *pki = CMSG_DATA(cmh); + rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: { - /* this is derived from net/ipv6/udp.c:udpv6_recvmesg */ - struct sockaddr_in6 *sin6 = svc_addr_in6(rqstp); - - sin6->sin6_family = AF_INET6; - sin6->sin6_port = skb->h.uh->source; - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - if (ipv6_addr_type(&sin6->sin6_addr) & - IPV6_ADDR_LINKLOCAL) - sin6->sin6_scope_id = IP6CB(skb)->iif; - ipv6_addr_copy(&sin6->sin6_addr, - &skb->nh.ipv6h->saddr); - rqstp->rq_addrlen = sizeof(struct sockaddr_in); - /* Remember which interface received this request */ - ipv6_addr_copy(&rqstp->rq_daddr.addr6, - &skb->nh.ipv6h->saddr); } + case AF_INET6: { + struct in6_pktinfo *pki = CMSG_DATA(cmh); + ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr); break; -#endif + } } - return; } /* @@ -771,7 +745,15 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) struct svc_sock *svsk = rqstp->rq_sock; struct svc_serv *serv = svsk->sk_server; struct sk_buff *skb; + char buffer[CMSG_SPACE(sizeof(union svc_pktinfo_u))]; + struct cmsghdr *cmh = (struct cmsghdr *)buffer; int err, len; + struct msghdr msg = { + .msg_name = svc_addr(rqstp), + .msg_control = cmh, + .msg_controllen = sizeof(buffer), + .msg_flags = MSG_DONTWAIT, + }; if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) /* udp sockets need large rcvbuf as all pending @@ -797,7 +779,9 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) } clear_bit(SK_DATA, &svsk->sk_flags); - while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { + while ((err == kernel_recvmsg(svsk->sk_sock, &msg, NULL, + 0, 0, MSG_PEEK | MSG_DONTWAIT)) < 0 || + (skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { if (err == -EAGAIN) { svc_sock_received(svsk); return err; @@ -805,6 +789,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) /* possibly an icmp error */ dprintk("svc: recvfrom returned error %d\n", -err); } + rqstp->rq_addrlen = sizeof(rqstp->rq_addr); if (skb->tstamp.off_sec == 0) { struct timeval tv; @@ -827,7 +812,16 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_prot = IPPROTO_UDP; - svc_udp_get_sender_address(rqstp, skb); + if (cmh->cmsg_level != IPPROTO_IP || + cmh->cmsg_type != IP_PKTINFO) { + if (net_ratelimit()) + printk("rpcsvc: received unknown control message:" + "%d/%d\n", + cmh->cmsg_level, cmh->cmsg_type); + skb_free_datagram(svsk->sk_sk, skb); + return 0; + } + svc_udp_get_dest_address(rqstp, cmh); if (skb_is_nonlinear(skb)) { /* we have to copy */ @@ -884,6 +878,9 @@ svc_udp_sendto(struct svc_rqst *rqstp) static void svc_udp_init(struct svc_sock *svsk) { + int one = 1; + mm_segment_t oldfs; + svsk->sk_sk->sk_data_ready = svc_udp_data_ready; svsk->sk_sk->sk_write_space = svc_write_space; svsk->sk_recvfrom = svc_udp_recvfrom; @@ -899,6 +896,13 @@ svc_udp_init(struct svc_sock *svsk) set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */ set_bit(SK_CHNGBUF, &svsk->sk_flags); + + oldfs = get_fs(); + set_fs(KERNEL_DS); + /* make sure we get destination address info */ + svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO, + (char __user *)&one, sizeof(one)); + set_fs(oldfs); } /* @@ -977,11 +981,9 @@ static inline int svc_port_is_privileged(struct sockaddr *sin) case AF_INET: return ntohs(((struct sockaddr_in *)sin)->sin_port) < PROT_SOCK; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: return ntohs(((struct sockaddr_in6 *)sin)->sin6_port) < PROT_SOCK; -#endif default: return 0; } @@ -1786,7 +1788,7 @@ svc_delete_socket(struct svc_sock *svsk) spin_unlock_bh(&serv->sv_lock); } -void svc_close_socket(struct svc_sock *svsk) +static void svc_close_socket(struct svc_sock *svsk) { set_bit(SK_CLOSE, &svsk->sk_flags); if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) @@ -1799,6 +1801,19 @@ void svc_close_socket(struct svc_sock *svsk) svc_sock_put(svsk); } +void svc_force_close_socket(struct svc_sock *svsk) +{ + set_bit(SK_CLOSE, &svsk->sk_flags); + if (test_bit(SK_BUSY, &svsk->sk_flags)) { + /* Waiting to be processed, but no threads left, + * So just remove it from the waiting list + */ + list_del_init(&svsk->sk_ready); + clear_bit(SK_BUSY, &svsk->sk_flags); + } + svc_close_socket(svsk); +} + /** * svc_makesock - Make a socket for nfsd and lockd * @serv: RPC server structure |