X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fsunrpc%2Fsvcsock.c;h=e6bb1b0563ec50af63c61facd1eaba156afcb796;hb=d7c9f1ed972b4a468dd24a2457721704dfe9ca70;hp=c75bffeb89eb705831585ba1447c0ba77dd06a9c;hpb=e3d18658d4f28e4783e1bb1c41e9134c9e5db0a9;p=linux-2.6 diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c75bffeb89..e6bb1b0563 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -75,7 +75,7 @@ * */ -#define RPCDBG_FACILITY RPCDBG_SVCSOCK +#define RPCDBG_FACILITY RPCDBG_SVCXPRT static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, @@ -85,10 +85,14 @@ static void svc_udp_data_ready(struct sock *, int); static int svc_udp_recvfrom(struct svc_rqst *); static int svc_udp_sendto(struct svc_rqst *); static void svc_close_socket(struct svc_sock *svsk); +static void svc_sock_detach(struct svc_xprt *); +static void svc_sock_free(struct svc_xprt *); static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); +static struct svc_xprt *svc_create_socket(struct svc_serv *, int, + struct sockaddr *, int, int); /* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after @@ -185,14 +189,13 @@ svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) /* * Release an skbuff after use */ -static inline void -svc_release_skb(struct svc_rqst *rqstp) +static void svc_release_skb(struct svc_rqst *rqstp) { - struct sk_buff *skb = rqstp->rq_skbuff; + struct sk_buff *skb = rqstp->rq_xprt_ctxt; struct svc_deferred_req *dr = rqstp->rq_deferred; if (skb) { - rqstp->rq_skbuff = NULL; + rqstp->rq_xprt_ctxt = NULL; dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); skb_free_datagram(rqstp->rq_sock->sk_sk, skb); @@ -203,22 +206,6 @@ svc_release_skb(struct svc_rqst *rqstp) } } -/* - * Any space to write? - */ -static inline unsigned long -svc_sock_wspace(struct svc_sock *svsk) -{ - int wspace; - - if (svsk->sk_sock->type == SOCK_STREAM) - wspace = sk_stream_wspace(svsk->sk_sk); - else - wspace = sock_wspace(svsk->sk_sk); - - return wspace; -} - /* * Queue up a socket with data pending. If there are idle nfsd * processes, wake 'em up. @@ -268,22 +255,24 @@ svc_sock_enqueue(struct svc_sock *svsk) BUG_ON(svsk->sk_pool != NULL); svsk->sk_pool = pool; - set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2 - > svc_sock_wspace(svsk)) - && !test_bit(SK_CLOSE, &svsk->sk_flags) - && !test_bit(SK_CONN, &svsk->sk_flags)) { + /* Handle pending connection */ + if (test_bit(SK_CONN, &svsk->sk_flags)) + goto process; + + /* Handle close in-progress */ + if (test_bit(SK_CLOSE, &svsk->sk_flags)) + goto process; + + /* Check if we have space to reply to a request */ + if (!svsk->sk_xprt.xpt_ops->xpo_has_wspace(&svsk->sk_xprt)) { /* Don't enqueue while not enough space for reply */ - dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", - svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg, - svc_sock_wspace(svsk)); + dprintk("svc: no write space, socket %p not enqueued\n", svsk); svsk->sk_pool = NULL; clear_bit(SK_BUSY, &svsk->sk_flags); goto out_unlock; } - clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - + process: if (!list_empty(&pool->sp_threads)) { rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, @@ -377,16 +366,9 @@ static inline void svc_sock_put(struct svc_sock *svsk) { if (atomic_dec_and_test(&svsk->sk_inuse)) { - BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags)); - - dprintk("svc: releasing dead socket\n"); - if (svsk->sk_sock->file) - sockfd_put(svsk->sk_sock); - else - sock_release(svsk->sk_sock); - if (svsk->sk_info_authunix != NULL) - svcauth_unix_info_release(svsk->sk_info_authunix); - kfree(svsk); + BUG_ON(!test_bit(SK_DEAD, &svsk->sk_flags)); + module_put(svsk->sk_xprt.xpt_class->xcl_owner); + svsk->sk_xprt.xpt_ops->xpo_free(&svsk->sk_xprt); } } @@ -395,7 +377,7 @@ svc_sock_release(struct svc_rqst *rqstp) { struct svc_sock *svsk = rqstp->rq_sock; - svc_release_skb(rqstp); + rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); svc_free_res_pages(rqstp); rqstp->rq_res.page_len = 0; @@ -797,11 +779,6 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) return svc_deferred_recv(rqstp); } - if (test_bit(SK_CLOSE, &svsk->sk_flags)) { - svc_delete_socket(svsk); - return 0; - } - clear_bit(SK_DATA, &svsk->sk_flags); skb = NULL; err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, @@ -867,7 +844,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) skb_free_datagram(svsk->sk_sk, skb); return 0; } - rqstp->rq_skbuff = skb; + rqstp->rq_xprt_ctxt = skb; } rqstp->rq_arg.page_base = 0; @@ -900,16 +877,69 @@ svc_udp_sendto(struct svc_rqst *rqstp) return error; } +static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp) +{ +} + +static int svc_udp_has_wspace(struct svc_xprt *xprt) +{ + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + struct svc_serv *serv = svsk->sk_server; + unsigned long required; + + /* + * Set the SOCK_NOSPACE flag before checking the available + * sock space. + */ + set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); + required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg; + if (required*2 > sock_wspace(svsk->sk_sk)) + return 0; + clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); + return 1; +} + +static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt) +{ + BUG(); + return NULL; +} + +static struct svc_xprt *svc_udp_create(struct svc_serv *serv, + struct sockaddr *sa, int salen, + int flags) +{ + return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags); +} + +static struct svc_xprt_ops svc_udp_ops = { + .xpo_create = svc_udp_create, + .xpo_recvfrom = svc_udp_recvfrom, + .xpo_sendto = svc_udp_sendto, + .xpo_release_rqst = svc_release_skb, + .xpo_detach = svc_sock_detach, + .xpo_free = svc_sock_free, + .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, + .xpo_has_wspace = svc_udp_has_wspace, + .xpo_accept = svc_udp_accept, +}; + +static struct svc_xprt_class svc_udp_class = { + .xcl_name = "udp", + .xcl_owner = THIS_MODULE, + .xcl_ops = &svc_udp_ops, + .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP, +}; + static void svc_udp_init(struct svc_sock *svsk) { int one = 1; mm_segment_t oldfs; + svc_xprt_init(&svc_udp_class, &svsk->sk_xprt); svsk->sk_sk->sk_data_ready = svc_udp_data_ready; svsk->sk_sk->sk_write_space = svc_write_space; - svsk->sk_recvfrom = svc_udp_recvfrom; - svsk->sk_sendto = svc_udp_sendto; /* initialise setting must have enough space to * receive and respond to one request. @@ -1017,9 +1047,9 @@ static inline int svc_port_is_privileged(struct sockaddr *sin) /* * Accept a TCP connection */ -static void -svc_tcp_accept(struct svc_sock *svsk) +static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) { + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct sockaddr_storage addr; struct sockaddr *sin = (struct sockaddr *) &addr; struct svc_serv *serv = svsk->sk_server; @@ -1031,7 +1061,7 @@ svc_tcp_accept(struct svc_sock *svsk) dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); if (!sock) - return; + return NULL; clear_bit(SK_CONN, &svsk->sk_flags); err = kernel_accept(sock, &newsock, O_NONBLOCK); @@ -1042,11 +1072,10 @@ svc_tcp_accept(struct svc_sock *svsk) else if (err != -EAGAIN && net_ratelimit()) printk(KERN_WARNING "%s: accept failed (err %d)!\n", serv->sv_name, -err); - return; + return NULL; } set_bit(SK_CONN, &svsk->sk_flags); - svc_sock_enqueue(svsk); err = kernel_getpeername(newsock, sin, &slen); if (err < 0) { @@ -1088,17 +1117,30 @@ svc_tcp_accept(struct svc_sock *svsk) svc_sock_received(newsvsk); - /* make sure that we don't have too many active connections. - * If we have, something must be dropped. - * - * There's no point in trying to do random drop here for - * DoS prevention. The NFS clients does 1 reconnect in 15 - * seconds. An attacker can easily beat that. - * - * The only somewhat efficient mechanism would be if drop - * old connections from the same IP first. But right now - * we don't even record the client IP in svc_sock. - */ + if (serv->sv_stats) + serv->sv_stats->nettcpconn++; + + return &newsvsk->sk_xprt; + +failed: + sock_release(newsock); + return NULL; +} + +/* + * Make sure that we don't have too many active connections. If we + * have, something must be dropped. + * + * There's no point in trying to do random drop here for DoS + * prevention. The NFS clients does 1 reconnect in 15 seconds. An + * attacker can easily beat that. + * + * The only somewhat efficient mechanism would be if drop old + * connections from the same IP first. But right now we don't even + * record the client IP in svc_sock. + */ +static void svc_check_conn_limits(struct svc_serv *serv) +{ if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { struct svc_sock *svsk = NULL; spin_lock_bh(&serv->sv_lock); @@ -1106,13 +1148,9 @@ svc_tcp_accept(struct svc_sock *svsk) if (net_ratelimit()) { /* Try to help the admin */ printk(KERN_NOTICE "%s: too many open TCP " - "sockets, consider increasing the " - "number of nfsd threads\n", - serv->sv_name); - printk(KERN_NOTICE - "%s: last TCP connect from %s\n", - serv->sv_name, __svc_print_addr(sin, - buf, sizeof(buf))); + "sockets, consider increasing the " + "number of nfsd threads\n", + serv->sv_name); } /* * Always select the oldest socket. It's not fair, @@ -1130,17 +1168,7 @@ svc_tcp_accept(struct svc_sock *svsk) svc_sock_enqueue(svsk); svc_sock_put(svsk); } - } - - if (serv->sv_stats) - serv->sv_stats->nettcpconn++; - - return; - -failed: - sock_release(newsock); - return; } /* @@ -1165,17 +1193,6 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) return svc_deferred_recv(rqstp); } - if (test_bit(SK_CLOSE, &svsk->sk_flags)) { - svc_delete_socket(svsk); - return 0; - } - - if (svsk->sk_sk->sk_state == TCP_LISTEN) { - svc_tcp_accept(svsk); - svc_sock_received(svsk); - return 0; - } - if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) /* sndbuf needs to have room for one request * per thread, otherwise we can stall even when the @@ -1281,7 +1298,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; } - rqstp->rq_skbuff = NULL; + rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; /* Reset TCP read info */ @@ -1295,7 +1312,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) return len; err_delete: - svc_delete_socket(svsk); + set_bit(SK_CLOSE, &svsk->sk_flags); return -EAGAIN; error: @@ -1344,17 +1361,90 @@ svc_tcp_sendto(struct svc_rqst *rqstp) return sent; } +/* + * Setup response header. TCP has a 4B record length field. + */ +static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) +{ + struct kvec *resv = &rqstp->rq_res.head[0]; + + /* tcp needs a space for the record length... */ + svc_putnl(resv, 0); +} + +static int svc_tcp_has_wspace(struct svc_xprt *xprt) +{ + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + struct svc_serv *serv = svsk->sk_server; + int required; + int wspace; + + /* + * Set the SOCK_NOSPACE flag before checking the available + * sock space. + */ + set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); + required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg; + wspace = sk_stream_wspace(svsk->sk_sk); + + if (wspace < sk_stream_min_wspace(svsk->sk_sk)) + return 0; + if (required * 2 > wspace) + return 0; + + clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); + return 1; +} + +static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, + struct sockaddr *sa, int salen, + int flags) +{ + return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags); +} + +static struct svc_xprt_ops svc_tcp_ops = { + .xpo_create = svc_tcp_create, + .xpo_recvfrom = svc_tcp_recvfrom, + .xpo_sendto = svc_tcp_sendto, + .xpo_release_rqst = svc_release_skb, + .xpo_detach = svc_sock_detach, + .xpo_free = svc_sock_free, + .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, + .xpo_has_wspace = svc_tcp_has_wspace, + .xpo_accept = svc_tcp_accept, +}; + +static struct svc_xprt_class svc_tcp_class = { + .xcl_name = "tcp", + .xcl_owner = THIS_MODULE, + .xcl_ops = &svc_tcp_ops, + .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, +}; + +void svc_init_xprt_sock(void) +{ + svc_reg_xprt_class(&svc_tcp_class); + svc_reg_xprt_class(&svc_udp_class); +} + +void svc_cleanup_xprt_sock(void) +{ + svc_unreg_xprt_class(&svc_tcp_class); + svc_unreg_xprt_class(&svc_udp_class); +} + static void svc_tcp_init(struct svc_sock *svsk) { struct sock *sk = svsk->sk_sk; struct tcp_sock *tp = tcp_sk(sk); - svsk->sk_recvfrom = svc_tcp_recvfrom; - svsk->sk_sendto = svc_tcp_sendto; + svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt); if (sk->sk_state == TCP_LISTEN) { dprintk("setting up TCP socket for listening\n"); + set_bit(SK_LISTENER, &svsk->sk_flags); sk->sk_data_ready = svc_tcp_listen_data_ready; set_bit(SK_CONN, &svsk->sk_flags); } else { @@ -1497,10 +1587,28 @@ svc_recv(struct svc_rqst *rqstp, long timeout) } spin_unlock_bh(&pool->sp_lock); - dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n", - rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse)); - len = svsk->sk_recvfrom(rqstp); - dprintk("svc: got len=%d\n", len); + len = 0; + if (test_bit(SK_CLOSE, &svsk->sk_flags)) { + dprintk("svc_recv: found SK_CLOSE\n"); + svc_delete_socket(svsk); + } else if (test_bit(SK_LISTENER, &svsk->sk_flags)) { + struct svc_xprt *newxpt; + newxpt = svsk->sk_xprt.xpt_ops->xpo_accept(&svsk->sk_xprt); + if (newxpt) { + /* + * We know this module_get will succeed because the + * listener holds a reference too + */ + __module_get(newxpt->xpt_class->xcl_owner); + svc_check_conn_limits(svsk->sk_server); + } + svc_sock_received(svsk); + } else { + dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n", + rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse)); + len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp); + dprintk("svc: got len=%d\n", len); + } /* No data, incomplete (TCP) read, or accept() */ if (len == 0 || len == -EAGAIN) { @@ -1546,7 +1654,7 @@ svc_send(struct svc_rqst *rqstp) } /* release the receive skb before sending the reply */ - svc_release_skb(rqstp); + rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); /* calculate over-all length */ xb = & rqstp->rq_res; @@ -1559,7 +1667,7 @@ svc_send(struct svc_rqst *rqstp) if (test_bit(SK_DEAD, &svsk->sk_flags)) len = -ENOTCONN; else - len = svsk->sk_sendto(rqstp); + len = svsk->sk_xprt.xpt_ops->xpo_sendto(rqstp); mutex_unlock(&svsk->sk_mutex); svc_sock_release(rqstp); @@ -1733,8 +1841,10 @@ EXPORT_SYMBOL_GPL(svc_addsock); /* * Create socket for RPC service. */ -static int svc_create_socket(struct svc_serv *serv, int protocol, - struct sockaddr *sin, int len, int flags) +static struct svc_xprt *svc_create_socket(struct svc_serv *serv, + int protocol, + struct sockaddr *sin, int len, + int flags) { struct svc_sock *svsk; struct socket *sock; @@ -1749,13 +1859,13 @@ static int svc_create_socket(struct svc_serv *serv, int protocol, if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { printk(KERN_WARNING "svc: only UDP and TCP " "sockets supported\n"); - return -EINVAL; + return ERR_PTR(-EINVAL); } type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; error = sock_create_kern(sin->sa_family, type, protocol, &sock); if (error < 0) - return error; + return ERR_PTR(error); svc_reclassify_socket(sock); @@ -1772,13 +1882,47 @@ static int svc_create_socket(struct svc_serv *serv, int protocol, if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { svc_sock_received(svsk); - return ntohs(inet_sk(svsk->sk_sk)->sport); + return (struct svc_xprt *)svsk; } bummer: dprintk("svc: svc_create_socket error = %d\n", -error); sock_release(sock); - return error; + return ERR_PTR(error); +} + +/* + * Detach the svc_sock from the socket so that no + * more callbacks occur. + */ +static void svc_sock_detach(struct svc_xprt *xprt) +{ + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + struct sock *sk = svsk->sk_sk; + + dprintk("svc: svc_sock_detach(%p)\n", svsk); + + /* put back the old socket callbacks */ + sk->sk_state_change = svsk->sk_ostate; + sk->sk_data_ready = svsk->sk_odata; + sk->sk_write_space = svsk->sk_owspace; +} + +/* + * Free the svc_sock's socket resources and the svc_sock itself. + */ +static void svc_sock_free(struct svc_xprt *xprt) +{ + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + dprintk("svc: svc_sock_free(%p)\n", svsk); + + if (svsk->sk_info_authunix != NULL) + svcauth_unix_info_release(svsk->sk_info_authunix); + if (svsk->sk_sock->file) + sockfd_put(svsk->sk_sock); + else + sock_release(svsk->sk_sock); + kfree(svsk); } /* @@ -1795,9 +1939,7 @@ svc_delete_socket(struct svc_sock *svsk) serv = svsk->sk_server; sk = svsk->sk_sk; - sk->sk_state_change = svsk->sk_ostate; - sk->sk_data_ready = svsk->sk_odata; - sk->sk_write_space = svsk->sk_owspace; + svsk->sk_xprt.xpt_ops->xpo_detach(&svsk->sk_xprt); spin_lock_bh(&serv->sv_lock); @@ -1846,28 +1988,6 @@ void svc_force_close_socket(struct svc_sock *svsk) svc_close_socket(svsk); } -/** - * svc_makesock - Make a socket for nfsd and lockd - * @serv: RPC server structure - * @protocol: transport protocol to use - * @port: port to use - * @flags: requested socket characteristics - * - */ -int svc_makesock(struct svc_serv *serv, int protocol, unsigned short port, - int flags) -{ - struct sockaddr_in sin = { - .sin_family = AF_INET, - .sin_addr.s_addr = INADDR_ANY, - .sin_port = htons(port), - }; - - dprintk("svc: creating socket proto = %d\n", protocol); - return svc_create_socket(serv, protocol, (struct sockaddr *) &sin, - sizeof(sin), flags); -} - /* * Handle defer and revisit of requests */