X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fsunrpc%2Fxprtsock.c;h=4486c59c3aca87d6102a372255246a326b9efdc6;hb=8842413aa4c3220ce9313791f99808fc149ca16d;hp=b5544514ee57d825526b864774887a7d3d4c42d8;hpb=67a391d72ca7efb387c30ec761a487e50a3ff085;p=linux-2.6 diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b5544514ee..4486c59c3a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -135,12 +135,6 @@ static ctl_table sunrpc_table[] = { #endif -/* - * How many times to try sending a request on a socket before waiting - * for the socket buffer to clear. - */ -#define XS_SENDMSG_RETRY (10U) - /* * Time out for an RPC UDP socket connect. UDP socket connects are * synchronous, but we set a timeout anyway in case of resource @@ -280,7 +274,9 @@ static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) return (struct sockaddr_in6 *) &xprt->addr; } -static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) +static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt, + const char *protocol, + const char *netid) { struct sockaddr_in *addr = xs_addr_in(xprt); char *buf; @@ -299,21 +295,14 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) } xprt->address_strings[RPC_DISPLAY_PORT] = buf; - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - if (xprt->prot == IPPROTO_UDP) - snprintf(buf, 8, "udp"); - else - snprintf(buf, 8, "tcp"); - } - xprt->address_strings[RPC_DISPLAY_PROTO] = buf; + xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; buf = kzalloc(48, GFP_KERNEL); if (buf) { snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", NIPQUAD(addr->sin_addr.s_addr), ntohs(addr->sin_port), - xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + protocol); } xprt->address_strings[RPC_DISPLAY_ALL] = buf; @@ -340,12 +329,12 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) } xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; - xprt->address_strings[RPC_DISPLAY_NETID] = - kstrdup(xprt->prot == IPPROTO_UDP ? - RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL); + xprt->address_strings[RPC_DISPLAY_NETID] = netid; } -static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) +static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, + const char *protocol, + const char *netid) { struct sockaddr_in6 *addr = xs_addr_in6(xprt); char *buf; @@ -364,21 +353,14 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) } xprt->address_strings[RPC_DISPLAY_PORT] = buf; - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - if (xprt->prot == IPPROTO_UDP) - snprintf(buf, 8, "udp"); - else - snprintf(buf, 8, "tcp"); - } - xprt->address_strings[RPC_DISPLAY_PROTO] = buf; + xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; buf = kzalloc(64, GFP_KERNEL); if (buf) { snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s", NIP6(addr->sin6_addr), ntohs(addr->sin6_port), - xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + protocol); } xprt->address_strings[RPC_DISPLAY_ALL] = buf; @@ -405,17 +387,21 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) } xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; - xprt->address_strings[RPC_DISPLAY_NETID] = - kstrdup(xprt->prot == IPPROTO_UDP ? - RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL); + xprt->address_strings[RPC_DISPLAY_NETID] = netid; } static void xs_free_peer_addresses(struct rpc_xprt *xprt) { - int i; + unsigned int i; for (i = 0; i < RPC_DISPLAY_MAX; i++) - kfree(xprt->address_strings[i]); + switch (i) { + case RPC_DISPLAY_PROTO: + case RPC_DISPLAY_NETID: + continue; + default: + kfree(xprt->address_strings[i]); + } } #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) @@ -524,6 +510,14 @@ out: return sent; } +static void xs_nospace_callback(struct rpc_task *task) +{ + struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); + + transport->inet->sk_write_pending--; + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); +} + /** * xs_nospace - place task on wait queue if transmit was incomplete * @task: task to put to sleep @@ -539,20 +533,27 @@ static void xs_nospace(struct rpc_task *task) task->tk_pid, req->rq_slen - req->rq_bytes_sent, req->rq_slen); - if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { - /* Protect against races with write_space */ - spin_lock_bh(&xprt->transport_lock); - - /* Don't race with disconnect */ - if (!xprt_connected(xprt)) - task->tk_status = -ENOTCONN; - else if (test_bit(SOCK_NOSPACE, &transport->sock->flags)) - xprt_wait_for_buffer_space(task); + /* Protect against races with write_space */ + spin_lock_bh(&xprt->transport_lock); + + /* Don't race with disconnect */ + if (xprt_connected(xprt)) { + if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { + /* + * Notify TCP that we're limited by the application + * window size + */ + set_bit(SOCK_NOSPACE, &transport->sock->flags); + transport->inet->sk_write_pending++; + /* ...and wait for more buffer space */ + xprt_wait_for_buffer_space(task, xs_nospace_callback); + } + } else { + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); + task->tk_status = -ENOTCONN; + } - spin_unlock_bh(&xprt->transport_lock); - } else - /* Keep holding the socket if it is blocked */ - rpc_delay(task, HZ>>4); + spin_unlock_bh(&xprt->transport_lock); } /** @@ -578,7 +579,6 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_svec->iov_base, req->rq_svec->iov_len); - req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, xdr, @@ -596,24 +596,41 @@ static int xs_udp_send_request(struct rpc_task *task) } switch (status) { + case -EAGAIN: + xs_nospace(task); + break; case -ENETUNREACH: case -EPIPE: case -ECONNREFUSED: /* When the server has died, an ICMP port unreachable message * prompts ECONNREFUSED. */ - break; - case -EAGAIN: - xs_nospace(task); + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); break; default: + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); - break; } return status; } +/** + * xs_tcp_shutdown - gracefully shut down a TCP socket + * @xprt: transport + * + * Initiates a graceful shutdown of the TCP socket by calling the + * equivalent of shutdown(SHUT_WR); + */ +static void xs_tcp_shutdown(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct socket *sock = transport->sock; + + if (sock != NULL) + kernel_sock_shutdown(sock, SHUT_WR); +} + static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) { u32 reclen = buf->len - sizeof(rpc_fraghdr); @@ -642,7 +659,6 @@ static int xs_tcp_send_request(struct rpc_task *task) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; int status; - unsigned int retry = 0; xs_encode_tcp_record_marker(&req->rq_snd_buf); @@ -654,7 +670,6 @@ static int xs_tcp_send_request(struct rpc_task *task) * to cope with writespace callbacks arriving _after_ we have * called sendmsg(). */ while (1) { - req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent); @@ -673,9 +688,10 @@ static int xs_tcp_send_request(struct rpc_task *task) return 0; } + if (status != 0) + continue; status = -EAGAIN; - if (retry++ > XS_SENDMSG_RETRY) - break; + break; } switch (status) { @@ -687,12 +703,13 @@ static int xs_tcp_send_request(struct rpc_task *task) case -ENOTCONN: case -EPIPE: status = -ENOTCONN; + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); - xprt_disconnect(xprt); - break; + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); + xs_tcp_shutdown(xprt); } return status; @@ -759,7 +776,9 @@ static void xs_close(struct rpc_xprt *xprt) clear_close_wait: smp_mb__before_clear_bit(); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + clear_bit(XPRT_CLOSING, &xprt->state); smp_mb__after_clear_bit(); + xprt_disconnect_done(xprt); } /** @@ -775,7 +794,6 @@ static void xs_destroy(struct rpc_xprt *xprt) cancel_rearming_delayed_work(&transport->connect_worker); - xprt_disconnect(xprt); xs_close(xprt); xs_free_peer_addresses(xprt); kfree(xprt->slot); @@ -886,7 +904,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea /* Sanity check of the record length */ if (unlikely(transport->tcp_reclen < 4)) { dprintk("RPC: invalid TCP record fragment length\n"); - xprt_disconnect(xprt); + xprt_force_disconnect(xprt); return; } dprintk("RPC: reading TCP record fragment of length %d\n", @@ -1064,6 +1082,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) { struct rpc_xprt *xprt; read_descriptor_t rd_desc; + int read; dprintk("RPC: xs_tcp_data_ready...\n"); @@ -1075,8 +1094,10 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ rd_desc.arg.data = xprt; - rd_desc.count = 65536; - tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); + do { + rd_desc.count = 65536; + read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); + } while (read > 0); out: read_unlock(&sk->sk_callback_lock); } @@ -1113,18 +1134,46 @@ static void xs_tcp_state_change(struct sock *sk) transport->tcp_flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; - xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; xprt_wake_pending_tasks(xprt, 0); } spin_unlock_bh(&xprt->transport_lock); break; - case TCP_SYN_SENT: - case TCP_SYN_RECV: + case TCP_FIN_WAIT1: + /* The client initiated a shutdown of the socket */ + xprt->connect_cookie++; + xprt->reestablish_timeout = 0; + set_bit(XPRT_CLOSING, &xprt->state); + smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTED, &xprt->state); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + smp_mb__after_clear_bit(); break; case TCP_CLOSE_WAIT: + /* The server initiated a shutdown of the socket */ + set_bit(XPRT_CLOSING, &xprt->state); xprt_force_disconnect(xprt); - default: - xprt_disconnect(xprt); + case TCP_SYN_SENT: + xprt->connect_cookie++; + case TCP_CLOSING: + /* + * If the server closed down the connection, make sure that + * we back off before reconnecting + */ + if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + break; + case TCP_LAST_ACK: + smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTED, &xprt->state); + smp_mb__after_clear_bit(); + break; + case TCP_CLOSE: + smp_mb__before_clear_bit(); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + clear_bit(XPRT_CLOSING, &xprt->state); + smp_mb__after_clear_bit(); + /* Mark transport as closed and wake up all pending tasks */ + xprt_disconnect_done(xprt); } out: read_unlock(&sk->sk_callback_lock); @@ -1151,9 +1200,11 @@ static void xs_udp_write_space(struct sock *sk) if (unlikely(!(sock = sk->sk_socket))) goto out; + clear_bit(SOCK_NOSPACE, &sock->flags); + if (unlikely(!(xprt = xprt_from_sock(sk)))) goto out; - if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) + if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0) goto out; xprt_write_space(xprt); @@ -1184,9 +1235,11 @@ static void xs_tcp_write_space(struct sock *sk) if (unlikely(!(sock = sk->sk_socket))) goto out; + clear_bit(SOCK_NOSPACE, &sock->flags); + if (unlikely(!(xprt = xprt_from_sock(sk)))) goto out; - if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) + if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0) goto out; xprt_write_space(xprt); @@ -1324,7 +1377,7 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) nloop++; } while (err == -EADDRINUSE && nloop != 2); dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n", - __FUNCTION__, NIPQUAD(myaddr.sin_addr), + __func__, NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err); return err; } @@ -1617,8 +1670,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work) break; default: /* get rid of existing socket, and retry */ - xs_close(xprt); - break; + xs_tcp_shutdown(xprt); } } out: @@ -1677,8 +1729,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work) break; default: /* get rid of existing socket, and retry */ - xs_close(xprt); - break; + xs_tcp_shutdown(xprt); } } out: @@ -1725,6 +1776,19 @@ static void xs_connect(struct rpc_task *task) } } +static void xs_tcp_connect(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + + /* Initiate graceful shutdown of the socket if not already done */ + if (test_bit(XPRT_CONNECTED, &xprt->state)) + xs_tcp_shutdown(xprt); + /* Exit if we need to wait for socket shutdown to complete */ + if (test_bit(XPRT_CLOSING, &xprt->state)) + return; + xs_connect(task); +} + /** * xs_udp_print_stats - display UDP socket-specifc stats * @xprt: rpc_xprt struct containing statistics @@ -1795,12 +1859,12 @@ static struct rpc_xprt_ops xs_tcp_ops = { .release_xprt = xs_tcp_release_xprt, .rpcbind = rpcb_getport_async, .set_port = xs_set_port, - .connect = xs_connect, + .connect = xs_tcp_connect, .buf_alloc = rpc_malloc, .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, - .close = xs_close, + .close = xs_tcp_shutdown, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, }; @@ -1841,6 +1905,13 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, return xprt; } +static const struct rpc_timeout xs_udp_default_timeout = { + .to_initval = 5 * HZ, + .to_maxval = 30 * HZ, + .to_increment = 5 * HZ, + .to_retries = 5, +}; + /** * xs_setup_udp - Set up transport to use a UDP socket * @args: rpc transport creation arguments @@ -1869,10 +1940,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) xprt->ops = &xs_udp_ops; - if (args->timeout) - xprt->timeout = *args->timeout; - else - xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); + xprt->timeout = &xs_udp_default_timeout; switch (addr->sa_family) { case AF_INET: @@ -1881,7 +1949,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker4); - xs_format_ipv4_peer_addresses(xprt); + xs_format_ipv4_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); break; case AF_INET6: if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) @@ -1889,7 +1957,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker6); - xs_format_ipv6_peer_addresses(xprt); + xs_format_ipv6_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); break; default: kfree(xprt); @@ -1907,6 +1975,12 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) return ERR_PTR(-EINVAL); } +static const struct rpc_timeout xs_tcp_default_timeout = { + .to_initval = 60 * HZ, + .to_maxval = 60 * HZ, + .to_retries = 2, +}; + /** * xs_setup_tcp - Set up transport to use a TCP socket * @args: rpc transport creation arguments @@ -1933,11 +2007,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt->idle_timeout = XS_IDLE_DISC_TO; xprt->ops = &xs_tcp_ops; - - if (args->timeout) - xprt->timeout = *args->timeout; - else - xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); + xprt->timeout = &xs_tcp_default_timeout; switch (addr->sa_family) { case AF_INET: @@ -1945,14 +2015,14 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt_set_bound(xprt); INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4); - xs_format_ipv4_peer_addresses(xprt); + xs_format_ipv4_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); break; case AF_INET6: if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) xprt_set_bound(xprt); INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6); - xs_format_ipv6_peer_addresses(xprt); + xs_format_ipv6_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); break; default: kfree(xprt);