* after a clear, the socket must be read/accepted
* if this succeeds, it must be set again.
* SK_CLOSE can set at any time. It is never cleared.
- * sk_inuse contains a bias of '1' until SK_DEAD is set.
- * so when sk_inuse hits zero, we know the socket is dead
+ * xpt_ref contains a bias of '1' until SK_DEAD is set.
+ * so when xprt_ref hits zero, we know the transport is dead
* and no-one is using it.
* SK_DEAD can only be set while SK_BUSY is held which ensures
* no other thread will be using the socket or will try to
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);
+static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
+ struct sockaddr *, int, int);
/* apparently the "standard" is that clients close
* idle connections after 5 minutes, servers after
"svc_sock_enqueue: server %p, rq_sock=%p!\n",
rqstp, rqstp->rq_sock);
rqstp->rq_sock = svsk;
- atomic_inc(&svsk->sk_inuse);
+ svc_xprt_get(&svsk->sk_xprt);
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
BUG_ON(svsk->sk_pool != pool);
list_del_init(&svsk->sk_ready);
dprintk("svc: socket %p dequeued, inuse=%d\n",
- svsk->sk_sk, atomic_read(&svsk->sk_inuse));
+ svsk->sk_sk, atomic_read(&svsk->sk_xprt.xpt_ref.refcount));
return svsk;
}
}
}
-/*
- * Release a socket after use.
- */
-static inline void
-svc_sock_put(struct svc_sock *svsk)
-{
- if (atomic_dec_and_test(&svsk->sk_inuse)) {
- BUG_ON(!test_bit(SK_DEAD, &svsk->sk_flags));
- svsk->sk_xprt.xpt_ops->xpo_free(&svsk->sk_xprt);
- }
-}
-
static void
svc_sock_release(struct svc_rqst *rqstp)
{
svc_reserve(rqstp, 0);
rqstp->rq_sock = NULL;
- svc_sock_put(svsk);
+ svc_xprt_put(&svsk->sk_xprt);
}
/*
return svc_deferred_recv(rqstp);
}
- if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
- svc_delete_socket(svsk);
- return 0;
- }
-
clear_bit(SK_DATA, &svsk->sk_flags);
skb = NULL;
err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
return 1;
}
+static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
+{
+ BUG();
+ return NULL;
+}
+
+static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
+ struct sockaddr *sa, int salen,
+ int flags)
+{
+ return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags);
+}
+
static struct svc_xprt_ops svc_udp_ops = {
+ .xpo_create = svc_udp_create,
.xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto,
.xpo_release_rqst = svc_release_skb,
.xpo_free = svc_sock_free,
.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
.xpo_has_wspace = svc_udp_has_wspace,
+ .xpo_accept = svc_udp_accept,
};
static struct svc_xprt_class svc_udp_class = {
.xcl_name = "udp",
+ .xcl_owner = THIS_MODULE,
.xcl_ops = &svc_udp_ops,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
};
/*
* Accept a TCP connection
*/
-static void
-svc_tcp_accept(struct svc_sock *svsk)
+static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
{
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct sockaddr_storage addr;
struct sockaddr *sin = (struct sockaddr *) &addr;
struct svc_serv *serv = svsk->sk_server;
dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
if (!sock)
- return;
+ return NULL;
clear_bit(SK_CONN, &svsk->sk_flags);
err = kernel_accept(sock, &newsock, O_NONBLOCK);
else if (err != -EAGAIN && net_ratelimit())
printk(KERN_WARNING "%s: accept failed (err %d)!\n",
serv->sv_name, -err);
- return;
+ return NULL;
}
set_bit(SK_CONN, &svsk->sk_flags);
- svc_sock_enqueue(svsk);
err = kernel_getpeername(newsock, sin, &slen);
if (err < 0) {
svc_sock_received(newsvsk);
- /* make sure that we don't have too many active connections.
- * If we have, something must be dropped.
- *
- * There's no point in trying to do random drop here for
- * DoS prevention. The NFS clients does 1 reconnect in 15
- * seconds. An attacker can easily beat that.
- *
- * The only somewhat efficient mechanism would be if drop
- * old connections from the same IP first. But right now
- * we don't even record the client IP in svc_sock.
- */
- if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
- struct svc_sock *svsk = NULL;
- spin_lock_bh(&serv->sv_lock);
- if (!list_empty(&serv->sv_tempsocks)) {
- if (net_ratelimit()) {
- /* Try to help the admin */
- printk(KERN_NOTICE "%s: too many open TCP "
- "sockets, consider increasing the "
- "number of nfsd threads\n",
- serv->sv_name);
- printk(KERN_NOTICE
- "%s: last TCP connect from %s\n",
- serv->sv_name, __svc_print_addr(sin,
- buf, sizeof(buf)));
- }
- /*
- * Always select the oldest socket. It's not fair,
- * but so is life
- */
- svsk = list_entry(serv->sv_tempsocks.prev,
- struct svc_sock,
- sk_list);
- set_bit(SK_CLOSE, &svsk->sk_flags);
- atomic_inc(&svsk->sk_inuse);
- }
- spin_unlock_bh(&serv->sv_lock);
-
- if (svsk) {
- svc_sock_enqueue(svsk);
- svc_sock_put(svsk);
- }
-
- }
-
if (serv->sv_stats)
serv->sv_stats->nettcpconn++;
- return;
+ return &newsvsk->sk_xprt;
failed:
sock_release(newsock);
- return;
+ return NULL;
}
/*
return svc_deferred_recv(rqstp);
}
- if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
- svc_delete_socket(svsk);
- return 0;
- }
-
- if (svsk->sk_sk->sk_state == TCP_LISTEN) {
- svc_tcp_accept(svsk);
- svc_sock_received(svsk);
- return 0;
- }
-
if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
/* sndbuf needs to have room for one request
* per thread, otherwise we can stall even when the
return len;
err_delete:
- svc_delete_socket(svsk);
+ set_bit(SK_CLOSE, &svsk->sk_flags);
return -EAGAIN;
error:
return 1;
}
+static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
+ struct sockaddr *sa, int salen,
+ int flags)
+{
+ return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags);
+}
+
static struct svc_xprt_ops svc_tcp_ops = {
+ .xpo_create = svc_tcp_create,
.xpo_recvfrom = svc_tcp_recvfrom,
.xpo_sendto = svc_tcp_sendto,
.xpo_release_rqst = svc_release_skb,
.xpo_free = svc_sock_free,
.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
.xpo_has_wspace = svc_tcp_has_wspace,
+ .xpo_accept = svc_tcp_accept,
};
static struct svc_xprt_class svc_tcp_class = {
.xcl_name = "tcp",
+ .xcl_owner = THIS_MODULE,
.xcl_ops = &svc_tcp_ops,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
};
if (sk->sk_state == TCP_LISTEN) {
dprintk("setting up TCP socket for listening\n");
+ set_bit(SK_LISTENER, &svsk->sk_flags);
sk->sk_data_ready = svc_tcp_listen_data_ready;
set_bit(SK_CONN, &svsk->sk_flags);
} else {
spin_unlock_bh(&serv->sv_lock);
}
+/*
+ * Make sure that we don't have too many active connections. If we
+ * have, something must be dropped.
+ *
+ * There's no point in trying to do random drop here for DoS
+ * prevention. The NFS clients does 1 reconnect in 15 seconds. An
+ * attacker can easily beat that.
+ *
+ * The only somewhat efficient mechanism would be if drop old
+ * connections from the same IP first. But right now we don't even
+ * record the client IP in svc_sock.
+ */
+static void svc_check_conn_limits(struct svc_serv *serv)
+{
+ if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
+ struct svc_sock *svsk = NULL;
+ spin_lock_bh(&serv->sv_lock);
+ if (!list_empty(&serv->sv_tempsocks)) {
+ if (net_ratelimit()) {
+ /* Try to help the admin */
+ printk(KERN_NOTICE "%s: too many open TCP "
+ "sockets, consider increasing the "
+ "number of nfsd threads\n",
+ serv->sv_name);
+ }
+ /*
+ * Always select the oldest socket. It's not fair,
+ * but so is life
+ */
+ svsk = list_entry(serv->sv_tempsocks.prev,
+ struct svc_sock,
+ sk_list);
+ set_bit(SK_CLOSE, &svsk->sk_flags);
+ svc_xprt_get(&svsk->sk_xprt);
+ }
+ spin_unlock_bh(&serv->sv_lock);
+
+ if (svsk) {
+ svc_sock_enqueue(svsk);
+ svc_xprt_put(&svsk->sk_xprt);
+ }
+ }
+}
+
/*
* Receive the next request on any socket. This code is carefully
* organised not to touch any cachelines in the shared svc_serv
spin_lock_bh(&pool->sp_lock);
if ((svsk = svc_sock_dequeue(pool)) != NULL) {
rqstp->rq_sock = svsk;
- atomic_inc(&svsk->sk_inuse);
+ svc_xprt_get(&svsk->sk_xprt);
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
} else {
}
spin_unlock_bh(&pool->sp_lock);
- dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
- rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));
- len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp);
- dprintk("svc: got len=%d\n", len);
+ len = 0;
+ if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
+ dprintk("svc_recv: found SK_CLOSE\n");
+ svc_delete_socket(svsk);
+ } else if (test_bit(SK_LISTENER, &svsk->sk_flags)) {
+ struct svc_xprt *newxpt;
+ newxpt = svsk->sk_xprt.xpt_ops->xpo_accept(&svsk->sk_xprt);
+ if (newxpt) {
+ /*
+ * We know this module_get will succeed because the
+ * listener holds a reference too
+ */
+ __module_get(newxpt->xpt_class->xcl_owner);
+ svc_check_conn_limits(svsk->sk_server);
+ }
+ svc_sock_received(svsk);
+ } else {
+ dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
+ rqstp, pool->sp_id, svsk,
+ atomic_read(&svsk->sk_xprt.xpt_ref.refcount));
+ len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp);
+ dprintk("svc: got len=%d\n", len);
+ }
/* No data, incomplete (TCP) read, or accept() */
if (len == 0 || len == -EAGAIN) {
if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
continue;
- if (atomic_read(&svsk->sk_inuse) > 1 || test_bit(SK_BUSY, &svsk->sk_flags))
+ if (atomic_read(&svsk->sk_xprt.xpt_ref.refcount) > 1
+ || test_bit(SK_BUSY, &svsk->sk_flags))
continue;
- atomic_inc(&svsk->sk_inuse);
+ svc_xprt_get(&svsk->sk_xprt);
list_move(le, &to_be_aged);
set_bit(SK_CLOSE, &svsk->sk_flags);
set_bit(SK_DETACHED, &svsk->sk_flags);
/* a thread will dequeue and close it soon */
svc_sock_enqueue(svsk);
- svc_sock_put(svsk);
+ svc_xprt_put(&svsk->sk_xprt);
}
mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
svsk->sk_odata = inet->sk_data_ready;
svsk->sk_owspace = inet->sk_write_space;
svsk->sk_server = serv;
- atomic_set(&svsk->sk_inuse, 1);
svsk->sk_lastrecv = get_seconds();
spin_lock_init(&svsk->sk_lock);
INIT_LIST_HEAD(&svsk->sk_deferred);
/*
* Create socket for RPC service.
*/
-static int svc_create_socket(struct svc_serv *serv, int protocol,
- struct sockaddr *sin, int len, int flags)
+static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
+ int protocol,
+ struct sockaddr *sin, int len,
+ int flags)
{
struct svc_sock *svsk;
struct socket *sock;
if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
printk(KERN_WARNING "svc: only UDP and TCP "
"sockets supported\n");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
error = sock_create_kern(sin->sa_family, type, protocol, &sock);
if (error < 0)
- return error;
+ return ERR_PTR(error);
svc_reclassify_socket(sock);
if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) {
svc_sock_received(svsk);
- return ntohs(inet_sk(svsk->sk_sk)->sport);
+ return (struct svc_xprt *)svsk;
}
bummer:
dprintk("svc: svc_create_socket error = %d\n", -error);
sock_release(sock);
- return error;
+ return ERR_PTR(error);
}
/*
* is about to be destroyed (in svc_destroy).
*/
if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) {
- BUG_ON(atomic_read(&svsk->sk_inuse)<2);
- atomic_dec(&svsk->sk_inuse);
+ BUG_ON(atomic_read(&svsk->sk_xprt.xpt_ref.refcount) < 2);
if (test_bit(SK_TEMP, &svsk->sk_flags))
serv->sv_tmpcnt--;
+ svc_xprt_put(&svsk->sk_xprt);
}
spin_unlock_bh(&serv->sv_lock);
/* someone else will have to effect the close */
return;
- atomic_inc(&svsk->sk_inuse);
+ svc_xprt_get(&svsk->sk_xprt);
svc_delete_socket(svsk);
clear_bit(SK_BUSY, &svsk->sk_flags);
- svc_sock_put(svsk);
+ svc_xprt_put(&svsk->sk_xprt);
}
void svc_force_close_socket(struct svc_sock *svsk)
svc_close_socket(svsk);
}
-/**
- * svc_makesock - Make a socket for nfsd and lockd
- * @serv: RPC server structure
- * @protocol: transport protocol to use
- * @port: port to use
- * @flags: requested socket characteristics
- *
- */
-int svc_makesock(struct svc_serv *serv, int protocol, unsigned short port,
- int flags)
-{
- struct sockaddr_in sin = {
- .sin_family = AF_INET,
- .sin_addr.s_addr = INADDR_ANY,
- .sin_port = htons(port),
- };
-
- dprintk("svc: creating socket proto = %d\n", protocol);
- return svc_create_socket(serv, protocol, (struct sockaddr *) &sin,
- sizeof(sin), flags);
-}
-
/*
* Handle defer and revisit of requests
*/
struct svc_sock *svsk;
if (too_many) {
- svc_sock_put(dr->svsk);
+ svc_xprt_put(&dr->svsk->sk_xprt);
kfree(dr);
return;
}
spin_unlock(&svsk->sk_lock);
set_bit(SK_DEFERRED, &svsk->sk_flags);
svc_sock_enqueue(svsk);
- svc_sock_put(svsk);
+ svc_xprt_put(&svsk->sk_xprt);
}
static struct cache_deferred_req *
dr->argslen = rqstp->rq_arg.len >> 2;
memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2);
}
- atomic_inc(&rqstp->rq_sock->sk_inuse);
+ svc_xprt_get(rqstp->rq_xprt);
dr->svsk = rqstp->rq_sock;
dr->handle.revisit = svc_revisit;