From: phk Date: Tue, 19 Feb 2008 11:10:12 +0000 (+0000) Subject: Revise the kqueue_acceptor with a sledgehammer. X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ba89f5cd6d45bea720709e630273811bb84dbb9d;p=varnish Revise the kqueue_acceptor with a sledgehammer. Use EV_ONESHOT for all session events to make the kernel delete the event when it fires, to remove any chance of any race with session state and kqueue event arming. For sessions with acceptfilter, this does just what we want (and I kick myself for not realizing this much sooner). For sessions where the acceptfilter is not enabled or has given up, this results in an extra kevent arming operation for each read, but this is still a much lower overhead than synchrnously deleting the event when before passing the session on. Delete all the workarounds and band-aids that had accumulated. All in all, a win-win-win situation. I have no idea how many tickets this will close. git-svn-id: svn+ssh://projects.linpro.no/svn/varnish/trunk@2514 d4fa192b-c00b-0410-8231-f00ffab90ce4 --- diff --git a/varnish-cache/bin/varnishd/cache_acceptor_kqueue.c b/varnish-cache/bin/varnishd/cache_acceptor_kqueue.c index 2852fc16..362fa957 100644 --- a/varnish-cache/bin/varnishd/cache_acceptor_kqueue.c +++ b/varnish-cache/bin/varnishd/cache_acceptor_kqueue.c @@ -51,80 +51,6 @@ #include "cache.h" #include "cache_acceptor.h" -/********************************************************************** - * Generic bitmap functions, may be generalized at some point. - */ - -#define VBITMAP_TYPE unsigned /* Our preferred wordsize */ -#define VBITMAP_LUMP (32*1024) /* How many bits we alloc at a time */ -#define VBITMAP_WORD (sizeof(VBITMAP_TYPE) * 8) -#define VBITMAP_IDX(n) (n / VBITMAP_WORD) -#define VBITMAP_BIT(n) (1U << (n % VBITMAP_WORD)) - -struct vbitmap { - VBITMAP_TYPE *bits; - unsigned nbits; -}; - -static void -vbit_expand(struct vbitmap *vb, unsigned bit) -{ - unsigned char *p; - - bit += VBITMAP_LUMP - 1; - bit -= (bit % VBITMAP_LUMP); - VSL(SLT_Debug, 0, "Expanding KQ VBIT to %u", bit); - p = realloc(vb->bits, bit / 8); - AN(p); - memset(p + vb->nbits / 8, 0, (bit - vb->nbits) / 8); - vb->bits = (void*)p; - vb->nbits = bit; -} - -static struct vbitmap * -vbit_init(unsigned initial) -{ - struct vbitmap *vb; - - vb = calloc(sizeof *vb, 1); - AN(vb); - if (initial == 0) { -#ifdef HAVE_GETDTABLESIZE - initial = getdtablesize(); -#else - initial = VBITMAP_LUMP; -#endif - } - vbit_expand(vb, initial); - return (vb); -} - -static void -vbit_set(struct vbitmap *vb, unsigned bit) -{ - - if (bit >= vb->nbits) - vbit_expand(vb, bit); - vb->bits[VBITMAP_IDX(bit)] |= VBITMAP_BIT(bit); -} - -static void -vbit_clr(struct vbitmap *vb, unsigned bit) -{ - - if (bit >= vb->nbits) - vbit_expand(vb, bit); - vb->bits[VBITMAP_IDX(bit)] &= ~VBITMAP_BIT(bit); -} - -static int -vbit_test(struct vbitmap *vb, unsigned bit) -{ - - if (bit >= vb->nbits) - vbit_expand(vb, bit); - return (vb->bits[VBITMAP_IDX(bit)] & VBITMAP_BIT(bit)); -} /**********************************************************************/ @@ -132,7 +58,6 @@ vbit_test(struct vbitmap *vb, unsigned bit) static pthread_t vca_kqueue_thread; static int kq = -1; -static struct vbitmap *vca_kqueue_bits; static VTAILQ_HEAD(,sess) sesshead = VTAILQ_HEAD_INITIALIZER(sesshead); @@ -149,21 +74,7 @@ vca_kq_flush(void) if (nki == 0) return; i = kevent(kq, ki, nki, NULL, 0, NULL); - assert(i <= 0); - if (i < 0) { - /* - * We do not push kevents into the kernel before - * passing the session off to a worker thread, so - * by the time we get around to delete the event - * the fd may be closed and we get an ENOENT back - * once we do flush. - * We can get EBADF the same way if the client closes - * on us. In that case, we get no kevent on that - * socket, but the TAILQ still has it, and it will - * be GC'ed there after the timeout. - */ - assert(errno == ENOENT || errno == EBADF); - } + assert(i == 0); nki = 0; } @@ -172,8 +83,9 @@ vca_kq_sess(struct sess *sp, short arm) { CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); - if (sp->fd < 0) - return; + assert(sp->fd >= 0); + if (params->diag_bitmap & 4) + VSL(SLT_Debug, sp->fd, "KQ: EV_SET sp %p arm %x", sp, arm); EV_SET(&ki[nki], sp->fd, EVFILT_READ, arm, 0, 0, sp); if (++nki == NKEV) vca_kq_flush(); @@ -196,52 +108,40 @@ vca_kev(const struct kevent *kp) CHECK_OBJ_NOTNULL(ss[j], SESS_MAGIC); assert(ss[j]->fd >= 0); AZ(ss[j]->obj); - AZ(vbit_test(vca_kqueue_bits, ss[j]->fd)); - vbit_set(vca_kqueue_bits, ss[j]->fd); VTAILQ_INSERT_TAIL(&sesshead, ss[j], list); - vca_kq_sess(ss[j], EV_ADD); + vca_kq_sess(ss[j], EV_ADD | EV_ONESHOT); j++; i -= sizeof ss[0]; } assert(i == 0); return; } - if (!vbit_test(vca_kqueue_bits, kp->ident)) { - if (params->diag_bitmap & 0x4) - VSL(SLT_Debug, kp->ident, - "KQ: not my fd %d, sp %p kev data %lu flags 0x%x%s", - kp->ident, kp->udata, (unsigned long)kp->data, - kp->flags, (kp->flags & EV_EOF) ? " EOF" : ""); - return; - } CAST_OBJ_NOTNULL(sp, kp->udata, SESS_MAGIC); if (params->diag_bitmap & 0x4) - VSL(SLT_Debug, sp->id, "sp %p kev data %lu flags 0x%x%s", + VSL(SLT_Debug, sp->id, "KQ: sp %p kev data %lu flags 0x%x%s", sp, (unsigned long)kp->data, kp->flags, (kp->flags & EV_EOF) ? " EOF" : ""); - if (sp->fd == -1 || kp->fflags == 0) { - if (params->diag_bitmap & 0x4) - VSL(SLT_Debug, sp->id, "KQ: got event 0x%04x on fd %d", - kp->fflags, kp->ident); - return; - } + spassert(sp->id == kp->ident); spassert(sp->fd == sp->id); if (kp->data > 0) { i = HTC_Rx(sp->htc); - if (i == 0) + if (i == 0) { + vca_kq_sess(sp, EV_ADD | EV_ONESHOT); return; /* more needed */ - vbit_clr(vca_kqueue_bits, sp->fd); + } VTAILQ_REMOVE(&sesshead, sp, list); - vca_kq_sess(sp, EV_DELETE); vca_handover(sp, i); return; } else if (kp->flags == EV_EOF) { - vbit_clr(vca_kqueue_bits, sp->fd); VTAILQ_REMOVE(&sesshead, sp, list); vca_close_session(sp, "EOF"); SES_Delete(sp); return; + } else { + VSL(SLT_Debug, sp->id, "KQ: sp %p kev data %lu flags 0x%x%s", + sp, (unsigned long)kp->data, kp->flags, + (kp->flags & EV_EOF) ? " EOF" : ""); } } @@ -273,10 +173,6 @@ vca_kqueue_main(void *arg) assert(n >= 1 && n <= NKEV); nki = 0; for (kp = ke, j = 0; j < n; j++, kp++) { - if (kp->flags & EV_ERROR) { - /* See comment in vca_kq_sess() */ - continue; - } if (kp->filter == EVFILT_TIMER) { dotimer = 1; continue; @@ -301,7 +197,6 @@ vca_kqueue_main(void *arg) break; if (sp->t_open > deadline) break; - vbit_clr(vca_kqueue_bits, sp->fd); VTAILQ_REMOVE(&sesshead, sp, list); vca_close_session(sp, "timeout"); SES_Delete(sp); @@ -320,7 +215,6 @@ vca_kqueue_init(void) i |= O_NONBLOCK; i = fcntl(vca_pipes[0], F_SETFL, i); - vca_kqueue_bits = vbit_init(0); AZ(pthread_create(&vca_kqueue_thread, NULL, vca_kqueue_main, NULL)); }