From: cecilihf Date: Fri, 31 Aug 2007 12:14:06 +0000 (+0000) Subject: Added support for load balancing among backends in varnish. It is still experimental X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fa0f40915d5115e6092a1bb7d3a8f649398d43eb;p=varnish Added support for load balancing among backends in varnish. It is still experimental and very basic, but it should be ready for testing. Two strategies for load balancing are implemented: a simple round robin, and a simple weighted random. The following is an example configuration in vcl. The weight parameter for random is optional. Default is equal weight. backend foo { set backend.host = "foo.bar.com"; set backend.port = "http"; } backend_round_robin rr { set backend.set = { { "foo1.bar.com", "http" } { "foo2.bar.com", "http" } { "foo3.bar.com", "http" } }; } backend_random rrr { set backend.set = { { "foo1.bar.com", "http", 0.3 } { "foo2.bar.com", "http", 0.6 } { "foo3.bar.com", "http", 0.1 } }; } sub vcl_recv { if {req.http.host ~ "foo"} { req.backend = foo; } elseif {req.http.host ~ "bar"} { req.backend = rr; } else { req.backend = rrr; } } git-svn-id: svn+ssh://projects.linpro.no/svn/varnish/trunk@1931 d4fa192b-c00b-0410-8231-f00ffab90ce4 --- diff --git a/varnish-cache/bin/varnishd/Makefile.am b/varnish-cache/bin/varnishd/Makefile.am index b3dba993..9d8e4ed3 100644 --- a/varnish-cache/bin/varnishd/Makefile.am +++ b/varnish-cache/bin/varnishd/Makefile.am @@ -12,6 +12,8 @@ varnishd_SOURCES = \ cache_acceptor_poll.c \ cache_acceptor_kqueue.c \ cache_backend.c \ + cache_backend_random.c \ + cache_backend_round_robin.c \ cache_backend_simple.c \ cache_ban.c \ cache_center.c \ diff --git a/varnish-cache/bin/varnishd/cache.h b/varnish-cache/bin/varnishd/cache.h index 08e79896..6e4bdaf9 100644 --- a/varnish-cache/bin/varnishd/cache.h +++ b/varnish-cache/bin/varnishd/cache.h @@ -326,6 +326,7 @@ struct vbe_conn { TAILQ_ENTRY(vbe_conn) list; struct backend *backend; int fd; + void *priv; }; diff --git a/varnish-cache/bin/varnishd/cache_backend_random.c b/varnish-cache/bin/varnishd/cache_backend_random.c new file mode 100644 index 00000000..ff49c980 --- /dev/null +++ b/varnish-cache/bin/varnishd/cache_backend_random.c @@ -0,0 +1,471 @@ +/*- + * Copyright (c) 2006 Verdens Gang AS + * Copyright (c) 2006-2007 Linpro AS + * All rights reserved. + * + * Author: Cecilie Fritzvold + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + * + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "shmlog.h" +#include "cache.h" +#include "vrt.h" + + +struct ber { + unsigned magic; +#define BER_MAGIC 0x66f05894 + struct brspec *blist; +#if 0 + /* Store a hash of the backend info given in + * vcl for comparison when a new vcl file is + * uploaded. Not in use yet. + */ + unsigned hash; +#endif +}; + +struct brspec { + unsigned magic; +#define BRSPEC_MAGIC 0x761d69c2 + struct brspec *next; + double limit; + char *hostname; + char *portname; + struct addrinfo *addr; + struct addrinfo *last_addr; + double dnsttl; + double dnstime; + unsigned dnsseq; + TAILQ_HEAD(, vbe_conn) connlist; +}; + +/*--------------------------------------------------------------------*/ + +static int +ber_conn_try_list(struct sess *sp, struct brspec *bs) +{ + struct addrinfo *ai, *from; + struct sockaddr_storage ss; + int fam, sockt, proto; + socklen_t alen; + int s, loops; + char abuf1[TCP_ADDRBUFSIZE], abuf2[TCP_ADDRBUFSIZE]; + char pbuf1[TCP_PORTBUFSIZE], pbuf2[TCP_PORTBUFSIZE]; + unsigned myseq; + + /* Called with lock held */ + myseq = bs->dnsseq; + loops = 0; + from = bs->last_addr; + for (ai = from; ai != NULL && (loops != 1 || ai != from);) { + fam = ai->ai_family; + sockt = ai->ai_socktype; + proto = ai->ai_protocol; + alen = ai->ai_addrlen; + assert(alen <= sizeof ss); + memcpy(&ss, ai->ai_addr, alen); + UNLOCK(&sp->backend->mtx); + s = socket(fam, sockt, proto); + if (s >= 0 && connect(s, (void *)&ss, alen)) { + AZ(close(s)); + s = -1; + } + if (s >= 0) { + TCP_myname(s, abuf1, sizeof abuf1, pbuf1, sizeof pbuf1); + TCP_name((void*)&ss, alen, + abuf2, sizeof abuf2, pbuf2, sizeof pbuf2); + WSL(sp->wrk, SLT_BackendOpen, s, "%s %s %s %s %s", + sp->backend->vcl_name, abuf1, pbuf1, abuf2, pbuf2); + } + LOCK(&sp->backend->mtx); + if (s >= 0) { + if (myseq == bs->dnsseq) + bs->last_addr = ai; + return (s); + } + if (myseq != bs->dnsseq) { + loops = 0; + from = bs->last_addr; + ai = from; + } else { + ai = ai->ai_next; + if (ai == NULL) { + loops++; + ai = bs->addr; + } + } + } + return (-1); +} + +/*--------------------------------------------------------------------*/ + +static int +ber_conn_try(struct sess *sp, struct backend *bp, struct brspec *bs) +{ + int s; + struct addrinfo *res, hint, *old; + int error; + + LOCK(&bp->mtx); + + s = ber_conn_try_list(sp, bs); + if (s >= 0) { + bp->refcount++; + UNLOCK(&bp->mtx); + return (s); + } + + if (bs->dnstime + bs->dnsttl >= TIM_mono()) { + UNLOCK(&bp->mtx); + return (-1); + } + + /* Then do another lookup to catch DNS changes */ + bs->dnstime = TIM_mono(); + UNLOCK(&bp->mtx); + + memset(&hint, 0, sizeof hint); + hint.ai_family = PF_UNSPEC; + hint.ai_socktype = SOCK_STREAM; + res = NULL; + error = getaddrinfo(bs->hostname, + bs->portname == NULL ? "http" : bs->portname, + &hint, &res); + if (error) { + if (res != NULL) + freeaddrinfo(res); + printf("getaddrinfo: %s\n", gai_strerror(error)); /* XXX */ + LOCK(&bp->mtx); + } else { + LOCK(&bp->mtx); + bs->dnsseq++; + old = bs->addr; + bs->last_addr = res; + bs->addr = res; + if (old != NULL) + freeaddrinfo(old); + } + + /* And try the entire list */ + s = ber_conn_try_list(sp, bs); + if (s >= 0) { + bp->refcount++; + UNLOCK(&bp->mtx); + return (s); + } + + UNLOCK(&bp->mtx); + return (-1); +} + + +/* Get a backend connection ------------------------------------------ + * + * Get the next backend in the round-robin list, and connect to this. + * + * Try all cached backend connections for this backend, and use the + * first one that is looks like it is still connected. + * If that fails to get us a connection, create a new one, reusing a + * connection from the freelist, if possible. + * + * This function is slightly complicated by optimizations on bermtx. + */ + +static struct vbe_conn * +ber_nextfd(struct sess *sp) +{ + struct vbe_conn *vc; + struct pollfd pfd; + struct backend *bp; + int reuse = 0; + struct ber *ber; + struct brspec *bs; + double r; + + CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); + CHECK_OBJ_NOTNULL(sp->backend, BACKEND_MAGIC); + bp = sp->backend; + CAST_OBJ_NOTNULL(ber, bp->priv, BER_MAGIC); + + r = (double)rand() / ((double)(RAND_MAX)+1.0); + bs = ber->blist; + CHECK_OBJ_NOTNULL(bs, BRSPEC_MAGIC); + while (r > bs->limit) { + bs = bs->next; + CHECK_OBJ_NOTNULL(bs, BRSPEC_MAGIC); + } + while (1) { + LOCK(&bp->mtx); + vc = TAILQ_FIRST(&bs->connlist); + if (vc != NULL) { + bp->refcount++; + assert(vc->backend == bp); + assert(vc->fd >= 0); + TAILQ_REMOVE(&bs->connlist, vc, list); + vc->priv = bs; + } + UNLOCK(&bp->mtx); + if (vc == NULL) + break; + + /* Test the connection for remote close before we use it */ + pfd.fd = vc->fd; + pfd.events = POLLIN; + pfd.revents = 0; + if (!poll(&pfd, 1, 0)) { + /* XXX locking of stats */ + VSL_stats->backend_reuse += reuse; + VSL_stats->backend_conn++; + return (vc); + } + VBE_ClosedFd(sp->wrk, vc); + } + + vc = VBE_NewConn(); + assert(vc->fd == -1); + AZ(vc->backend); + vc->fd = ber_conn_try(sp, bp, bs); + if (vc->fd < 0) { + VBE_ReleaseConn(vc); + VSL_stats->backend_fail++; + return (NULL); + } + vc->backend = bp; + vc->priv = bs; + VSL_stats->backend_conn++; + return (vc); +} + +static struct vbe_conn * +ber_GetFd(struct sess *sp) +{ + struct vbe_conn *vc; + unsigned n; + for (n = 1; n < 5; n++) { + vc = ber_nextfd(sp); + if (vc == NULL) { + usleep(100000 * n); + continue; + } + assert(vc->fd >= 0); + assert(vc->backend == sp->backend); + WSL(sp->wrk, SLT_BackendXID, vc->fd, "%u", sp->xid); + WSL(sp->wrk, SLT_Backend, sp->fd, "%d %s", vc->fd, + sp->backend->vcl_name); + return (vc); + } + return (NULL); +} + +/*--------------------------------------------------------------------*/ + +static void +ber_ClosedFd(struct worker *w, struct vbe_conn *vc) +{ + CHECK_OBJ_NOTNULL(vc, VBE_CONN_MAGIC); + CHECK_OBJ_NOTNULL(vc->backend, BACKEND_MAGIC); + assert(vc->fd >= 0); + WSL(w, SLT_BackendClose, vc->fd, "%s", vc->backend->vcl_name); + AZ(close(vc->fd)); + vc->fd = -1; + VBE_DropRef(vc->backend); + vc->backend = NULL; + VBE_ReleaseConn(vc); +} + +/*--------------------------------------------------------------------*/ + +static void +ber_RecycleFd(struct worker *w, struct vbe_conn *vc) +{ + struct brspec *bs; + + CHECK_OBJ_NOTNULL(vc, VBE_CONN_MAGIC); + CHECK_OBJ_NOTNULL(vc->backend, BACKEND_MAGIC); + CAST_OBJ_NOTNULL(bs, vc->priv, BRSPEC_MAGIC); + + assert(vc->fd >= 0); + WSL(w, SLT_BackendReuse, vc->fd, "%s", vc->backend->vcl_name); + LOCK(&vc->backend->mtx); + VSL_stats->backend_recycle++; + TAILQ_INSERT_HEAD(&bs->connlist, vc, list); + VBE_DropRefLocked(vc->backend); +} + +/*--------------------------------------------------------------------*/ + +static void +ber_Cleanup(struct backend *b) +{ + struct ber *ber; + struct vbe_conn *vbe; + struct brspec *bs, *bstmp; + + assert(b->refcount == 0); + CAST_OBJ_NOTNULL(ber, b->priv, BER_MAGIC); + + bs = ber->blist; + + do { + free(bs->portname); + free(bs->hostname); + freeaddrinfo(bs->addr); + while (1) { + vbe = TAILQ_FIRST(&bs->connlist); + if (vbe == NULL) + break; + TAILQ_REMOVE(&bs->connlist, vbe, list); + if (vbe->fd >= 0) + close(vbe->fd); + free(vbe); + } + bstmp = bs; + bs = bs->next; + free(bstmp); + } while (bs != ber->blist); + + free(ber); +} + +/*--------------------------------------------------------------------*/ + +/* Will return the hostname of the first backend in the list */ +static const char * +ber_GetHostname(struct backend *b) +{ + struct ber *ber; + + CHECK_OBJ_NOTNULL(b, BACKEND_MAGIC); + CAST_OBJ_NOTNULL(ber, b->priv, BER_MAGIC); + return (ber->blist->hostname); +} + +/*--------------------------------------------------------------------*/ + +static void +ber_Init(void) +{ + +} + +/*--------------------------------------------------------------------*/ + +struct backend_method backend_method_random = { + .name = "random", + .getfd = ber_GetFd, + .close = ber_ClosedFd, + .recycle = ber_RecycleFd, + .gethostname = ber_GetHostname, + .cleanup = ber_Cleanup, + .init = ber_Init +}; + +/*--------------------------------------------------------------------*/ + +void +VRT_init_random_backend(struct backend **bp, struct vrt_random_backend *t) +{ + struct backend *b; + struct ber *ber; + struct vrt_backend_entry *be; + struct brspec *bs = NULL; + struct brspec *bs_prev = NULL; + struct brspec *bs_first = NULL; + double limit = 0; + double default_weight; + + /* + * Scan existing backends to see if we can recycle one of them. + */ + + /* + * XXX: Do this by comparing a hash generated from this new + * XXX: backend with the earlier computed hashes from existing + * XXX: backends ? Should the hash be a parameter to this function, + * XXX: or computed here? + */ + + b = VBE_NewBackend(&backend_method_random); + + ber = calloc(sizeof *ber, 1); + XXXAN(ber); + ber->magic = BER_MAGIC; + + b->priv = ber; + + AN(t->name); + b->vcl_name = strdup(t->name); + XXXAN(b->vcl_name); + + default_weight = 1.0 / (double)t->count; + + be = t->bentry; + while (be != NULL) { + bs = calloc(sizeof *bs, 1); + bs->magic = BRSPEC_MAGIC; + AN(be->port); + bs->portname = strdup(be->port); + XXXAN(bs->portname); + + AN(be->host); + bs->hostname = strdup(be->host); + XXXAN(bs->hostname); + + if (!(t->weighted)) + be->weight = default_weight; + + limit += be->weight; + bs->limit = limit; + + bs->dnsttl = 300; + + if (bs_first == NULL) + bs_first = bs; + + bs->next = bs_prev; + bs_prev = bs; + be = be->next; + } + + bs_first->next = bs; + ber->blist = bs; + + *bp = b; +} + diff --git a/varnish-cache/bin/varnishd/cache_backend_round_robin.c b/varnish-cache/bin/varnishd/cache_backend_round_robin.c new file mode 100644 index 00000000..3536f47e --- /dev/null +++ b/varnish-cache/bin/varnishd/cache_backend_round_robin.c @@ -0,0 +1,454 @@ +/*- + * Copyright (c) 2006 Verdens Gang AS + * Copyright (c) 2006-2007 Linpro AS + * All rights reserved. + * + * Author: Cecilie Fritzvold + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + * + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "shmlog.h" +#include "cache.h" +#include "vrt.h" + + +struct brr { + unsigned magic; +#define BRR_MAGIC 0x66f05894 + struct bspec *blist; +#if 0 + /* Store a hash of the backend info given in + * vcl for comparison when a new vcl file is + * uploaded. Not in use yet. + */ + unsigned hash; +#endif +}; + +struct bspec { + unsigned magic; +#define BSPEC_MAGIC 0x761d69c2 + struct bspec *next; + char *hostname; + char *portname; + struct addrinfo *addr; + struct addrinfo *last_addr; + double dnsttl; + double dnstime; + unsigned dnsseq; + TAILQ_HEAD(, vbe_conn) connlist; +}; + +/*--------------------------------------------------------------------*/ + +static int +brr_conn_try_list(struct sess *sp, struct bspec *bs) +{ + struct addrinfo *ai, *from; + struct sockaddr_storage ss; + int fam, sockt, proto; + socklen_t alen; + int s, loops; + char abuf1[TCP_ADDRBUFSIZE], abuf2[TCP_ADDRBUFSIZE]; + char pbuf1[TCP_PORTBUFSIZE], pbuf2[TCP_PORTBUFSIZE]; + unsigned myseq; + + /* Called with lock held */ + myseq = bs->dnsseq; + loops = 0; + from = bs->last_addr; + for (ai = from; ai != NULL && (loops != 1 || ai != from);) { + fam = ai->ai_family; + sockt = ai->ai_socktype; + proto = ai->ai_protocol; + alen = ai->ai_addrlen; + assert(alen <= sizeof ss); + memcpy(&ss, ai->ai_addr, alen); + UNLOCK(&sp->backend->mtx); + s = socket(fam, sockt, proto); + if (s >= 0 && connect(s, (void *)&ss, alen)) { + AZ(close(s)); + s = -1; + } + if (s >= 0) { + TCP_myname(s, abuf1, sizeof abuf1, pbuf1, sizeof pbuf1); + TCP_name((void*)&ss, alen, + abuf2, sizeof abuf2, pbuf2, sizeof pbuf2); + WSL(sp->wrk, SLT_BackendOpen, s, "%s %s %s %s %s", + sp->backend->vcl_name, abuf1, pbuf1, abuf2, pbuf2); + } + LOCK(&sp->backend->mtx); + if (s >= 0) { + if (myseq == bs->dnsseq) + bs->last_addr = ai; + return (s); + } + if (myseq != bs->dnsseq) { + loops = 0; + from = bs->last_addr; + ai = from; + } else { + ai = ai->ai_next; + if (ai == NULL) { + loops++; + ai = bs->addr; + } + } + } + return (-1); +} + +/*--------------------------------------------------------------------*/ + +static int +brr_conn_try(struct sess *sp, struct backend *bp, struct bspec *bs) +{ + int s; + struct addrinfo *res, hint, *old; + int error; + + LOCK(&bp->mtx); + + s = brr_conn_try_list(sp, bs); + if (s >= 0) { + bp->refcount++; + UNLOCK(&bp->mtx); + return (s); + } + + if (bs->dnstime + bs->dnsttl >= TIM_mono()) { + UNLOCK(&bp->mtx); + return (-1); + } + + /* Then do another lookup to catch DNS changes */ + bs->dnstime = TIM_mono(); + UNLOCK(&bp->mtx); + + memset(&hint, 0, sizeof hint); + hint.ai_family = PF_UNSPEC; + hint.ai_socktype = SOCK_STREAM; + res = NULL; + error = getaddrinfo(bs->hostname, + bs->portname == NULL ? "http" : bs->portname, + &hint, &res); + if (error) { + if (res != NULL) + freeaddrinfo(res); + printf("getaddrinfo: %s\n", gai_strerror(error)); /* XXX */ + LOCK(&bp->mtx); + } else { + LOCK(&bp->mtx); + bs->dnsseq++; + old = bs->addr; + bs->last_addr = res; + bs->addr = res; + if (old != NULL) + freeaddrinfo(old); + } + + /* And try the entire list */ + s = brr_conn_try_list(sp, bs); + if (s >= 0) { + bp->refcount++; + UNLOCK(&bp->mtx); + return (s); + } + + UNLOCK(&bp->mtx); + return (-1); +} + + +/* Get a backend connection ------------------------------------------ + * + * Get the next backend in the round-robin list, and connect to this. + * + * Try all cached backend connections for this backend, and use the + * first one that is looks like it is still connected. + * If that fails to get us a connection, create a new one, reusing a + * connection from the freelist, if possible. + * + * This function is slightly complicated by optimizations on brrmtx. + */ + +static struct vbe_conn * +brr_nextfd(struct sess *sp) +{ + struct vbe_conn *vc; + struct pollfd pfd; + struct backend *bp; + int reuse = 0; + struct brr *brr; + struct bspec *bs; + + CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); + CHECK_OBJ_NOTNULL(sp->backend, BACKEND_MAGIC); + bp = sp->backend; + CAST_OBJ_NOTNULL(brr, bp->priv, BRR_MAGIC); + + bs = brr->blist = brr->blist->next; + + while (1) { + LOCK(&bp->mtx); + vc = TAILQ_FIRST(&bs->connlist); + if (vc != NULL) { + bp->refcount++; + assert(vc->backend == bp); + assert(vc->fd >= 0); + TAILQ_REMOVE(&bs->connlist, vc, list); + vc->priv = bs; + } + UNLOCK(&bp->mtx); + if (vc == NULL) + break; + + /* Test the connection for remote close before we use it */ + pfd.fd = vc->fd; + pfd.events = POLLIN; + pfd.revents = 0; + if (!poll(&pfd, 1, 0)) { + /* XXX locking of stats */ + VSL_stats->backend_reuse += reuse; + VSL_stats->backend_conn++; + return (vc); + } + VBE_ClosedFd(sp->wrk, vc); + } + + vc = VBE_NewConn(); + assert(vc->fd == -1); + AZ(vc->backend); + vc->fd = brr_conn_try(sp, bp, bs); + if (vc->fd < 0) { + VBE_ReleaseConn(vc); + VSL_stats->backend_fail++; + return (NULL); + } + vc->backend = bp; + vc->priv = bs; + VSL_stats->backend_conn++; + return (vc); +} + +static struct vbe_conn * +brr_GetFd(struct sess *sp) +{ + struct vbe_conn *vc; + unsigned n; + for (n = 1; n < 5; n++) { + vc = brr_nextfd(sp); + if (vc == NULL) { + usleep(100000 * n); + continue; + } + assert(vc->fd >= 0); + assert(vc->backend == sp->backend); + WSL(sp->wrk, SLT_BackendXID, vc->fd, "%u", sp->xid); + WSL(sp->wrk, SLT_Backend, sp->fd, "%d %s", vc->fd, + sp->backend->vcl_name); + return (vc); + } + return (NULL); +} + +/*--------------------------------------------------------------------*/ + +static void +brr_ClosedFd(struct worker *w, struct vbe_conn *vc) +{ + CHECK_OBJ_NOTNULL(vc, VBE_CONN_MAGIC); + CHECK_OBJ_NOTNULL(vc->backend, BACKEND_MAGIC); + assert(vc->fd >= 0); + WSL(w, SLT_BackendClose, vc->fd, "%s", vc->backend->vcl_name); + AZ(close(vc->fd)); + vc->fd = -1; + VBE_DropRef(vc->backend); + vc->backend = NULL; + VBE_ReleaseConn(vc); +} + +/*--------------------------------------------------------------------*/ + +static void +brr_RecycleFd(struct worker *w, struct vbe_conn *vc) +{ + struct bspec *bs; + + CHECK_OBJ_NOTNULL(vc, VBE_CONN_MAGIC); + CHECK_OBJ_NOTNULL(vc->backend, BACKEND_MAGIC); + CAST_OBJ_NOTNULL(bs, vc->priv, BSPEC_MAGIC); + + assert(vc->fd >= 0); + WSL(w, SLT_BackendReuse, vc->fd, "%s", vc->backend->vcl_name); + LOCK(&vc->backend->mtx); + VSL_stats->backend_recycle++; + TAILQ_INSERT_HEAD(&bs->connlist, vc, list); + VBE_DropRefLocked(vc->backend); +} + +/*--------------------------------------------------------------------*/ + +static void +brr_Cleanup(struct backend *b) +{ + struct brr *brr; + struct vbe_conn *vbe; + struct bspec *bs, *bstmp; + + assert(b->refcount == 0); + CAST_OBJ_NOTNULL(brr, b->priv, BRR_MAGIC); + + bs = brr->blist; + + do { + free(bs->portname); + free(bs->hostname); + freeaddrinfo(bs->addr); + while (1) { + vbe = TAILQ_FIRST(&bs->connlist); + if (vbe == NULL) + break; + TAILQ_REMOVE(&bs->connlist, vbe, list); + if (vbe->fd >= 0) + close(vbe->fd); + free(vbe); + } + bstmp = bs; + bs = bs->next; + free(bstmp); + } while (bs != brr->blist); + + free(brr); +} + +/*--------------------------------------------------------------------*/ + +/* Will return the hostname of the first backend in the list */ +static const char * +brr_GetHostname(struct backend *b) +{ + struct brr *brr; + + CHECK_OBJ_NOTNULL(b, BACKEND_MAGIC); + CAST_OBJ_NOTNULL(brr, b->priv, BRR_MAGIC); + return (brr->blist->hostname); +} + +/*--------------------------------------------------------------------*/ + +static void +brr_Init(void) +{ + +} + +/*--------------------------------------------------------------------*/ + +struct backend_method backend_method_round_robin = { + .name = "round_robin", + .getfd = brr_GetFd, + .close = brr_ClosedFd, + .recycle = brr_RecycleFd, + .gethostname = brr_GetHostname, + .cleanup = brr_Cleanup, + .init = brr_Init +}; + +/*--------------------------------------------------------------------*/ + +void +VRT_init_round_robin_backend(struct backend **bp, struct vrt_round_robin_backend *t) +{ + struct backend *b; + struct brr *brr; + struct vrt_backend_entry *be; + struct bspec *bs = NULL; + struct bspec *bs_prev = NULL; + struct bspec *bs_first = NULL; + + /* + * Scan existing backends to see if we can recycle one of them. + */ + /* + * XXX: Do this by comparing a hash generated from this new + * XXX: backend with the earlier computed hashes from existing + * XXX: backends ? Should the hash be a parameter to this function, + * XXX: or computed here? + */ + + + b = VBE_NewBackend(&backend_method_round_robin); + + brr = calloc(sizeof *brr, 1); + XXXAN(brr); + brr->magic = BRR_MAGIC; + + b->priv = brr; + + AN(t->name); + b->vcl_name = strdup(t->name); + XXXAN(b->vcl_name); + + be = t->bentry; + while (be != NULL) { + bs = calloc(sizeof *bs, 1); + bs->magic = BSPEC_MAGIC; + AN(be->port); + bs->portname = strdup(be->port); + XXXAN(bs->portname); + + AN(be->host); + bs->hostname = strdup(be->host); + XXXAN(bs->hostname); + + bs->dnsttl = 300; + + if (bs_first == NULL) + bs_first = bs; + + bs->next = bs_prev; + bs_prev = bs; + be = be->next; + } + + bs_first->next = bs; + brr->blist = bs; + + *bp = b; +} + diff --git a/varnish-cache/include/vrt.h b/varnish-cache/include/vrt.h index 2e3f8769..a54ed181 100644 --- a/varnish-cache/include/vrt.h +++ b/varnish-cache/include/vrt.h @@ -46,6 +46,26 @@ struct vrt_simple_backend { const char *host; }; +struct vrt_backend_entry { + const char *port; + const char *host; + double weight; + struct vrt_backend_entry *next; +}; + +struct vrt_round_robin_backend { + const char *name; + struct vrt_backend_entry *bentry; +}; + +struct vrt_random_backend { + const char *name; + unsigned weighted; + unsigned count; + struct vrt_backend_entry *bentry; +}; + + struct vrt_ref { unsigned source; unsigned offset; @@ -94,6 +114,8 @@ int VRT_strcmp(const char *s1, const char *s2); /* Backend related */ void VRT_init_simple_backend(struct backend **, struct vrt_simple_backend *); +void VRT_init_round_robin_backend(struct backend **, struct vrt_round_robin_backend *); +void VRT_init_random_backend(struct backend **, struct vrt_random_backend *); void VRT_fini_backend(struct backend *); char *VRT_IP_string(struct sess *sp, struct sockaddr *sa); diff --git a/varnish-cache/include/vrt_obj.h b/varnish-cache/include/vrt_obj.h index 9560ce7a..2bf6da68 100644 --- a/varnish-cache/include/vrt_obj.h +++ b/varnish-cache/include/vrt_obj.h @@ -9,6 +9,7 @@ void VRT_l_backend_host(struct backend *, const char *); void VRT_l_backend_port(struct backend *, const char *); void VRT_l_backend_dnsttl(struct backend *, double); +void VRT_l_backend_set(struct backend *, struct vrt_backend_entry *); struct sockaddr * VRT_r_client_ip(struct sess *); struct sockaddr * VRT_r_server_ip(struct sess *); const char * VRT_r_req_request(struct sess *); diff --git a/varnish-cache/lib/libvcl/vcc_backend.c b/varnish-cache/lib/libvcl/vcc_backend.c index 3e47024d..7fc73372 100644 --- a/varnish-cache/lib/libvcl/vcc_backend.c +++ b/varnish-cache/lib/libvcl/vcc_backend.c @@ -59,7 +59,7 @@ CheckHostPort(const char *host, const char *port) } void -vcc_ParseBackend(struct tokenlist *tl) +vcc_ParseSimpleBackend(struct tokenlist *tl) { struct var *vp; struct token *t_be = NULL; @@ -85,6 +85,7 @@ vcc_ParseBackend(struct tokenlist *tl) vcc_NextToken(tl); ExpectErr(tl, '{'); vcc_NextToken(tl); + while (1) { if (tl->t->tok == '}') break; @@ -160,3 +161,177 @@ vcc_ParseBackend(struct tokenlist *tl) Ff(tl, 0, "\tVRT_fini_backend(VGC_backend_%.*s);\n", PF(t_be)); tl->nbackend++; } + +void +vcc_ParseBalancedBackend(struct tokenlist *tl) +{ + struct var *vp; + struct token *t_be = NULL; + struct token *t_host = NULL; + struct token *t_port = NULL; + double t_weight = 0; + const char *ep; + int cnt = 0; + int weighted = 0; + double weight = 0; + unsigned backend_type = tl->t->tok; + + vcc_NextToken(tl); + ExpectErr(tl, ID); + t_be = tl->t; + vcc_AddDef(tl, tl->t, R_BACKEND); + /* + * The first backend is always referenced because that is the default + * at the beginning of vcl_recv + */ + if (tl->nbackend == 0) + vcc_AddRef(tl, tl->t, R_BACKEND); + + /* In the compiled vcl we use these macros to refer to backends */ + Fh(tl, 1, "#define VGC_backend_%.*s (VCL_conf.backend[%d])\n", + PF(tl->t), tl->nbackend); + + vcc_NextToken(tl); + ExpectErr(tl, '{'); + vcc_NextToken(tl); + + while (1) { + if (tl->t->tok == '}') + break; + ExpectErr(tl, ID); + if (!vcc_IdIs(tl->t, "set")) { + vsb_printf(tl->sb, + "Expected 'set', found "); + vcc_ErrToken(tl, tl->t); + vsb_printf(tl->sb, " at\n"); + vcc_ErrWhere(tl, tl->t); + return; + } + vcc_NextToken(tl); + ExpectErr(tl, VAR); + vp = vcc_FindVar(tl, tl->t, vcc_be_vars); + ERRCHK(tl); + assert(vp != NULL); + vcc_NextToken(tl); + ExpectErr(tl, '='); + vcc_NextToken(tl); + if (vp->fmt != SET) { + vsb_printf(tl->sb, + "Assignments not possible for '%s'\n", vp->name); + vcc_ErrWhere(tl, tl->t); + return; + } + + ExpectErr(tl, '{'); + vcc_NextToken(tl); + + while (1) { + if (tl->t->tok == '}') + break; + + ExpectErr(tl, '{'); + vcc_NextToken(tl); + + // Host + ExpectErr(tl, CSTR); + t_host = tl->t; + vcc_NextToken(tl); + + ep = CheckHostPort(t_host->dec, "80"); + if (ep != NULL) { + vsb_printf(tl->sb, "Backend '%.*s': %s\n", PF(t_be), ep); + vcc_ErrWhere(tl, t_host); + return; + } + + if (tl->t->tok == ',') { + vcc_NextToken(tl); + + // Port + + ExpectErr(tl, CSTR); + t_port = tl->t; + vcc_NextToken(tl); + + ep = CheckHostPort(t_host->dec, t_port->dec); + if (ep != NULL) { + vsb_printf(tl->sb, + "Backend '%.*s': %s\n", PF(t_be), ep); + vcc_ErrWhere(tl, t_port); + return; + } + + if (tl->t->tok == ',') { + + vcc_NextToken(tl); + + // Weight + t_weight = vcc_DoubleVal(tl); + weighted = 1; + weight += t_weight; + } + } + + ExpectErr(tl, '}'); + vcc_NextToken(tl); + + Fc(tl, 0, "\nstatic struct vrt_backend_entry bentry_%.*s_%d = {\n", + PF(t_be), cnt); + Fc(tl, 0, "\t.port = %.*s,\n", PF(t_port)); + Fc(tl, 0, "\t.host = %.*s,\n", PF(t_host)); + Fc(tl, 0, "\t.weight = %f,\n", t_weight); + if (cnt > 0) { + Fc(tl, 0, "\t.next = &bentry_%.*s_%d\n", PF(t_be), cnt-1); + } /*else { + Fc(tl, 0, "\t.next = NULL\n"); + }*/ + Fc(tl, 0, "};\n"); + t_weight = 0; + cnt++; + } + ExpectErr(tl, '}'); + vcc_NextToken(tl); + ExpectErr(tl, ';'); + vcc_NextToken(tl); + + if (t_host == NULL) { + vsb_printf(tl->sb, "Backend '%.*s' has no hostname\n", + PF(t_be)); + vcc_ErrWhere(tl, tl->t); + return; + } + + if (weighted && (int)weight != 1) { + vsb_printf(tl->sb, "Total weight must be 1\n"); + vcc_ErrWhere(tl, tl->t); + return; + } + + if (backend_type == T_BACKEND_ROUND_ROBIN) { + Fc(tl, 0, "\nstatic struct vrt_round_robin_backend sbe_%.*s = {\n", + PF(t_be)); + Fc(tl, 0, "\t.name = \"%.*s\",\n", PF(t_be)); + Fc(tl, 0, "\t.bentry = &bentry_%.*s_%d\n", PF(t_be), cnt-1); + Fc(tl, 0, "};\n"); + Fi(tl, 0, "\tVRT_init_round_robin_backend(&VGC_backend_%.*s , &sbe_%.*s);\n", + PF(t_be), PF(t_be)); + } else if (backend_type == T_BACKEND_RANDOM) { + Fc(tl, 0, "\nstatic struct vrt_random_backend sbe_%.*s = {\n", + PF(t_be)); + Fc(tl, 0, "\t.name = \"%.*s\",\n", PF(t_be)); + Fc(tl, 0, "\t.weighted = %d,\n", weighted); + Fc(tl, 0, "\t.count = %d,\n", cnt); + Fc(tl, 0, "\t.bentry = &bentry_%.*s_%d\n", PF(t_be), cnt-1); + Fc(tl, 0, "};\n"); + Fi(tl, 0, "\tVRT_init_random_backend(&VGC_backend_%.*s , &sbe_%.*s);\n", + PF(t_be), PF(t_be)); + } + Ff(tl, 0, "\tVRT_fini_backend(VGC_backend_%.*s);\n", PF(t_be)); + + } + ExpectErr(tl, '}'); + + vcc_NextToken(tl); + tl->nbackend++; +} + diff --git a/varnish-cache/lib/libvcl/vcc_compile.h b/varnish-cache/lib/libvcl/vcc_compile.h index 150a5824..7f8bf99a 100644 --- a/varnish-cache/lib/libvcl/vcc_compile.h +++ b/varnish-cache/lib/libvcl/vcc_compile.h @@ -99,7 +99,8 @@ enum var_type { HOSTNAME, PORTNAME, HASH, - HEADER + HEADER, + SET }; enum ref_type { @@ -144,7 +145,8 @@ void vcc_Cond_Ip(const struct var *vp, struct tokenlist *tl); void vcc_ParseAction(struct tokenlist *tl); /* vcc_backend.c */ -void vcc_ParseBackend(struct tokenlist *tl); +void vcc_ParseSimpleBackend(struct tokenlist *tl); +void vcc_ParseBalancedBackend(struct tokenlist *tl); /* vcc_compile.c */ extern struct method method_tab[]; diff --git a/varnish-cache/lib/libvcl/vcc_fixed_token.c b/varnish-cache/lib/libvcl/vcc_fixed_token.c index 992116d1..f9400e5c 100644 --- a/varnish-cache/lib/libvcl/vcc_fixed_token.c +++ b/varnish-cache/lib/libvcl/vcc_fixed_token.c @@ -166,6 +166,24 @@ vcl_fixed_token(const char *p, const char **q) } return (0); case 'b': + if (p[0] == 'b' && p[1] == 'a' && p[2] == 'c' && + p[3] == 'k' && p[4] == 'e' && p[5] == 'n' && + p[6] == 'd' && p[7] == '_' && p[8] == 'r' && + p[9] == 'o' && p[10] == 'u' && p[11] == 'n' && + p[12] == 'd' && p[13] == '_' && p[14] == 'r' && + p[15] == 'o' && p[16] == 'b' && p[17] == 'i' && + p[18] == 'n' && !isvar(p[19])) { + *q = p + 19; + return (T_BACKEND_ROUND_ROBIN); + } + if (p[0] == 'b' && p[1] == 'a' && p[2] == 'c' && + p[3] == 'k' && p[4] == 'e' && p[5] == 'n' && + p[6] == 'd' && p[7] == '_' && p[8] == 'r' && + p[9] == 'a' && p[10] == 'n' && p[11] == 'd' && + p[12] == 'o' && p[13] == 'm' && !isvar(p[14])) { + *q = p + 14; + return (T_BACKEND_RANDOM); + } if (p[0] == 'b' && p[1] == 'a' && p[2] == 'c' && p[3] == 'k' && p[4] == 'e' && p[5] == 'n' && p[6] == 'd' && !isvar(p[7])) { @@ -274,6 +292,8 @@ vcl_init_tnames(void) vcl_tnames[ID] = "ID"; vcl_tnames[T_ACL] = "acl"; vcl_tnames[T_BACKEND] = "backend"; + vcl_tnames[T_BACKEND_RANDOM] = "backend_random"; + vcl_tnames[T_BACKEND_ROUND_ROBIN] = "backend_round_robin"; vcl_tnames[T_CAND] = "&&"; vcl_tnames[T_COR] = "||"; vcl_tnames[T_DEC] = "--"; @@ -333,7 +353,7 @@ vcl_output_lang_h(struct vsb *sb) vsb_cat(sb, " struct vrt_ref *ref;\n"); vsb_cat(sb, " unsigned nref;\n"); vsb_cat(sb, " unsigned busy;\n"); - vsb_cat(sb, "\n"); + vsb_cat(sb, " \n"); vsb_cat(sb, " unsigned nsrc;\n"); vsb_cat(sb, " const char **srcname;\n"); vsb_cat(sb, " const char **srcbody;\n"); @@ -404,6 +424,26 @@ vcl_output_lang_h(struct vsb *sb) vsb_cat(sb, " const char *host;\n"); vsb_cat(sb, "};\n"); vsb_cat(sb, "\n"); + vsb_cat(sb, "struct vrt_backend_entry {\n"); + vsb_cat(sb, " const char *port;\n"); + vsb_cat(sb, " const char *host;\n"); + vsb_cat(sb, " double weight;\n"); + vsb_cat(sb, " struct vrt_backend_entry *next;\n"); + vsb_cat(sb, "};\n"); + vsb_cat(sb, "\n"); + vsb_cat(sb, "struct vrt_round_robin_backend {\n"); + vsb_cat(sb, " const char *name;\n"); + vsb_cat(sb, " struct vrt_backend_entry *bentry;\n"); + vsb_cat(sb, "};\n"); + vsb_cat(sb, "\n"); + vsb_cat(sb, "struct vrt_random_backend {\n"); + vsb_cat(sb, " const char *name;\n"); + vsb_cat(sb, " unsigned weighted;\n"); + vsb_cat(sb, " unsigned count;\n"); + vsb_cat(sb, " struct vrt_backend_entry *bentry;\n"); + vsb_cat(sb, "};\n"); + vsb_cat(sb, "\n"); + vsb_cat(sb, "\n"); vsb_cat(sb, "struct vrt_ref {\n"); vsb_cat(sb, " unsigned source;\n"); vsb_cat(sb, " unsigned offset;\n"); @@ -452,6 +492,8 @@ vcl_output_lang_h(struct vsb *sb) vsb_cat(sb, "\n"); vsb_cat(sb, "/* Backend related */\n"); vsb_cat(sb, "void VRT_init_simple_backend(struct backend **, struct vrt_simple_backend *);\n"); + vsb_cat(sb, "void VRT_init_round_robin_backend(struct backend **, struct vrt_round_robin_backend *);\n"); + vsb_cat(sb, "void VRT_init_random_backend(struct backend **, struct vrt_random_backend *);\n"); vsb_cat(sb, "void VRT_fini_backend(struct backend *);\n"); vsb_cat(sb, "\n"); vsb_cat(sb, "char *VRT_IP_string(struct sess *sp, struct sockaddr *sa);\n"); @@ -473,6 +515,7 @@ vcl_output_lang_h(struct vsb *sb) vsb_cat(sb, "void VRT_l_backend_host(struct backend *, const char *);\n"); vsb_cat(sb, "void VRT_l_backend_port(struct backend *, const char *);\n"); vsb_cat(sb, "void VRT_l_backend_dnsttl(struct backend *, double);\n"); + vsb_cat(sb, "void VRT_l_backend_set(struct backend *, struct vrt_backend_entry *);\n"); vsb_cat(sb, "struct sockaddr * VRT_r_client_ip(struct sess *);\n"); vsb_cat(sb, "struct sockaddr * VRT_r_server_ip(struct sess *);\n"); vsb_cat(sb, "const char * VRT_r_req_request(struct sess *);\n"); diff --git a/varnish-cache/lib/libvcl/vcc_gen_fixed_token.tcl b/varnish-cache/lib/libvcl/vcc_gen_fixed_token.tcl index 4ee4caa4..8b04deb3 100755 --- a/varnish-cache/lib/libvcl/vcc_gen_fixed_token.tcl +++ b/varnish-cache/lib/libvcl/vcc_gen_fixed_token.tcl @@ -73,6 +73,10 @@ set keywords { acl backend + + backend_round_robin + + backend_random } # Non-word tokens @@ -137,7 +141,7 @@ puts $fo { unsigned magic; struct vrt_ref *ref; unsigned nref; unsigned busy; - + unsigned nsrc; const char **srcname; const char **srcbody; diff --git a/varnish-cache/lib/libvcl/vcc_gen_obj.tcl b/varnish-cache/lib/libvcl/vcc_gen_obj.tcl index 315ab05e..8070afa9 100755 --- a/varnish-cache/lib/libvcl/vcc_gen_obj.tcl +++ b/varnish-cache/lib/libvcl/vcc_gen_obj.tcl @@ -35,6 +35,7 @@ set beobj { { backend.host WO HOSTNAME {} } { backend.port WO PORTNAME {} } { backend.dnsttl WO TIME {} } + { backend.set WO SET {} } } # Variables available in sessions @@ -180,6 +181,7 @@ set tt(HEADER) "const char *" set tt(HOSTNAME) "const char *" set tt(PORTNAME) "const char *" set tt(HASH) "const char *" +set tt(SET) "struct vrt_backend_entry *" #---------------------------------------------------------------------- # Boilerplate warning for all generated files. diff --git a/varnish-cache/lib/libvcl/vcc_obj.c b/varnish-cache/lib/libvcl/vcc_obj.c index e016aad6..21bb8168 100644 --- a/varnish-cache/lib/libvcl/vcc_obj.c +++ b/varnish-cache/lib/libvcl/vcc_obj.c @@ -31,6 +31,13 @@ struct var vcc_be_vars[] = { 0, 0 }, + { "backend.set", SET, 11, + NULL, + "VRT_l_backend_set(backend, ", + V_WO, + 0, + 0 + }, { NULL } }; diff --git a/varnish-cache/lib/libvcl/vcc_parse.c b/varnish-cache/lib/libvcl/vcc_parse.c index bf5d7269..aefc2ff3 100644 --- a/varnish-cache/lib/libvcl/vcc_parse.c +++ b/varnish-cache/lib/libvcl/vcc_parse.c @@ -545,7 +545,11 @@ vcc_Parse(struct tokenlist *tl) Function(tl); break; case T_BACKEND: - vcc_ParseBackend(tl); + vcc_ParseSimpleBackend(tl); + break; + case T_BACKEND_RANDOM: + case T_BACKEND_ROUND_ROBIN: + vcc_ParseBalancedBackend(tl); break; case EOI: break; diff --git a/varnish-cache/lib/libvcl/vcc_token_defs.h b/varnish-cache/lib/libvcl/vcc_token_defs.h index 9a5d81ee..ccbe6a5c 100644 --- a/varnish-cache/lib/libvcl/vcc_token_defs.h +++ b/varnish-cache/lib/libvcl/vcc_token_defs.h @@ -15,23 +15,25 @@ #define T_SUB 133 #define T_ACL 134 #define T_BACKEND 135 -#define T_INC 136 -#define T_DEC 137 -#define T_CAND 138 -#define T_COR 139 -#define T_LEQ 140 -#define T_EQ 141 -#define T_NEQ 142 -#define T_GEQ 143 -#define T_SHR 144 -#define T_SHL 145 -#define T_INCR 146 -#define T_DECR 147 -#define T_MUL 148 -#define T_DIV 149 -#define ID 150 -#define VAR 151 -#define CNUM 152 -#define CSTR 153 -#define EOI 154 -#define CSRC 155 +#define T_BACKEND_ROUND_ROBIN 136 +#define T_BACKEND_RANDOM 137 +#define T_INC 138 +#define T_DEC 139 +#define T_CAND 140 +#define T_COR 141 +#define T_LEQ 142 +#define T_EQ 143 +#define T_NEQ 144 +#define T_GEQ 145 +#define T_SHR 146 +#define T_SHL 147 +#define T_INCR 148 +#define T_DECR 149 +#define T_MUL 150 +#define T_DIV 151 +#define ID 152 +#define VAR 153 +#define CNUM 154 +#define CSTR 155 +#define EOI 156 +#define CSRC 157