From 991b32d4905a599c42f792e098b762a53f6cc598 Mon Sep 17 00:00:00 2001 From: phk Date: Fri, 11 Jul 2008 19:49:20 +0000 Subject: [PATCH] Drop pseudo-automatic support for multihomed backends and require clear expression of intent in VCL. We now fail backend hostnames that resolve to multiple IPv4 or multiple IPv6 addresses, in other words, you cannot use "cnn.com" as a backend hostname specification without the compiler yelling at you: % ./varnishd -d -d -b cnn.com -a :8080 Backend host "cnn.com": resolves to multiple IPv4 addresses. Only one address is allowed. Please specify which exact address you want to use, we found these: 64.236.16.20 64.236.16.52 64.236.24.12 64.236.29.120 [...] VCL compilation failed However, you _can_ use a hostname that resolves to both an IPv4 and an IPv6 address, and the new paramter "prefer_ipv6" will determine which one we try first in such cases. The other part of this change is that we now do the DNS lookup at VCL compiletime, and only then. If your backend's DNS record (or /etc/hosts entry) changes IP#, you must reload your VCL code to notify varnish. Finer technical points: We build a bytestring representation of the sockaddr's in VCC and include them in the concept of backend identity, for an existing backend (+ connections) to be reused for a new VCL the backend must now be defined exactly the same way AND have the same resolved IPv4/IPv6 addresses. Since we never muck about with the address in the backend struct anymore, it's static for the life of the struct backend instance, we can simplify and eliminate the locking dance around our connection attempts. Also eliminate the struct vrt_backend inclusion in struct backend, and instead make the relevat fields full-blown members of struct backend. This eliminates a number of TRUST_ME() calls. This is the companion commit to #2934 which prepared the VCL compiler. git-svn-id: svn+ssh://projects.linpro.no/svn/varnish/trunk@2936 d4fa192b-c00b-0410-8231-f00ffab90ce4 --- varnish-cache/bin/varnishd/cache_backend.c | 112 +++++------------- varnish-cache/bin/varnishd/cache_backend.h | 12 +- .../bin/varnishd/cache_backend_cfg.c | 106 ++++++++--------- varnish-cache/bin/varnishd/heritage.h | 3 + varnish-cache/bin/varnishd/mgt_param.c | 5 + 5 files changed, 97 insertions(+), 141 deletions(-) diff --git a/varnish-cache/bin/varnishd/cache_backend.c b/varnish-cache/bin/varnishd/cache_backend.c index 5cc1fe16..fffa9e95 100644 --- a/varnish-cache/bin/varnishd/cache_backend.c +++ b/varnish-cache/bin/varnishd/cache_backend.c @@ -41,11 +41,9 @@ #include #include -#include #include "shmlog.h" #include "cache.h" -#include "vrt.h" #include "cache_backend.h" /* @@ -70,7 +68,7 @@ VBE_AddHostHeader(const struct sess *sp) CHECK_OBJ_NOTNULL(sp->bereq->http, HTTP_MAGIC); CHECK_OBJ_NOTNULL(sp->backend, BACKEND_MAGIC); http_PrintfHeader(sp->wrk, sp->fd, sp->bereq->http, - "Host: %s", sp->backend->vrt->hostname); + "Host: %s", sp->backend->hosthdr); } /*-------------------------------------------------------------------- @@ -83,11 +81,8 @@ VBE_AddHostHeader(const struct sess *sp) */ static int -VBE_TryConnect(const struct sess *sp, const struct addrinfo *ai) +VBE_TryConnect(const struct sess *sp, int pf, const struct sockaddr *sa, socklen_t salen) { - struct sockaddr_storage ss; - int fam, sockt, proto; - socklen_t alen; int s, i, tmo; char abuf1[TCP_ADDRBUFSIZE], abuf2[TCP_ADDRBUFSIZE]; char pbuf1[TCP_PORTBUFSIZE], pbuf2[TCP_PORTBUFSIZE]; @@ -95,47 +90,31 @@ VBE_TryConnect(const struct sess *sp, const struct addrinfo *ai) CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); CHECK_OBJ_NOTNULL(sp->backend, BACKEND_MAGIC); - /* - * ai is only valid with the lock held, so copy out the bits - * we need to make the connection - */ - fam = ai->ai_family; - sockt = ai->ai_socktype; - proto = ai->ai_protocol; - alen = ai->ai_addrlen; - assert(alen <= sizeof ss); - memcpy(&ss, ai->ai_addr, alen); - - /* release lock during stuff that can take a long time */ - UNLOCK(&sp->backend->mtx); - - s = socket(fam, sockt, proto); + s = socket(pf, SOCK_STREAM, 0); if (s < 0) { LOCK(&sp->backend->mtx); return (s); } tmo = params->connect_timeout; - if (sp->backend->vrt->connect_timeout > 10e-3) - tmo = sp->backend->vrt->connect_timeout * 1000; + if (sp->backend->connect_timeout > 10e-3) + tmo = sp->backend->connect_timeout * 1000; if (tmo > 0) - i = TCP_connect(s, (void *)&ss, alen, tmo); + i = TCP_connect(s, sa, salen, tmo); else - i = connect(s, (void *)&ss, alen); + i = connect(s, sa, salen); if (i != 0) { AZ(close(s)); - LOCK(&sp->backend->mtx); return (-1); } TCP_myname(s, abuf1, sizeof abuf1, pbuf1, sizeof pbuf1); - TCP_name((void*)&ss, alen, abuf2, sizeof abuf2, pbuf2, sizeof pbuf2); + TCP_name(sa, salen, abuf2, sizeof abuf2, pbuf2, sizeof pbuf2); WSL(sp->wrk, SLT_BackendOpen, s, "%s %s %s %s %s", - sp->backend->vrt->vcl_name, abuf1, pbuf1, abuf2, pbuf2); + sp->backend->vcl_name, abuf1, pbuf1, abuf2, pbuf2); - LOCK(&sp->backend->mtx); return (s); } @@ -254,54 +233,6 @@ VBE_ReleaseConn(struct vbe_conn *vc) } } -/*-------------------------------------------------------------------- - * Try to get a socket connected to one of the addresses on the list. - * We start from the cached "last good" address and try all items on - * the list exactly once. - * - * Called with backend mutex held, but will release/acquire it. - * - * XXX: Not ready for DNS re-lookups - */ - -static int -bes_conn_try_list(const struct sess *sp, struct backend *bp) -{ - struct addrinfo *ai, *from; - int s, loops; - - CHECK_OBJ_NOTNULL(bp, BACKEND_MAGIC); - - /* No addrinfo, no connection */ - if (bp->ai == NULL) - return (-1); - AN(bp->last_ai); - - /* Called with lock held */ - loops = 0; - ai = from = bp->last_ai; - while (loops == 0 || ai != from) { - - /* NB: releases/acquires backend lock */ - s = VBE_TryConnect(sp, ai); - - if (s >= 0) { - bp->last_ai = ai; - return (s); - } - - /* Try next one */ - ai = ai->ai_next; - if (ai == NULL) { - loops++; - ai = bp->ai; - } - } - /* We have tried them all, fail */ - return (-1); -} - - /*--------------------------------------------------------------------*/ static int @@ -311,10 +242,25 @@ bes_conn_try(const struct sess *sp, struct backend *bp) LOCK(&bp->mtx); bp->refcount++; - s = bes_conn_try_list(sp, bp); /* releases/acquires backend lock */ - if (s < 0) + UNLOCK(&sp->backend->mtx); + + s = -1; + assert(bp->ipv6 != NULL || bp->ipv4 != NULL); + + /* release lock during stuff that can take a long time */ + + if (params->prefer_ipv6 && bp->ipv6 != NULL) + s = VBE_TryConnect(sp, PF_INET6, bp->ipv6, bp->ipv6len); + if (s == -1 && bp->ipv4 != NULL) + s = VBE_TryConnect(sp, PF_INET, bp->ipv4, bp->ipv4len); + if (s == -1 && !params->prefer_ipv6 && bp->ipv6 != NULL) + s = VBE_TryConnect(sp, PF_INET6, bp->ipv6, bp->ipv6len); + + if (s < 0) { + LOCK(&sp->backend->mtx); bp->refcount--; /* Only keep ref on success */ - UNLOCK(&bp->mtx); + UNLOCK(&bp->mtx); + } return (s); } @@ -377,7 +323,7 @@ VBE_ClosedFd(struct worker *w, struct vbe_conn *vc) CHECK_OBJ_NOTNULL(vc->backend, BACKEND_MAGIC); b = vc->backend; assert(vc->fd >= 0); - WSL(w, SLT_BackendClose, vc->fd, "%s", vc->backend->vrt->vcl_name); + WSL(w, SLT_BackendClose, vc->fd, "%s", vc->backend->vcl_name); i = close(vc->fd); assert(i == 0 || errno == ECONNRESET || errno == ENOTCONN); vc->fd = -1; @@ -398,7 +344,7 @@ VBE_RecycleFd(struct worker *w, struct vbe_conn *vc) CHECK_OBJ_NOTNULL(vc->backend, BACKEND_MAGIC); assert(vc->fd >= 0); bp = vc->backend; - WSL(w, SLT_BackendReuse, vc->fd, "%s", vc->backend->vrt->vcl_name); + WSL(w, SLT_BackendReuse, vc->fd, "%s", vc->backend->vcl_name); LOCK(&vc->backend->mtx); VSL_stats->backend_recycle++; VTAILQ_INSERT_HEAD(&bp->connlist, vc, list); diff --git a/varnish-cache/bin/varnishd/cache_backend.h b/varnish-cache/bin/varnishd/cache_backend.h index 588bb8a2..54836ed6 100644 --- a/varnish-cache/bin/varnishd/cache_backend.h +++ b/varnish-cache/bin/varnishd/cache_backend.h @@ -73,15 +73,21 @@ struct backend { unsigned magic; #define BACKEND_MAGIC 0x64c4c7c6 - struct vrt_backend vrt[1]; + char *hosthdr; + char *ident; + char *vcl_name; + double connect_timeout; + uint32_t hash; VTAILQ_ENTRY(backend) list; int refcount; pthread_mutex_t mtx; - struct addrinfo *ai; - struct addrinfo *last_ai; + struct sockaddr *ipv4; + socklen_t ipv4len; + struct sockaddr *ipv6; + socklen_t ipv6len; VTAILQ_HEAD(, vbe_conn) connlist; diff --git a/varnish-cache/bin/varnishd/cache_backend_cfg.c b/varnish-cache/bin/varnishd/cache_backend_cfg.c index d4399b2c..00dae87f 100644 --- a/varnish-cache/bin/varnishd/cache_backend_cfg.c +++ b/varnish-cache/bin/varnishd/cache_backend_cfg.c @@ -41,7 +41,6 @@ #include #include -#include #include "shmlog.h" #include "cache.h" @@ -99,9 +98,10 @@ VBE_DropRefLocked(struct backend *b) AZ(close(vbe->fd)); VBE_ReleaseConn(vbe); } - free(TRUST_ME(b->vrt->ident)); - free(TRUST_ME(b->vrt->hostname)); - free(TRUST_ME(b->vrt->portname)); + free(b->ident); + free(b->hosthdr); + free(b->ipv4); + free(b->ipv6); b->magic = 0; free(b); VSL_stats->n_backend--; @@ -117,44 +117,17 @@ VBE_DropRef(struct backend *b) VBE_DropRefLocked(b); } -/*-------------------------------------------------------------------- - * DNS lookup of backend host/port - */ +/*--------------------------------------------------------------------*/ static void -vbe_dns_lookup(const struct cli *cli, struct backend *bp) +copy_sockaddr(struct sockaddr **sa, socklen_t *len, const unsigned char *src) { - int error; - struct addrinfo *res, hint, *old; - - CHECK_OBJ_NOTNULL(bp, BACKEND_MAGIC); - memset(&hint, 0, sizeof hint); - hint.ai_family = PF_UNSPEC; - hint.ai_socktype = SOCK_STREAM; - res = NULL; - error = getaddrinfo(bp->vrt->hostname, bp->vrt->portname, - &hint, &res); - if (error) { - if (res != NULL) - freeaddrinfo(res); - /* - * We cannot point to the source code any more, it may - * be long gone from memory. We already checked over in - * the VCL compiler, so this is only relevant for refreshes. - * XXX: which we do when exactly ? - */ - cli_out(cli, "DNS(/hosts) lookup failed for (%s/%s): %s", - bp->vrt->hostname, bp->vrt->portname, gai_strerror(error)); - return; - } - LOCK(&bp->mtx); - old = bp->ai; - bp->ai = res; - bp->last_ai = res; - UNLOCK(&bp->mtx); - if (old != NULL) - freeaddrinfo(old); + assert(*src > 0); + *sa = malloc(*src); + AN(*sa); + memcpy(*sa, src + 1, *src); + *len = *src; } /*-------------------------------------------------------------------- @@ -169,25 +142,46 @@ VBE_AddBackend(struct cli *cli, const struct vrt_backend *vb) struct backend *b; uint32_t u; - AN(vb->hostname); - AN(vb->portname); AN(vb->ident); + assert(vb->ipv4_sockaddr != NULL || vb->ipv6_sockaddr != NULL); (void)cli; ASSERT_CLI(); - u = crc32_l(vb->ident, strlen(vb->ident)); + + /* calculate a hash of (ident + ipv4_sockaddr + ipv6_sockaddr) */ + u = crc32(~0U, vb->ident, strlen(vb->ident)); + if (vb->ipv4_sockaddr != NULL) + u = crc32(u, vb->ipv4_sockaddr + 1, vb->ipv4_sockaddr[0]); + if (vb->ipv6_sockaddr != NULL) + u = crc32(u, vb->ipv6_sockaddr + 1, vb->ipv6_sockaddr[0]); + + /* Run through the list and see if we already have this backend */ VTAILQ_FOREACH(b, &backends, list) { CHECK_OBJ_NOTNULL(b, BACKEND_MAGIC); if (u != b->hash) continue; - if (strcmp(b->vrt->ident, vb->ident)) + if (strcmp(b->ident, vb->ident)) + continue; + if (vb->ipv4_sockaddr != NULL && + b->ipv4len != vb->ipv4_sockaddr[0]) + continue; + if (vb->ipv6_sockaddr != NULL && + b->ipv6len != vb->ipv6_sockaddr[0]) + continue; + if (b->ipv4len != 0 && + memcmp(b->ipv4, vb->ipv4_sockaddr + 1, b->ipv4len)) + continue; + if (b->ipv6len != 0 && + memcmp(b->ipv6, vb->ipv6_sockaddr + 1, b->ipv6len)) continue; b->refcount++; return (b); } + /* Create new backend */ ALLOC_OBJ(b, BACKEND_MAGIC); XXXAN(b); - b->magic = BACKEND_MAGIC; + MTX_INIT(&b->mtx); + b->refcount = 1; VTAILQ_INIT(&b->connlist); b->hash = u; @@ -196,17 +190,21 @@ VBE_AddBackend(struct cli *cli, const struct vrt_backend *vb) * This backend may live longer than the VCL that instantiated it * so we cannot simply reference the VCL's copy of things. */ - REPLACE(b->vrt->ident, vb->ident); - REPLACE(b->vrt->hostname, vb->hostname); - REPLACE(b->vrt->portname, vb->portname); - REPLACE(b->vrt->vcl_name, vb->vcl_name); + REPLACE(b->ident, vb->ident); + REPLACE(b->vcl_name, vb->vcl_name); + REPLACE(b->hosthdr, vb->hostname); - b->vrt->connect_timeout = vb->connect_timeout; + b->connect_timeout = vb->connect_timeout; - MTX_INIT(&b->mtx); - b->refcount = 1; + /* + * Copy over the sockaddrs + */ + if (vb->ipv4_sockaddr != NULL) + copy_sockaddr(&b->ipv4, &b->ipv4len, vb->ipv4_sockaddr); + if (vb->ipv6_sockaddr != NULL) + copy_sockaddr(&b->ipv6, &b->ipv6len, vb->ipv6_sockaddr); - vbe_dns_lookup(cli, b); + assert(b->ipv4 != NULL || b->ipv6 != NULL); VTAILQ_INSERT_TAIL(&backends, b, list); VSL_stats->n_backend++; @@ -238,11 +236,9 @@ cli_debug_backend(struct cli *cli, const char * const *av, void *priv) ASSERT_CLI(); VTAILQ_FOREACH(b, &backends, list) { CHECK_OBJ_NOTNULL(b, BACKEND_MAGIC); - cli_out(cli, "%p %s/%s/%s %d\n", + cli_out(cli, "%p %s %d\n", b, - b->vrt->vcl_name, - b->vrt->hostname, - b->vrt->portname, + b->vcl_name, b->refcount); } } diff --git a/varnish-cache/bin/varnishd/heritage.h b/varnish-cache/bin/varnishd/heritage.h index 6eb16a16..85d5456d 100644 --- a/varnish-cache/bin/varnishd/heritage.h +++ b/varnish-cache/bin/varnishd/heritage.h @@ -166,6 +166,9 @@ struct params { /* Log local socket address to shm */ unsigned log_local_addr; + + /* Prefer IPv6 connections to backend*/ + unsigned prefer_ipv6; }; extern volatile struct params *params; diff --git a/varnish-cache/bin/varnishd/mgt_param.c b/varnish-cache/bin/varnishd/mgt_param.c index 270c19a2..936733f0 100644 --- a/varnish-cache/bin/varnishd/mgt_param.c +++ b/varnish-cache/bin/varnishd/mgt_param.c @@ -735,6 +735,11 @@ static const struct parspec parspec[] = { "VCL can override this default value for each backend.", 0, "400", "ms" }, + { "prefer_ipv6", tweak_bool, &master.prefer_ipv6, 0, 0, + "Prefer IPv6 address when connecting to backends which " + "have both IPv4 and IPv6 addresses.", + 0, + "off", "bool" }, { "session_linger", tweak_uint, &master.session_linger,0, UINT_MAX, "How long time the workerthread lingers on the session " -- 2.39.5