From c75d8eb788acc0d52e1f750feb7eda78fdad1b6f Mon Sep 17 00:00:00 2001 From: phk Date: Tue, 16 Oct 2007 22:23:45 +0000 Subject: [PATCH] Start pulling ESI:include stuff over from my private tree. This is the meat of the XML-dissecting code. In theory the task is pretty simple, but it gets complicated by the fact that the XML input can be split over multiple pieces of storage, with an element we care about straddling a boundary. I have not decided what to do about ESI violations or XML syntax errors, SHMlogging them, possibly controlled by a parameter might make sense. For now mark them in the source. git-svn-id: svn+ssh://projects.linpro.no/svn/varnish/trunk@2108 d4fa192b-c00b-0410-8231-f00ffab90ce4 --- varnish-cache/bin/varnishd/cache_vrt_esi.c | 236 ++++++++++++++++++++- 1 file changed, 234 insertions(+), 2 deletions(-) diff --git a/varnish-cache/bin/varnishd/cache_vrt_esi.c b/varnish-cache/bin/varnishd/cache_vrt_esi.c index 8d8b4aaa..f71b6cfe 100644 --- a/varnish-cache/bin/varnishd/cache_vrt_esi.c +++ b/varnish-cache/bin/varnishd/cache_vrt_esi.c @@ -28,7 +28,16 @@ * * $Id: cache_vrt.c 2067 2007-09-30 20:57:30Z phk $ * - * Runtime support for compiled VCL programs + * Runtime support for compiled VCL programs ESI processing. + * + * The basic ESI 1.0 is a very simple specification: + * http://www.w3.org/TR/esi-lang + * But it seems that Oracle and Akamai has embrodiered it to be almost a new + * layer of scripting language in HTTP transmission chains. + * + * It is not obvious how much help the "advanced" features of ESI really + * are to users, so our aim is to pick the fruit starting with the lowest + * hanging, esi:include */ #include @@ -41,12 +50,235 @@ #include "vcl.h" #include "cache.h" +/*-------------------------------------------------------------------- + * Add one piece to the output, either verbatim or include + */ + +static void +add_piece(txt t, int kind) +{ + + printf("K%d \"%.*s\"\n", kind, t.e - t.b, t.b); +} + +/*-------------------------------------------------------------------- + * Zoom over a piece of object and dike out all releveant esi: pieces. + * The entire txt may not be processed because an interesting part + * could possibly span into the next chunk of storage. + * Return value: number of bytes processed. + */ + +static int +vxml(txt t) +{ + char *p, *q, *r; + txt o; + int celem; /* closing element */ + int remflg; /* inside */ + int incmt; /* inside comment */ + int i; + + o.b = t.b; + remflg = 0; + incmt = 0; + for (p = t.b; p < t.e; ) { + if (incmt && *p == '-') { + /* + * We are inside an when we see it. + */ + if (p + 2 >= t.e) { + /* XXX: need to return pending incmt */ + return (p - t.b); + } + if (!memcmp(p, "-->", 3)) { + incmt = 0; + o.e = p; + add_piece(o, 0); + p += 3; + o.b = p; + } else + p++; + continue; + } + + if (*p != '<') { + /* nothing happens until next element or comment */ + p++; + continue; + } + + i = t.e - p; + + if (i < 2) + return (p - t.b); + + if (remflg == 0 && !memcmp(p, " + * at least 10 char, but we only test on the + * first seven because the tail is handled + * by the incmt flag. + */ + if (i < 7) + return (p - t.b); + + o.e = p; + add_piece(o, 0); + + p += 7; + o.b = p; + incmt = 1; + continue; + } + + if (!memcmp(p, " at least 7 char + */ + if (i < 7) + return (p - t.b); + for (q = p + 4; ; q++) { + if (q + 2 >= t.e) + return (p - t.b); + if (!memcmp(q, "-->", 3)) + break; + } + p = q + 3; + continue; + } + + if (!memcmp(p, " 9 ? 9 : i)) { + /* + * cdata at least 12 char + */ + if (i < 12) + return (p - t.b); + for (q = p + 9; ; q++) { + if (q + 2 >= t.e) + return (p - t.b); + if (!memcmp(q, "]]>", 3)) + break; + } + p = q + 3; + continue; + } + + if (p[1] == '!') { + /* Ignore unrecognized '; q++) + continue; + if (*q != '>') + return (p - t.b); + + /* Opening/empty or closing element ? */ + if (p[1] == '/') { + celem = 1; + r = p + 2; + if (q[-1] == '/') { + /* XML violation, ignore this ? */ + } + } else { + celem = 0; + r = p + 1; + } + + if (r + 9 < q && !memcmp(r, "esi:remove", 10)) { + + if (celem != remflg) { + /* ESI 1.0 violation, ignore this element */ + if (!remflg) { + o.e = p; + add_piece(o, 0); + } + } else if (!celem && q[-1] == '/') { + /* empty element */ + o.e = p; + add_piece(o, 0); + } else if (!celem) { + /* open element */ + o.e = p; + add_piece(o, 0); + remflg = !celem; + } else { + /* close element */ + remflg = !celem; + } + p = q + 1; + o.b = p; + continue; + } + + if (remflg && r + 3 < q && !memcmp(r, "esi:", 4)) { + /* ESI 1.0 violation, no nesting in esi:remove */ + p = q + 1; + continue; + } + + if (r + 10 < q && !memcmp(r, "esi:include", 11)) { + + o.e = p; + add_piece(o, 0); + + if (celem == 0) { + o.b = r + 11; + o.e = q; + add_piece(o, 1); + if (q[-1] != '/') { + /* ESI 1.0 violation */ + } + } else { + /* ESI 1.0 violation */ + } + p = q + 1; + o.b = p; + continue; + } + + if (r + 3 < q && !memcmp(r, "esi:", 4)) { + /* + * Unimplemented ESI element, ignore + */ + o.e = p; + add_piece(o, 0); + p = q + 1; + o.b = p; + continue; + } + + /* Not an element we care about */ + p = q + 1; + } + o.e = p; + add_piece(o, 0); + return (p - t.b); +} + /*--------------------------------------------------------------------*/ void VRT_ESI(struct sess *sp) { + struct storage *st; + txt t; + int i; CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); - INCOMPL(); + CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC); + VTAILQ_FOREACH(st, &sp->obj->store, list) { + t.b = (void*)st->ptr; + t.e = t.b + st->len; + i = vxml(t); + printf("VXML(%p+%d) = %d", st->ptr, st->len, i); + if (i < st->len) + printf(" \"%.*s\"", st->len - i, st->ptr + i); + printf("\n"); + } } + +/*--------------------------------------------------------------------*/ -- 2.39.5