]> err.no Git - varnish/commitdiff
Start pulling ESI:include stuff over from my private tree.
authorphk <phk@d4fa192b-c00b-0410-8231-f00ffab90ce4>
Tue, 16 Oct 2007 22:23:45 +0000 (22:23 +0000)
committerphk <phk@d4fa192b-c00b-0410-8231-f00ffab90ce4>
Tue, 16 Oct 2007 22:23:45 +0000 (22:23 +0000)
This is the meat of the XML-dissecting code.

In theory the task is pretty simple, but it gets complicated by the fact
that the XML input can be split over multiple pieces of storage, with
an element we care about straddling a boundary.

I have not decided what to do about ESI violations or XML syntax errors,
SHMlogging them, possibly controlled by a parameter might make sense.
For now mark them in the source.

git-svn-id: svn+ssh://projects.linpro.no/svn/varnish/trunk@2108 d4fa192b-c00b-0410-8231-f00ffab90ce4

varnish-cache/bin/varnishd/cache_vrt_esi.c

index 8d8b4aaa3f5fb89945bef662d99f899225bb1d15..f71b6cfec02d4c4dfb97e586c1ce0c8c658eee34 100644 (file)
  *
  * $Id: cache_vrt.c 2067 2007-09-30 20:57:30Z phk $
  *
- * Runtime support for compiled VCL programs
+ * Runtime support for compiled VCL programs ESI processing.
+ *
+ * The basic ESI 1.0 is a very simple specification:
+ *     http://www.w3.org/TR/esi-lang
+ * But it seems that Oracle and Akamai has embrodiered it to be almost a new
+ * layer of scripting language in HTTP transmission chains.
+ *
+ * It is not obvious how much help the "advanced" features of ESI really
+ * are to users, so our aim is to pick the fruit starting with the lowest
+ * hanging, esi:include
  */
 
 #include <stdio.h>
 #include "vcl.h"
 #include "cache.h"
 
+/*--------------------------------------------------------------------
+ * Add one piece to the output, either verbatim or include
+ */
+
+static void
+add_piece(txt t, int kind)
+{
+
+       printf("K%d \"%.*s\"\n", kind, t.e - t.b, t.b);
+}
+
+/*--------------------------------------------------------------------
+ * Zoom over a piece of object and dike out all releveant esi: pieces.
+ * The entire txt may not be processed because an interesting part 
+ * could possibly span into the next chunk of storage.
+ * Return value: number of bytes processed.
+ */
+
+static int
+vxml(txt t)
+{
+       char *p, *q, *r;
+       txt o;
+       int celem;              /* closing element */
+       int remflg;             /* inside <esi:remove> </esi:remove> */
+       int incmt;              /* inside <!--esi ... --> comment */
+       int i;
+
+       o.b = t.b;
+       remflg = 0;
+       incmt = 0;
+       for (p = t.b; p < t.e; ) {
+               if (incmt && *p == '-') {
+                       /*
+                        * We are inside an <!--esi comment and need to zap
+                        * the end comment marker --> when we see it.
+                        */
+                       if (p + 2 >= t.e) {
+                               /* XXX: need to return pending incmt  */
+                               return (p - t.b);
+                       }
+                       if (!memcmp(p, "-->", 3)) {
+                               incmt = 0;
+                               o.e = p;
+                               add_piece(o, 0);
+                               p += 3;
+                               o.b = p;
+                       } else
+                               p++;
+                       continue;
+               }
+
+               if (*p != '<') {
+                       /* nothing happens until next element or comment */
+                       p++;
+                       continue;
+               }
+
+               i = t.e - p;
+
+               if (i < 2)
+                       return (p - t.b);
+
+               if (remflg == 0 && !memcmp(p, "<!--esi", i > 7 ? 7 : i)) {
+                       /*
+                        * ESI comment. <!--esi...-->
+                        * at least 10 char, but we only test on the
+                        * first seven because the tail is handled
+                        * by the incmt flag.
+                        */
+                       if (i < 7)
+                               return (p - t.b);
+
+                       o.e = p;
+                       add_piece(o, 0);
+
+                       p += 7;
+                       o.b = p;
+                       incmt = 1;
+                       continue;
+               }
+
+               if (!memcmp(p, "<!--", i > 4 ? 4 : i)) {
+                       /*
+                        * plain comment <!--...--> at least 7 char
+                        */
+                       if (i < 7)
+                               return (p - t.b);
+                       for (q = p + 4; ; q++) {
+                               if (q + 2 >= t.e)
+                                       return (p - t.b);
+                               if (!memcmp(q, "-->", 3))
+                                       break;
+                       }
+                       p = q + 3;
+                       continue;
+               }
+
+               if (!memcmp(p, "<![CDATA[", i > 9 ? 9 : i)) {
+                       /*
+                        * cdata <![CDATA[...]]> at least 12 char
+                        */
+                       if (i < 12)
+                               return (p - t.b);
+                       for (q = p + 9; ; q++) {
+                               if (q + 2 >= t.e)
+                                       return (p - t.b);
+                               if (!memcmp(q, "]]>", 3))
+                                       break;
+                       }
+                       p = q + 3;
+                       continue;
+               } 
+
+               if (p[1] == '!') {
+                       /* Ignore unrecognized <! sequence */
+                       p += 2;
+                       continue;
+               }
+
+               /* Find end of this element */
+               for (q = p + 1; q < t.e && *q != '>'; q++)
+                       continue;
+               if (*q != '>')
+                       return (p - t.b);
+
+               /* Opening/empty or closing element ? */
+               if (p[1] == '/') {
+                       celem = 1;
+                       r = p + 2;
+                       if (q[-1] == '/') {
+                               /* XML violation, ignore this ? */
+                       }
+               } else {
+                       celem = 0;
+                       r = p + 1;
+               }
+
+               if (r + 9 < q && !memcmp(r, "esi:remove", 10)) {
+
+                       if (celem != remflg) {
+                               /* ESI 1.0 violation, ignore this element */
+                               if (!remflg) {
+                                       o.e = p;
+                                       add_piece(o, 0);
+                               }
+                       } else if (!celem && q[-1] == '/') {
+                               /* empty element */
+                               o.e = p;
+                               add_piece(o, 0);
+                       } else if (!celem) {
+                               /* open element */
+                               o.e = p;
+                               add_piece(o, 0);
+                               remflg = !celem;
+                       } else {
+                               /* close element */
+                               remflg = !celem;
+                       }
+                       p = q + 1;
+                       o.b = p;
+                       continue;
+               }
+
+               if (remflg && r + 3 < q && !memcmp(r, "esi:", 4)) {
+                       /* ESI 1.0 violation, no nesting in esi:remove */
+                       p = q + 1;
+                       continue;
+               }
+
+               if (r + 10 < q && !memcmp(r, "esi:include", 11)) {
+                       
+                       o.e = p;
+                       add_piece(o, 0);
+
+                       if (celem == 0) {
+                               o.b = r + 11;
+                               o.e = q;
+                               add_piece(o, 1);
+                               if (q[-1] != '/') {
+                                       /* ESI 1.0 violation */
+                               }
+                       } else {
+                               /* ESI 1.0 violation */
+                       }
+                       p = q + 1;
+                       o.b = p;
+                       continue;
+               }
+
+               if (r + 3 < q && !memcmp(r, "esi:", 4)) {
+                       /*
+                        * Unimplemented ESI element, ignore
+                        */
+                       o.e = p;
+                       add_piece(o, 0);
+                       p = q + 1;
+                       o.b = p;
+                       continue;
+               }
+
+               /* Not an element we care about */
+               p = q + 1;
+       }
+       o.e = p;
+       add_piece(o, 0);
+       return (p - t.b);
+}
+
 /*--------------------------------------------------------------------*/
 
 void
 VRT_ESI(struct sess *sp)
 {
+       struct storage *st;
+       txt t;
+       int i;
 
        CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
-       INCOMPL();
+       CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);
+       VTAILQ_FOREACH(st, &sp->obj->store, list) {
+               t.b = (void*)st->ptr;
+               t.e = t.b + st->len;
+               i = vxml(t);
+               printf("VXML(%p+%d) = %d", st->ptr, st->len, i);
+               if (i < st->len) 
+                       printf(" \"%.*s\"", st->len - i, st->ptr + i);
+               printf("\n");
+       }
 }
+
+/*--------------------------------------------------------------------*/