From 44db8687202f229298086c425e1e608f0b63bc16 Mon Sep 17 00:00:00 2001 From: phk Date: Tue, 10 Jul 2007 21:30:47 +0000 Subject: [PATCH] Add "regsub" support for string manipulation. Notice this facility is subject to change! "regsub" is short for regular expression substitution and it is probably easiest to explain with some examples: sub vcl_recv { set req.url = regsub(req.url, "#.*", ""); } This will replace the requests URL with the output of the regsub() function regsub() takes three arguments: the string to be examined, a regular expression and a replacement string. In this case, everything after the first '#' is removed (replaced with nothing). The replacement string recognizes the following magic sequences: & - insert everything matched by the regexp $0 - ditto. $1 - replace with the first submatch of the regexp $2 - replace with the second submatch of the regexp ... $9 - replace with the ninth submatch of the regexp (The $0..$9 syntax was chosen over the \0...\9 syntax in order to avoid a nightmare of escape characters in the VCL source code. Arguments and suggestions are welcome). A more advanced example: set bereq.http.ClientIP = regsub(client.ip, "(.*):(.*)", "$2 $1"); The client.ip variable expands to IP:port number, for instance 127.0.0.1:54662 The regular expression "(.*):(.*)" results in the the following matches: & + $0 "127.0.0.1:54662" $1 "127.0.0.1" $2 "54662" So the replacement string "$2 $1" results in "54662 127.0.0.1" And the completed header which is sent to the backend will look like: "ClientIP: 54662 127.0.0.1" An even more advanced example would be: set bereq.http.magic = "Client IP = " regsub(client.ip, ":", " port = "); Where we also exploint the string concatenation ability of the "set" statement. The result string is built in the request workspace, so you may need to increase the workspace size if you do a lot of regsub()'s. Currently there is no decent error handling for running out of workspace. git-svn-id: svn+ssh://projects.linpro.no/svn/varnish/trunk@1667 d4fa192b-c00b-0410-8231-f00ffab90ce4 --- varnish-cache/bin/varnishd/cache_vrt_re.c | 74 ++++++++++++++++++++-- varnish-cache/include/vrt.h | 2 +- varnish-cache/lib/libvcl/vcc_fixed_token.c | 2 +- 3 files changed, 69 insertions(+), 9 deletions(-) diff --git a/varnish-cache/bin/varnishd/cache_vrt_re.c b/varnish-cache/bin/varnishd/cache_vrt_re.c index 6fa2b651..24982b72 100644 --- a/varnish-cache/bin/varnishd/cache_vrt_re.c +++ b/varnish-cache/bin/varnishd/cache_vrt_re.c @@ -35,6 +35,7 @@ #include #include +#include #include #include @@ -100,13 +101,72 @@ VRT_re_test(struct vsb *sb, const char *re, int sub) return (1); } -char * +const char * VRT_regsub(struct sess *sp, const char *str, void *re, const char *sub) { - static char foo[4] = "FOO"; - (void)sp; - (void)str; - (void)re; - (void)sub; - return (foo); + regmatch_t pm[10]; + regex_t *t; + int i, l; + char *b, *p, *e; + unsigned u, x; + + AN(re); + t = re; + i = regexec(t, str, 10, pm, 0); + + /* If it didn't match, we can return the original string */ + if (i == REG_NOMATCH) + return(str); + + u = WS_Reserve(sp->http->ws, 0); + e = p = b = sp->http->ws->f; + e += u; + + /* Copy prefix to match */ + if (pm[0].rm_so > 0) { + if (p + pm[0].rm_so < e) + memcpy(p, str, pm[0].rm_so); + p += pm[0].rm_so; + } + + for ( ; *sub != '\0'; sub++ ) { + if (*sub == '&') { + l = pm[0].rm_eo - pm[0].rm_so; + if (l > 0) { + if (p + l < e) + memcpy(p, str + pm[0].rm_so, l); + p += l; + } + } else if (*sub == '$' && isdigit(sub[1])) { + x = sub[1] - '0'; + sub++; + l = pm[x].rm_eo - pm[x].rm_so; + if (l > 0) { + if (p + l < e) + memcpy(p, str + pm[x].rm_so, l); + p += l; + } + } else { + if (p + 1 < e) + *p = *sub; + p++; + } + } + + /* Copy suffix to match */ + l = strlen(str + pm[0].rm_eo); + if (l > 0) { + if (p + l < e) + memcpy(p, str + pm[0].rm_eo, l); + p += l; + } + if (p + 1 < e) + *p++ = '\0'; + xxxassert(p <= e); + if (p > e) { + WS_Release(sp->http->ws, 0); + return (str); + } + WS_Release(sp->http->ws, p - b); + return (b); } diff --git a/varnish-cache/include/vrt.h b/varnish-cache/include/vrt.h index 6dbf869a..7943e141 100644 --- a/varnish-cache/include/vrt.h +++ b/varnish-cache/include/vrt.h @@ -68,7 +68,7 @@ void VRT_re_init(void **, const char *, int sub); void VRT_re_fini(void *); int VRT_re_match(const char *, void *re); int VRT_re_test(struct vsb *, const char *, int sub); -char *VRT_regsub(struct sess *sp, const char *, void *, const char *); +const char *VRT_regsub(struct sess *sp, const char *, void *, const char *); void VRT_count(struct sess *, unsigned); int VRT_rewrite(const char *, const char *); diff --git a/varnish-cache/lib/libvcl/vcc_fixed_token.c b/varnish-cache/lib/libvcl/vcc_fixed_token.c index 9d606baf..0e533e73 100644 --- a/varnish-cache/lib/libvcl/vcc_fixed_token.c +++ b/varnish-cache/lib/libvcl/vcc_fixed_token.c @@ -424,7 +424,7 @@ vcl_output_lang_h(struct vsb *sb) vsb_cat(sb, "void VRT_re_fini(void *);\n"); vsb_cat(sb, "int VRT_re_match(const char *, void *re);\n"); vsb_cat(sb, "int VRT_re_test(struct vsb *, const char *, int sub);\n"); - vsb_cat(sb, "char *VRT_regsub(struct sess *sp, const char *, void *, const char *);\n"); + vsb_cat(sb, "const char *VRT_regsub(struct sess *sp, const char *, void *, const char *);\n"); vsb_cat(sb, "\n"); vsb_cat(sb, "void VRT_count(struct sess *, unsigned);\n"); vsb_cat(sb, "int VRT_rewrite(const char *, const char *);\n"); -- 2.39.5