From: phk Date: Fri, 15 Sep 2006 07:14:37 +0000 (+0000) Subject: Overhaul NCSA logging. X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a31d7087be195fa3e3b2107427817d1996cca7c8;p=varnish Overhaul NCSA logging. Reduce memory footprint by only allocating space for a fd first time we see it. Few if any servers will ever see 64k fd's in use. Move the sbuf into the logline structure for simplicity. Access the logline structure through a pointer instead of indexing the array all the time. Reduce amount of malloc/free traffic for performance. Use the "cooked" SHMlog api where the length, fd, etc are broken out as arguments for us. Pass the FILE* where we want the stuff to go as private data (stdout for now). Add FlexeLint files. Now over to Anders again... git-svn-id: svn+ssh://projects.linpro.no/svn/varnish/trunk@981 d4fa192b-c00b-0410-8231-f00ffab90ce4 --- diff --git a/varnish-cache/bin/varnishncsa/flint.lnt b/varnish-cache/bin/varnishncsa/flint.lnt new file mode 100644 index 00000000..1a7630b8 --- /dev/null +++ b/varnish-cache/bin/varnishncsa/flint.lnt @@ -0,0 +1,60 @@ +-passes=3 + +-header(../../config.h) + ++d__attribute__()= + +// Fix strchr() semtics, it can only return NULL if arg2 != 0 +-sem(strchr, 1p, type(1), 2n == 0 ? (@p < 1p) : (@p < 1p || @p == 0 )) + +-ffc // No automatic custody + +-e763 // Redundant declaration for symbol '...' previously declared +-e726 // Extraneous comma ignored +-e728 // Symbol ... not explicitly initialized +-e716 // while(1) ... +-e785 // Too few initializers for aggregate +-e786 // String concatenation within initializer + +-emacro(740, TAILQ_PREV) // Unusual pointer cast (incompatible indirect types) +-emacro((826), TAILQ_PREV) // Suspicious pointer-to-pointer conversion (area too small) + + +-esym(534, printf) // Ignoring return value of function +-esym(534, fprintf) // Ignoring return value of function +-esym(534, memset) // Ignoring return value of function +-esym(534, memcpy) // Ignoring return value of function +-esym(534, memmove) // Ignoring return value of function +-esym(534, strcpy) // Ignoring return value of function +-esym(534, vsb_printf) // Ignoring return value of function +-esym(534, vsb_cat) // Ignoring return value of function + +// cache.h +-emacro(506, INCOMPL) // Constant value Boolean + +// cache_center.c +-efunc(525, CNT_Session) // Negative indentation from line +-efunc(525, http_FilterHeader) // Negative indentation from line +-efunc(539, http_FilterHeader) // Positive indentation from line + +// cache_vcl.c +-efunc(525, vcl_handlingname) // Negative indentation from line +-esym(528, vcl_handlingname) // Not referenced +-e641 // Converting enum 'cli_status_e' to int + +// Review all below this line /////////////////////////////////////////////// + +-e767 // Macro redef (system queue.h vs ours ) + +-e574 // Signed-unsigned mix with relational +-e712 // Loss of precision (assignment) (long long to +-e747 // Significant prototype coercion (arg. no. 2) long +-e713 // Loss of precision (assignment) (unsigned long long to long long) +-e506 // Constant value boolean +-e818 // Pointer parameter '...' could be declared as pointing to const +-e774 // Boolean within 'if' always evaluates to False +-e534 // Ignoring return value of function +-e557 // unrecog format +-e732 // Loss of sign (arg. no. 2) (int to unsigned +-e737 // [45] Loss of sign in promotion from int to unsigned + diff --git a/varnish-cache/bin/varnishncsa/flint.sh b/varnish-cache/bin/varnishncsa/flint.sh new file mode 100644 index 00000000..e96821db --- /dev/null +++ b/varnish-cache/bin/varnishncsa/flint.sh @@ -0,0 +1,29 @@ +#!/bin/sh + +T=/tmp/_$$ +flexelint \ + -I/usr/include \ + -I. \ + -I../../include \ + flint.lnt \ + *.c > $T 2>&1 + +for t in Error Warning Info +do + sed -n "/$t [0-9][0-9][0-9]:/s/.*\($t [0-9][0-9][0-9]\).*/\1/p" $T +done | awk ' +$2 == 830 { next } +$2 == 831 { next } + { + i=$2"_"$1 + h[i]++ + n++ + } +END { + printf "%5d %s\n", n, "Total" + for (i in h) + printf "%5d %s\n", h[i], i + } +' | sort -rn + +cat $T diff --git a/varnish-cache/bin/varnishncsa/varnishncsa.c b/varnish-cache/bin/varnishncsa/varnishncsa.c index 6787c32a..60c0fee1 100644 --- a/varnish-cache/bin/varnishncsa/varnishncsa.c +++ b/varnish-cache/bin/varnishncsa/varnishncsa.c @@ -27,385 +27,133 @@ #include "shmlog.h" #include "varnishapi.h" - -/* Ordering-----------------------------------------------------------*/ - - -/* Adding a struct to hold the data for the logline - * - */ - -struct logline { - char df_h[4 * (3 + 1)]; // Datafield for %h (IP adress) - int df_hfini; // Set to 1 when a SessionClose is seen. - unsigned char *df_r; // Datafield for %r (Request) - int df_rfini; // Set to 1 when a ReqServTime has come. - unsigned char *df_s; // Datafield for %s, Status - int df_sfini; - unsigned char *df_b; // Datafield for %b, Bytes - int df_bfini; - struct tm *logline_time; // Datafield for %t - unsigned char *df_R; // Datafield for %{Referer}i - int df_Rfini; - unsigned char *df_U; // Datafield for %{User-agent}i - int df_Ufini; - int bogus_req; // Set to 1 if we discover a bogus request -}; - -/* We make a array of pointers to vsb's. Sbuf is a string buffer. - * * The buffer can be made/extended/cleared etc. through a API. - * * The array is 65536 long because we will use sessionid as key. - * * - * */ - -static struct vsb *ob[65536]; -static struct logline ll[65536]; - - -/* -* Clean order is called once in a while. It clears all the sessions that -* where never finished (SLT_SessionClose). Because the data is not complete -* we disregard the data. -* -*/ - -static void -clean_order(void) +static struct logline { + char df_h[4 * (3 + 1)]; /* Datafield for %h (IP adress) */ + char df_s[4]; /* Datafield for %s, Status */ + char df_b[12]; /* Datafield for %b, Bytes */ + char *df_R; /* Datafield for %{Referer} */ + char *df_U; /* Datafield for %{User-agent} */ + int bogus_req; /* bogus request */ + struct vsb *sb; +} *ll[65536]; + +/* Check if string starts with pfx */ +static int +ispfx(const char *ptr, unsigned len, const char *pfx) { - unsigned u; - - for (u = 0; u < 65536; u++) { - if (ob[u] == NULL) - continue; - vsb_finish(ob[u]); - vsb_clear(ob[u]); - } -} - -static struct tm *make_timestruct(char *tmpPtra){ - char *tmpPtrb; - int timesec = 0; - time_t req_time; // Timeobject used for making the requesttime. - int i = 0; - char temp_time[27]; - struct tm *timestring; - - temp_time[0] = '\0'; - for ( tmpPtrb = strtok(tmpPtra," "); tmpPtrb != NULL; tmpPtrb = strtok(NULL, " ")){ - if (i == 2){ - // We have the right time - //printf("Time: %s\n", tmpPtrb); - tmpPtra = tmpPtrb; - strncpy(temp_time, tmpPtra, 10); - temp_time[11] = '\0'; - } - //printf("ReqServTime number %d: %s\n", i, tmpPtrb); - - i++; - } - - //strncpy(temp_time, tmpPtra, 20); - //printf("inten: %s\n",temp_time); - timesec = atoi(temp_time); - req_time = timesec; - timestring = localtime(&req_time); - return timestring; - + unsigned l; + + l = strlen(pfx); + if (l > len) + return (0); + if (strncasecmp(ptr, pfx, l)) + return (0); + return (1); } -static void -extended_log_format(unsigned char *p) +static int +extended_log_format(void *priv, unsigned tag, unsigned fd, unsigned len, unsigned spec, const char *ptr) { - unsigned u,v,w; - - // Used for getting IP. - unsigned char *tmpPtr; - int j; - - // Used for requesttime. - char *tmpPtra; - - u = (p[2] << 8) | p[3]; - if (ob[u] == NULL) { - ob[u] = vsb_new(NULL, NULL, 0, VSB_AUTOEXTEND); - assert(ob[u] != NULL); + const char *p; + char *q; + FILE *fo; + time_t t; + long l; + struct tm tm; + char tbuf[40]; + struct logline *lp; + + if (!(spec &VSL_S_CLIENT)) + return (0); + + if (ll[fd] == NULL) { + ll[fd] = calloc(sizeof *ll[fd], 1); + assert(ll[fd] != NULL); + ll[fd]->sb = vsb_new(NULL, NULL, 0, VSB_AUTOEXTEND); + assert(ll[fd]->sb != NULL); } - - v = 0; - w = 0; - j = 0; - //ll[u].df_rfini = 0; - //ll[u].df_hfini = 0; + lp = ll[fd]; - switch (p[0]) { + switch (tag) { case SLT_SessionOpen: - - // We catch the IP adress of the session. - // We also catch IP in SessionReuse because we can not always - // be sure we see a SessionOpen when we start logging. - - //printf("IP %s\n", p+4); - ll[u].df_h[0] = '\0'; - tmpPtr = strchr(p + 4, ' '); - if (tmpPtr != NULL){ - - j = strlen(p + 4) - strlen(tmpPtr); // length of IP - strncpy(ll[u].df_h, p + 4, j); - ll[u].df_h[j] = '\0'; // put on a NULL at end of buffer. - } - //printf("New session [%d]: %s \n",u, ll[u].df_h); - + for (p = ptr, q = lp->df_h; *p && *p != ' ';) + *q++ = *p++; + *q = '\0'; break; case SLT_ReqStart: - - // We use XID to catch that a new request is comming inn. - + vsb_clear(lp->sb); break; case SLT_RxRequest: - - vsb_clear(ob[u]); - - if (p[1] >= 4 && !strncasecmp((void *)&p[4], "HEAD",4)){ - vsb_bcat(ob[u], p + 4, strlen(p + 4)); - //printf("Got a HEAD\n"); + if (ispfx(ptr, len, "HEAD")) { + vsb_bcat(lp->sb, ptr, len); + } else if (ispfx(ptr, len, "POST")) { + vsb_bcat(lp->sb, ptr, len); + } else if (ispfx(ptr, len, "GET")) { + vsb_bcat(lp->sb, ptr, len); + } else { + lp->bogus_req = 1; } - - else if (p[1] >= 4 && !strncasecmp((void *)&p[4], "POST",4)){ - vsb_bcat(ob[u], p + 4, strlen(p + 4)); - //printf("Got a POST\n"); - } - - else if (p[1] >= 3 && !strncasecmp((void *)&p[4], "GET",3)){ - vsb_bcat(ob[u], p + 4, strlen(p + 4)); - //printf("Got a GET\n"); - } - - else { - //vsb_bcat(ob[u], p + 4, strlen(p + 4)); - //printf("Got something other than HEAD, POST, GET\n"); - ll[u].bogus_req = 1; - } - break; + break; case SLT_RxURL: - - vsb_cat(ob[u], " "); - vsb_bcat(ob[u], p + 4, strlen(p + 4)); - + vsb_cat(lp->sb, " "); + vsb_bcat(lp->sb, ptr, len); break; case SLT_RxProtocol: - - vsb_cat(ob[u], " "); - vsb_bcat(ob[u], p + 4, strlen(p + 4)); - + vsb_cat(lp->sb, " "); + vsb_bcat(lp->sb, ptr, len); break; case SLT_TxStatus: - - ll[u].df_s = strdup(p + 4); - ll[u].df_sfini = 1; - + strcpy(lp->df_s, ptr); break; case SLT_RxHeader: - if (p[1] >= 11 && !strncasecmp((void *)&p[4], "user-agent:",11)){ - ll[u].df_U = strdup(p + 4); - // Jump ahead past "User-Agent: " - ll[u].df_U = ll[u].df_U + 12; - ll[u].df_Ufini = 1; - } - if (p[1] >= 8 && !strncasecmp((void *)&p[4], "referer:",8)){ - ll[u].df_R = strdup(p + 4); - ll[u].df_R = ll[u].df_R + 9; - ll[u].df_Rfini = 1; - } - - break; - - case SLT_ReqEnd: - - // We use ReqServTime to find how the time the request was delivered - // also to define that a request is finished. - tmpPtra = strdup(p + 4); - ll[u].logline_time = make_timestruct(tmpPtra); - free(tmpPtra); - ll[u].df_rfini = 1; - //printf("ReqServTime %s\n", temp_time); - + if (ispfx(ptr, len, "user-agent:")) + lp->df_U = strdup(ptr + 12); + else if (ispfx(ptr, len, "referer:")) + lp->df_R = strdup(ptr + 9); break; case SLT_Length: - - // XXX ask DES or PHK about this one. Am I overflowing? - - ll[u].df_b = strdup(p + 4); - ll[u].df_bfini = 1; - if (!atoi(ll[u].df_b)){ - ll[u].df_b = malloc(2); - ll[u].df_b[0] = '-'; - ll[u].df_b[1] = '\0'; - } - - break; - - case SLT_SessionClose: - - // Session is closed, we clean up things. But do not write. - - //printf("Session close [%d]\n", u); - - ll[u].df_hfini = 1; - - break; - - case SLT_SessionReuse: - - // We use SessionReuse to catch the IP adress of a session that has already - // started with a SessionOpen that we did not catch. - // Other than that it is not used. - - // Catch IP if not already done. - - if (ll[u].df_h[0] == '\0'){ - // We don't have IP, fetch it. - // - - tmpPtr = strchr(p + 4, ' '); - if (tmpPtr != NULL){ - j = strlen(p + 4) - strlen(tmpPtr); // length of IP - strncpy(ll[u].df_h, p + 4, j); - ll[u].df_h[j] = '\0'; // put on a NULL at end of buffer. - } - //printf("Got IP from Reuse [%d] : %s\n", u, ll[u].df_h); - } - - //printf("Session reuse [%d]\n", u); - + if (strcmp(ptr, "0")) + strcpy(lp->df_b, ptr); + else + strcpy(lp->df_b, "-"); break; default: - - // printf("DEBUG: %s\n", p+4); - break; } - - - if (ll[u].df_rfini) { - // We have a ReqServTime. Lets print the logline - // and clear variables that are different for each request. - // - - char temp_time[27]; // Where we store the string we take from the log - - // make temp_time - strftime (temp_time, 29, "[%d/%b/%Y:%X %z] ", ll[u].logline_time); - - if (ll[u].df_h[0] == '\0' || ll[u].bogus_req){ - ll[u].bogus_req = 0; - //printf("Tom IP \n"); - } - else{ - printf("%s - - %s ", ll[u].df_h, temp_time ); - vsb_finish(ob[u]); - printf("\"%s\"", vsb_data(ob[u])); - printf(" %s %s ", ll[u].df_s, ll[u].df_b); - if (ll[u].df_Rfini){ - printf(" \"%s\" ", ll[u].df_R); - } - else { - printf(" \"-\" "); - } - - if (ll[u].df_Ufini){ - printf(" \"%s\" ", ll[u].df_U); - } - else { - printf(" \"-\" "); - } - printf("\n"); - } - fflush(stdout); - - //vsb_finish(ob[u]); - vsb_clear(ob[u]); - temp_time[0] = '\0'; - - - ll[u].df_rfini = 0; - - - // Clear the TxStaus - - if (ll[u].df_sfini){ - free(ll[u].df_s); - ll[u].df_sfini = 0; - //printf("Freed df_s [%d]\n", u); - } - - if (ll[u].df_bfini){ - free(ll[u].df_b); - ll[u].df_bfini = 0; - //printf("Freed df_b [%d]\n", u); - } - - // Clean User-Agent and Referer - if (ll[u].df_Ufini){ - ll[u].df_U = ll[u].df_U - 12; - free(ll[u].df_U); - ll[u].df_Ufini = 0; - ll[u].df_U[0] = '\0'; - //printf("Freed df_U [%d]\n", u); - } - - if (ll[u].df_Rfini){ - ll[u].df_R = ll[u].df_R - 9; - free(ll[u].df_R); - ll[u].df_R[0] = '\0'; - ll[u].df_Rfini = 0; - //printf("Freed df_R [%d]\n", u); - } - - // Clean up ReqEnd/Time variables - - //if (tmpPtra != NULL){ - // free(tmpPtra); - //printf("Freed tmpPtra [%d]\n", u); - //} - - /* - if (tmpPtrb != NULL){ - free(tmpPtrb); - //printf("Freed tmpPtrb [%d]\n", u); - } - if (tmpPtrc != NULL){ - free(tmpPtrc); - //printf("Freed tmpPtrc [%d]\n", u); - } - */ - temp_time[0] = '\0'; - - - if (ll[u].df_hfini) { - // We have a SessionClose. Lets clean data. - // - // Clean IP adress - ll[u].df_h[0] = '\0'; - //printf("Clearer [%d]\n", u); - ll[u].df_hfini = 0; - - } - - - - + if (tag != SLT_ReqEnd) + return (0); + + fo = priv; + assert(1 == sscanf(ptr, "%*u %*u.%*u %ld.", &l)); + t = l; + localtime_r(&t, &tm); + strftime(tbuf, sizeof tbuf, "%d/%b/%Y:%T %z", &tm); + fprintf(fo, "%s - - %s", lp->df_h, tbuf); + vsb_finish(lp->sb); + fprintf(fo, " \"%s\"", vsb_data(lp->sb)); + fprintf(fo, " %s", lp->df_b); + if (lp->df_R != NULL) { + fprintf(fo, " \"%s\"", lp->df_R); + free(lp->df_R); + lp->df_R = NULL; } - - + if (lp->df_U != NULL) { + fprintf(fo, " \"%s\"", lp->df_U); + free(lp->df_U); + lp->df_U = NULL; + } + fprintf(fo, "\n"); + + return (0); } /*--------------------------------------------------------------------*/ @@ -421,15 +169,11 @@ int main(int argc, char **argv) { int i, c; - unsigned u, v; - unsigned char *p; - char *w_opt = NULL; - FILE *wfile = NULL; struct VSL_data *vd; vd = VSL_New(); - while ((c = getopt(argc, argv, VSL_ARGS "w:")) != -1) { + while ((c = getopt(argc, argv, VSL_ARGS "")) != -1) { i = VSL_Arg(vd, c, optarg); if (i < 0) exit (1); @@ -439,9 +183,6 @@ main(int argc, char **argv) case 'V': varnish_version("varnishncsa"); exit(0); - case 'w': - w_opt = optarg; - break; default: usage(); } @@ -450,36 +191,11 @@ main(int argc, char **argv) if (VSL_OpenLog(vd)) exit (1); - if (w_opt != NULL) { - wfile = fopen(w_opt, "w"); - if (wfile == NULL) { - perror(w_opt); - exit (1); - } - } - u = 0; - v = 0; - while (1) { - i = VSL_NextLog(vd, &p); + i = VSL_Dispatch(vd, extended_log_format, stdout); if (i < 0) break; - if (i == 0) { - if (w_opt == NULL) { - if (++v == 100){ - clean_order(); - fflush(stdout); - } - } - usleep(50000); - continue; - } - v = 0; - - extended_log_format(p); } - - clean_order(); return (0); }