From: helge Date: Sun, 24 Apr 2005 14:51:41 +0000 (+0000) Subject: rewrote WOHTMLParser to use Unicode X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=320d77b4630a1a7e04619d154cc9765098dbec6a;p=sope rewrote WOHTMLParser to use Unicode fixed some gcc 4.0 warnings git-svn-id: http://svn.opengroupware.org/SOPE/trunk@766 e4a50df8-12e2-0310-a44c-efbce7f8a7e3 --- diff --git a/sope-appserver/NGObjWeb/ChangeLog b/sope-appserver/NGObjWeb/ChangeLog index 3b361bc5..d0eef499 100644 --- a/sope-appserver/NGObjWeb/ChangeLog +++ b/sope-appserver/NGObjWeb/ChangeLog @@ -1,5 +1,11 @@ 2005-04-24 Helge Hess + * v4.5.153 + + * Templates/WOHTMLParser.m: rewrote parser to use unichar + + * Templates: fixed gcc 4.0 warnings + * v4.5.152 * Templates/WODParser.m: rewrote parser to use unichar diff --git a/sope-appserver/NGObjWeb/DynamicElements/_WOStaticHTMLElement.h b/sope-appserver/NGObjWeb/DynamicElements/_WOStaticHTMLElement.h index ac871e5e..79ffe185 100644 --- a/sope-appserver/NGObjWeb/DynamicElements/_WOStaticHTMLElement.h +++ b/sope-appserver/NGObjWeb/DynamicElements/_WOStaticHTMLElement.h @@ -22,6 +22,7 @@ #ifndef __DynamicElements__WOStaticHTMLElement_H__ #define __DynamicElements__WOStaticHTMLElement_H__ +#import #include /* @@ -37,8 +38,8 @@ NSString *text; } -// TODO: use Unicode? - (id)initWithBuffer:(const char *)_buffer length:(unsigned)_len; +- (id)initWithCharacters:(const unichar *)_buffer length:(unsigned)_len; @end diff --git a/sope-appserver/NGObjWeb/DynamicElements/_WOStaticHTMLElement.m b/sope-appserver/NGObjWeb/DynamicElements/_WOStaticHTMLElement.m index f98ad6a2..e4a6e2dd 100644 --- a/sope-appserver/NGObjWeb/DynamicElements/_WOStaticHTMLElement.m +++ b/sope-appserver/NGObjWeb/DynamicElements/_WOStaticHTMLElement.m @@ -37,6 +37,12 @@ static Class StrClass = Nil; : nil; return self; } +- (id)initWithCharacters:(const unichar *)_buffer length:(unsigned)_len { + self->text = (_len > 0) + ? [[StrClass alloc] initWithCharacters:_buffer length:_len] + : nil; + return self; +} - (void)dealloc { [self->text release]; diff --git a/sope-appserver/NGObjWeb/Templates/WOHTMLParser.m b/sope-appserver/NGObjWeb/Templates/WOHTMLParser.m index 8ba83d52..356c4070 100644 --- a/sope-appserver/NGObjWeb/Templates/WOHTMLParser.m +++ b/sope-appserver/NGObjWeb/Templates/WOHTMLParser.m @@ -50,7 +50,7 @@ @implementation WOHTMLParser static WOElement *_parseElement(NSZone *_zone, - const char *_buffer, unsigned *_idx, + const unichar *_buffer, unsigned *_idx, unsigned _len, NSException **_exception, WOHTMLParser *self); @@ -100,32 +100,11 @@ static BOOL useUTF8 = NO; contentElements:_subElements]; } -- (id)_makeConstantStringElementWithBuffer:(const unsigned char *)_buf +- (id)_makeConstantStringElementWithBuffer:(const unichar *)_buf length:(unsigned)_len { - return [[WOStringClass allocWithZone:NULL] initWithBuffer:_buf length:_len]; -} - -- (NSString *)_makeStringForBuffer:(const unsigned char *)_buf - length:(unsigned)_len -{ - NSString *r; - NSData *data; - - if (_len == 0) - return @""; - - if (!useUTF8) - return [[StrClass alloc] initWithCString:_buf length:_len]; - - // Note: we cast the pointer because we are not going to modify _buf for the - // duration and we are never going to write the data - should work - // with any Foundation, but isn't strictly API compatible - data = [[NSData alloc] initWithBytesNoCopy:(void *)_buf length:_len - freeWhenDone:NO]; - r = [[StrClass alloc] initWithData:data encoding:NSUTF8StringEncoding]; - [data release]; - return r; + return [[WOStringClass allocWithZone:NULL] + initWithCharacters:_buf length:_len]; } /* accessors */ @@ -136,12 +115,20 @@ static BOOL useUTF8 = NO; /* parsing API */ +- (NSStringEncoding)stringEncodingForData:(NSData *)_data { + // TODO: we could check for UTF-16 marker in front of data + return useUTF8 ? NSUTF8StringEncoding : [NSString defaultCStringEncoding]; +} + - (NSArray *)parseHTMLData:(NSData *)_html { NSMutableArray *topLevel; - const char *html; + const unichar *html; unsigned idx, len; NSException *exception = nil; - + unichar *buf; + unsigned int bufLen; + NSString *s; + if (![self->callback parser:self willParseHTMLData:_html]) return nil; @@ -150,18 +137,37 @@ static BOOL useUTF8 = NO; if (_html == nil) return nil; + /* recode buffer using NSString */ + + s = [[NSString alloc] initWithData:_html + encoding:[self stringEncodingForData:_html]]; + bufLen = [s length]; + buf = calloc(bufLen + 2, sizeof(unichar)); + [s getCharacters:buf]; + [s release]; s = nil; + buf[bufLen] = 0; /* null-terminate buffer, parser might need that */ + + /* start parsing */ + topLevel = [NSMutableArray arrayWithCapacity:64]; idx = 0; - len = [_html length]; - html = [_html bytes]; + len = bufLen; + html = buf; while ((idx < len) && (exception == nil)) { WOElement *element; - if ((element = _parseElement(NULL, html, &idx, len, &exception, self))) { - [topLevel addObject:element]; - [element release]; element = nil; - } + element = _parseElement(NULL, html, &idx, len, &exception, self); + if (element == nil) + continue; + + [topLevel addObject:element]; + [element release]; element = nil; + } + + if (buf != NULL) { + free(buf); buf = NULL; + html = NULL; } ASSIGN(self->parsingException, exception); @@ -180,7 +186,7 @@ static BOOL useUTF8 = NO; /* internal parsing */ -static int _numberOfLines(const char *_buffer, unsigned _lastIdx) { +static int _numberOfLines(const unichar *_buffer, unsigned _lastIdx) { register int pos, lineCount = 1; for (pos = 0; (pos < (int)_lastIdx) && (_buffer[pos] != '\0'); pos++) { @@ -190,7 +196,7 @@ static int _numberOfLines(const char *_buffer, unsigned _lastIdx) { return lineCount; } -static inline BOOL _isHTMLSpace(char c) { +static inline BOOL _isHTMLSpace(const unichar c) { switch (c) { case ' ': case '\t': case '\r': case '\n': return YES; @@ -201,7 +207,7 @@ static inline BOOL _isHTMLSpace(char c) { } static NSException *_makeHtmlException(NSException *_exception, - const char *_buffer, unsigned _idx, + const unichar *_buffer, unsigned _idx, unsigned _len, NSString *_text, WOHTMLParser *self) { @@ -240,7 +246,7 @@ static NSException *_makeHtmlException(NSException *_exception, if (!atEof && (_idx > 0)) { register unsigned pos; - const unsigned char *startPos, *endPos; + const unichar *startPos, *endPos; for (pos = _idx; (pos >= 0) && (_buffer[pos] != '\n'); pos--) ; @@ -253,7 +259,8 @@ static NSException *_makeHtmlException(NSException *_exception, if (startPos < endPos) { NSString *ll; - ll = [self _makeStringForBuffer:startPos length:(endPos - startPos)]; + ll = [[StrClass alloc] initWithCharacters:startPos + length:(endPos - startPos)]; [ui setObject:ll forKey:@"lastLine"]; [ll release]; } @@ -278,7 +285,7 @@ static NSException *_makeHtmlException(NSException *_exception, } static inline BOOL -_isComment(const char *_buffer, unsigned _idx, unsigned _len) +_isComment(const unichar *_buffer, unsigned _idx, unsigned _len) { // - 7 chars if ((_idx + 7) >= _len) // check whether it is long enough @@ -293,13 +300,14 @@ _isComment(const char *_buffer, unsigned _idx, unsigned _len) return YES; } -static inline BOOL _isHashTag(const char *_buf, unsigned _idx, unsigned _len) { +static inline BOOL _isHashTag(const unichar *_buf, unsigned _idx, + unsigned _len) { /* check for "<#.>" (len 4) */ if ((_idx + 3) >= _len) // check whether it is long enough return NO; return (_buf[_idx] == '<' && _buf[_idx + 1] == '#') ? YES : NO; } -static inline BOOL _isHashCloseTag(const char *_buf, +static inline BOOL _isHashCloseTag(const unichar *_buf, unsigned _idx, unsigned _len) { /* check for "" (len 5) */ @@ -309,7 +317,27 @@ static inline BOOL _isHashCloseTag(const char *_buf, ? YES : NO; } -static inline BOOL _isWOTag(const char *_buf, unsigned _idx, unsigned _len) { +static BOOL _ucIsCaseEqual(const unichar *s, char *tok, unsigned len) { + register unsigned int i; + + for (i = 0; i < len; i++) { + register unsigned char c; + + if (s[i] == tok[i]) + continue; + + if (s[i] == 0) + return NO; + + c = isupper(tok[i]) ? tolower(tok[i]) : toupper(tok[i]); + if (s[i] != c) + return NO; + } + return YES; +} + +static inline BOOL _isWOTag(const unichar *_buf, unsigned _idx, + unsigned _len) { /* check for "" (len 19) (lowercase is allowed) */ if ((_idx + 18) >= _len) // check whether it is long enough return NO; @@ -317,11 +345,11 @@ static inline BOOL _isWOTag(const char *_buf, unsigned _idx, unsigned _len) { return NO; // now check for ' (len=12) */ if ((_idx + 12) > _len) // check whether it is long enough @@ -329,10 +357,10 @@ _isWOCloseTag(const char *_buf, unsigned _idx, unsigned _len) if (_buf[_idx] != '<') // check whether it is a tag return NO; - return (strncasecmp(&(_buf[_idx]), "", 12) == 0) ? YES : NO; + return _ucIsCaseEqual(&(_buf[_idx]), "", 12); } -static inline void _skipSpaces(register const char *_buffer, unsigned *_idx, +static inline void _skipSpaces(register const unichar *_buffer, unsigned *_idx, unsigned _len) { register unsigned pos = *_idx; @@ -346,7 +374,7 @@ static inline void _skipSpaces(register const char *_buffer, unsigned *_idx, } static NSString *_parseStringValue(NSZone *_zone, - register const char *_buffer, + register const unichar *_buffer, unsigned *_idx, unsigned _len, NSException **_exception, WOHTMLParser *self) @@ -387,9 +415,12 @@ static NSString *_parseStringValue(NSZone *_zone, if (len == 0) // empty string return @""; - return [self _makeStringForBuffer:&(_buffer[startPos]) length:len]; + return [[StrClass alloc] initWithCharacters:&(_buffer[startPos]) + length:len]; } - else { + + /* string without quotes */ + { unsigned startPos = pos; //NSLog(@"parsing id at '%c'[%i] ..", _buffer[pos], pos); @@ -407,12 +438,12 @@ static NSString *_parseStringValue(NSZone *_zone, if ((pos - startPos) == 0) // wasn't a string .. return nil; - return [self _makeStringForBuffer:&(_buffer[startPos]) - length:(pos - startPos)]; + return [[StrClass alloc] initWithCharacters:&(_buffer[startPos]) + length:(pos - startPos)]; } } -static WOElement *_parseHashElement(NSZone *_zone, const char *_buffer, +static WOElement *_parseHashElement(NSZone *_zone, const unichar *_buffer, unsigned *_idx, unsigned _len, NSException **_exc, WOHTMLParser *self) @@ -555,7 +586,7 @@ static WOElement *_parseHashElement(NSZone *_zone, const char *_buffer, } static NSMutableDictionary * -_parseTagAttributes(NSZone *_zone, const char *_buffer, +_parseTagAttributes(NSZone *_zone, const unichar *_buffer, unsigned *_idx, unsigned _len, NSException **_exception, WOHTMLParser *self) { @@ -629,7 +660,7 @@ _parseTagAttributes(NSZone *_zone, const char *_buffer, return dict; } -static WOElement *_parseWOElement(NSZone *_zone, const char *_buffer, +static WOElement *_parseWOElement(NSZone *_zone, const unichar *_buffer, unsigned *_idx, unsigned _len, NSException **_exception, WOHTMLParser *self) @@ -644,8 +675,8 @@ static WOElement *_parseWOElement(NSZone *_zone, const char *_buffer, if (!_isWOTag(_buffer, *_idx, _len)) return nil; // not a WO tag .. - NSCAssert(strncasecmp("