@implementation WOHTMLParser
static WOElement *_parseElement(NSZone *_zone,
- const char *_buffer, unsigned *_idx,
+ const unichar *_buffer, unsigned *_idx,
unsigned _len, NSException **_exception,
WOHTMLParser *self);
contentElements:_subElements];
}
-- (id)_makeConstantStringElementWithBuffer:(const unsigned char *)_buf
+- (id)_makeConstantStringElementWithBuffer:(const unichar *)_buf
length:(unsigned)_len
{
- return [[WOStringClass allocWithZone:NULL] initWithBuffer:_buf length:_len];
-}
-
-- (NSString *)_makeStringForBuffer:(const unsigned char *)_buf
- length:(unsigned)_len
-{
- NSString *r;
- NSData *data;
-
- if (_len == 0)
- return @"";
-
- if (!useUTF8)
- return [[StrClass alloc] initWithCString:_buf length:_len];
-
- // Note: we cast the pointer because we are not going to modify _buf for the
- // duration and we are never going to write the data - should work
- // with any Foundation, but isn't strictly API compatible
- data = [[NSData alloc] initWithBytesNoCopy:(void *)_buf length:_len
- freeWhenDone:NO];
- r = [[StrClass alloc] initWithData:data encoding:NSUTF8StringEncoding];
- [data release];
- return r;
+ return [[WOStringClass allocWithZone:NULL]
+ initWithCharacters:_buf length:_len];
}
/* accessors */
/* parsing API */
+- (NSStringEncoding)stringEncodingForData:(NSData *)_data {
+ // TODO: we could check for UTF-16 marker in front of data
+ return useUTF8 ? NSUTF8StringEncoding : [NSString defaultCStringEncoding];
+}
+
- (NSArray *)parseHTMLData:(NSData *)_html {
NSMutableArray *topLevel;
- const char *html;
+ const unichar *html;
unsigned idx, len;
NSException *exception = nil;
-
+ unichar *buf;
+ unsigned int bufLen;
+ NSString *s;
+
if (![self->callback parser:self willParseHTMLData:_html])
return nil;
if (_html == nil)
return nil;
+ /* recode buffer using NSString */
+
+ s = [[NSString alloc] initWithData:_html
+ encoding:[self stringEncodingForData:_html]];
+ bufLen = [s length];
+ buf = calloc(bufLen + 2, sizeof(unichar));
+ [s getCharacters:buf];
+ [s release]; s = nil;
+ buf[bufLen] = 0; /* null-terminate buffer, parser might need that */
+
+ /* start parsing */
+
topLevel = [NSMutableArray arrayWithCapacity:64];
idx = 0;
- len = [_html length];
- html = [_html bytes];
+ len = bufLen;
+ html = buf;
while ((idx < len) && (exception == nil)) {
WOElement *element;
- if ((element = _parseElement(NULL, html, &idx, len, &exception, self))) {
- [topLevel addObject:element];
- [element release]; element = nil;
- }
+ element = _parseElement(NULL, html, &idx, len, &exception, self);
+ if (element == nil)
+ continue;
+
+ [topLevel addObject:element];
+ [element release]; element = nil;
+ }
+
+ if (buf != NULL) {
+ free(buf); buf = NULL;
+ html = NULL;
}
ASSIGN(self->parsingException, exception);
/* internal parsing */
-static int _numberOfLines(const char *_buffer, unsigned _lastIdx) {
+static int _numberOfLines(const unichar *_buffer, unsigned _lastIdx) {
register int pos, lineCount = 1;
for (pos = 0; (pos < (int)_lastIdx) && (_buffer[pos] != '\0'); pos++) {
return lineCount;
}
-static inline BOOL _isHTMLSpace(char c) {
+static inline BOOL _isHTMLSpace(const unichar c) {
switch (c) {
case ' ': case '\t': case '\r': case '\n':
return YES;
}
static NSException *_makeHtmlException(NSException *_exception,
- const char *_buffer, unsigned _idx,
+ const unichar *_buffer, unsigned _idx,
unsigned _len, NSString *_text,
WOHTMLParser *self)
{
if (!atEof && (_idx > 0)) {
register unsigned pos;
- const unsigned char *startPos, *endPos;
+ const unichar *startPos, *endPos;
for (pos = _idx; (pos >= 0) && (_buffer[pos] != '\n'); pos--)
;
if (startPos < endPos) {
NSString *ll;
- ll = [self _makeStringForBuffer:startPos length:(endPos - startPos)];
+ ll = [[StrClass alloc] initWithCharacters:startPos
+ length:(endPos - startPos)];
[ui setObject:ll forKey:@"lastLine"];
[ll release];
}
}
static inline BOOL
-_isComment(const char *_buffer, unsigned _idx, unsigned _len)
+_isComment(const unichar *_buffer, unsigned _idx, unsigned _len)
{
// <!----> - 7 chars
if ((_idx + 7) >= _len) // check whether it is long enough
return YES;
}
-static inline BOOL _isHashTag(const char *_buf, unsigned _idx, unsigned _len) {
+static inline BOOL _isHashTag(const unichar *_buf, unsigned _idx,
+ unsigned _len) {
/* check for "<#.>" (len 4) */
if ((_idx + 3) >= _len) // check whether it is long enough
return NO;
return (_buf[_idx] == '<' && _buf[_idx + 1] == '#') ? YES : NO;
}
-static inline BOOL _isHashCloseTag(const char *_buf,
+static inline BOOL _isHashCloseTag(const unichar *_buf,
unsigned _idx, unsigned _len)
{
/* check for "</#.>" (len 5) */
? YES : NO;
}
-static inline BOOL _isWOTag(const char *_buf, unsigned _idx, unsigned _len) {
+static BOOL _ucIsCaseEqual(const unichar *s, char *tok, unsigned len) {
+ register unsigned int i;
+
+ for (i = 0; i < len; i++) {
+ register unsigned char c;
+
+ if (s[i] == tok[i])
+ continue;
+
+ if (s[i] == 0)
+ return NO;
+
+ c = isupper(tok[i]) ? tolower(tok[i]) : toupper(tok[i]);
+ if (s[i] != c)
+ return NO;
+ }
+ return YES;
+}
+
+static inline BOOL _isWOTag(const unichar *_buf, unsigned _idx,
+ unsigned _len) {
/* check for "<WEBOBJECT .......>" (len 19) (lowercase is allowed) */
if ((_idx + 18) >= _len) // check whether it is long enough
return NO;
return NO;
// now check for '<WEBOBJECT'
- return (strncasecmp(&(_buf[_idx]), "<WEBOBJECT", 10) == 0) ? YES : NO;
+ return _ucIsCaseEqual(&(_buf[_idx]), "<WEBOBJECT", 10);
}
static inline BOOL
-_isWOCloseTag(const char *_buf, unsigned _idx, unsigned _len)
+_isWOCloseTag(const unichar *_buf, unsigned _idx, unsigned _len)
{
/* check for </WEBOBJECT> (len=12) */
if ((_idx + 12) > _len) // check whether it is long enough
if (_buf[_idx] != '<') // check whether it is a tag
return NO;
- return (strncasecmp(&(_buf[_idx]), "</WEBOBJECT>", 12) == 0) ? YES : NO;
+ return _ucIsCaseEqual(&(_buf[_idx]), "</WEBOBJECT>", 12);
}
-static inline void _skipSpaces(register const char *_buffer, unsigned *_idx,
+static inline void _skipSpaces(register const unichar *_buffer, unsigned *_idx,
unsigned _len)
{
register unsigned pos = *_idx;
}
static NSString *_parseStringValue(NSZone *_zone,
- register const char *_buffer,
+ register const unichar *_buffer,
unsigned *_idx, unsigned _len,
NSException **_exception,
WOHTMLParser *self)
if (len == 0) // empty string
return @"";
- return [self _makeStringForBuffer:&(_buffer[startPos]) length:len];
+ return [[StrClass alloc] initWithCharacters:&(_buffer[startPos])
+ length:len];
}
- else {
+
+ /* string without quotes */
+ {
unsigned startPos = pos;
//NSLog(@"parsing id at '%c'[%i] ..", _buffer[pos], pos);
if ((pos - startPos) == 0) // wasn't a string ..
return nil;
- return [self _makeStringForBuffer:&(_buffer[startPos])
- length:(pos - startPos)];
+ return [[StrClass alloc] initWithCharacters:&(_buffer[startPos])
+ length:(pos - startPos)];
}
}
-static WOElement *_parseHashElement(NSZone *_zone, const char *_buffer,
+static WOElement *_parseHashElement(NSZone *_zone, const unichar *_buffer,
unsigned *_idx, unsigned _len,
NSException **_exc,
WOHTMLParser *self)
}
static NSMutableDictionary *
-_parseTagAttributes(NSZone *_zone, const char *_buffer,
+_parseTagAttributes(NSZone *_zone, const unichar *_buffer,
unsigned *_idx, unsigned _len,
NSException **_exception, WOHTMLParser *self)
{
return dict;
}
-static WOElement *_parseWOElement(NSZone *_zone, const char *_buffer,
+static WOElement *_parseWOElement(NSZone *_zone, const unichar *_buffer,
unsigned *_idx, unsigned _len,
NSException **_exception,
WOHTMLParser *self)
if (!_isWOTag(_buffer, *_idx, _len))
return nil; // not a WO tag ..
- NSCAssert(strncasecmp("<WEBOBJECT", &(_buffer[*_idx]), 10) == 0,
- @"invalid parser state ..");
+ NSCAssert(_ucIsCaseEqual(&(_buffer[*_idx]), "<WEBOBJECT", 10),
+ @"Invalid parser state (expected <WEBOBJECT in buffer)!");
// skip '<WEBOBJECT'
*_idx += 10;
return element;
}
-static inline NSString *_makeTextString(NSZone *_zone, const char *_buffer,
+static inline NSString *_makeTextString(NSZone *_zone, const unichar *_buffer,
unsigned _len, WOHTMLParser *self)
{
NSString *result = nil;
- register unsigned char *buffer;
+ register unichar *buffer;
register unsigned pos, bufPos;
if (_len == 0) // empty string
if (!compressHTMLWhitespace)
/* deliver whitespace as in template */
- return [self _makeStringForBuffer:_buffer length:_len];
+ return [[StrClass alloc] initWithCharacters:_buffer length:_len];
+
+ buffer = calloc(_len + 3, sizeof(unichar));
- buffer = malloc(_len + 3);
-
for (pos = 0, bufPos = 0; pos < _len; ) {
buffer[bufPos] = _buffer[pos];
}
}
- result = [self _makeStringForBuffer:buffer length:bufPos];
- if (buffer) free(buffer);
+ result = [[StrClass alloc] initWithCharacters:buffer length:bufPos];
+ if (buffer != NULL) free(buffer);
return result;
}
static WOElement *_parseElement(NSZone *_zone,
- const char *_buffer, unsigned *_idx,
+ const unichar *_buffer, unsigned *_idx,
unsigned _len, NSException **_exception,
WOHTMLParser *self)
{