2 Copyright (C) 2003-2004 Max Berger
3 Copyright (C) 2004-2005 OpenGroupware.org
5 This file is part of versitSaxDriver, written for the OpenGroupware.org
8 SOPE is free software; you can redistribute it and/or modify it under
9 the terms of the GNU Lesser General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 SOPE is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with SOPE; see the file COPYING. If not, write to the
20 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "VSSaxDriver.h"
25 #include "VSStringFormatter.h"
26 #include <SaxObjC/SaxException.h>
29 @interface VSSaxTag : NSObject
38 + (id)beginTag:(NSString *)_tag attributes:(SaxAttributes *)_attrs;
39 - (id)initEndTag:(NSString *)_tag;
40 - (id)initWithData:(NSString *)_data;
42 - (NSString *)tagName;
46 static NSString *VSBeginType = @"BEGIN";
47 static NSString *VSEndType = @"END";
48 static NSString *VSDataType = @"DATA";
50 @implementation VSSaxTag
52 + (id)beginTag:(NSString *)_tag attributes:(SaxAttributes *)_attrs {
55 tag = [[[self alloc] init] autorelease];
56 tag->type = VSBeginType;
57 tag->tagName = [_tag copy];
58 tag->attrs = [_attrs retain];
61 - (id)initEndTag:(NSString *)_tag {
62 self->type = VSEndType;
63 self->tagName = [_tag copy];
66 - (id)initWithData:(NSString *)_data {
67 self->type = VSDataType;
68 self->data = [_data retain];
74 [self->tagName release];
75 [self->attrs release];
79 - (NSString *)tagName {
85 @implementation VSSaxDriver
87 static BOOL debugOn = NO;
89 static NSCharacterSet *dotCharSet = nil;
90 static NSCharacterSet *equalSignCharSet = nil;
91 static NSCharacterSet *commaCharSet = nil;
92 static NSCharacterSet *colonAndSemicolonCharSet = nil;
93 static NSCharacterSet *colonSemicolonAndDquoteCharSet = nil;
94 static NSCharacterSet *whitespaceCharSet = nil;
96 static VSStringFormatter *stringFormatter = nil;
99 static BOOL didInit = NO;
106 ud = [NSUserDefaults standardUserDefaults];
107 debugOn = [ud boolForKey:@"VSSaxDriverDebugEnabled"];
110 [[NSCharacterSet characterSetWithCharactersInString:@"."] retain];
112 [[NSCharacterSet characterSetWithCharactersInString:@"="] retain];
114 [[NSCharacterSet characterSetWithCharactersInString:@","] retain];
115 colonAndSemicolonCharSet =
116 [[NSCharacterSet characterSetWithCharactersInString:@":;"] retain];
117 colonSemicolonAndDquoteCharSet =
118 [[NSCharacterSet characterSetWithCharactersInString:@":;\""] retain];
120 [[NSCharacterSet whitespaceCharacterSet] retain];
122 stringFormatter = [VSStringFormatter sharedFormatter];
127 if ((self = [super init])) {
128 self->prefixURI = @"";
129 self->cardStack = [[NSMutableArray alloc] initWithCapacity:4];
130 self->elementList = [[NSMutableArray alloc] initWithCapacity:8];
131 self->attributeMapping = [[NSMutableDictionary alloc] initWithCapacity:8];
132 self->subItemMapping = [[NSMutableDictionary alloc] initWithCapacity:8];
138 [self->contentHandler release];
139 [self->errorHandler release];
140 [self->prefixURI release];
141 [self->cardStack release];
142 [self->elementList release];
143 [self->attributeElements release];
144 [self->elementMapping release];
145 [self->attributeMapping release];
146 [self->subItemMapping release];
152 - (void)setFeature:(NSString *)_name to:(BOOL)_value {
154 - (BOOL)feature:(NSString *)_name {
158 - (void)setProperty:(NSString *)_name to:(id)_value {
160 - (id)property:(NSString *)_name {
166 - (void)setContentHandler:(id<NSObject,SaxContentHandler>)_handler {
167 ASSIGN(self->contentHandler, _handler);
170 - (void)setDTDHandler:(id<NSObject,SaxDTDHandler>)_handler {
174 - (void)setErrorHandler:(id<NSObject,SaxErrorHandler>)_handler {
175 ASSIGN(self->errorHandler, _handler);
177 - (void)setEntityResolver:(id<NSObject,SaxEntityResolver>)_handler {
181 - (id<NSObject,SaxContentHandler>)contentHandler {
182 return self->contentHandler;
185 - (id<NSObject,SaxDTDHandler>)dtdHandler {
190 - (id<NSObject,SaxErrorHandler>)errorHandler {
191 return self->errorHandler;
193 - (id<NSObject,SaxEntityResolver>)entityResolver {
198 - (void)setPrefixURI:(NSString *)_uri {
199 ASSIGNCOPY(self->prefixURI, _uri);
201 - (NSString *)prefixURI {
202 return self->prefixURI;
205 - (void)setAttributeElements:(NSSet *)_elements {
206 ASSIGNCOPY(self->attributeElements, _elements);
208 - (NSSet *)attributeElements {
209 return self->attributeElements;
212 - (void)setElementMapping:(NSDictionary *)_mapping {
213 ASSIGNCOPY(self->elementMapping, _mapping);
215 - (NSDictionary *)elementMapping {
216 return self->elementMapping;
219 - (void)setAttributeMapping:(NSDictionary *)_mapping {
220 [self setAttributeMapping:_mapping forElement:@""];
223 - (void)setAttributeMapping:(NSDictionary *)_mapping
224 forElement:(NSString *)_element
228 [attributeMapping setObject:_mapping forKey:_element];
231 - (void)setSubItemMapping:(NSArray *)_mapping forElement:(NSString *)_element {
232 [subItemMapping setObject:_mapping forKey:_element];
239 - (NSString *)_mapTagName:(NSString *)_tagName {
243 if ((ret = [self->elementMapping objectForKey:_tagName]) == nil) {
244 //NSLog(@"Unknown Key: %@ in %@",_tagName,self->elementMapping);
247 /* This is to allow parsing of vCards produced by Apple
248 Addressbook. AFAIK the .dot notation is a non-standard
250 r = [_tagName rangeOfCharacterFromSet:dotCharSet];
252 ret = [self _mapTagName:[_tagName substringFromIndex:(r.location + 1)]];
258 - (void)_addAttribute:(NSString *)_attribute
259 value:(NSString *)_value
260 toAttrs:(SaxAttributes *)_attrs
262 [_attrs addAttribute:_attribute
269 - (void)_addAttribute:(NSString *)_attribute value:(NSString *)_value {
272 element = [cardStack lastObject];
273 [self _addAttribute:_attribute value:_value toAttrs:element->attrs];
276 - (NSString *)_mapAttrName:(NSString *)_attrName forTag:(NSString *)_tagName {
277 NSString *mappedName;
279 mappedName = [(NSDictionary *)[self->attributeMapping objectForKey:_tagName]
280 objectForKey:_attrName];
281 if (mappedName == nil) {
282 mappedName = [(NSDictionary *)[self->attributeMapping objectForKey:
283 [self _mapTagName:_tagName]]
284 objectForKey:_attrName];
286 if (mappedName == nil) {
287 mappedName = [(NSDictionary *)[self->attributeMapping objectForKey:@""]
288 objectForKey:_attrName];
290 if (mappedName == nil)
291 mappedName = _attrName;
296 - (void)_parseAttr:(NSString *)_attr
297 forTag:(NSString *)_tagName
298 intoAttr:(NSString **)attr_
299 intoValue:(NSString **)value_
302 NSString *attrName, *attrValue, *mappedName;
304 r = [_attr rangeOfCharacterFromSet:equalSignCharSet];
306 unsigned left, right;
308 attrName = [[_attr substringToIndex:r.location] uppercaseString];
309 left = NSMaxRange(r);
310 right = [_attr length] - 1;
312 if (([_attr characterAtIndex:left] == '"') &&
313 ([_attr characterAtIndex:right] == '"'))
316 r = NSMakeRange(left, right - left);
317 attrValue = [_attr substringWithRange:r];
320 attrValue = [_attr substringFromIndex:left];
323 else if (left == right) {
324 attrValue = [_attr substringFromIndex:left];
336 // ZNeK: what's this for?
337 r = [attrValue rangeOfCharacterFromSet:commaCharSet];
338 while (r.length > 0) {
339 [attrValue replaceCharactersInRange:r withString:@" "];
340 r = [attrValue rangeOfCharacterFromSet:commaCharSet];
344 mappedName = [self _mapAttrName:attrName forTag:_tagName];
346 *value_ = [stringFormatter stringByUnescapingRFC2445Text:attrValue];
349 - (SaxAttributes *)_mapAttrs:(NSArray *)_attrs forTag:(NSString *)_tagName {
350 SaxAttributes *retAttrs;
351 NSEnumerator *attrEnum;
352 NSString *curAttr, *mappedAttr, *mappedValue, *oldValue;
353 NSMutableDictionary *attributes;
355 if (_attrs == nil || [_attrs count] == 0)
358 attributes = [[NSMutableDictionary alloc] init];
359 retAttrs = [[[SaxAttributes alloc] init] autorelease];
361 attrEnum = [_attrs objectEnumerator];
362 while ((curAttr = [attrEnum nextObject]) != nil) {
363 [self _parseAttr:curAttr
366 intoValue:&mappedValue];
367 if ((oldValue = [attributes objectForKey:mappedAttr])) {
370 val = [NSString stringWithFormat:@"%@ %@",oldValue, mappedValue];
371 [attributes setObject:val forKey:mappedAttr];
374 [attributes setObject:mappedValue forKey:mappedAttr];
377 attrEnum = [attributes keyEnumerator];
378 while ((curAttr = [attrEnum nextObject]) != nil) {
379 [self _addAttribute:curAttr
380 value:[attributes objectForKey:curAttr]
384 [attributes release];
389 - (VSSaxTag *)_beginTag:(NSString *)_tagName withAttrs:(SaxAttributes *)_attrs{
392 tag = [VSSaxTag beginTag:_tagName attributes:_attrs];
393 [self->elementList addObject:tag];
397 - (void)_endTag:(NSString *)_tagName {
400 tag = [[VSSaxTag alloc] initEndTag:_tagName];
401 [self->elementList addObject:tag];
402 [tag release]; tag = nil;
405 - (void)_addSubItems:(NSArray *)_items withData:(NSString *)_content {
406 NSEnumerator *itemEnum, *contentEnum;
409 itemEnum = [_items objectEnumerator];
410 contentEnum = [[_content componentsSeparatedByString:@";"] objectEnumerator];
412 while ((subTag = [itemEnum nextObject]) != nil) {
413 NSString *subContent;
415 subContent = [contentEnum nextObject];
417 [self _beginTag:subTag withAttrs:nil];
418 if ([subContent length] > 0) {
421 if ((a = [(VSSaxTag*)[VSSaxTag alloc] initWithData:subContent]) != nil) {
422 [self->elementList addObject:a];
426 [self _endTag:subTag];
430 - (void)_dataTag:(NSString *)_tagName
431 withAttrs:(SaxAttributes *)_attrs
432 andContent:(NSString *)_content
436 _content = [stringFormatter stringByUnescapingRFC2445Text:_content];
438 if ([self->attributeElements containsObject:_tagName]) {
439 [self _addAttribute:_tagName value:_content];
443 [self _beginTag:_tagName withAttrs:_attrs];
444 if ([_content length] > 0) {
445 if ((subItems = [self->subItemMapping objectForKey:_tagName]) != nil) {
446 [self _addSubItems:subItems withData:_content];
451 if ((a = [(VSSaxTag *)[VSSaxTag alloc] initWithData:_content]) != nil) {
452 [self->elementList addObject:a];
457 [self _endTag:_tagName];
460 /* report events for collected elements */
462 - (void)_eventsForElements {
466 enu = [self->elementList objectEnumerator];
467 while ((obj = [enu nextObject]) != nil) {
468 if ([obj->type isEqualToString:VSBeginType]) {
469 [self->contentHandler startElement:obj->tagName
470 namespace:self->prefixURI
472 attributes:obj->attrs];
474 else if ([obj->type isEqualToString:VSEndType]) {
475 [self->contentHandler endElement:obj->tagName
476 namespace:self->prefixURI
477 rawName:obj->tagName];
483 // TODO: better move to tag itself?
484 len = [obj->data length];
485 chardata = calloc(len + 1, sizeof(unichar));
486 [obj->data getCharacters:chardata range:NSMakeRange(0, len)];
488 [self->contentHandler characters:chardata length:len];
489 if (chardata != NULL) free(chardata); chardata = NULL;
492 [elementList removeAllObjects];
497 - (void)warn:(NSString *)_warn {
498 SaxParseException *e;
500 e = (id)[SaxParseException exceptionWithName:@"SaxParseException"
503 [self->errorHandler warning:e];
506 /* parsing raw string */
508 - (void)_parseLine:(NSString *)_line {
509 NSString *tagName, *tagValue;
510 NSMutableArray *tagAttributes;
511 NSRange r, todoRange;
514 length = [_line length];
515 todoRange = NSMakeRange(0, length);
516 r = [_line rangeOfCharacterFromSet:colonAndSemicolonCharSet
519 /* is line well-formed? */
521 [self warn:[@"got an improper content line! ->\n"
522 stringByAppendingString:_line]];
526 tagName = [[_line substringToIndex:r.location] uppercaseString];
527 tagAttributes = [[NSMutableArray alloc] initWithCapacity:16];
529 /* possible shortcut: if we spotted a ':', we don't have to do "expensive"
530 argument scanning/processing.
532 if ([_line characterAtIndex:r.location] != ':') {
533 BOOL isAtEnd = NO, isInDquote = NO;
534 unsigned start = NSMaxRange(r);
536 todoRange = NSMakeRange(start, length - start);
540 /* scan for parameters */
541 r = [_line rangeOfCharacterFromSet:colonSemicolonAndDquoteCharSet
544 /* is line well-formed? */
545 if (r.length == 0 || r.location == 0) {
546 [self warn:[@"got an improper content line! ->\n"
547 stringByAppendingString:_line]];
548 [tagAttributes release]; tagAttributes = nil;
552 /* first check if delimiter candidate is escaped */
553 if ([_line characterAtIndex:(r.location - 1)] != '\\') {
557 delimiter = [_line characterAtIndex:r.location];
558 if (delimiter == '\"') {
559 /* not a real delimiter - toggle isInDquote for proper escaping */
560 isInDquote = !isInDquote;
564 /* is a delimiter, which one? */
566 if (delimiter == ':') {
569 copyRange = NSMakeRange(start, r.location - start);
570 [tagAttributes addObject:[_line substringWithRange:copyRange]];
572 /* adjust start, todoRange */
573 start = NSMaxRange(r);
574 todoRange = NSMakeRange(start, length - start);
580 /* adjust todoRange */
581 unsigned offset = NSMaxRange(r);
582 todoRange = NSMakeRange(offset, length - offset);
586 tagValue = [_line substringFromIndex:NSMaxRange(r)];
591 if ([tagName isEqualToString:@"BEGIN"]) {
593 tag = [self _beginTag:[self _mapTagName:tagValue]
594 withAttrs:[[[SaxAttributes alloc] init] autorelease]];
595 [self->cardStack addObject:tag];
597 else if ([tagName isEqualToString:@"END"]) {
600 mtName = [self _mapTagName:tagValue];
601 if ([self->cardStack count] > 0) {
602 NSString *expectedName;
604 expectedName = [(VSSaxTag *)[self->cardStack lastObject] tagName];
605 if (![expectedName isEqualToString:mtName]) {
608 // TODO: rather report an error?
609 // TODO: setup userinfo dict with details
610 s = [NSString stringWithFormat:
611 @"Found end tag '%@' which does not match expected "
613 @" Tag '%@' has not been closed properly. Given "
614 @"document contains errors!",
615 mtName, expectedName, expectedName];
618 /* probably futile attempt to parse anyways */
620 NSLog(@"%s trying to fix previous error by inserting bogus end "
622 __PRETTY_FUNCTION__);
624 [self _endTag:expectedName];
625 [self->cardStack removeLastObject];
629 // TOOD: generate error?
630 [self warn:[@"found end tag without any open tags left: "
631 stringByAppendingString:mtName]];
633 [self _endTag:mtName];
634 [self->cardStack removeLastObject];
636 /* report parsed elements */
638 if ([self->cardStack count] == 0)
639 [self _eventsForElements];
642 [self _dataTag:[self _mapTagName:tagName]
643 withAttrs:[self _mapAttrs:tagAttributes forTag:tagName]
644 andContent:tagValue];
646 [tagAttributes release];
650 /* top level parsing method */
652 - (void)_reportDocStart {
653 [self->contentHandler startDocument];
654 [self->contentHandler startPrefixMapping:@"" uri:self->prefixURI];
656 [self->contentHandler startElement:@"vCardSet" namespace:self->prefixURI
657 rawName:@"vCardSet" attributes:nil];
659 - (void)_reportDocEnd {
660 [self->contentHandler endElement:@"vCardSet" namespace:self->prefixURI
661 rawName:@"vCardSet"];
663 [self->contentHandler endPrefixMapping:@""];
664 [self->contentHandler endDocument];
667 - (void)_parseString:(NSString *)_rawString {
668 NSMutableString *line;
669 unsigned pos, length;
672 [self _reportDocStart];
676 length = [_rawString length];
678 contentline = name *(";" param ) ":" value CRLF
679 ; When parsing a content line, folded lines MUST first
682 r = NSMakeRange(0, 0);
683 /* probably too optimistic */
684 line = [[NSMutableString alloc] initWithCapacity:75 + 2];
686 for (pos = 0; pos < length; pos++) {
689 c = [_rawString characterAtIndex:pos];
692 if (((length - 1) - pos) >= 1) {
693 if ([_rawString characterAtIndex:pos + 1] == '\n') {
694 BOOL isAtEndOfLine = YES;
696 /* test for folding first */
697 if (((length - 1) - pos) >= 2) {
700 ws = [_rawString characterAtIndex:pos + 2];
701 isAtEndOfLine = [whitespaceCharSet characterIsMember:ws] ? NO :YES;
702 if (!isAtEndOfLine) {
703 /* assemble part of line up to pos */
705 [line appendString:[_rawString substringWithRange:r]];
709 r = NSMakeRange(pos + 1, 0); /* begin new range */
713 /* assemble part of line up to pos */
715 [line appendString:[_rawString substringWithRange:r]];
717 [self _parseLine:line];
719 [line deleteCharactersInRange:NSMakeRange(0, [line length])];
721 r = NSMakeRange(pos + 1, 0); /* begin new range */
726 /* garbled last line! */
727 [self warn:@"last line is truncated, trying to parse anyways!"];
730 else if (c == '\n') { /* broken, non-standard */
731 BOOL isAtEndOfLine = YES;
733 /* test for folding first */
734 if (((length - 1) - pos) >= 1) {
737 ws = [_rawString characterAtIndex:(pos + 1)];
739 isAtEndOfLine = [whitespaceCharSet characterIsMember:ws] ? NO : YES;
740 if (!isAtEndOfLine) {
741 /* assemble part of line up to pos */
743 [line appendString:[_rawString substringWithRange:r]];
747 r = NSMakeRange(pos + 1, 0); /* begin new range */
751 /* assemble part of line up to pos */
753 [line appendString:[_rawString substringWithRange:r]];
755 [self _parseLine:line];
757 [line deleteCharactersInRange:NSMakeRange(0, [line length])];
758 r = NSMakeRange(pos + 1, 0); /* begin new range */
766 [self warn:@"Last line of parse string is not properly terminated!"];
767 [line appendString:[_rawString substringWithRange:r]];
768 [self _parseLine:line];
771 if ([self->cardStack count] != 0) {
772 [self warn:@"found elements on cardStack. This indicates an improper "
773 @"nesting structure! Not all required events will have been "
774 @"generated, leading to unpredictable results!"];
775 [self->cardStack removeAllObjects]; // clean up
778 [line release]; line = nil;
780 [self _reportDocEnd];
783 /* main entry functions */
785 - (id)sourceForData:(NSData *)_data systemId:(NSString *)_sysId {
786 SaxParseException *e = nil;
787 NSStringEncoding encoding;
789 const unsigned char *bytes;
793 NSLog(@"%s: trying to decode data (0x%08X,len=%d) ...",
794 __PRETTY_FUNCTION__, _data, [_data length]);
797 if ((len = [_data length]) == 0) {
798 e = (id)[SaxParseException exceptionWithName:@"SaxIOException"
799 reason:@"Got no parsing data!"
801 [self->errorHandler fatalError:e];
805 e = (id)[SaxParseException exceptionWithName:@"SaxIOException"
806 reason:@"Input data to short for vCard!"
808 [self->errorHandler fatalError:e];
812 bytes = [_data bytes];
813 if ((bytes[0] == 0xFF && bytes[1] == 0xFE) ||
814 (bytes[0] == 0xFE && bytes[1] == 0xFF)) {
815 encoding = NSUnicodeStringEncoding;
818 encoding = NSUTF8StringEncoding;
820 // FIXME: Data is not always utf-8.....
821 source = [[[NSString alloc] initWithData:_data encoding:encoding]
824 e = (id)[SaxParseException exceptionWithName:@"SaxIOException"
825 reason:@"Could not convert input to string!"
827 [self->errorHandler fatalError:e];
832 - (void)parseFromSource:(id)_source systemId:(NSString *)_sysId {
834 NSLog(@"%s: parse: %@ (sysid=%@)", __PRETTY_FUNCTION__, _source, _sysId);
836 if ([_source isKindOfClass:[NSURL class]]) {
837 if (_sysId == nil) _sysId = [_source absoluteString];
840 NSLog(@"%s: trying to load URL: %@ (sysid=%@)",__PRETTY_FUNCTION__,
844 // TODO: remember encoding of source
845 _source = [_source resourceDataUsingCache:NO];
848 if ([_source isKindOfClass:[NSData class]]) {
849 if (_sysId == nil) _sysId = @"<data>";
850 if ((_source = [self sourceForData:_source systemId:_sysId]) == nil)
854 if (![_source isKindOfClass:[NSString class]]) {
855 SaxParseException *e;
859 NSLog(@"%s: unrecognizable source: %@", __PRETTY_FUNCTION__,_source);
861 s = [@"cannot handle data-source: " stringByAppendingString:
862 [_source description]];
863 e = (id)[SaxParseException exceptionWithName:@"SaxIOException"
867 [self->errorHandler fatalError:e];
874 NSLog(@"%s: trying to parse string (0x%08X,len=%d) ...",
875 __PRETTY_FUNCTION__, _source, [_source length]);
877 if (_sysId == nil) _sysId = @"<string>";
878 [self _parseString:_source];
881 - (void)parseFromSource:(id)_source {
882 [self parseFromSource:_source systemId:nil];
885 - (void)parseFromSystemId:(NSString *)_sysId {
888 if ([_sysId rangeOfString:@"://"].length == 0) {
889 /* seems to be a path, path to be a proper URL */
890 url = [NSURL fileURLWithPath:_sysId];
893 /* Note: Cocoa NSURL doesn't complain on "/abc/def" like input! */
894 url = [NSURL URLWithString:_sysId];
898 SaxParseException *e;
900 e = (id)[SaxParseException exceptionWithName:@"SaxIOException"
901 reason:@"cannot handle system-id"
903 [self->errorHandler fatalError:e];
907 [self parseFromSource:url systemId:_sysId];
912 - (BOOL)isDebuggingEnabled {
916 @end /* VersitSaxDriver */