2 Copyright (C) 2000-2005 SKYRIX Software AG
4 This file is part of SOPE.
6 SOPE is free software; you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 SOPE is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with SOPE; see the file COPYING. If not, write to the
18 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 #include "NSString+misc.h"
26 TODO: support new Panther API?:
27 - (NSString *)stringByAddingPercentEscapesUsingEncoding:(NSStringEncoding)e
28 - (NSString *)stringByReplacingPercentEscapesUsingEncoding:(NSStringEncoding)e
31 @implementation NSString(URLEscaping)
33 static int useUTF8Encoding = -1;
35 static inline BOOL doUseUTF8Encoding(void) {
36 if (useUTF8Encoding == -1) {
37 NSUserDefaults *ud = [NSUserDefaults standardUserDefaults];
39 useUTF8Encoding = [ud boolForKey:@"NGUseUTF8AsURLEncoding"] ? 1 : 0;
41 NSLog(@"Note: Using UTF-8 as URL encoding in NGExtensions.");
43 return useUTF8Encoding ? YES : NO;
46 static inline BOOL isUrlAlpha(unsigned char _c) {
48 (((_c >= 'a') && (_c <= 'z')) ||
49 ((_c >= 'A') && (_c <= 'Z')))
52 static inline BOOL isUrlDigit(unsigned char _c) {
53 return ((_c >= '0') && (_c <= '9')) ? YES : NO;
55 static inline BOOL isUrlSafeChar(unsigned char _c) {
57 case '$': case '-': case '_': case '.':
58 #if 0 /* see OGo bug #1260, required for forms */
61 case '@': // TODO: not a safe char?!
68 static inline BOOL isUrlExtraChar(unsigned char _c) {
70 case '!': case '*': case '"': case '\'':
76 static inline BOOL isUrlEscapeChar(unsigned char _c) {
77 return (_c == '%') ? YES : NO;
79 static inline BOOL isUrlReservedChar(unsigned char _c) {
81 case '=': case ';': case '/':
82 case '#': case '?': case ':':
89 static inline BOOL isUrlXalpha(unsigned char _c) {
90 if (isUrlAlpha(_c)) return YES;
91 if (isUrlDigit(_c)) return YES;
92 if (isUrlSafeChar(_c)) return YES;
93 if (isUrlExtraChar(_c)) return YES;
94 if (isUrlEscapeChar(_c)) return YES;
98 static inline BOOL isUrlHexChar(unsigned char _c) {
101 if ((_c >= 'a') && (_c <= 'f'))
103 if ((_c >= 'A') && (_c <= 'F'))
108 static inline BOOL isUrlAlphaNum(unsigned char _c) {
109 return (isUrlAlpha(_c) || isUrlDigit(_c)) ? YES : NO;
112 static inline BOOL isToBeEscaped(unsigned char _c) {
113 return (isUrlAlphaNum(_c) || (_c == '_') || isUrlSafeChar(_c)) ? NO : YES;
117 NGEscapeUrlBuffer(const unsigned char *_source, unsigned char *_dest,
120 register const unsigned char *src = (void*)_source;
122 for (i = 0; i < srclen; i++, src++) {
124 if (*src == ' ') { // a ' ' becomes a '+'
125 *_dest = '+'; _dest++;
128 if (!isToBeEscaped(*src)) {
132 else { // any other char is escaped ..
133 *_dest = '%'; _dest++;
134 sprintf((char *)_dest, "%02X", (unsigned)*src);
141 static inline int _valueOfHexChar(register unichar _c) {
143 case '0': case '1': case '2': case '3': case '4':
144 case '5': case '6': case '7': case '8': case '9':
145 return (_c - 48); // 0-9 (ascii-char)'0' - 48 => (int)0
147 case 'A': case 'B': case 'C':
148 case 'D': case 'E': case 'F':
149 return (_c - 55); // A-F, A=10..F=15, 'A'=65..'F'=70
151 case 'a': case 'b': case 'c':
152 case 'd': case 'e': case 'f':
153 return (_c - 87); // a-f, a=10..F=15, 'a'=97..'f'=102
159 static inline BOOL _isHexDigit(register unichar _c) {
161 case '0': case '1': case '2': case '3': case '4':
162 case '5': case '6': case '7': case '8': case '9':
163 case 'A': case 'B': case 'C':
164 case 'D': case 'E': case 'F':
165 case 'a': case 'b': case 'c':
166 case 'd': case 'e': case 'f':
175 NGUnescapeUrlBuffer(const unsigned char *_source, unsigned char *_dest)
179 while (!done && (*_source != '\0')) {
182 //if (c == '+') // '+' stands for a space
185 _source++; c = *_source;
191 else if (_isHexDigit(c)) { // hex-escaped char, like '%F3'
192 int decChar = _valueOfHexChar(c);
195 decChar = decChar * 16 + _valueOfHexChar(c);
196 *_dest = (unsigned char)decChar;
198 else // escaped char, like '%%' -> '%'
201 else // char passed through
210 - (BOOL)containsURLEscapeCharacters {
211 register unsigned i, len;
212 register unichar (*charAtIdx)(id,SEL,unsigned);
214 if ((len = [self length]) == 0) return NO;
216 charAtIdx = (void*)[self methodForSelector:@selector(characterAtIndex:)];
217 for (i = 0; i < len; i++) {
218 if (charAtIdx(self, @selector(characterAtIndex:), i) == '%')
223 - (BOOL)containsURLInvalidCharacters {
224 register unsigned i, len;
225 register unichar (*charAtIdx)(id,SEL,unsigned);
227 if ((len = [self length]) == 0) return NO;
229 charAtIdx = (void*)[self methodForSelector:@selector(characterAtIndex:)];
230 for (i = 0; i < len; i++) {
231 if (isToBeEscaped(charAtIdx(self, @selector(characterAtIndex:), i)))
237 - (NSString *)stringByUnescapingURL {
239 Input is a URL string - per definition ASCII(?!), like "hello%98%88.txt"
240 output is a unicode string (never longer than the input)
242 Note that the input itself is in some encoding! That is, the input is
243 turned into a buffer eg containing UTF-8 and needs to be converted into
251 if (![self containsURLEscapeCharacters]) /* scan for '%' */
252 return [[self copy] autorelease];
254 if ((len = [self cStringLength]) == 0) return @"";
256 cstr = malloc(len + 10);
257 [self getCString:cstr]; /* this is OK, a URL is always in ASCII! */
260 buffer = malloc(len + 4);
261 NGUnescapeUrlBuffer((unsigned char *)cstr, (unsigned char *)buffer);
263 if (doUseUTF8Encoding()) {
264 /* OK, the input is considered UTF-8 encoded in a string */
265 s = [[NSString alloc] initWithUTF8String:buffer];
266 if (buffer != NULL) free(buffer); buffer = NULL;
269 s = [[NSString alloc]
270 initWithCStringNoCopy:buffer
271 length:strlen(buffer)
274 if (cstr != NULL) free(cstr); cstr = NULL;
275 return [s autorelease];
278 - (NSString *)stringByEscapingURL {
283 if ((len = [self length]) == 0) return @"";
285 if (![self containsURLInvalidCharacters]) // needs to be escaped ?
286 return [[self copy] autorelease];
288 if (doUseUTF8Encoding()) {
290 // a) encode into a data buffer! (eg UTF8 or ISO)
291 // b) encode that buffer into URL encoding
292 // c) create an ASCII string from that
295 if ((data = [self dataUsingEncoding:NSUTF8StringEncoding]) == nil)
297 if ((len = [data length]) == 0)
300 buffer = malloc(len * 3 + 2);
301 NGEscapeUrlBuffer([data bytes], (unsigned char *)buffer, len);
306 len = [self cStringLength];
307 cstr = malloc(len + 4);
308 [self getCString:(char *)cstr]; // Unicode!
311 buffer = malloc(len * 3 + 2);
312 NGEscapeUrlBuffer(cstr, (unsigned char *)buffer, len);
313 if (cstr) free(cstr);
316 /* the following assumes that the default-encoding is ASCII compatible */
317 s = [[NSString alloc]
318 initWithCStringNoCopy:buffer
319 length:strlen(buffer)
321 return [s autorelease];
324 @end /* NSString(URLEscaping) */