2 Copyright (C) 2000-2004 SKYRIX Software AG
4 This file is part of OpenGroupware.org.
6 OGo is free software; you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 OGo is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with OGo; see the file COPYING. If not, write to the
18 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 #include "NSString+misc.h"
26 TODO: support new Panther API?:
27 - (NSString *)stringByAddingPercentEscapesUsingEncoding:(NSStringEncoding)e
28 - (NSString *)stringByReplacingPercentEscapesUsingEncoding:(NSStringEncoding)e
31 @implementation NSString(URLEscaping)
33 static int useUTF8Encoding = -1;
35 static inline BOOL doUseUTF8Encoding(void) {
36 if (useUTF8Encoding == -1) {
37 NSUserDefaults *ud = [NSUserDefaults standardUserDefaults];
39 useUTF8Encoding = [ud boolForKey:@"NGUseUTF8AsURLEncoding"] ? 1 : 0;
41 NSLog(@"Note: Using UTF-8 as URL encoding in NGExtensions.");
43 return useUTF8Encoding ? YES : NO;
46 static inline BOOL isUrlAlpha(unsigned char _c) {
48 (((_c >= 'a') && (_c <= 'z')) ||
49 ((_c >= 'A') && (_c <= 'Z')))
52 static inline BOOL isUrlDigit(unsigned char _c) {
53 return ((_c >= '0') && (_c <= '9')) ? YES : NO;
55 static inline BOOL isUrlSafeChar(unsigned char _c) {
57 case '$': case '-': case '_': case '.': case '+':
58 case '@': // TODO: not a safe char?!
65 static inline BOOL isUrlExtraChar(unsigned char _c) {
67 case '!': case '*': case '"': case '\'':
73 static inline BOOL isUrlEscapeChar(unsigned char _c) {
74 return (_c == '%') ? YES : NO;
76 static inline BOOL isUrlReservedChar(unsigned char _c) {
78 case '=': case ';': case '/':
79 case '#': case '?': case ':':
86 static inline BOOL isUrlXalpha(unsigned char _c) {
87 if (isUrlAlpha(_c)) return YES;
88 if (isUrlDigit(_c)) return YES;
89 if (isUrlSafeChar(_c)) return YES;
90 if (isUrlExtraChar(_c)) return YES;
91 if (isUrlEscapeChar(_c)) return YES;
95 static inline BOOL isUrlHexChar(unsigned char _c) {
98 if ((_c >= 'a') && (_c <= 'f'))
100 if ((_c >= 'A') && (_c <= 'F'))
105 static inline BOOL isUrlAlphaNum(unsigned char _c) {
106 return (isUrlAlpha(_c) || isUrlDigit(_c)) ? YES : NO;
109 static inline BOOL isToBeEscaped(unsigned char _c) {
110 return (isUrlAlphaNum(_c) || (_c == '_') || isUrlSafeChar(_c)) ? NO : YES;
114 NGEscapeUrlBuffer(const unsigned char *_source, unsigned char *_dest,
117 register const unsigned char *src = (void*)_source;
119 for (i = 0; i < srclen; i++, src++) {
121 if (*src == ' ') { // a ' ' becomes a '+'
122 *_dest = '+'; _dest++;
125 if (!isToBeEscaped(*src)) {
129 else { // any other char is escaped ..
130 *_dest = '%'; _dest++;
131 sprintf(_dest, "%02X", (unsigned)*src);
138 static inline int _valueOfHexChar(register unichar _c) {
140 case '0': case '1': case '2': case '3': case '4':
141 case '5': case '6': case '7': case '8': case '9':
142 return (_c - 48); // 0-9 (ascii-char)'0' - 48 => (int)0
144 case 'A': case 'B': case 'C':
145 case 'D': case 'E': case 'F':
146 return (_c - 55); // A-F, A=10..F=15, 'A'=65..'F'=70
148 case 'a': case 'b': case 'c':
149 case 'd': case 'e': case 'f':
150 return (_c - 87); // a-f, a=10..F=15, 'a'=97..'f'=102
156 static inline BOOL _isHexDigit(register unichar _c) {
158 case '0': case '1': case '2': case '3': case '4':
159 case '5': case '6': case '7': case '8': case '9':
160 case 'A': case 'B': case 'C':
161 case 'D': case 'E': case 'F':
162 case 'a': case 'b': case 'c':
163 case 'd': case 'e': case 'f':
172 NGUnescapeUrlBuffer(const unsigned char *_source, unsigned char *_dest)
176 while (!done && (*_source != '\0')) {
179 //if (c == '+') // '+' stands for a space
182 _source++; c = *_source;
188 else if (_isHexDigit(c)) { // hex-escaped char, like '%F3'
189 int decChar = _valueOfHexChar(c);
192 decChar = decChar * 16 + _valueOfHexChar(c);
193 *_dest = (unsigned char)decChar;
195 else // escaped char, like '%%' -> '%'
198 else // char passed through
207 - (BOOL)containsURLEscapeCharacters {
208 register unsigned i, len;
209 register unichar (*charAtIdx)(id,SEL,unsigned);
211 if ((len = [self length]) == 0) return NO;
213 charAtIdx = (void*)[self methodForSelector:@selector(characterAtIndex:)];
214 for (i = 0; i < len; i++) {
215 if (charAtIdx(self, @selector(characterAtIndex:), i) == '%')
220 - (BOOL)containsURLInvalidCharacters {
221 register unsigned i, len;
222 register unichar (*charAtIdx)(id,SEL,unsigned);
224 if ((len = [self length]) == 0) return NO;
226 charAtIdx = (void*)[self methodForSelector:@selector(characterAtIndex:)];
227 for (i = 0; i < len; i++) {
228 if (isToBeEscaped(charAtIdx(self, @selector(characterAtIndex:), i)))
234 - (NSString *)stringByUnescapingURL {
236 input is a URL string - per definition ASCII(?!), like "hello%98%88.txt"
237 output is a unicode string (never longer than the input)
239 Note that the input itself is in some encoding! That is, the input is
240 turned into a buffer eg containing UTF-8 and needs to be converted into
248 if (![self containsURLEscapeCharacters]) /* scan for '%' */
249 return [[self copy] autorelease];
251 if ((len = [self cStringLength]) == 0) return @"";
253 cstr = malloc(len + 10);
254 [self getCString:cstr]; /* this is OK, a URL is always in ASCII! */
257 buffer = malloc(len + 4);
258 NGUnescapeUrlBuffer(cstr, buffer);
260 if (doUseUTF8Encoding()) {
261 /* OK, the input is considered UTF-8 encoded in a string */
262 s = [[NSString alloc] initWithUTF8String:buffer];
263 if (buffer) free(buffer);
266 s = [[NSString alloc]
267 initWithCStringNoCopy:buffer
268 length:strlen(buffer)
271 if (cstr) free(cstr);
272 return [s autorelease];
275 - (NSString *)stringByEscapingURL {
280 if ((len = [self length]) == 0) return @"";
282 if (![self containsURLInvalidCharacters]) // needs to be escaped ?
283 return [[self copy] autorelease];
285 if (doUseUTF8Encoding()) {
287 // a) encode into a data buffer! (eg UTF8 or ISO)
288 // b) encode that buffer into URL encoding
289 // c) create an ASCII string from that
292 if ((data = [self dataUsingEncoding:NSUTF8StringEncoding]) == nil)
294 if ((len = [data length]) == 0)
297 buffer = malloc(len * 3 + 2);
298 NGEscapeUrlBuffer([data bytes], buffer, len);
303 len = [self cStringLength];
304 cstr = malloc(len + 4);
305 [self getCString:cstr]; // Unicode!
308 buffer = malloc(len * 3 + 2);
309 NGEscapeUrlBuffer(cstr, buffer, len);
310 if (cstr) free(cstr);
313 /* the following assumes that the default-encoding is ASCII compatible */
314 s = [[NSString alloc]
315 initWithCStringNoCopy:buffer
316 length:strlen(buffer)
318 return [s autorelease];
321 @end /* NSString(URLEscaping) */