2 Copyright (C) 2000-2004 SKYRIX Software AG
4 This file is part of OpenGroupware.org.
6 OGo is free software; you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 OGo is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with OGo; see the file COPYING. If not, write to the
18 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 // $Id: NSString+URLEscaping.m 1 2004-08-20 10:08:27Z znek $
23 #include "NSString+misc.h"
27 TODO: support new Panther API?:
28 - (NSString *)stringByAddingPercentEscapesUsingEncoding:(NSStringEncoding)e
29 - (NSString *)stringByReplacingPercentEscapesUsingEncoding:(NSStringEncoding)e
32 @implementation NSString(URLEscaping)
34 static int useUTF8Encoding = -1;
36 static inline BOOL doUseUTF8Encoding(void) {
37 if (useUTF8Encoding == -1) {
38 NSUserDefaults *ud = [NSUserDefaults standardUserDefaults];
40 useUTF8Encoding = [ud boolForKey:@"NGUseUTF8AsURLEncoding"] ? 1 : 0;
42 NSLog(@"Note: Using UTF-8 as URL encoding in NGExtensions.");
44 return useUTF8Encoding ? YES : NO;
47 static inline BOOL isUrlAlpha(unsigned char _c) {
49 (((_c >= 'a') && (_c <= 'z')) ||
50 ((_c >= 'A') && (_c <= 'Z')))
53 static inline BOOL isUrlDigit(unsigned char _c) {
54 return ((_c >= '0') && (_c <= '9')) ? YES : NO;
56 static inline BOOL isUrlSafeChar(unsigned char _c) {
58 case '$': case '-': case '_': case '@':
59 case '.': case '&': case '+':
66 static inline BOOL isUrlExtraChar(unsigned char _c) {
68 case '!': case '*': case '"': case '\'':
74 static inline BOOL isUrlEscapeChar(unsigned char _c) {
75 return (_c == '%') ? YES : NO;
77 static inline BOOL isUrlReservedChar(unsigned char _c) {
79 case '=': case ';': case '/':
80 case '#': case '?': case ':':
87 static inline BOOL isUrlXalpha(unsigned char _c) {
88 if (isUrlAlpha(_c)) return YES;
89 if (isUrlDigit(_c)) return YES;
90 if (isUrlSafeChar(_c)) return YES;
91 if (isUrlExtraChar(_c)) return YES;
92 if (isUrlEscapeChar(_c)) return YES;
96 static inline BOOL isUrlHexChar(unsigned char _c) {
99 if ((_c >= 'a') && (_c <= 'f'))
101 if ((_c >= 'A') && (_c <= 'F'))
106 static inline BOOL isUrlAlphaNum(unsigned char _c) {
107 return (isUrlAlpha(_c) || isUrlDigit(_c)) ? YES : NO;
110 static inline BOOL isToBeEscaped(unsigned char _c) {
111 return (isUrlAlphaNum(_c) || (_c == '_') || isUrlSafeChar(_c)) ? NO : YES;
115 NGEscapeUrlBuffer(const unsigned char *_source, unsigned char *_dest,
118 register const unsigned char *src = (void*)_source;
120 for (i = 0; i < srclen; i++, src++) {
122 if (*src == ' ') { // a ' ' becomes a '+'
123 *_dest = '+'; _dest++;
126 if (!isToBeEscaped(*src)) {
130 else { // any other char is escaped ..
131 *_dest = '%'; _dest++;
132 sprintf(_dest, "%02X", (unsigned)*src);
139 static inline int _valueOfHexChar(register unichar _c) {
141 case '0': case '1': case '2': case '3': case '4':
142 case '5': case '6': case '7': case '8': case '9':
143 return (_c - 48); // 0-9 (ascii-char)'0' - 48 => (int)0
145 case 'A': case 'B': case 'C':
146 case 'D': case 'E': case 'F':
147 return (_c - 55); // A-F, A=10..F=15, 'A'=65..'F'=70
149 case 'a': case 'b': case 'c':
150 case 'd': case 'e': case 'f':
151 return (_c - 87); // a-f, a=10..F=15, 'a'=97..'f'=102
157 static inline BOOL _isHexDigit(register unichar _c) {
159 case '0': case '1': case '2': case '3': case '4':
160 case '5': case '6': case '7': case '8': case '9':
161 case 'A': case 'B': case 'C':
162 case 'D': case 'E': case 'F':
163 case 'a': case 'b': case 'c':
164 case 'd': case 'e': case 'f':
173 NGUnescapeUrlBuffer(const unsigned char *_source, unsigned char *_dest)
177 while (!done && (*_source != '\0')) {
180 //if (c == '+') // '+' stands for a space
183 _source++; c = *_source;
189 else if (_isHexDigit(c)) { // hex-escaped char, like '%F3'
190 int decChar = _valueOfHexChar(c);
193 decChar = decChar * 16 + _valueOfHexChar(c);
194 *_dest = (unsigned char)decChar;
196 else // escaped char, like '%%' -> '%'
199 else // char passed through
208 - (BOOL)containsURLEscapeCharacters {
209 register unsigned i, len;
210 register unichar (*charAtIdx)(id,SEL,unsigned);
212 if ((len = [self length]) == 0) return NO;
214 charAtIdx = (void*)[self methodForSelector:@selector(characterAtIndex:)];
215 for (i = 0; i < len; i++) {
216 if (charAtIdx(self, @selector(characterAtIndex:), i) == '%')
221 - (BOOL)containsURLInvalidCharacters {
222 register unsigned i, len;
223 register unichar (*charAtIdx)(id,SEL,unsigned);
225 if ((len = [self length]) == 0) return NO;
227 charAtIdx = (void*)[self methodForSelector:@selector(characterAtIndex:)];
228 for (i = 0; i < len; i++) {
229 if (isToBeEscaped(charAtIdx(self, @selector(characterAtIndex:), i)))
235 - (NSString *)stringByUnescapingURL {
237 input is a URL string - per definition ASCII(?!), like "hello%98%88.txt"
238 output is a unicode string (never longer than the input)
240 Note that the input itself is in some encoding! That is, the input is
241 turned into a buffer eg containing UTF-8 and needs to be converted into
249 if (![self containsURLEscapeCharacters]) /* scan for '%' */
250 return [[self copy] autorelease];
252 if ((len = [self cStringLength]) == 0) return @"";
254 cstr = malloc(len + 10);
255 [self getCString:cstr]; /* this is OK, a URL is always in ASCII! */
258 buffer = malloc(len + 4);
259 NGUnescapeUrlBuffer(cstr, buffer);
261 if (doUseUTF8Encoding()) {
262 /* OK, the input is considered UTF-8 encoded in a string */
263 s = [[NSString alloc] initWithUTF8String:buffer];
264 if (buffer) free(buffer);
267 s = [[NSString alloc]
268 initWithCStringNoCopy:buffer
269 length:strlen(buffer)
272 if (cstr) free(cstr);
273 return [s autorelease];
276 - (NSString *)stringByEscapingURL {
281 if ((len = [self length]) == 0) return @"";
283 if (![self containsURLInvalidCharacters]) // needs to be escaped ?
284 return [[self copy] autorelease];
286 if (doUseUTF8Encoding()) {
288 // a) encode into a data buffer! (eg UTF8 or ISO)
289 // b) encode that buffer into URL encoding
290 // c) create an ASCII string from that
293 if ((data = [self dataUsingEncoding:NSUTF8StringEncoding]) == nil)
295 if ((len = [data length]) == 0)
298 buffer = malloc(len * 3 + 2);
299 NGEscapeUrlBuffer([data bytes], buffer, len);
304 len = [self cStringLength];
305 cstr = malloc(len + 4);
306 [self getCString:cstr]; // Unicode!
309 buffer = malloc(len * 3 + 2);
310 NGEscapeUrlBuffer(cstr, buffer, len);
311 if (cstr) free(cstr);
314 /* the following assumes that the default-encoding is ASCII compatible */
315 s = [[NSString alloc]
316 initWithCStringNoCopy:buffer
317 length:strlen(buffer)
319 return [s autorelease];
322 @end /* NSString(URLEscaping) */