2 Copyright (C) 2000-2003 SKYRIX Software AG
4 This file is part of OGo
6 OGo is free software; you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 OGo is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with OGo; see the file COPYING. If not, write to the
18 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "NSString+misc.h"
26 @implementation NSString(URLEscaping)
28 static int useUTF8Encoding = -1;
30 static inline BOOL doUseUTF8Encoding(void) {
31 if (useUTF8Encoding == -1) {
32 NSUserDefaults *ud = [NSUserDefaults standardUserDefaults];
34 useUTF8Encoding = [ud boolForKey:@"NGUseUTF8AsURLEncoding"] ? 1 : 0;
36 NSLog(@"Note: Using UTF-8 as URL encoding in NGExtensions.");
38 return useUTF8Encoding ? YES : NO;
41 static inline BOOL isUrlAlpha(unsigned char _c) {
43 (((_c >= 'a') && (_c <= 'z')) ||
44 ((_c >= 'A') && (_c <= 'Z')))
47 static inline BOOL isUrlDigit(unsigned char _c) {
48 return ((_c >= '0') && (_c <= '9')) ? YES : NO;
50 static inline BOOL isUrlSafeChar(unsigned char _c) {
52 case '$': case '-': case '_': case '@':
53 case '.': case '&': case '+':
60 static inline BOOL isUrlExtraChar(unsigned char _c) {
62 case '!': case '*': case '"': case '\'':
68 static inline BOOL isUrlEscapeChar(unsigned char _c) {
69 return (_c == '%') ? YES : NO;
71 static inline BOOL isUrlReservedChar(unsigned char _c) {
73 case '=': case ';': case '/':
74 case '#': case '?': case ':':
81 static inline BOOL isUrlXalpha(unsigned char _c) {
82 if (isUrlAlpha(_c)) return YES;
83 if (isUrlDigit(_c)) return YES;
84 if (isUrlSafeChar(_c)) return YES;
85 if (isUrlExtraChar(_c)) return YES;
86 if (isUrlEscapeChar(_c)) return YES;
90 static inline BOOL isUrlHexChar(unsigned char _c) {
93 if ((_c >= 'a') && (_c <= 'f'))
95 if ((_c >= 'A') && (_c <= 'F'))
100 static inline BOOL isUrlAlphaNum(unsigned char _c) {
101 return (isUrlAlpha(_c) || isUrlDigit(_c)) ? YES : NO;
104 static inline BOOL isToBeEscaped(unsigned char _c) {
105 return (isUrlAlphaNum(_c) || (_c == '_')) ? NO : YES;
109 NGEscapeUrlBuffer(const unsigned char *_source, unsigned char *_dest,
112 register const unsigned char *src = (void*)_source;
114 for (i = 0; i < srclen; i++, src++) {
116 if (*src == ' ') { // a ' ' becomes a '+'
117 *_dest = '+'; _dest++;
120 if (!isToBeEscaped(*src)) {
124 else { // any other char is escaped ..
125 *_dest = '%'; _dest++;
126 sprintf(_dest, "%02X", (unsigned)*src);
133 static inline int _valueOfHexChar(register unichar _c) {
135 case '0': case '1': case '2': case '3': case '4':
136 case '5': case '6': case '7': case '8': case '9':
137 return (_c - 48); // 0-9 (ascii-char)'0' - 48 => (int)0
139 case 'A': case 'B': case 'C':
140 case 'D': case 'E': case 'F':
141 return (_c - 55); // A-F, A=10..F=15, 'A'=65..'F'=70
143 case 'a': case 'b': case 'c':
144 case 'd': case 'e': case 'f':
145 return (_c - 87); // a-f, a=10..F=15, 'a'=97..'f'=102
151 static inline BOOL _isHexDigit(register unichar _c) {
153 case '0': case '1': case '2': case '3': case '4':
154 case '5': case '6': case '7': case '8': case '9':
155 case 'A': case 'B': case 'C':
156 case 'D': case 'E': case 'F':
157 case 'a': case 'b': case 'c':
158 case 'd': case 'e': case 'f':
167 NGUnescapeUrlBuffer(const unsigned char *_source, unsigned char *_dest)
171 while (!done && (*_source != '\0')) {
174 //if (c == '+') // '+' stands for a space
177 _source++; c = *_source;
183 else if (_isHexDigit(c)) { // hex-escaped char, like '%F3'
184 int decChar = _valueOfHexChar(c);
187 decChar = decChar * 16 + _valueOfHexChar(c);
188 *_dest = (unsigned char)decChar;
190 else // escaped char, like '%%' -> '%'
193 else // char passed through
202 - (BOOL)containsURLEscapeCharacters {
203 register unsigned i, len;
204 register unichar (*charAtIdx)(id,SEL,unsigned);
206 if ((len = [self length]) == 0) return NO;
208 charAtIdx = (void*)[self methodForSelector:@selector(characterAtIndex:)];
209 for (i = 0; i < len; i++) {
210 if (charAtIdx(self, @selector(characterAtIndex:), i) == '%')
215 - (BOOL)containsURLInvalidCharacters {
216 register unsigned i, len;
217 register unichar (*charAtIdx)(id,SEL,unsigned);
219 if ((len = [self length]) == 0) return NO;
221 charAtIdx = (void*)[self methodForSelector:@selector(characterAtIndex:)];
222 for (i = 0; i < len; i++) {
223 if (isToBeEscaped(charAtIdx(self, @selector(characterAtIndex:), i)))
229 - (NSString *)stringByUnescapingURL {
231 input is a URL string - per definition ASCII(?!), like "hello%98%88.txt"
232 output is a unicode string (never longer than the input)
234 Note that the input itself is in some encoding! That is, the input is
235 turned into a buffer eg containing UTF-8 and needs to be converted into
243 if (![self containsURLEscapeCharacters]) /* scan for '%' */
244 return [[self copy] autorelease];
246 if ((len = [self cStringLength]) == 0) return @"";
248 cstr = malloc(len + 10);
249 [self getCString:cstr]; /* this is OK, a URL is always in ASCII! */
252 buffer = malloc(len + 4);
253 NGUnescapeUrlBuffer(cstr, buffer);
255 if (doUseUTF8Encoding()) {
256 /* OK, the input is considered UTF-8 encoded in a string */
257 s = [[NSString alloc] initWithUTF8String:buffer];
258 if (buffer) free(buffer);
261 s = [[NSString alloc]
262 initWithCStringNoCopy:buffer
263 length:strlen(buffer)
266 if (cstr) free(cstr);
267 return [s autorelease];
270 - (NSString *)stringByEscapingURL {
275 if ((len = [self length]) == 0) return @"";
277 if (![self containsURLInvalidCharacters]) // needs to be escaped ?
278 return [[self copy] autorelease];
280 if (doUseUTF8Encoding()) {
282 // a) encode into a data buffer! (eg UTF8 or ISO)
283 // b) encode that buffer into URL encoding
284 // c) create an ASCII string from that
287 if ((data = [self dataUsingEncoding:NSUTF8StringEncoding]) == nil)
289 if ((len = [data length]) == 0)
292 buffer = malloc(len * 3 + 2);
293 NGEscapeUrlBuffer([data bytes], buffer, len);
298 len = [self cStringLength];
299 cstr = malloc(len + 4);
300 [self getCString:cstr]; // Unicode!
303 buffer = malloc(len * 3 + 2);
304 NGEscapeUrlBuffer(cstr, buffer, len);
305 if (cstr) free(cstr);
308 /* the following assumes that the default-encoding is ASCII compatible */
309 s = [[NSString alloc]
310 initWithCStringNoCopy:buffer
311 length:strlen(buffer)
313 return [s autorelease];
316 @end /* NSString(URLEscaping) */