err.no Git - sope/blob - sope-core/NGExtensions/FdExt.subproj/NSString+URLEscaping.m

   1 /*
   2   Copyright (C) 2000-2004 SKYRIX Software AG
   3
   4   This file is part of OpenGroupware.org.
   5
   6   OGo is free software; you can redistribute it and/or modify it under
   7   the terms of the GNU Lesser General Public License as published by the
   8   Free Software Foundation; either version 2, or (at your option) any
   9   later version.
  10
  11   OGo is distributed in the hope that it will be useful, but WITHOUT ANY
  12   WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  14   License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public
  17   License along with OGo; see the file COPYING.  If not, write to the
  18   Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19   02111-1307, USA.
  20 */
  21
  22 #include "NSString+misc.h"
  23 #include "common.h"
  24
  25 /*
  26   TODO: support new Panther API?:
  27 - (NSString *)stringByAddingPercentEscapesUsingEncoding:(NSStringEncoding)e
  28 - (NSString *)stringByReplacingPercentEscapesUsingEncoding:(NSStringEncoding)e
  29 */
  30
  31 @implementation NSString(URLEscaping)
  32
  33 static int useUTF8Encoding = -1;
  34
  35 static inline BOOL doUseUTF8Encoding(void) {
  36   if (useUTF8Encoding == -1) {
  37     NSUserDefaults *ud = [NSUserDefaults standardUserDefaults];
  38
  39     useUTF8Encoding = [ud boolForKey:@"NGUseUTF8AsURLEncoding"] ? 1 : 0;
  40     if (useUTF8Encoding)
  41       NSLog(@"Note: Using UTF-8 as URL encoding in NGExtensions.");
  42   }
  43   return useUTF8Encoding ? YES : NO;
  44 }
  45
  46 static inline BOOL isUrlAlpha(unsigned char _c) {
  47   return
  48     (((_c >= 'a') && (_c <= 'z')) ||
  49      ((_c >= 'A') && (_c <= 'Z')))
  50     ? YES : NO;
  51 }
  52 static inline BOOL isUrlDigit(unsigned char _c) {
  53   return ((_c >= '0') && (_c <= '9')) ? YES : NO;
  54 }
  55 static inline BOOL isUrlSafeChar(unsigned char _c) {
  56   switch (_c) {
  57     case '$': case '-': case '_': case '.': case '+':
  58     case '@': // TODO: not a safe char?!
  59       return YES;
  60
  61     default:
  62       return NO;
  63   }
  64 }
  65 static inline BOOL isUrlExtraChar(unsigned char _c) {
  66   switch (_c) {
  67     case '!': case '*': case '"': case '\'':
  68     case '|': case ',':
  69       return YES;
  70   }
  71   return NO;
  72 }
  73 static inline BOOL isUrlEscapeChar(unsigned char _c) {
  74   return (_c == '%') ? YES : NO;
  75 }
  76 static inline BOOL isUrlReservedChar(unsigned char _c) {
  77   switch (_c) {
  78     case '=': case ';': case '/':
  79     case '#': case '?': case ':':
  80     case ' ':
  81       return YES;
  82   }
  83   return NO;
  84 }
  85
  86 static inline BOOL isUrlXalpha(unsigned char _c) {
  87   if (isUrlAlpha(_c))      return YES;
  88   if (isUrlDigit(_c))      return YES;
  89   if (isUrlSafeChar(_c))   return YES;
  90   if (isUrlExtraChar(_c))  return YES;
  91   if (isUrlEscapeChar(_c)) return YES;
  92   return NO;
  93 }
  94
  95 static inline BOOL isUrlHexChar(unsigned char _c) {
  96   if (isUrlDigit(_c))
  97     return YES;
  98   if ((_c >= 'a') && (_c <= 'f'))
  99     return YES;
 100   if ((_c >= 'A') && (_c <= 'F'))
 101     return YES;
 102   return NO;
 103 }
 104
 105 static inline BOOL isUrlAlphaNum(unsigned char _c) {
 106   return (isUrlAlpha(_c) || isUrlDigit(_c)) ? YES : NO;
 107 }
 108
 109 static inline BOOL isToBeEscaped(unsigned char _c) {
 110   return (isUrlAlphaNum(_c) || (_c == '_') || isUrlSafeChar(_c)) ? NO : YES;
 111 }
 112
 113 static void
 114 NGEscapeUrlBuffer(const unsigned char *_source, unsigned char *_dest,
 115                   unsigned srclen)
 116 {
 117   register const unsigned char *src = (void*)_source;
 118   register unsigned i;
 119   for (i = 0; i < srclen; i++, src++) {
 120 #if 0 // explain!
 121     if (*src == ' ') { // a ' ' becomes a '+'
 122       *_dest = '+'; _dest++;
 123     }
 124 #endif
 125     if (!isToBeEscaped(*src)) {
 126       *_dest = *src;
 127       _dest++;
 128     }
 129     else { // any other char is escaped ..
 130       *_dest = '%'; _dest++;
 131       sprintf(_dest, "%02X", (unsigned)*src);
 132       _dest += 2;
 133     }
 134   }
 135   *_dest = '\0';
 136 }
 137
 138 static inline int _valueOfHexChar(register unichar _c) {
 139   switch (_c) {
 140     case '0': case '1': case '2': case '3': case '4':
 141     case '5': case '6': case '7': case '8': case '9':
 142       return (_c - 48); // 0-9 (ascii-char)'0' - 48 => (int)0
 143
 144     case 'A': case 'B': case 'C':
 145     case 'D': case 'E': case 'F':
 146       return (_c - 55); // A-F, A=10..F=15, 'A'=65..'F'=70
 147
 148     case 'a': case 'b': case 'c':
 149     case 'd': case 'e': case 'f':
 150       return (_c - 87); // a-f, a=10..F=15, 'a'=97..'f'=102
 151
 152     default:
 153       return -1;
 154   }
 155 }
 156 static inline BOOL _isHexDigit(register unichar _c) {
 157   switch (_c) {
 158     case '0': case '1': case '2': case '3': case '4':
 159     case '5': case '6': case '7': case '8': case '9':
 160     case 'A': case 'B': case 'C':
 161     case 'D': case 'E': case 'F':
 162     case 'a': case 'b': case 'c':
 163     case 'd': case 'e': case 'f':
 164       return YES;
 165
 166     default:
 167       return NO;
 168   }
 169 }
 170
 171 static void
 172 NGUnescapeUrlBuffer(const unsigned char *_source, unsigned char *_dest)
 173 {
 174   BOOL done = NO;
 175
 176   while (!done && (*_source != '\0')) {
 177     char c = *_source;
 178
 179     //if (c == '+') // '+' stands for a space
 180     //  *_dest = ' ';
 181     if (c == '%') {
 182       _source++; c = *_source;
 183
 184       if (c == '\0') {
 185         *_dest = '%';
 186         done = YES;
 187       }
 188       else if (_isHexDigit(c)) { // hex-escaped char, like '%F3'
 189         int decChar = _valueOfHexChar(c);
 190         _source++;
 191         c = *_source;
 192         decChar = decChar * 16 + _valueOfHexChar(c);
 193         *_dest = (unsigned char)decChar;
 194       }
 195       else // escaped char, like '%%' -> '%'
 196         *_dest = c;
 197     }
 198     else // char passed through
 199       *_dest = c;
 200
 201     _dest++;
 202     _source++;
 203   }
 204   *_dest = '\0';
 205 }
 206
 207 - (BOOL)containsURLEscapeCharacters {
 208   register unsigned i, len;
 209   register unichar (*charAtIdx)(id,SEL,unsigned);
 210
 211   if ((len = [self length]) == 0) return NO;
 212
 213   charAtIdx = (void*)[self methodForSelector:@selector(characterAtIndex:)];
 214   for (i = 0; i < len; i++) {
 215     if (charAtIdx(self, @selector(characterAtIndex:), i) == '%')
 216       return YES;
 217   }
 218   return NO;
 219 }
 220 - (BOOL)containsURLInvalidCharacters {
 221   register unsigned i, len;
 222   register unichar (*charAtIdx)(id,SEL,unsigned);
 223
 224   if ((len = [self length]) == 0) return NO;
 225
 226   charAtIdx = (void*)[self methodForSelector:@selector(characterAtIndex:)];
 227   for (i = 0; i < len; i++) {
 228     if (isToBeEscaped(charAtIdx(self, @selector(characterAtIndex:), i)))
 229       return YES;
 230   }
 231   return NO;
 232 }
 233
 234 - (NSString *)stringByUnescapingURL {
 235   /*
 236      input is a URL string - per definition ASCII(?!), like "hello%98%88.txt"
 237      output is a unicode string (never longer than the input)
 238
 239      Note that the input itself is in some encoding! That is, the input is
 240      turned into a buffer eg containing UTF-8 and needs to be converted into
 241      a unicode string.
 242   */
 243   unsigned len;
 244   char     *cstr;
 245   char     *buffer = NULL;
 246   NSString *s;
 247
 248   if (![self containsURLEscapeCharacters]) /* scan for '%' */
 249     return [[self copy] autorelease];
 250
 251   if ((len = [self cStringLength]) == 0) return @"";
 252
 253   cstr = malloc(len + 10);
 254   [self getCString:cstr]; /* this is OK, a URL is always in ASCII! */
 255   cstr[len] = '\0';
 256
 257   buffer = malloc(len + 4);
 258   NGUnescapeUrlBuffer(cstr, buffer);
 259
 260   if (doUseUTF8Encoding()) {
 261     /* OK, the input is considered UTF-8 encoded in a string */
 262     s = [[NSString alloc] initWithUTF8String:buffer];
 263     if (buffer) free(buffer);
 264   }
 265   else {
 266     s = [[NSString alloc]
 267           initWithCStringNoCopy:buffer
 268           length:strlen(buffer)
 269           freeWhenDone:YES];
 270   }
 271   if (cstr) free(cstr);
 272   return [s autorelease];
 273 }
 274
 275 - (NSString *)stringByEscapingURL {
 276   unsigned len;
 277   NSString *s;
 278   char     *buffer = NULL;
 279
 280   if ((len = [self length]) == 0) return @"";
 281
 282   if (![self containsURLInvalidCharacters]) // needs to be escaped ?
 283     return [[self copy] autorelease];
 284
 285   if (doUseUTF8Encoding()) {
 286     // steps:
 287     // a) encode into a data buffer! (eg UTF8 or ISO)
 288     // b) encode that buffer into URL encoding
 289     // c) create an ASCII string from that
 290     NSData *data;
 291
 292     if ((data = [self dataUsingEncoding:NSUTF8StringEncoding]) == nil)
 293       return nil;
 294     if ((len = [data length]) == 0)
 295       return @"";
 296
 297     buffer = malloc(len * 3 + 2);
 298     NGEscapeUrlBuffer([data bytes], buffer, len);
 299   }
 300   else {
 301     unsigned char *cstr;
 302
 303     len  = [self cStringLength];
 304     cstr = malloc(len + 4);
 305     [self getCString:cstr]; // Unicode!
 306     cstr[len] = '\0';
 307
 308     buffer = malloc(len * 3 + 2);
 309     NGEscapeUrlBuffer(cstr, buffer, len);
 310     if (cstr) free(cstr);
 311
 312   }
 313   /* the following assumes that the default-encoding is ASCII compatible */
 314   s = [[NSString alloc]
 315                  initWithCStringNoCopy:buffer
 316                  length:strlen(buffer)
 317                  freeWhenDone:YES];
 318   return [s autorelease];
 319 }
 320
 321 @end /* NSString(URLEscaping) */