err.no Git - sope/blob - sope-core/NGExtensions/FdExt.subproj/NSString+URLEscaping.m

   1 /*
   2   Copyright (C) 2000-2005 SKYRIX Software AG
   3
   4   This file is part of SOPE.
   5
   6   SOPE is free software; you can redistribute it and/or modify it under
   7   the terms of the GNU Lesser General Public License as published by the
   8   Free Software Foundation; either version 2, or (at your option) any
   9   later version.
  10
  11   SOPE is distributed in the hope that it will be useful, but WITHOUT ANY
  12   WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  14   License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public
  17   License along with SOPE; see the file COPYING.  If not, write to the
  18   Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19   02111-1307, USA.
  20 */
  21
  22 #include "NSString+misc.h"
  23 #include "common.h"
  24
  25 /*
  26   TODO: support new Panther API?:
  27 - (NSString *)stringByAddingPercentEscapesUsingEncoding:(NSStringEncoding)e
  28 - (NSString *)stringByReplacingPercentEscapesUsingEncoding:(NSStringEncoding)e
  29 */
  30
  31 @implementation NSString(URLEscaping)
  32
  33 static int useUTF8Encoding = -1;
  34
  35 static inline BOOL doUseUTF8Encoding(void) {
  36   if (useUTF8Encoding == -1) {
  37     NSUserDefaults *ud = [NSUserDefaults standardUserDefaults];
  38
  39     useUTF8Encoding = [ud boolForKey:@"NGUseUTF8AsURLEncoding"] ? 1 : 0;
  40     if (useUTF8Encoding)
  41       NSLog(@"Note: Using UTF-8 as URL encoding in NGExtensions.");
  42   }
  43   return useUTF8Encoding ? YES : NO;
  44 }
  45
  46 static inline BOOL isUrlAlpha(unsigned char _c) {
  47   return
  48     (((_c >= 'a') && (_c <= 'z')) ||
  49      ((_c >= 'A') && (_c <= 'Z')))
  50     ? YES : NO;
  51 }
  52 static inline BOOL isUrlDigit(unsigned char _c) {
  53   return ((_c >= '0') && (_c <= '9')) ? YES : NO;
  54 }
  55 static inline BOOL isUrlSafeChar(unsigned char _c) {
  56   switch (_c) {
  57     case '$': case '-': case '_': case '.':
  58 #if 0 /* see OGo bug #1260, required for forms */
  59     case '+':
  60 #endif
  61     case '@': // TODO: not a safe char?!
  62       return YES;
  63
  64     default:
  65       return NO;
  66   }
  67 }
  68 static inline BOOL isUrlExtraChar(unsigned char _c) {
  69   switch (_c) {
  70     case '!': case '*': case '"': case '\'':
  71     case '|': case ',':
  72       return YES;
  73   }
  74   return NO;
  75 }
  76 static inline BOOL isUrlEscapeChar(unsigned char _c) {
  77   return (_c == '%') ? YES : NO;
  78 }
  79 static inline BOOL isUrlReservedChar(unsigned char _c) {
  80   switch (_c) {
  81     case '=': case ';': case '/':
  82     case '#': case '?': case ':':
  83     case ' ':
  84       return YES;
  85   }
  86   return NO;
  87 }
  88
  89 static inline BOOL isUrlXalpha(unsigned char _c) {
  90   if (isUrlAlpha(_c))      return YES;
  91   if (isUrlDigit(_c))      return YES;
  92   if (isUrlSafeChar(_c))   return YES;
  93   if (isUrlExtraChar(_c))  return YES;
  94   if (isUrlEscapeChar(_c)) return YES;
  95   return NO;
  96 }
  97
  98 static inline BOOL isUrlHexChar(unsigned char _c) {
  99   if (isUrlDigit(_c))
 100     return YES;
 101   if ((_c >= 'a') && (_c <= 'f'))
 102     return YES;
 103   if ((_c >= 'A') && (_c <= 'F'))
 104     return YES;
 105   return NO;
 106 }
 107
 108 static inline BOOL isUrlAlphaNum(unsigned char _c) {
 109   return (isUrlAlpha(_c) || isUrlDigit(_c)) ? YES : NO;
 110 }
 111
 112 static inline BOOL isToBeEscaped(unsigned char _c) {
 113   return (isUrlAlphaNum(_c) || (_c == '_') || isUrlSafeChar(_c)) ? NO : YES;
 114 }
 115
 116 static void
 117 NGEscapeUrlBuffer(const unsigned char *_source, unsigned char *_dest,
 118                   unsigned srclen)
 119 {
 120   register const unsigned char *src = (void*)_source;
 121   register unsigned i;
 122   for (i = 0; i < srclen; i++, src++) {
 123 #if 0 // explain!
 124     if (*src == ' ') { // a ' ' becomes a '+'
 125       *_dest = '+'; _dest++;
 126     }
 127 #endif
 128     if (!isToBeEscaped(*src)) {
 129       *_dest = *src;
 130       _dest++;
 131     }
 132     else { // any other char is escaped ..
 133       *_dest = '%'; _dest++;
 134       sprintf((char *)_dest, "%02X", (unsigned)*src);
 135       _dest += 2;
 136     }
 137   }
 138   *_dest = '\0';
 139 }
 140
 141 static inline int _valueOfHexChar(register unichar _c) {
 142   switch (_c) {
 143     case '0': case '1': case '2': case '3': case '4':
 144     case '5': case '6': case '7': case '8': case '9':
 145       return (_c - 48); // 0-9 (ascii-char)'0' - 48 => (int)0
 146
 147     case 'A': case 'B': case 'C':
 148     case 'D': case 'E': case 'F':
 149       return (_c - 55); // A-F, A=10..F=15, 'A'=65..'F'=70
 150
 151     case 'a': case 'b': case 'c':
 152     case 'd': case 'e': case 'f':
 153       return (_c - 87); // a-f, a=10..F=15, 'a'=97..'f'=102
 154
 155     default:
 156       return -1;
 157   }
 158 }
 159 static inline BOOL _isHexDigit(register unichar _c) {
 160   switch (_c) {
 161     case '0': case '1': case '2': case '3': case '4':
 162     case '5': case '6': case '7': case '8': case '9':
 163     case 'A': case 'B': case 'C':
 164     case 'D': case 'E': case 'F':
 165     case 'a': case 'b': case 'c':
 166     case 'd': case 'e': case 'f':
 167       return YES;
 168
 169     default:
 170       return NO;
 171   }
 172 }
 173
 174 static void
 175 NGUnescapeUrlBuffer(const unsigned char *_source, unsigned char *_dest)
 176 {
 177   BOOL done = NO;
 178
 179   while (!done && (*_source != '\0')) {
 180     char c = *_source;
 181
 182     //if (c == '+') // '+' stands for a space
 183     //  *_dest = ' ';
 184     if (c == '%') {
 185       _source++; c = *_source;
 186
 187       if (c == '\0') {
 188         *_dest = '%';
 189         done = YES;
 190       }
 191       else if (_isHexDigit(c)) { // hex-escaped char, like '%F3'
 192         int decChar = _valueOfHexChar(c);
 193         _source++;
 194         c = *_source;
 195         decChar = decChar * 16 + _valueOfHexChar(c);
 196         *_dest = (unsigned char)decChar;
 197       }
 198       else // escaped char, like '%%' -> '%'
 199         *_dest = c;
 200     }
 201     else // char passed through
 202       *_dest = c;
 203
 204     _dest++;
 205     _source++;
 206   }
 207   *_dest = '\0';
 208 }
 209
 210 - (BOOL)containsURLEscapeCharacters {
 211   register unsigned i, len;
 212   register unichar (*charAtIdx)(id,SEL,unsigned);
 213
 214   if ((len = [self length]) == 0) return NO;
 215
 216   charAtIdx = (void*)[self methodForSelector:@selector(characterAtIndex:)];
 217   for (i = 0; i < len; i++) {
 218     if (charAtIdx(self, @selector(characterAtIndex:), i) == '%')
 219       return YES;
 220   }
 221   return NO;
 222 }
 223 - (BOOL)containsURLInvalidCharacters {
 224   register unsigned i, len;
 225   register unichar (*charAtIdx)(id,SEL,unsigned);
 226
 227   if ((len = [self length]) == 0) return NO;
 228
 229   charAtIdx = (void*)[self methodForSelector:@selector(characterAtIndex:)];
 230   for (i = 0; i < len; i++) {
 231     if (isToBeEscaped(charAtIdx(self, @selector(characterAtIndex:), i)))
 232       return YES;
 233   }
 234   return NO;
 235 }
 236
 237 - (NSString *)stringByUnescapingURL {
 238   /*
 239      Input is a URL string - per definition ASCII(?!), like "hello%98%88.txt"
 240      output is a unicode string (never longer than the input)
 241
 242      Note that the input itself is in some encoding! That is, the input is
 243      turned into a buffer eg containing UTF-8 and needs to be converted into
 244      a unicode string.
 245   */
 246   unsigned len;
 247   char     *cstr;
 248   char     *buffer = NULL;
 249   NSString *s;
 250
 251   if (![self containsURLEscapeCharacters]) /* scan for '%' */
 252     return [[self copy] autorelease];
 253
 254   if ((len = [self cStringLength]) == 0) return @"";
 255
 256   cstr = malloc(len + 10);
 257   [self getCString:cstr]; /* this is OK, a URL is always in ASCII! */
 258   cstr[len] = '\0';
 259
 260   buffer = malloc(len + 4);
 261   NGUnescapeUrlBuffer((unsigned char *)cstr, (unsigned char *)buffer);
 262
 263   if (doUseUTF8Encoding()) {
 264     /* OK, the input is considered UTF-8 encoded in a string */
 265     s = [[NSString alloc] initWithUTF8String:buffer];
 266     if (buffer != NULL) free(buffer); buffer = NULL;
 267   }
 268   else {
 269     s = [[NSString alloc]
 270           initWithCStringNoCopy:buffer
 271           length:strlen(buffer)
 272           freeWhenDone:YES];
 273   }
 274   if (cstr != NULL) free(cstr); cstr = NULL;
 275   return [s autorelease];
 276 }
 277
 278 - (NSString *)stringByEscapingURL {
 279   unsigned len;
 280   NSString *s;
 281   char     *buffer = NULL;
 282
 283   if ((len = [self length]) == 0) return @"";
 284
 285   if (![self containsURLInvalidCharacters]) // needs to be escaped ?
 286     return [[self copy] autorelease];
 287
 288   if (doUseUTF8Encoding()) {
 289     // steps:
 290     // a) encode into a data buffer! (eg UTF8 or ISO)
 291     // b) encode that buffer into URL encoding
 292     // c) create an ASCII string from that
 293     NSData *data;
 294
 295     if ((data = [self dataUsingEncoding:NSUTF8StringEncoding]) == nil)
 296       return nil;
 297     if ((len = [data length]) == 0)
 298       return @"";
 299
 300     buffer = malloc(len * 3 + 2);
 301     NGEscapeUrlBuffer([data bytes], (unsigned char *)buffer, len);
 302   }
 303   else {
 304     unsigned char *cstr;
 305
 306     len  = [self cStringLength];
 307     cstr = malloc(len + 4);
 308     [self getCString:(char *)cstr]; // Unicode!
 309     cstr[len] = '\0';
 310
 311     buffer = malloc(len * 3 + 2);
 312     NGEscapeUrlBuffer(cstr, (unsigned char *)buffer, len);
 313     if (cstr) free(cstr);
 314
 315   }
 316   /* the following assumes that the default-encoding is ASCII compatible */
 317   s = [[NSString alloc]
 318                  initWithCStringNoCopy:buffer
 319                  length:strlen(buffer)
 320                  freeWhenDone:YES];
 321   return [s autorelease];
 322 }
 323
 324 @end /* NSString(URLEscaping) */