err.no Git - sope/blob - sope-core/NGExtensions/NGQuotedPrintableCoding.m

   1 /*
   2   Copyright (C) 2000-2008 SKYRIX Software AG
   3   Copyright (C) 2006-2008 Helge Hess
   4
   5   This file is part of SOPE.
   6
   7   SOPE is free software; you can redistribute it and/or modify it under
   8   the terms of the GNU Lesser General Public License as published by the
   9   Free Software Foundation; either version 2, or (at your option) any
  10   later version.
  11
  12   SOPE is distributed in the hope that it will be useful, but WITHOUT ANY
  13   WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  15   License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public
  18   License along with SOPE; see the file COPYING.  If not, write to the
  19   Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20   02111-1307, USA.
  21 */
  22
  23 #include "NGQuotedPrintableCoding.h"
  24 #include "common.h"
  25 #include "NGMemoryAllocation.h"
  26
  27
  28 @implementation NSString(QuotedPrintableCoding)
  29
  30 - (NSString *)stringByDecodingQuotedPrintable {
  31   NSData *data;
  32
  33   data = ([self length] > 0)
  34     ? [self dataUsingEncoding:NSASCIIStringEncoding]
  35     : [NSData data];
  36
  37   data = [data dataByDecodingQuotedPrintable];
  38
  39   // TODO: should we default to some specific charset instead? (either
  40   //       Latin1 or UTF-8
  41   //       or the charset of the receiver?
  42   return [NSString stringWithCString:[data bytes] length:[data length]];
  43 }
  44
  45 - (NSString *)stringByEncodingQuotedPrintable {
  46   NSData *data;
  47
  48   // TBD: which encoding to use?
  49   data = ([self length] > 0)
  50     ? [self dataUsingEncoding:[NSString defaultCStringEncoding]]
  51     : [NSData data];
  52
  53   data = [data dataByEncodingQuotedPrintable];
  54
  55   return [[[NSString alloc] initWithData:data encoding:NSASCIIStringEncoding]
  56                      autorelease];
  57 }
  58
  59 @end /* NSString(QuotedPrintableCoding) */
  60
  61
  62 @implementation NSData(QuotedPrintableCoding)
  63
  64 - (NSData *)dataByDecodingQuotedPrintable {
  65   char   *dest;
  66   size_t destSize;
  67   size_t resSize;
  68
  69   destSize = [self length];
  70   dest     = malloc(destSize * sizeof(char) + 2);
  71
  72   resSize =
  73     NGDecodeQuotedPrintableX([self bytes], [self length], dest, destSize, YES);
  74
  75   return ((int)resSize != -1)
  76     ? [NSData dataWithBytesNoCopy:dest length:resSize]
  77     : nil;
  78 }
  79 - (NSData *)dataByDecodingQuotedPrintableTransferEncoding {
  80   char   *dest;
  81   size_t destSize;
  82   size_t resSize;
  83
  84   destSize = [self length];
  85   dest     = malloc(destSize * sizeof(char) + 2);
  86
  87   resSize =
  88     NGDecodeQuotedPrintableX([self bytes], [self length], dest, destSize, NO);
  89
  90   return ((int)resSize != -1)
  91     ? [NSData dataWithBytesNoCopy:dest length:resSize]
  92     : nil;
  93 }
  94
  95 - (NSData *)dataByEncodingQuotedPrintable {
  96   const char   *bytes  = [self bytes];
  97   unsigned int length  = [self length];
  98   char         *des    = NULL;
  99   unsigned int desLen  = 0;
 100
 101   desLen = length *3;
 102   des = NGMallocAtomic(sizeof(char) * desLen);
 103
 104   desLen = NGEncodeQuotedPrintable(bytes, length, des, desLen);
 105
 106   return (int)desLen != -1
 107     ? [NSData dataWithBytesNoCopy:des length:desLen]
 108     : nil;
 109 }
 110
 111 @end /* NSData(QuotedPrintableCoding) */
 112
 113
 114 // implementation
 115
 116 static inline signed char __hexToChar(char c) {
 117   if ((c > 47) && (c < 58)) // '0' .. '9'
 118     return c - 48;
 119   if ((c > 64) && (c < 71)) // 'A' .. 'F'
 120     return c - 55;
 121   if ((c > 96) && (c < 103)) // 'a' .. 'f'
 122     return c - 87;
 123   return -1;
 124 }
 125
 126 int NGDecodeQuotedPrintableX(const char *_src, unsigned _srcLen,
 127                              char *_dest, unsigned _destLen,
 128                              BOOL _replaceUnderline)
 129 {
 130   /*
 131     Eg: "Hello=20World" => "Hello World"
 132
 133     =XY where XY is a hex encoded byte. In addition '_' is decoded as 0x20
 134     (not as space!, this depends on the charset, see RFC 2047 4.2).
 135   */
 136   unsigned cnt     = 0;
 137   unsigned destCnt = 0;
 138
 139   if (_srcLen < _destLen)
 140     return -1;
 141
 142   for (cnt = 0; ((cnt < _srcLen) && (destCnt < _destLen)); cnt++) {
 143     if (_src[cnt] != '=') {
 144       _dest[destCnt] =
 145         (_replaceUnderline && _src[cnt] == '_') ? 0x20 : _src[cnt];
 146       destCnt++;
 147     }
 148     else {
 149       if ((_srcLen - cnt) > 1) {
 150         signed char c1, c2;
 151
 152         cnt++;          // skip '='
 153         c1 = _src[cnt]; // first hex digit
 154
 155         if (c1 == '\r' || c1 == '\n') {
 156           if (_src[cnt + 1] == '\r' || _src[cnt + 1] == '\n' )
 157             cnt++;
 158           continue;
 159         }
 160         c1 = __hexToChar(c1);
 161
 162         cnt++; // skip first hex digit
 163         c2 = __hexToChar(_src[cnt]);
 164
 165         if ((c1 == -1) || (c2 == -1)) {
 166           if ((_destLen - destCnt) > 1) {
 167             _dest[destCnt] = _src[cnt - 1]; destCnt++;
 168             _dest[destCnt] = _src[cnt];     destCnt++;
 169           }
 170           else
 171             break;
 172         }
 173         else {
 174           register unsigned char c = ((c1 << 4) | c2);
 175           _dest[destCnt] = c;
 176           destCnt++;
 177         }
 178       }
 179       else
 180         break;
 181     }
 182   }
 183   if (cnt < _srcLen)
 184     return -1;
 185   return destCnt;
 186 }
 187 int NGDecodeQuotedPrintable(const char *_src, unsigned _srcLen,
 188                             char *_dest, unsigned _destLen)
 189 {
 190   // should we deprecated that?
 191   return NGDecodeQuotedPrintableX(_src, _srcLen, _dest, _destLen, YES);
 192 }
 193
 194 /*
 195   From RFC 2045 Multipurpose Internet Mail Extensions
 196
 197   6.7. Quoted-Printable Content-Transfer-Encoding
 198
 199   ...
 200
 201   In this encoding, octets are to be represented as determined by the
 202   following rules:
 203
 204
 205     (1)   (General 8bit representation) Any octet, except a CR or
 206           LF that is part of a CRLF line break of the canonical
 207           (standard) form of the data being encoded, may be
 208           represented by an "=" followed by a two digit
 209           hexadecimal representation of the octet's value.  The
 210           digits of the hexadecimal alphabet, for this purpose,
 211           are "0123456789ABCDEF".  Uppercase letters must be
 212           used; lowercase letters are not allowed.  Thus, for
 213           example, the decimal value 12 (US-ASCII form feed) can
 214           be represented by "=0C", and the decimal value 61 (US-
 215           ASCII EQUAL SIGN) can be represented by "=3D".  This
 216           rule must be followed except when the following rules
 217           allow an alternative encoding.
 218
 219     (2)   (Literal representation) Octets with decimal values of
 220           33 through 60 inclusive, and 62 through 126, inclusive,
 221           MAY be represented as the US-ASCII characters which
 222           correspond to those octets (EXCLAMATION POINT through
 223           LESS THAN, and GREATER THAN through TILDE,
 224           respectively).
 225
 226     (3)   (White Space) Octets with values of 9 and 32 MAY be
 227           represented as US-ASCII TAB (HT) and SPACE characters,
 228           respectively, but MUST NOT be so represented at the end
 229           of an encoded line. Any TAB (HT) or SPACE characters on an
 230           encoded line MUST thus be followed on that line by a printable
 231           character. In particular, an "=" at the end of an encoded line,
 232           indicating a soft line break (see rule #5) may follow one or
 233           more TAB (HT) or SPACE characters. It follows that an octet
 234           with decimal value 9 or 32 appearing at the end of an encoded line
 235           must be represented according to Rule #1. This rule is necessary
 236           because some MTAs (Message Transport Agents, programs which transport
 237           messages from one user to another, or perform a portion of such
 238           transfers) are known to pad lines of text with SPACEs, and others
 239           are known to remove "white space" characters from the end of a line.
 240           Therefore, when decoding a Quoted-Printable body, any trailing white
 241           space on a line must be deleted, as it will necessarily have been
 242           added by intermediate transport agents.
 243
 244
 245     (4)   (Line Breaks) A line break in a text body, represented
 246           as a CRLF sequence in the text canonical form, must be
 247           represented by a (RFC 822) line break, which is also a
 248           CRLF sequence, in the Quoted-Printable encoding.  Since
 249           the canonical representation of media types other than
 250           text do not generally include the representation of
 251           line breaks as CRLF sequences, no hard line breaks
 252           (i.e. line breaks that are intended to be meaningful
 253           and to be displayed to the user) can occur in the
 254           quoted-printable encoding of such types.  Sequences
 255           like "=0D", "=0A", "=0A=0D" and "=0D=0A" will routinely
 256           appear in non-text data represented in quoted-
 257           printable, of course.
 258
 259     (5)   (Soft Line Breaks) The Quoted-Printable encoding
 260           REQUIRES that encoded lines be no more than 76
 261           characters long.  If longer lines are to be encoded
 262           with the Quoted-Printable encoding, "soft" line breaks
 263           must be used.  An equal sign as the last character on a
 264           encoded line indicates such a non-significant ("soft")
 265           line break in the encoded text.
 266
 267 */
 268
 269 int NGEncodeQuotedPrintable(const char *_src, unsigned _srcLen,
 270                             char *_dest, unsigned _destLen) {
 271   unsigned cnt      = 0;
 272   unsigned destCnt  = 0;
 273   char     hexT[16] = {'0','1','2','3','4','5','6','7','8',
 274                        '9','A','B','C','D','E','F'};
 275
 276   if (_srcLen > _destLen)
 277     return -1;
 278
 279   for (cnt = 0; (cnt < _srcLen) && (destCnt < _destLen); cnt++) {
 280     char c = _src[cnt];
 281     if ((c == 9)  ||
 282         (c == 10) ||
 283         (c == 13) ||
 284         ((c > 31) && (c < 61)) ||
 285         ((c > 61) && (c < 127))) { // no quoting
 286       _dest[destCnt++] = c;
 287     }
 288     else { // need to be quoted
 289       if (_destLen - destCnt > 2) {
 290         _dest[destCnt++] = '=';
 291         _dest[destCnt++] = hexT[(c >> 4) & 15];
 292         _dest[destCnt++] = hexT[c & 15];
 293       }
 294       else
 295         break;
 296     }
 297   }
 298   if (cnt < _srcLen)
 299     return -1;
 300   return destCnt;
 301 }
 302
 303 // static linking
 304
 305 void __link_NGQuotedPrintableCoding(void) {
 306   __link_NGQuotedPrintableCoding();
 307 }