err.no Git - sope/blob - sope-core/NGExtensions/NGQuotedPrintableCoding.m

   1 /*
   2   Copyright (C) 2000-2006 SKYRIX Software AG
   3   Copyright (C) 2006      Helge Hess
   4
   5   This file is part of SOPE.
   6
   7   SOPE is free software; you can redistribute it and/or modify it under
   8   the terms of the GNU Lesser General Public License as published by the
   9   Free Software Foundation; either version 2, or (at your option) any
  10   later version.
  11
  12   SOPE is distributed in the hope that it will be useful, but WITHOUT ANY
  13   WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  15   License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public
  18   License along with SOPE; see the file COPYING.  If not, write to the
  19   Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20   02111-1307, USA.
  21 */
  22
  23 #include "NGQuotedPrintableCoding.h"
  24 #include "common.h"
  25 #include "NGMemoryAllocation.h"
  26
  27
  28 @implementation NSString(QuotedPrintableCoding)
  29
  30 - (NSString *)stringByDecodingQuotedPrintable {
  31   NSData   *data;
  32   unsigned len;
  33
  34   if ((len = [self cStringLength]) > 0) {
  35     void *buf;
  36     buf = malloc(len + 10);
  37     [self getCString:buf];
  38     data = [NSData dataWithBytes:buf length:len];
  39     if (buf != NULL) free(buf);
  40   }
  41   else
  42     data = [NSData data];
  43
  44   data = [data dataByDecodingQuotedPrintable];
  45
  46   // TODO: should we default to some specific charset instead? (either
  47   //       Latin1 or UTF-8
  48   return [NSString stringWithCString:[data bytes] length:[data length]];
  49 }
  50
  51 - (NSString *)stringByEncodingQuotedPrintable {
  52   NSData *data;
  53   unsigned len;
  54
  55   if ((len = [self cStringLength])) {
  56     void *buf;
  57     buf = malloc(len + 10);
  58     [self getCString:buf];
  59     data = [NSData dataWithBytes:buf length:len];
  60     free(buf);
  61   }
  62   else
  63     data = [NSData data];
  64
  65   data = [data dataByEncodingQuotedPrintable];
  66
  67   return [NSString stringWithCString:[data bytes] length:[data length]];
  68 }
  69
  70 @end /* NSString(QuotedPrintableCoding) */
  71
  72
  73 @implementation NSData(QuotedPrintableCoding)
  74
  75 - (NSData *)dataByDecodingQuotedPrintable {
  76   char   *dest;
  77   size_t destSize;
  78   size_t resSize;
  79
  80   destSize = [self length];
  81   dest     = malloc(destSize * sizeof(char) + 2);
  82
  83   resSize =
  84     NGDecodeQuotedPrintableX([self bytes], [self length], dest, destSize, YES);
  85
  86   return ((int)resSize != -1)
  87     ? [NSData dataWithBytesNoCopy:dest length:resSize]
  88     : nil;
  89 }
  90 - (NSData *)dataByDecodingQuotedPrintableTransferEncoding {
  91   char   *dest;
  92   size_t destSize;
  93   size_t resSize;
  94
  95   destSize = [self length];
  96   dest     = malloc(destSize * sizeof(char) + 2);
  97
  98   resSize =
  99     NGDecodeQuotedPrintableX([self bytes], [self length], dest, destSize, NO);
 100
 101   return ((int)resSize != -1)
 102     ? [NSData dataWithBytesNoCopy:dest length:resSize]
 103     : nil;
 104 }
 105
 106 - (NSData *)dataByEncodingQuotedPrintable {
 107   const char   *bytes  = [self bytes];
 108   unsigned int length  = [self length];
 109   char         *des    = NULL;
 110   unsigned int desLen  = 0;
 111
 112   desLen = length *3;
 113   des = NGMallocAtomic(sizeof(char) * desLen);
 114
 115   desLen = NGEncodeQuotedPrintable(bytes, length, des, desLen);
 116
 117   return (int)desLen != -1
 118     ? [NSData dataWithBytesNoCopy:des length:desLen]
 119     : nil;
 120 }
 121
 122 @end /* NSData(QuotedPrintableCoding) */
 123
 124
 125 // implementation
 126
 127 static inline signed char __hexToChar(char c) {
 128   if ((c > 47) && (c < 58)) // '0' .. '9'
 129     return c - 48;
 130   if ((c > 64) && (c < 71)) // 'A' .. 'F'
 131     return c - 55;
 132   if ((c > 96) && (c < 103)) // 'a' .. 'f'
 133     return c - 87;
 134   return -1;
 135 }
 136
 137 int NGDecodeQuotedPrintableX(const char *_src, unsigned _srcLen,
 138                              char *_dest, unsigned _destLen,
 139                              BOOL _replaceUnderline)
 140 {
 141   /*
 142     Eg: "Hello=20World" => "Hello World"
 143
 144     =XY where XY is a hex encoded byte. In addition '_' is decoded as 0x20
 145     (not as space!, this depends on the charset, see RFC 2047 4.2).
 146   */
 147   unsigned cnt     = 0;
 148   unsigned destCnt = 0;
 149
 150   if (_srcLen < _destLen)
 151     return -1;
 152
 153   for (cnt = 0; ((cnt < _srcLen) && (destCnt < _destLen)); cnt++) {
 154     if (_src[cnt] != '=') {
 155       _dest[destCnt] =
 156         (_replaceUnderline && _src[cnt] == '_') ? 0x20 : _src[cnt];
 157       destCnt++;
 158     }
 159     else {
 160       if ((_srcLen - cnt) > 1) {
 161         signed char c1, c2;
 162
 163         cnt++;          // skip '='
 164         c1 = _src[cnt]; // first hex digit
 165
 166         if (c1 == '\r' || c1 == '\n') {
 167           if (_src[cnt + 1] == '\r' || _src[cnt + 1] == '\n' )
 168             cnt++;
 169           continue;
 170         }
 171         c1 = __hexToChar(c1);
 172
 173         cnt++; // skip first hex digit
 174         c2 = __hexToChar(_src[cnt]);
 175
 176         if ((c1 == -1) || (c2 == -1)) {
 177           if ((_destLen - destCnt) > 1) {
 178             _dest[destCnt] = _src[cnt - 1]; destCnt++;
 179             _dest[destCnt] = _src[cnt];     destCnt++;
 180           }
 181           else
 182             break;
 183         }
 184         else {
 185           register unsigned char c = ((c1 << 4) | c2);
 186           _dest[destCnt] = c;
 187           destCnt++;
 188         }
 189       }
 190       else
 191         break;
 192     }
 193   }
 194   if (cnt < _srcLen)
 195     return -1;
 196   return destCnt;
 197 }
 198 int NGDecodeQuotedPrintable(const char *_src, unsigned _srcLen,
 199                             char *_dest, unsigned _destLen)
 200 {
 201   // should we deprecated that?
 202   return NGDecodeQuotedPrintableX(_src, _srcLen, _dest, _destLen, YES);
 203 }
 204
 205 /*
 206   From RFC 2045 Multipurpose Internet Mail Extensions
 207
 208   6.7. Quoted-Printable Content-Transfer-Encoding
 209
 210   ...
 211
 212   In this encoding, octets are to be represented as determined by the
 213   following rules:
 214
 215
 216     (1)   (General 8bit representation) Any octet, except a CR or
 217           LF that is part of a CRLF line break of the canonical
 218           (standard) form of the data being encoded, may be
 219           represented by an "=" followed by a two digit
 220           hexadecimal representation of the octet's value.  The
 221           digits of the hexadecimal alphabet, for this purpose,
 222           are "0123456789ABCDEF".  Uppercase letters must be
 223           used; lowercase letters are not allowed.  Thus, for
 224           example, the decimal value 12 (US-ASCII form feed) can
 225           be represented by "=0C", and the decimal value 61 (US-
 226           ASCII EQUAL SIGN) can be represented by "=3D".  This
 227           rule must be followed except when the following rules
 228           allow an alternative encoding.
 229
 230     (2)   (Literal representation) Octets with decimal values of
 231           33 through 60 inclusive, and 62 through 126, inclusive,
 232           MAY be represented as the US-ASCII characters which
 233           correspond to those octets (EXCLAMATION POINT through
 234           LESS THAN, and GREATER THAN through TILDE,
 235           respectively).
 236
 237     (3)   (White Space) Octets with values of 9 and 32 MAY be
 238           represented as US-ASCII TAB (HT) and SPACE characters,
 239           respectively, but MUST NOT be so represented at the end
 240           of an encoded line. Any TAB (HT) or SPACE characters on an
 241           encoded line MUST thus be followed on that line by a printable
 242           character. In particular, an "=" at the end of an encoded line,
 243           indicating a soft line break (see rule #5) may follow one or
 244           more TAB (HT) or SPACE characters. It follows that an octet
 245           with decimal value 9 or 32 appearing at the end of an encoded line
 246           must be represented according to Rule #1. This rule is necessary
 247           because some MTAs (Message Transport Agents, programs which transport
 248           messages from one user to another, or perform a portion of such
 249           transfers) are known to pad lines of text with SPACEs, and others
 250           are known to remove "white space" characters from the end of a line.
 251           Therefore, when decoding a Quoted-Printable body, any trailing white
 252           space on a line must be deleted, as it will necessarily have been
 253           added by intermediate transport agents.
 254
 255
 256     (4)   (Line Breaks) A line break in a text body, represented
 257           as a CRLF sequence in the text canonical form, must be
 258           represented by a (RFC 822) line break, which is also a
 259           CRLF sequence, in the Quoted-Printable encoding.  Since
 260           the canonical representation of media types other than
 261           text do not generally include the representation of
 262           line breaks as CRLF sequences, no hard line breaks
 263           (i.e. line breaks that are intended to be meaningful
 264           and to be displayed to the user) can occur in the
 265           quoted-printable encoding of such types.  Sequences
 266           like "=0D", "=0A", "=0A=0D" and "=0D=0A" will routinely
 267           appear in non-text data represented in quoted-
 268           printable, of course.
 269
 270     (5)   (Soft Line Breaks) The Quoted-Printable encoding
 271           REQUIRES that encoded lines be no more than 76
 272           characters long.  If longer lines are to be encoded
 273           with the Quoted-Printable encoding, "soft" line breaks
 274           must be used.  An equal sign as the last character on a
 275           encoded line indicates such a non-significant ("soft")
 276           line break in the encoded text.
 277
 278 */
 279
 280 int NGEncodeQuotedPrintable(const char *_src, unsigned _srcLen,
 281                             char *_dest, unsigned _destLen) {
 282   unsigned cnt      = 0;
 283   unsigned destCnt  = 0;
 284   char     hexT[16] = {'0','1','2','3','4','5','6','7','8',
 285                        '9','A','B','C','D','E','F'};
 286
 287   if (_srcLen > _destLen)
 288     return -1;
 289
 290   for (cnt = 0; (cnt < _srcLen) && (destCnt < _destLen); cnt++) {
 291     char c = _src[cnt];
 292     if ((c == 9)  ||
 293         (c == 10) ||
 294         (c == 13) ||
 295         ((c > 31) && (c < 61)) ||
 296         ((c > 61) && (c < 127))) { // no quoting
 297       _dest[destCnt++] = c;
 298     }
 299     else { // need to be quoted
 300       if (_destLen - destCnt > 2) {
 301         _dest[destCnt++] = '=';
 302         _dest[destCnt++] = hexT[(c >> 4) & 15];
 303         _dest[destCnt++] = hexT[c & 15];
 304       }
 305       else
 306         break;
 307     }
 308   }
 309   if (cnt < _srcLen)
 310     return -1;
 311   return destCnt;
 312 }
 313
 314 // static linking
 315
 316 void __link_NGQuotedPrintableCoding(void) {
 317   __link_NGQuotedPrintableCoding();
 318 }