2 Copyright (C) 2000-2006 SKYRIX Software AG
3 Copyright (C) 2006 Helge Hess
5 This file is part of SOPE.
7 SOPE is free software; you can redistribute it and/or modify it under
8 the terms of the GNU Lesser General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 SOPE is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with SOPE; see the file COPYING. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "NGQuotedPrintableCoding.h"
25 #include "NGMemoryAllocation.h"
28 @implementation NSString(QuotedPrintableCoding)
30 - (NSString *)stringByDecodingQuotedPrintable {
34 if ((len = [self cStringLength]) > 0) {
36 buf = malloc(len + 10);
37 [self getCString:buf];
38 data = [NSData dataWithBytes:buf length:len];
39 if (buf != NULL) free(buf);
44 data = [data dataByDecodingQuotedPrintable];
46 // TODO: should we default to some specific charset instead? (either
48 return [NSString stringWithCString:[data bytes] length:[data length]];
51 - (NSString *)stringByEncodingQuotedPrintable {
55 if ((len = [self cStringLength])) {
57 buf = malloc(len + 10);
58 [self getCString:buf];
59 data = [NSData dataWithBytes:buf length:len];
65 data = [data dataByEncodingQuotedPrintable];
67 return [NSString stringWithCString:[data bytes] length:[data length]];
70 @end /* NSString(QuotedPrintableCoding) */
73 @implementation NSData(QuotedPrintableCoding)
75 - (NSData *)dataByDecodingQuotedPrintable {
80 destSize = [self length];
81 dest = malloc(destSize * sizeof(char) + 2);
84 NGDecodeQuotedPrintableX([self bytes], [self length], dest, destSize, YES);
86 return ((int)resSize != -1)
87 ? [NSData dataWithBytesNoCopy:dest length:resSize]
90 - (NSData *)dataByDecodingQuotedPrintableTransferEncoding {
95 destSize = [self length];
96 dest = malloc(destSize * sizeof(char) + 2);
99 NGDecodeQuotedPrintableX([self bytes], [self length], dest, destSize, NO);
101 return ((int)resSize != -1)
102 ? [NSData dataWithBytesNoCopy:dest length:resSize]
106 - (NSData *)dataByEncodingQuotedPrintable {
107 const char *bytes = [self bytes];
108 unsigned int length = [self length];
110 unsigned int desLen = 0;
113 des = NGMallocAtomic(sizeof(char) * desLen);
115 desLen = NGEncodeQuotedPrintable(bytes, length, des, desLen);
117 return (int)desLen != -1
118 ? [NSData dataWithBytesNoCopy:des length:desLen]
122 @end /* NSData(QuotedPrintableCoding) */
127 static inline signed char __hexToChar(char c) {
128 if ((c > 47) && (c < 58)) // '0' .. '9'
130 if ((c > 64) && (c < 71)) // 'A' .. 'F'
132 if ((c > 96) && (c < 103)) // 'a' .. 'f'
137 int NGDecodeQuotedPrintableX(const char *_src, unsigned _srcLen,
138 char *_dest, unsigned _destLen,
139 BOOL _replaceUnderline)
142 Eg: "Hello=20World" => "Hello World"
144 =XY where XY is a hex encoded byte. In addition '_' is decoded as 0x20
145 (not as space!, this depends on the charset, see RFC 2047 4.2).
148 unsigned destCnt = 0;
150 if (_srcLen < _destLen)
153 for (cnt = 0; ((cnt < _srcLen) && (destCnt < _destLen)); cnt++) {
154 if (_src[cnt] != '=') {
156 (_replaceUnderline && _src[cnt] == '_') ? 0x20 : _src[cnt];
160 if ((_srcLen - cnt) > 1) {
164 c1 = _src[cnt]; // first hex digit
166 if (c1 == '\r' || c1 == '\n') {
167 if (_src[cnt + 1] == '\r' || _src[cnt + 1] == '\n' )
171 c1 = __hexToChar(c1);
173 cnt++; // skip first hex digit
174 c2 = __hexToChar(_src[cnt]);
176 if ((c1 == -1) || (c2 == -1)) {
177 if ((_destLen - destCnt) > 1) {
178 _dest[destCnt] = _src[cnt - 1]; destCnt++;
179 _dest[destCnt] = _src[cnt]; destCnt++;
185 register unsigned char c = ((c1 << 4) | c2);
198 int NGDecodeQuotedPrintable(const char *_src, unsigned _srcLen,
199 char *_dest, unsigned _destLen)
201 // should we deprecated that?
202 return NGDecodeQuotedPrintableX(_src, _srcLen, _dest, _destLen, YES);
206 From RFC 2045 Multipurpose Internet Mail Extensions
208 6.7. Quoted-Printable Content-Transfer-Encoding
212 In this encoding, octets are to be represented as determined by the
216 (1) (General 8bit representation) Any octet, except a CR or
217 LF that is part of a CRLF line break of the canonical
218 (standard) form of the data being encoded, may be
219 represented by an "=" followed by a two digit
220 hexadecimal representation of the octet's value. The
221 digits of the hexadecimal alphabet, for this purpose,
222 are "0123456789ABCDEF". Uppercase letters must be
223 used; lowercase letters are not allowed. Thus, for
224 example, the decimal value 12 (US-ASCII form feed) can
225 be represented by "=0C", and the decimal value 61 (US-
226 ASCII EQUAL SIGN) can be represented by "=3D". This
227 rule must be followed except when the following rules
228 allow an alternative encoding.
230 (2) (Literal representation) Octets with decimal values of
231 33 through 60 inclusive, and 62 through 126, inclusive,
232 MAY be represented as the US-ASCII characters which
233 correspond to those octets (EXCLAMATION POINT through
234 LESS THAN, and GREATER THAN through TILDE,
237 (3) (White Space) Octets with values of 9 and 32 MAY be
238 represented as US-ASCII TAB (HT) and SPACE characters,
239 respectively, but MUST NOT be so represented at the end
240 of an encoded line. Any TAB (HT) or SPACE characters on an
241 encoded line MUST thus be followed on that line by a printable
242 character. In particular, an "=" at the end of an encoded line,
243 indicating a soft line break (see rule #5) may follow one or
244 more TAB (HT) or SPACE characters. It follows that an octet
245 with decimal value 9 or 32 appearing at the end of an encoded line
246 must be represented according to Rule #1. This rule is necessary
247 because some MTAs (Message Transport Agents, programs which transport
248 messages from one user to another, or perform a portion of such
249 transfers) are known to pad lines of text with SPACEs, and others
250 are known to remove "white space" characters from the end of a line.
251 Therefore, when decoding a Quoted-Printable body, any trailing white
252 space on a line must be deleted, as it will necessarily have been
253 added by intermediate transport agents.
256 (4) (Line Breaks) A line break in a text body, represented
257 as a CRLF sequence in the text canonical form, must be
258 represented by a (RFC 822) line break, which is also a
259 CRLF sequence, in the Quoted-Printable encoding. Since
260 the canonical representation of media types other than
261 text do not generally include the representation of
262 line breaks as CRLF sequences, no hard line breaks
263 (i.e. line breaks that are intended to be meaningful
264 and to be displayed to the user) can occur in the
265 quoted-printable encoding of such types. Sequences
266 like "=0D", "=0A", "=0A=0D" and "=0D=0A" will routinely
267 appear in non-text data represented in quoted-
268 printable, of course.
270 (5) (Soft Line Breaks) The Quoted-Printable encoding
271 REQUIRES that encoded lines be no more than 76
272 characters long. If longer lines are to be encoded
273 with the Quoted-Printable encoding, "soft" line breaks
274 must be used. An equal sign as the last character on a
275 encoded line indicates such a non-significant ("soft")
276 line break in the encoded text.
280 int NGEncodeQuotedPrintable(const char *_src, unsigned _srcLen,
281 char *_dest, unsigned _destLen) {
283 unsigned destCnt = 0;
284 char hexT[16] = {'0','1','2','3','4','5','6','7','8',
285 '9','A','B','C','D','E','F'};
287 if (_srcLen > _destLen)
290 for (cnt = 0; (cnt < _srcLen) && (destCnt < _destLen); cnt++) {
295 ((c > 31) && (c < 61)) ||
296 ((c > 61) && (c < 127))) { // no quoting
297 _dest[destCnt++] = c;
299 else { // need to be quoted
300 if (_destLen - destCnt > 2) {
301 _dest[destCnt++] = '=';
302 _dest[destCnt++] = hexT[(c >> 4) & 15];
303 _dest[destCnt++] = hexT[c & 15];
316 void __link_NGQuotedPrintableCoding(void) {
317 __link_NGQuotedPrintableCoding();