2 Copyright (C) 2000-2008 SKYRIX Software AG
3 Copyright (C) 2006-2008 Helge Hess
5 This file is part of SOPE.
7 SOPE is free software; you can redistribute it and/or modify it under
8 the terms of the GNU Lesser General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 SOPE is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with SOPE; see the file COPYING. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "NGQuotedPrintableCoding.h"
25 #include "NGMemoryAllocation.h"
28 @implementation NSString(QuotedPrintableCoding)
30 - (NSString *)stringByDecodingQuotedPrintable {
33 data = ([self length] > 0)
34 ? [self dataUsingEncoding:NSASCIIStringEncoding]
37 data = [data dataByDecodingQuotedPrintable];
39 // TODO: should we default to some specific charset instead? (either
41 // or the charset of the receiver?
42 return [NSString stringWithCString:[data bytes] length:[data length]];
45 - (NSString *)stringByEncodingQuotedPrintable {
48 // TBD: which encoding to use?
49 data = ([self length] > 0)
50 ? [self dataUsingEncoding:[NSString defaultCStringEncoding]]
53 data = [data dataByEncodingQuotedPrintable];
55 return [[[NSString alloc] initWithData:data encoding:NSASCIIStringEncoding]
59 @end /* NSString(QuotedPrintableCoding) */
62 @implementation NSData(QuotedPrintableCoding)
64 - (NSData *)dataByDecodingQuotedPrintable {
69 destSize = [self length];
70 dest = malloc(destSize * sizeof(char) + 2);
73 NGDecodeQuotedPrintableX([self bytes], [self length], dest, destSize, YES);
75 return ((int)resSize != -1)
76 ? [NSData dataWithBytesNoCopy:dest length:resSize]
79 - (NSData *)dataByDecodingQuotedPrintableTransferEncoding {
84 destSize = [self length];
85 dest = malloc(destSize * sizeof(char) + 2);
88 NGDecodeQuotedPrintableX([self bytes], [self length], dest, destSize, NO);
90 return ((int)resSize != -1)
91 ? [NSData dataWithBytesNoCopy:dest length:resSize]
95 - (NSData *)dataByEncodingQuotedPrintable {
96 const char *bytes = [self bytes];
97 unsigned int length = [self length];
99 unsigned int desLen = 0;
102 des = NGMallocAtomic(sizeof(char) * desLen);
104 desLen = NGEncodeQuotedPrintable(bytes, length, des, desLen);
106 return (int)desLen != -1
107 ? [NSData dataWithBytesNoCopy:des length:desLen]
111 @end /* NSData(QuotedPrintableCoding) */
116 static inline signed char __hexToChar(char c) {
117 if ((c > 47) && (c < 58)) // '0' .. '9'
119 if ((c > 64) && (c < 71)) // 'A' .. 'F'
121 if ((c > 96) && (c < 103)) // 'a' .. 'f'
126 int NGDecodeQuotedPrintableX(const char *_src, unsigned _srcLen,
127 char *_dest, unsigned _destLen,
128 BOOL _replaceUnderline)
131 Eg: "Hello=20World" => "Hello World"
133 =XY where XY is a hex encoded byte. In addition '_' is decoded as 0x20
134 (not as space!, this depends on the charset, see RFC 2047 4.2).
137 unsigned destCnt = 0;
139 if (_srcLen < _destLen)
142 for (cnt = 0; ((cnt < _srcLen) && (destCnt < _destLen)); cnt++) {
143 if (_src[cnt] != '=') {
145 (_replaceUnderline && _src[cnt] == '_') ? 0x20 : _src[cnt];
149 if ((_srcLen - cnt) > 1) {
153 c1 = _src[cnt]; // first hex digit
155 if (c1 == '\r' || c1 == '\n') {
156 if (_src[cnt + 1] == '\r' || _src[cnt + 1] == '\n' )
160 c1 = __hexToChar(c1);
162 cnt++; // skip first hex digit
163 c2 = __hexToChar(_src[cnt]);
165 if ((c1 == -1) || (c2 == -1)) {
166 if ((_destLen - destCnt) > 1) {
167 _dest[destCnt] = _src[cnt - 1]; destCnt++;
168 _dest[destCnt] = _src[cnt]; destCnt++;
174 register unsigned char c = ((c1 << 4) | c2);
187 int NGDecodeQuotedPrintable(const char *_src, unsigned _srcLen,
188 char *_dest, unsigned _destLen)
190 // should we deprecated that?
191 return NGDecodeQuotedPrintableX(_src, _srcLen, _dest, _destLen, YES);
195 From RFC 2045 Multipurpose Internet Mail Extensions
197 6.7. Quoted-Printable Content-Transfer-Encoding
201 In this encoding, octets are to be represented as determined by the
205 (1) (General 8bit representation) Any octet, except a CR or
206 LF that is part of a CRLF line break of the canonical
207 (standard) form of the data being encoded, may be
208 represented by an "=" followed by a two digit
209 hexadecimal representation of the octet's value. The
210 digits of the hexadecimal alphabet, for this purpose,
211 are "0123456789ABCDEF". Uppercase letters must be
212 used; lowercase letters are not allowed. Thus, for
213 example, the decimal value 12 (US-ASCII form feed) can
214 be represented by "=0C", and the decimal value 61 (US-
215 ASCII EQUAL SIGN) can be represented by "=3D". This
216 rule must be followed except when the following rules
217 allow an alternative encoding.
219 (2) (Literal representation) Octets with decimal values of
220 33 through 60 inclusive, and 62 through 126, inclusive,
221 MAY be represented as the US-ASCII characters which
222 correspond to those octets (EXCLAMATION POINT through
223 LESS THAN, and GREATER THAN through TILDE,
226 (3) (White Space) Octets with values of 9 and 32 MAY be
227 represented as US-ASCII TAB (HT) and SPACE characters,
228 respectively, but MUST NOT be so represented at the end
229 of an encoded line. Any TAB (HT) or SPACE characters on an
230 encoded line MUST thus be followed on that line by a printable
231 character. In particular, an "=" at the end of an encoded line,
232 indicating a soft line break (see rule #5) may follow one or
233 more TAB (HT) or SPACE characters. It follows that an octet
234 with decimal value 9 or 32 appearing at the end of an encoded line
235 must be represented according to Rule #1. This rule is necessary
236 because some MTAs (Message Transport Agents, programs which transport
237 messages from one user to another, or perform a portion of such
238 transfers) are known to pad lines of text with SPACEs, and others
239 are known to remove "white space" characters from the end of a line.
240 Therefore, when decoding a Quoted-Printable body, any trailing white
241 space on a line must be deleted, as it will necessarily have been
242 added by intermediate transport agents.
245 (4) (Line Breaks) A line break in a text body, represented
246 as a CRLF sequence in the text canonical form, must be
247 represented by a (RFC 822) line break, which is also a
248 CRLF sequence, in the Quoted-Printable encoding. Since
249 the canonical representation of media types other than
250 text do not generally include the representation of
251 line breaks as CRLF sequences, no hard line breaks
252 (i.e. line breaks that are intended to be meaningful
253 and to be displayed to the user) can occur in the
254 quoted-printable encoding of such types. Sequences
255 like "=0D", "=0A", "=0A=0D" and "=0D=0A" will routinely
256 appear in non-text data represented in quoted-
257 printable, of course.
259 (5) (Soft Line Breaks) The Quoted-Printable encoding
260 REQUIRES that encoded lines be no more than 76
261 characters long. If longer lines are to be encoded
262 with the Quoted-Printable encoding, "soft" line breaks
263 must be used. An equal sign as the last character on a
264 encoded line indicates such a non-significant ("soft")
265 line break in the encoded text.
269 int NGEncodeQuotedPrintable(const char *_src, unsigned _srcLen,
270 char *_dest, unsigned _destLen) {
272 unsigned destCnt = 0;
273 char hexT[16] = {'0','1','2','3','4','5','6','7','8',
274 '9','A','B','C','D','E','F'};
276 if (_srcLen > _destLen)
279 for (cnt = 0; (cnt < _srcLen) && (destCnt < _destLen); cnt++) {
284 ((c > 31) && (c < 61)) ||
285 ((c > 61) && (c < 127))) { // no quoting
286 _dest[destCnt++] = c;
288 else { // need to be quoted
289 if (_destLen - destCnt > 2) {
290 _dest[destCnt++] = '=';
291 _dest[destCnt++] = hexT[(c >> 4) & 15];
292 _dest[destCnt++] = hexT[c & 15];
305 void __link_NGQuotedPrintableCoding(void) {
306 __link_NGQuotedPrintableCoding();