2 Copyright (C) 2000-2005 SKYRIX Software AG
4 This file is part of SOPE.
6 SOPE is free software; you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 SOPE is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with SOPE; see the file COPYING. If not, write to the
18 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 #include "NGQuotedPrintableCoding.h"
24 #include "NGMemoryAllocation.h"
26 @implementation NSString(QuotedPrintableCoding)
28 - (NSString *)stringByDecodingQuotedPrintable {
32 if ((len = [self cStringLength]) > 0) {
34 buf = malloc(len + 10);
35 [self getCString:buf];
36 data = [NSData dataWithBytes:buf length:len];
42 data = [data dataByDecodingQuotedPrintable];
43 return [NSString stringWithCString:[data bytes] length:[data length]];
45 - (NSString *)stringByEncodingQuotedPrintable {
49 if ((len = [self cStringLength])) {
51 buf = malloc(len + 10);
52 [self getCString:buf];
53 data = [NSData dataWithBytes:buf length:len];
59 data = [data dataByEncodingQuotedPrintable];
60 return [NSString stringWithCString:[data bytes] length:[data length]];
65 @implementation NSData(QuotedPrintableCoding)
67 - (NSData *)dataByDecodingQuotedPrintable {
72 destSize = [self length];
73 dest = NGMallocAtomic(destSize * sizeof(char));
75 resSize = NGDecodeQuotedPrintable([self bytes],[self length],dest,destSize);
77 return ((int)resSize != -1)
78 ? [NSData dataWithBytesNoCopy:dest length:resSize]
82 - (NSData *)dataByEncodingQuotedPrintable {
83 const char *bytes = [self bytes];
84 unsigned int length = [self length];
86 unsigned int desLen = 0;
89 des = NGMallocAtomic(sizeof(char) * desLen);
91 desLen = NGEncodeQuotedPrintable(bytes, length, des, desLen);
93 return (int)desLen != -1
94 ? [NSData dataWithBytesNoCopy:des length:desLen]
102 static inline signed char __hexToChar(char c) {
103 if ((c > 47) && (c < 58)) // '0' .. '9'
105 if ((c > 64) && (c < 71)) // 'A' .. 'F'
107 if ((c > 96) && (c < 103)) // 'a' .. 'f'
112 int NGDecodeQuotedPrintable(const char *_src, unsigned _srcLen,
113 char *_dest, unsigned _destLen)
116 Eg: "Hello=20World" => "Hello World"
118 =XY where XY is a hex encoded byte. In addition '_' is decoded as 0x20
119 (not as space!, this depends on the charset, see RFC 2047 4.2).
122 unsigned destCnt = 0;
124 if (_srcLen < _destLen)
127 for (cnt = 0; ((cnt < _srcLen) && (destCnt < _destLen)); cnt++) {
128 if (_src[cnt] != '=') {
129 _dest[destCnt] = _src[cnt] == '_' ? 0x20 : _src[cnt];
133 if ((_srcLen - cnt) > 1) {
137 c1 = _src[cnt]; // first hex digit
139 if (c1 == '\r' || c1 == '\n') {
140 if (_src[cnt + 1] == '\r' || _src[cnt + 1] == '\n' )
144 c1 = __hexToChar(c1);
146 cnt++; // skip first hex digit
147 c2 = __hexToChar(_src[cnt]);
149 if ((c1 == -1) || (c2 == -1)) {
150 if ((_destLen - destCnt) > 1) {
151 _dest[destCnt] = _src[cnt - 1]; destCnt++;
152 _dest[destCnt] = _src[cnt]; destCnt++;
158 register unsigned char c = ((c1 << 4) | c2);
173 From RFC 2045 Multipurpose Internet Mail Extensions
175 6.7. Quoted-Printable Content-Transfer-Encoding
179 In this encoding, octets are to be represented as determined by the
183 (1) (General 8bit representation) Any octet, except a CR or
184 LF that is part of a CRLF line break of the canonical
185 (standard) form of the data being encoded, may be
186 represented by an "=" followed by a two digit
187 hexadecimal representation of the octet's value. The
188 digits of the hexadecimal alphabet, for this purpose,
189 are "0123456789ABCDEF". Uppercase letters must be
190 used; lowercase letters are not allowed. Thus, for
191 example, the decimal value 12 (US-ASCII form feed) can
192 be represented by "=0C", and the decimal value 61 (US-
193 ASCII EQUAL SIGN) can be represented by "=3D". This
194 rule must be followed except when the following rules
195 allow an alternative encoding.
197 (2) (Literal representation) Octets with decimal values of
198 33 through 60 inclusive, and 62 through 126, inclusive,
199 MAY be represented as the US-ASCII characters which
200 correspond to those octets (EXCLAMATION POINT through
201 LESS THAN, and GREATER THAN through TILDE,
204 (3) (White Space) Octets with values of 9 and 32 MAY be
205 represented as US-ASCII TAB (HT) and SPACE characters,
206 respectively, but MUST NOT be so represented at the end
207 of an encoded line. Any TAB (HT) or SPACE characters on an
208 encoded line MUST thus be followed on that line by a printable
209 character. In particular, an "=" at the end of an encoded line,
210 indicating a soft line break (see rule #5) may follow one or
211 more TAB (HT) or SPACE characters. It follows that an octet
212 with decimal value 9 or 32 appearing at the end of an encoded line
213 must be represented according to Rule #1. This rule is necessary
214 because some MTAs (Message Transport Agents, programs which transport
215 messages from one user to another, or perform a portion of such
216 transfers) are known to pad lines of text with SPACEs, and others
217 are known to remove "white space" characters from the end of a line.
218 Therefore, when decoding a Quoted-Printable body, any trailing white
219 space on a line must be deleted, as it will necessarily have been
220 added by intermediate transport agents.
223 (4) (Line Breaks) A line break in a text body, represented
224 as a CRLF sequence in the text canonical form, must be
225 represented by a (RFC 822) line break, which is also a
226 CRLF sequence, in the Quoted-Printable encoding. Since
227 the canonical representation of media types other than
228 text do not generally include the representation of
229 line breaks as CRLF sequences, no hard line breaks
230 (i.e. line breaks that are intended to be meaningful
231 and to be displayed to the user) can occur in the
232 quoted-printable encoding of such types. Sequences
233 like "=0D", "=0A", "=0A=0D" and "=0D=0A" will routinely
234 appear in non-text data represented in quoted-
235 printable, of course.
237 (5) (Soft Line Breaks) The Quoted-Printable encoding
238 REQUIRES that encoded lines be no more than 76
239 characters long. If longer lines are to be encoded
240 with the Quoted-Printable encoding, "soft" line breaks
241 must be used. An equal sign as the last character on a
242 encoded line indicates such a non-significant ("soft")
243 line break in the encoded text.
247 int NGEncodeQuotedPrintable(const char *_src, unsigned _srcLen,
248 char *_dest, unsigned _destLen) {
250 unsigned destCnt = 0;
251 char hexT[16] = {'0','1','2','3','4','5','6','7','8',
252 '9','A','B','C','D','E','F'};
254 if (_srcLen > _destLen)
257 for (cnt = 0; (cnt < _srcLen) && (destCnt < _destLen); cnt++) {
262 ((c > 31) && (c < 61)) ||
263 ((c > 61) && (c < 127))) { // no quoting
264 _dest[destCnt++] = c;
266 else { // need to be quoted
267 if (_destLen - destCnt > 2) {
268 _dest[destCnt++] = '=';
269 _dest[destCnt++] = hexT[(c >> 4) & 15];
270 _dest[destCnt++] = hexT[c & 15];
283 void __link_NGQuotedPrintableCoding(void) {
284 __link_NGQuotedPrintableCoding();