2 Copyright (C) 2000-2005 SKYRIX Software AG
4 This file is part of SOPE.
6 SOPE is free software; you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 SOPE is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with SOPE; see the file COPYING. If not, write to the
18 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 #include "NGQuotedPrintableCoding.h"
24 #include "NGMemoryAllocation.h"
26 @implementation NSString(QuotedPrintableCoding)
28 - (NSString *)stringByDecodingQuotedPrintable {
32 if ((len = [self cStringLength]) > 0) {
34 buf = malloc(len + 10);
35 [self getCString:buf];
36 data = [NSData dataWithBytes:buf length:len];
42 data = [data dataByDecodingQuotedPrintable];
43 return [NSString stringWithCString:[data bytes] length:[data length]];
45 - (NSString *)stringByEncodingQuotedPrintable {
49 if ((len = [self cStringLength])) {
51 buf = malloc(len + 10);
52 [self getCString:buf];
53 data = [NSData dataWithBytes:buf length:len];
59 data = [data dataByEncodingQuotedPrintable];
60 return [NSString stringWithCString:[data bytes] length:[data length]];
65 @implementation NSData(QuotedPrintableCoding)
67 - (NSData *)dataByDecodingQuotedPrintable {
72 destSize = [self length];
73 dest = NGMallocAtomic(destSize * sizeof(char));
75 resSize = NGDecodeQuotedPrintable([self bytes],[self length],dest,destSize);
77 return ((int)resSize != -1)
78 ? [NSData dataWithBytesNoCopy:dest length:resSize]
82 - (NSData *)dataByEncodingQuotedPrintable {
83 const char *bytes = [self bytes];
84 unsigned int length = [self length];
86 unsigned int desLen = 0;
89 des = NGMallocAtomic(sizeof(char) * desLen);
91 desLen = NGEncodeQuotedPrintable(bytes, length, des, desLen);
93 return (int)desLen != -1
94 ? [NSData dataWithBytesNoCopy:des length:desLen]
102 static inline char __hexToChar(char c) {
103 if ((c > 47) && (c < 58)) // '0' .. '9'
105 if ((c > 64) && (c < 71)) // 'A' .. 'F'
107 if ((c > 96) && (c < 103)) // 'a' .. 'f'
112 int NGDecodeQuotedPrintable(const char *_src, unsigned _srcLen,
113 char *_dest, unsigned _destLen) {
115 unsigned destCnt = 0;
117 if (_srcLen < _destLen)
120 for (cnt = 0; ((cnt < _srcLen) && (destCnt < _destLen)); cnt++) {
121 if (_src[cnt] != '=') {
122 _dest[destCnt++] = _src[cnt];
125 if ((_srcLen - cnt) > 1) {
130 if (c1 == '\r' || c1 == '\n') {
131 if (_src[cnt+1] == '\r' || _src[cnt+1] == '\n' )
135 c1 = __hexToChar(c1);
136 c2 = __hexToChar(_src[++cnt]);
138 if ((c1 == -1) || (c2 == -1)) {
139 if ((_destLen - destCnt) > 1) {
140 _dest[destCnt++] = _src[cnt - 1];
141 _dest[destCnt++] = _src[cnt];
147 char c = ((c1 << 4) | c2);
148 _dest[destCnt++] = c;
161 From RFC 2045 Multipurpose Internet Mail Extensions
163 6.7. Quoted-Printable Content-Transfer-Encoding
167 In this encoding, octets are to be represented as determined by the
171 (1) (General 8bit representation) Any octet, except a CR or
172 LF that is part of a CRLF line break of the canonical
173 (standard) form of the data being encoded, may be
174 represented by an "=" followed by a two digit
175 hexadecimal representation of the octet's value. The
176 digits of the hexadecimal alphabet, for this purpose,
177 are "0123456789ABCDEF". Uppercase letters must be
178 used; lowercase letters are not allowed. Thus, for
179 example, the decimal value 12 (US-ASCII form feed) can
180 be represented by "=0C", and the decimal value 61 (US-
181 ASCII EQUAL SIGN) can be represented by "=3D". This
182 rule must be followed except when the following rules
183 allow an alternative encoding.
185 (2) (Literal representation) Octets with decimal values of
186 33 through 60 inclusive, and 62 through 126, inclusive,
187 MAY be represented as the US-ASCII characters which
188 correspond to those octets (EXCLAMATION POINT through
189 LESS THAN, and GREATER THAN through TILDE,
192 (3) (White Space) Octets with values of 9 and 32 MAY be
193 represented as US-ASCII TAB (HT) and SPACE characters,
194 respectively, but MUST NOT be so represented at the end
195 of an encoded line. Any TAB (HT) or SPACE characters on an
196 encoded line MUST thus be followed on that line by a printable
197 character. In particular, an "=" at the end of an encoded line,
198 indicating a soft line break (see rule #5) may follow one or
199 more TAB (HT) or SPACE characters. It follows that an octet
200 with decimal value 9 or 32 appearing at the end of an encoded line
201 must be represented according to Rule #1. This rule is necessary
202 because some MTAs (Message Transport Agents, programs which transport
203 messages from one user to another, or perform a portion of such
204 transfers) are known to pad lines of text with SPACEs, and others
205 are known to remove "white space" characters from the end of a line.
206 Therefore, when decoding a Quoted-Printable body, any trailing white
207 space on a line must be deleted, as it will necessarily have been
208 added by intermediate transport agents.
211 (4) (Line Breaks) A line break in a text body, represented
212 as a CRLF sequence in the text canonical form, must be
213 represented by a (RFC 822) line break, which is also a
214 CRLF sequence, in the Quoted-Printable encoding. Since
215 the canonical representation of media types other than
216 text do not generally include the representation of
217 line breaks as CRLF sequences, no hard line breaks
218 (i.e. line breaks that are intended to be meaningful
219 and to be displayed to the user) can occur in the
220 quoted-printable encoding of such types. Sequences
221 like "=0D", "=0A", "=0A=0D" and "=0D=0A" will routinely
222 appear in non-text data represented in quoted-
223 printable, of course.
225 (5) (Soft Line Breaks) The Quoted-Printable encoding
226 REQUIRES that encoded lines be no more than 76
227 characters long. If longer lines are to be encoded
228 with the Quoted-Printable encoding, "soft" line breaks
229 must be used. An equal sign as the last character on a
230 encoded line indicates such a non-significant ("soft")
231 line break in the encoded text.
235 int NGEncodeQuotedPrintable(const char *_src, unsigned _srcLen,
236 char *_dest, unsigned _destLen) {
238 unsigned destCnt = 0;
239 char hexT[16] = {'0','1','2','3','4','5','6','7','8',
240 '9','A','B','C','D','E','F'};
242 if (_srcLen > _destLen)
245 for (cnt = 0; (cnt < _srcLen) && (destCnt < _destLen); cnt++) {
250 ((c > 31) && (c < 61)) ||
251 ((c > 61) && (c < 127))) { // no quoting
252 _dest[destCnt++] = c;
254 else { // need to be quoted
255 if (_destLen - destCnt > 2) {
256 _dest[destCnt++] = '=';
257 _dest[destCnt++] = hexT[(c >> 4) & 15];
258 _dest[destCnt++] = hexT[c & 15];
271 void __link_NGQuotedPrintableCoding(void) {
272 __link_NGQuotedPrintableCoding();