2 Copyright (C) 2000-2003 SKYRIX Software AG
4 This file is part of OGo
6 OGo is free software; you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 OGo is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with OGo; see the file COPYING. If not, write to the
18 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "NGQuotedPrintableCoding.h"
25 #include "NGMemoryAllocation.h"
27 @implementation NSString(QuotedPrintableCoding)
29 - (NSString *)stringByDecodingQuotedPrintable {
33 if ((len = [self cStringLength])) {
35 buf = malloc(len + 10);
36 [self getCString:buf];
37 data = [NSData dataWithBytes:buf length:len];
43 data = [data dataByDecodingQuotedPrintable];
44 return [NSString stringWithCString:[data bytes] length:[data length]];
46 - (NSString *)stringByEncodingQuotedPrintable {
50 if ((len = [self cStringLength])) {
52 buf = malloc(len + 10);
53 [self getCString:buf];
54 data = [NSData dataWithBytes:buf length:len];
60 data = [data dataByEncodingQuotedPrintable];
61 return [NSString stringWithCString:[data bytes] length:[data length]];
66 @implementation NSData(QuotedPrintableCoding)
68 - (NSData *)dataByDecodingQuotedPrintable {
73 destSize = [self length];
74 dest = NGMallocAtomic(destSize * sizeof(char));
76 resSize = NGDecodeQuotedPrintable([self bytes],[self length],dest,destSize);
78 return ((int)resSize != -1)
79 ? [NSData dataWithBytesNoCopy:dest length:resSize]
83 - (NSData *)dataByEncodingQuotedPrintable {
84 const char *bytes = [self bytes];
85 unsigned int length = [self length];
87 unsigned int desLen = 0;
90 des = NGMallocAtomic(sizeof(char) * desLen);
92 desLen = NGEncodeQuotedPrintable(bytes, length, des, desLen);
94 return (int)desLen != -1
95 ? [NSData dataWithBytesNoCopy:des length:desLen]
103 static inline char __hexToChar(char c) {
104 if ((c > 47) && (c < 58)) // '0' .. '9'
106 if ((c > 64) && (c < 71)) // 'A' .. 'F'
108 if ((c > 96) && (c < 103)) // 'a' .. 'f'
113 int NGDecodeQuotedPrintable(const char *_src, unsigned _srcLen,
114 char *_dest, unsigned _destLen) {
116 unsigned destCnt = 0;
118 if (_srcLen < _destLen)
121 for (cnt = 0; ((cnt < _srcLen) && (destCnt < _destLen)); cnt++) {
122 if (_src[cnt] != '=') {
123 _dest[destCnt++] = _src[cnt];
126 if ((_srcLen - cnt) > 1) {
131 if (c1 == '\r' || c1 == '\n') {
132 if (_src[cnt+1] == '\r' || _src[cnt+1] == '\n' )
136 c1 = __hexToChar(c1);
137 c2 = __hexToChar(_src[++cnt]);
139 if ((c1 == -1) || (c2 == -1)) {
140 if ((_destLen - destCnt) > 1) {
141 _dest[destCnt++] = _src[cnt - 1];
142 _dest[destCnt++] = _src[cnt];
148 char c = ((c1 << 4) | c2);
149 _dest[destCnt++] = c;
162 From RFC 2045 Multipurpose Internet Mail Extensions
164 6.7. Quoted-Printable Content-Transfer-Encoding
168 In this encoding, octets are to be represented as determined by the
172 (1) (General 8bit representation) Any octet, except a CR or
173 LF that is part of a CRLF line break of the canonical
174 (standard) form of the data being encoded, may be
175 represented by an "=" followed by a two digit
176 hexadecimal representation of the octet's value. The
177 digits of the hexadecimal alphabet, for this purpose,
178 are "0123456789ABCDEF". Uppercase letters must be
179 used; lowercase letters are not allowed. Thus, for
180 example, the decimal value 12 (US-ASCII form feed) can
181 be represented by "=0C", and the decimal value 61 (US-
182 ASCII EQUAL SIGN) can be represented by "=3D". This
183 rule must be followed except when the following rules
184 allow an alternative encoding.
186 (2) (Literal representation) Octets with decimal values of
187 33 through 60 inclusive, and 62 through 126, inclusive,
188 MAY be represented as the US-ASCII characters which
189 correspond to those octets (EXCLAMATION POINT through
190 LESS THAN, and GREATER THAN through TILDE,
193 (3) (White Space) Octets with values of 9 and 32 MAY be
194 represented as US-ASCII TAB (HT) and SPACE characters,
195 respectively, but MUST NOT be so represented at the end
196 of an encoded line. Any TAB (HT) or SPACE characters on an
197 encoded line MUST thus be followed on that line by a printable
198 character. In particular, an "=" at the end of an encoded line,
199 indicating a soft line break (see rule #5) may follow one or
200 more TAB (HT) or SPACE characters. It follows that an octet
201 with decimal value 9 or 32 appearing at the end of an encoded line
202 must be represented according to Rule #1. This rule is necessary
203 because some MTAs (Message Transport Agents, programs which transport
204 messages from one user to another, or perform a portion of such
205 transfers) are known to pad lines of text with SPACEs, and others
206 are known to remove "white space" characters from the end of a line.
207 Therefore, when decoding a Quoted-Printable body, any trailing white
208 space on a line must be deleted, as it will necessarily have been
209 added by intermediate transport agents.
212 (4) (Line Breaks) A line break in a text body, represented
213 as a CRLF sequence in the text canonical form, must be
214 represented by a (RFC 822) line break, which is also a
215 CRLF sequence, in the Quoted-Printable encoding. Since
216 the canonical representation of media types other than
217 text do not generally include the representation of
218 line breaks as CRLF sequences, no hard line breaks
219 (i.e. line breaks that are intended to be meaningful
220 and to be displayed to the user) can occur in the
221 quoted-printable encoding of such types. Sequences
222 like "=0D", "=0A", "=0A=0D" and "=0D=0A" will routinely
223 appear in non-text data represented in quoted-
224 printable, of course.
226 (5) (Soft Line Breaks) The Quoted-Printable encoding
227 REQUIRES that encoded lines be no more than 76
228 characters long. If longer lines are to be encoded
229 with the Quoted-Printable encoding, "soft" line breaks
230 must be used. An equal sign as the last character on a
231 encoded line indicates such a non-significant ("soft")
232 line break in the encoded text.
236 int NGEncodeQuotedPrintable(const char *_src, unsigned _srcLen,
237 char *_dest, unsigned _destLen) {
239 unsigned destCnt = 0;
240 char hexT[16] = {'0','1','2','3','4','5','6','7','8',
241 '9','A','B','C','D','E','F'};
243 if (_srcLen > _destLen)
246 for (cnt = 0; (cnt < _srcLen) && (destCnt < _destLen); cnt++) {
251 ((c > 31) && (c < 61)) ||
252 ((c > 61) && (c < 127))) { // no quoting
253 _dest[destCnt++] = c;
255 else { // need to be quoted
256 if (_destLen - destCnt > 2) {
257 _dest[destCnt++] = '=';
258 _dest[destCnt++] = hexT[(c >> 4) & 15];
259 _dest[destCnt++] = hexT[c & 15];
272 void __link_NGQuotedPrintableCoding(void) {
273 __link_NGQuotedPrintableCoding();