2 Copyright (C) 2000-2003 SKYRIX Software AG
4 This file is part of OGo
6 OGo is free software; you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 OGo is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with OGo; see the file COPYING. If not, write to the
18 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 typedef unsigned long UCS4;
24 typedef unsigned short UCS2;
25 typedef unsigned short UTF16;
26 typedef unsigned char UTF8;
29 static const int halfShift = 10;
30 static const UCS4 halfBase = 0x0010000UL;
31 static const UCS4 halfMask = 0x3FFUL;
32 static const UCS4 kSurrogateHighStart = 0xD800UL;
33 static const UCS4 kSurrogateHighEnd = 0xDBFFUL;
34 static const UCS4 kSurrogateLowStart = 0xDC00UL;
35 static const UCS4 kSurrogateLowEnd = 0xDFFFUL;
37 static const UCS4 kReplacementCharacter = 0x0000FFFDUL;
38 static const UCS4 kMaximumUCS2 = 0x0000FFFFUL;
39 static const UCS4 kMaximumUTF16 = 0x0010FFFFUL;
40 static const UCS4 kMaximumUCS4 = 0x7FFFFFFFUL;
42 static UCS4 offsetsFromUTF8[6] = {
43 0x00000000UL, 0x00003080UL, 0x000E2080UL,
44 0x03C82080UL, 0xFA082080UL, 0x82082080UL
46 static char bytesFromUTF8[256] = {
47 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
48 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
49 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
50 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
51 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
52 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
53 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
54 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
58 _UTF8ToUTF16(unsigned char **sourceStart, unsigned char *sourceEnd,
59 unichar **targetStart, const unichar *targetEnd)
62 register UTF8 *source = *sourceStart;
63 register UTF16 *target = *targetStart;
65 while (source < sourceEnd) {
67 register unsigned short extraBytesToWrite = bytesFromUTF8[*source];
69 if (source + extraBytesToWrite > sourceEnd) {
72 switch(extraBytesToWrite) { /* note: code falls through cases! */
73 case 5: ch += *source++; ch <<= 6;
74 case 4: ch += *source++; ch <<= 6;
75 case 3: ch += *source++; ch <<= 6;
76 case 2: ch += *source++; ch <<= 6;
77 case 1: ch += *source++; ch <<= 6;
78 case 0: ch += *source++;
80 ch -= offsetsFromUTF8[extraBytesToWrite];
82 if (target >= targetEnd) {
85 if (ch <= kMaximumUCS2) {
87 } else if (ch > kMaximumUTF16) {
88 *target++ = kReplacementCharacter;
90 if (target + 1 >= targetEnd) {
94 *target++ = (ch >> halfShift) + kSurrogateHighStart;
95 *target++ = (ch & halfMask) + kSurrogateLowStart;
98 *sourceStart = source;
99 *targetStart = target;