2 Copyright (C) 2000-2003 SKYRIX Software AG
4 This file is part of OGo
6 OGo is free software; you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 OGo is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with OGo; see the file COPYING. If not, write to the
18 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 typedef unsigned long UCS4;
24 typedef unsigned short UCS2;
25 typedef unsigned short UTF16;
26 typedef unsigned char UTF8;
29 static const int halfShift = 10;
30 static const UCS4 halfBase = 0x0010000UL;
31 static const UCS4 halfMask = 0x3FFUL;
32 static const UCS4 kSurrogateHighStart = 0xD800UL;
33 static const UCS4 kSurrogateLowStart = 0xDC00UL;
35 static const UCS4 kReplacementCharacter = 0x0000FFFDUL;
36 static const UCS4 kMaximumUCS2 = 0x0000FFFFUL;
37 static const UCS4 kMaximumUTF16 = 0x0010FFFFUL;
39 static UCS4 offsetsFromUTF8[6] = {
40 0x00000000UL, 0x00003080UL, 0x000E2080UL,
41 0x03C82080UL, 0xFA082080UL, 0x82082080UL
43 static char bytesFromUTF8[256] = {
44 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
45 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
46 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
47 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
48 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
49 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
50 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
51 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
55 _UTF8ToUTF16(unsigned char **sourceStart, unsigned char *sourceEnd,
56 unichar **targetStart, const unichar *targetEnd)
59 register UTF8 *source = *sourceStart;
60 register UTF16 *target = *targetStart;
62 while (source < sourceEnd) {
64 register unsigned short extraBytesToWrite = bytesFromUTF8[*source];
66 if (source + extraBytesToWrite > sourceEnd) {
69 switch(extraBytesToWrite) { /* note: code falls through cases! */
70 case 5: ch += *source++; ch <<= 6;
71 case 4: ch += *source++; ch <<= 6;
72 case 3: ch += *source++; ch <<= 6;
73 case 2: ch += *source++; ch <<= 6;
74 case 1: ch += *source++; ch <<= 6;
75 case 0: ch += *source++;
77 ch -= offsetsFromUTF8[extraBytesToWrite];
79 if (target >= targetEnd) {
82 if (ch <= kMaximumUCS2) {
84 } else if (ch > kMaximumUTF16) {
85 *target++ = kReplacementCharacter;
87 if (target + 1 >= targetEnd) {
91 *target++ = (ch >> halfShift) + kSurrogateHighStart;
92 *target++ = (ch & halfMask) + kSurrogateLowStart;
95 *sourceStart = source;
96 *targetStart = target;