]> err.no Git - sope/blob - sope-xml/libxmlSAXDriver/unicode.h
use Version file for install locations
[sope] / sope-xml / libxmlSAXDriver / unicode.h
1 /*
2   Copyright (C) 2000-2003 SKYRIX Software AG
3
4   This file is part of OGo
5
6   OGo is free software; you can redistribute it and/or modify it under
7   the terms of the GNU Lesser General Public License as published by the
8   Free Software Foundation; either version 2, or (at your option) any
9   later version.
10
11   OGo is distributed in the hope that it will be useful, but WITHOUT ANY
12   WARRANTY; without even the implied warranty of MERCHANTABILITY or
13   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14   License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with OGo; see the file COPYING.  If not, write to the
18   Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19   02111-1307, USA.
20 */
21 /* Unicode support */
22
23 typedef unsigned long   UCS4;
24 typedef unsigned short  UCS2;
25 typedef unsigned short  UTF16;
26 typedef unsigned char   UTF8;
27 #define unichar UTF16
28
29 static const int halfShift             = 10;
30 static const UCS4 halfBase             = 0x0010000UL;
31 static const UCS4 halfMask             = 0x3FFUL;
32 static const UCS4 kSurrogateHighStart  = 0xD800UL;
33 static const UCS4 kSurrogateLowStart   = 0xDC00UL;
34
35 static const UCS4 kReplacementCharacter = 0x0000FFFDUL;
36 static const UCS4 kMaximumUCS2          = 0x0000FFFFUL;
37 static const UCS4 kMaximumUTF16         = 0x0010FFFFUL;
38
39 static UCS4 offsetsFromUTF8[6] = {
40   0x00000000UL, 0x00003080UL, 0x000E2080UL, 
41   0x03C82080UL, 0xFA082080UL, 0x82082080UL
42 };
43 static char bytesFromUTF8[256] = {
44   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
45   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
46   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
47   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
48   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
49   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
50   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
51   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
52 };
53
54 static int
55 _UTF8ToUTF16(unsigned char **sourceStart, unsigned char *sourceEnd, 
56              unichar **targetStart, const unichar *targetEnd)
57 {
58   int            result = 0;
59   register UTF8  *source = *sourceStart;
60   register UTF16 *target = *targetStart;
61   
62   while (source < sourceEnd) {
63     register UCS4 ch = 0;
64     register unsigned short extraBytesToWrite = bytesFromUTF8[*source];
65     
66     if (source + extraBytesToWrite > sourceEnd) {
67       result = 1; break;
68     };
69     switch(extraBytesToWrite) { /* note: code falls through cases! */
70       case 5:   ch += *source++; ch <<= 6;
71       case 4:   ch += *source++; ch <<= 6;
72       case 3:   ch += *source++; ch <<= 6;
73       case 2:   ch += *source++; ch <<= 6;
74       case 1:   ch += *source++; ch <<= 6;
75       case 0:   ch += *source++;
76     };
77     ch -= offsetsFromUTF8[extraBytesToWrite];
78
79     if (target >= targetEnd) {
80       result = 2; break;
81     };
82     if (ch <= kMaximumUCS2) {
83       *target++ = ch;
84     } else if (ch > kMaximumUTF16) {
85       *target++ = kReplacementCharacter;
86     } else {
87       if (target + 1 >= targetEnd) {
88         result = 2; break;
89       };
90       ch -= halfBase;
91       *target++ = (ch >> halfShift) + kSurrogateHighStart;
92       *target++ = (ch & halfMask) + kSurrogateLowStart;
93     };
94   };
95   *sourceStart = source;
96   *targetStart = target;
97   return result;
98 }