2 Copyright (C) 2000-2005 SKYRIX Software AG
4 This file is part of SOPE.
6 SOPE is free software; you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 SOPE is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with SOPE; see the file COPYING. If not, write to the
18 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 #include "NSString+German.h"
25 @implementation NSString(German)
27 - (BOOL)doesContainGermanUmlauts {
28 register unsigned i, len;
30 if ((len = [self length]) == 0)
33 for (i = 0; i < len; i++) {
34 switch ([self characterAtIndex:i]) {
35 case 252: /* ü */
36 case 220: /* Ü */
37 case 228: /* ä */
38 case 196: /* Ä */
39 case 246: /* ö */
40 case 214: /* Ö */
41 case 223: /* ß */
48 - (NSString *)stringByReplacingGermanUmlautsWithTwoCharsAndSzWith:(unichar)_c {
50 a^ => ae, o^ => oe, u^ => ue, A^ => Ae, O^ => Oe, O^ => Ue
51 s^ => sz or ss (_sz arg)
53 unsigned i, len, rlen;
57 if ((len = [self length]) == 0)
60 buf = calloc((len * 2) + 3, sizeof(unichar));
62 for (i = 0, rlen = 0; i < len; i++) {
66 c = [self characterAtIndex:i];
69 buf[rlen] = 'u'; rlen++;
70 buf[rlen] = 'e'; rlen++;
73 buf[rlen] = 'U'; rlen++;
74 buf[rlen] = 'e'; rlen++;
77 buf[rlen] = 'a'; rlen++;
78 buf[rlen] = 'e'; rlen++;
81 buf[rlen] = 'A'; rlen++;
82 buf[rlen] = 'e'; rlen++;
85 buf[rlen] = 'o'; rlen++;
86 buf[rlen] = 'e'; rlen++;
89 buf[rlen] = 'O'; rlen++;
90 buf[rlen] = 'e'; rlen++;
92 case 223: /* ss or sz */
94 buf[rlen] = 's'; rlen++;
95 buf[rlen] = _c; rlen++;
98 default: /* copy char and continue */
106 ? [[NSString alloc] initWithCharacters:buf length:rlen]
109 return [s autorelease];
111 - (NSString *)stringByReplacingGermanUmlautsWithTwoChars {
112 // default sz mapping is "ss" (like Hess ;-)
113 return [self stringByReplacingGermanUmlautsWithTwoCharsAndSzWith:'s'];
116 - (NSString *)stringByReplacingTwoCharEncodingsOfGermanUmlauts {
118 ae => a^, oe => o^, ue => u^, Ae => A^, Oe => O^, Ue => U^
122 unsigned i, len, rlen;
127 if ((len = [self length]) == 0)
130 return [[self copy] autorelease];
132 buf = calloc(len + 3, sizeof(unichar));
133 [self getCharacters:buf]; // Note: we can reuse that buffer!
135 for (i = 0, rlen = 0, didReplace = NO; i < len; i++) {
136 register unichar c, cn;
140 if ((i + 1) >= len) {
143 break; // end, found last char (so can't be a sequence)
148 if ((c=='a' || c=='A' || c=='u' || c=='U' || c=='o' || c=='O')&&cn=='e') {
149 /* an umlaut sequence */
151 case 'a': buf[rlen] = 228; break;
152 case 'A': buf[rlen] = 196; break;
153 case 'o': buf[rlen] = 246; break;
154 case 'O': buf[rlen] = 214; break;
155 case 'u': buf[rlen] = 252; break;
156 case 'U': buf[rlen] = 220; break;
159 i++; // skip sequence char
162 else if (c == 's' && (cn == 's' || cn == 'z')) {
166 i++; // skip sequence char
170 /* regular char, copy */
177 ? [[NSString alloc] initWithCharacters:buf length:rlen]
180 return [s autorelease];
183 - (NSArray *)germanUmlautVariantsOfString {
185 The ^ is used to signal the single character umlaut to avoid non-ASCII
188 Note: we can only do a limited set of transformations! Eg you can only
189 mix umlauts *OR* the "ue", "oe" variants!
191 Q: what about names which contain encoded umlauts *and* the same sequence
192 as a regular part of the name! For example "Neuendoerf".
194 string with umlauts (two variants, ss and sz):
203 string with umlaut workaround (three variants due to sz/ss):
216 if ((len = [self length]) == 0)
217 return [NSArray arrayWithObjects:@"", nil];
219 if ([self doesContainGermanUmlauts]) {
220 s1 = [self stringByReplacingGermanUmlautsWithTwoCharsAndSzWith:'s'];
221 s2 = [self stringByReplacingGermanUmlautsWithTwoCharsAndSzWith:'z'];
223 if ([s2 isEqualToString:s1] || [s2 isEqualToString:self])
225 if ([s1 isEqualToString:self])
228 return [NSArray arrayWithObjects:self, s1, s2, nil];
231 if (len < 2) // a sequence would have at least 2 chars
232 return [NSArray arrayWithObjects:self, nil];
234 s1 = [self stringByReplacingTwoCharEncodingsOfGermanUmlauts];
236 if ([self isEqualToString:s1])
237 /* nothing was replaced */
238 return [NSArray arrayWithObjects:self, nil];
240 return [NSArray arrayWithObjects:self, s1, nil];
243 @end /* NSString(German) */