]> err.no Git - mapper/commitdiff
Normalize utf8 names. Add code to transliterate names, but undef for now as it didn...
authorKaj-Michael Lang <milang@tal.org>
Fri, 2 Nov 2007 22:40:50 +0000 (00:40 +0200)
committerKaj-Michael Lang <milang@tal.org>
Fri, 2 Nov 2007 22:40:50 +0000 (00:40 +0200)
src/osm.c

index d1f4ac6e5f53f5d0b1642db370fc166b81726aa3..dc0321a49dac1109f897b3ef32a1e6493c225f2a 100644 (file)
--- a/src/osm.c
+++ b/src/osm.c
 #endif
 /* #define VERBOSE_KEYS */
 
+
+/* Use g_convert to transliterate names.. my iconv seems to be fucked so this didn't work... */
+/* #define TRANSLIT_NAMES */
+
 #define FILE_BUFFER 65535
 
 #define OSM_DB_FILE "osm-planet.db"
@@ -461,7 +465,7 @@ sqlite3_prepare_v2(db, "insert into way_n2n (wid,f,t) values (?,?,?)", -1, &sql.
 sqlite3_prepare_v2(db, "delete from way_n2n where wid=?", -1, &sql.delete_way_n2n, NULL);
 
 /* Way names */
-sqlite3_prepare_v2(db, "insert or replace into way_names (wid,name) values (?, ?)",  -1, &sql.insert_way_name, NULL);
+sqlite3_prepare_v2(db, "insert or replace into way_names (wid,name,norm) values (?, ?, ?)",  -1, &sql.insert_way_name, NULL);
 sqlite3_prepare_v2(db, "delete from way_names", -1, &sql.delete_way_name, NULL);
 
 /* Way postal codes */
@@ -469,7 +473,7 @@ sqlite3_prepare_v2(db, "insert or replace into way_pc (wid,pc) values (?, ?)",
 sqlite3_prepare_v2(db, "delete from way_pc", -1, &sql.delete_way_pc, NULL);
 
 /* Other language names for ways */
-sqlite3_prepare_v2(db, "insert into way_names_nls (wid,lang,name) values (?, ?, ?)",  -1, &sql.insert_way_names_nls, NULL);
+sqlite3_prepare_v2(db, "insert into way_names_nls (wid,lang,name, norm) values (?, ?, ?, ?)",  -1, &sql.insert_way_names_nls, NULL);
 sqlite3_prepare_v2(db, "delete from way_names_nls where wid=?", -1, &sql.delete_way_names_nls, NULL);
 
 /* Way ref and int_ref */
@@ -652,6 +656,8 @@ db_exec(sql.insert_way_ref);
 static void 
 db_insert_way_name(way *w)
 {
+gchar *norm;
+
 if (!w->data)
        return;
 if (!w->data->name)
@@ -659,9 +665,19 @@ if (!w->data->name)
 
 way_names++;
 
+
 sqlite3_bind_int(sql.insert_way_name, 1, w->id);
 sqlite3_bind_text(sql.insert_way_name, 2, w->data->name, -1, SQLITE_TRANSIENT);
 
+#ifdef TRANSLIT_NAMES
+norm=g_convert(w->data->name, -1, "ASCII//TRANSLIT//IGNORE", "utf8", NULL, NULL, NULL);
+if (norm && strcmp(w->data->name, norm)!=0) {
+       sqlite3_bind_text(sql.insert_way_name, 3, norm, -1, SQLITE_TRANSIENT);
+}
+if (norm)
+       g_free(norm);
+#endif
+
 db_exec(sql.insert_way_name);
 }
 
@@ -696,12 +712,21 @@ db_exec(sql.delete_way_pc);
 static void
 db_insert_way_names_nls_cb(gpointer key, gpointer value, gpointer user_data)
 {
+gchar *norm;
+
 way *w=(way *)user_data;
 
 sqlite3_bind_int(sql.insert_way_names_nls, 1, w->id);
 sqlite3_bind_text(sql.insert_way_names_nls, 2, (gchar *)key, -1, SQLITE_TRANSIENT);
 sqlite3_bind_text(sql.insert_way_names_nls, 3, (gchar *)value, -1, SQLITE_TRANSIENT);
-
+#ifdef TRANSLIT_NAMES
+norm=g_convert((gchar *value), -1, "ASCII//TRANSLIT//IGNORE", "utf8", NULL, NULL, NULL);
+if (norm && strcmp((gchar *)value, norm)!=0) {
+       sqlite3_bind_text(sql.insert_way_names_nls, 4, norm, -1, SQLITE_TRANSIENT);
+}
+if (norm)
+       g_free(norm);
+#endif
 db_exec(sql.insert_way_names_nls);
 }
 
@@ -1417,7 +1442,7 @@ switch (t) {
                        cnode->data->name=NULL;
                        v=g_hash_table_lookup(osm_node_tags, "name");
                        if (v)
-                               cnode->data->name=g_strstrip(g_strdup(v));
+                               cnode->data->name=g_strstrip(g_utf8_normalize(v, -1, G_NORMALIZE_ALL));
                        v=g_hash_table_lookup(osm_node_tags, "note");
                        if (v)
                                cnode->data->desc=g_strstrip(g_strdup(v));
@@ -1494,7 +1519,7 @@ switch (t) {
 
                v=g_hash_table_lookup(osm_way_tags, "name");
                if (v) {
-                       cway->data->name=g_strdup(v);
+                       cway->data->name=g_utf8_normalize(v, -1, G_NORMALIZE_ALL);
                        /* Try to find other language names */
                        cway->data->names=g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free);
                        g_hash_table_foreach(osm_way_tags, find_nls_names, cway->data->names);