From 4285895034898735e3ff2d4e08db6dc4e2140996 Mon Sep 17 00:00:00 2001 From: Kaj-Michael Lang Date: Sat, 3 Nov 2007 00:40:50 +0200 Subject: [PATCH] Normalize utf8 names. Add code to transliterate names, but undef for now as it didn't work. --- src/osm.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/src/osm.c b/src/osm.c index d1f4ac6..dc0321a 100644 --- a/src/osm.c +++ b/src/osm.c @@ -58,6 +58,10 @@ #endif /* #define VERBOSE_KEYS */ + +/* Use g_convert to transliterate names.. my iconv seems to be fucked so this didn't work... */ +/* #define TRANSLIT_NAMES */ + #define FILE_BUFFER 65535 #define OSM_DB_FILE "osm-planet.db" @@ -461,7 +465,7 @@ sqlite3_prepare_v2(db, "insert into way_n2n (wid,f,t) values (?,?,?)", -1, &sql. sqlite3_prepare_v2(db, "delete from way_n2n where wid=?", -1, &sql.delete_way_n2n, NULL); /* Way names */ -sqlite3_prepare_v2(db, "insert or replace into way_names (wid,name) values (?, ?)", -1, &sql.insert_way_name, NULL); +sqlite3_prepare_v2(db, "insert or replace into way_names (wid,name,norm) values (?, ?, ?)", -1, &sql.insert_way_name, NULL); sqlite3_prepare_v2(db, "delete from way_names", -1, &sql.delete_way_name, NULL); /* Way postal codes */ @@ -469,7 +473,7 @@ sqlite3_prepare_v2(db, "insert or replace into way_pc (wid,pc) values (?, ?)", sqlite3_prepare_v2(db, "delete from way_pc", -1, &sql.delete_way_pc, NULL); /* Other language names for ways */ -sqlite3_prepare_v2(db, "insert into way_names_nls (wid,lang,name) values (?, ?, ?)", -1, &sql.insert_way_names_nls, NULL); +sqlite3_prepare_v2(db, "insert into way_names_nls (wid,lang,name, norm) values (?, ?, ?, ?)", -1, &sql.insert_way_names_nls, NULL); sqlite3_prepare_v2(db, "delete from way_names_nls where wid=?", -1, &sql.delete_way_names_nls, NULL); /* Way ref and int_ref */ @@ -652,6 +656,8 @@ db_exec(sql.insert_way_ref); static void db_insert_way_name(way *w) { +gchar *norm; + if (!w->data) return; if (!w->data->name) @@ -659,9 +665,19 @@ if (!w->data->name) way_names++; + sqlite3_bind_int(sql.insert_way_name, 1, w->id); sqlite3_bind_text(sql.insert_way_name, 2, w->data->name, -1, SQLITE_TRANSIENT); +#ifdef TRANSLIT_NAMES +norm=g_convert(w->data->name, -1, "ASCII//TRANSLIT//IGNORE", "utf8", NULL, NULL, NULL); +if (norm && strcmp(w->data->name, norm)!=0) { + sqlite3_bind_text(sql.insert_way_name, 3, norm, -1, SQLITE_TRANSIENT); +} +if (norm) + g_free(norm); +#endif + db_exec(sql.insert_way_name); } @@ -696,12 +712,21 @@ db_exec(sql.delete_way_pc); static void db_insert_way_names_nls_cb(gpointer key, gpointer value, gpointer user_data) { +gchar *norm; + way *w=(way *)user_data; sqlite3_bind_int(sql.insert_way_names_nls, 1, w->id); sqlite3_bind_text(sql.insert_way_names_nls, 2, (gchar *)key, -1, SQLITE_TRANSIENT); sqlite3_bind_text(sql.insert_way_names_nls, 3, (gchar *)value, -1, SQLITE_TRANSIENT); - +#ifdef TRANSLIT_NAMES +norm=g_convert((gchar *value), -1, "ASCII//TRANSLIT//IGNORE", "utf8", NULL, NULL, NULL); +if (norm && strcmp((gchar *)value, norm)!=0) { + sqlite3_bind_text(sql.insert_way_names_nls, 4, norm, -1, SQLITE_TRANSIENT); +} +if (norm) + g_free(norm); +#endif db_exec(sql.insert_way_names_nls); } @@ -1417,7 +1442,7 @@ switch (t) { cnode->data->name=NULL; v=g_hash_table_lookup(osm_node_tags, "name"); if (v) - cnode->data->name=g_strstrip(g_strdup(v)); + cnode->data->name=g_strstrip(g_utf8_normalize(v, -1, G_NORMALIZE_ALL)); v=g_hash_table_lookup(osm_node_tags, "note"); if (v) cnode->data->desc=g_strstrip(g_strdup(v)); @@ -1494,7 +1519,7 @@ switch (t) { v=g_hash_table_lookup(osm_way_tags, "name"); if (v) { - cway->data->name=g_strdup(v); + cway->data->name=g_utf8_normalize(v, -1, G_NORMALIZE_ALL); /* Try to find other language names */ cway->data->names=g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free); g_hash_table_foreach(osm_way_tags, find_nls_names, cway->data->names); -- 2.39.5