]> err.no Git - mapper/blob - src/osm-db-import.c
Small changes in importer, add comments and use more transactions
[mapper] / src / osm-db-import.c
1 /*
2  * This file is part of mapper
3  *
4  * Copyright (C) 2008 Kaj-Michael Lang
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with this program; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20
21 /*
22  * Routines to read OSM planet XML file and store it in a sqlite3 database.
23  * Reads in all nodes (if used, skips nodes outside bounding box)
24  * Special POI nodes are stored in POI table.
25  * Place POI nodes are stored in place table.
26  *
27  * Ways are read in and their data (name, type, etc) are stored 
28  * in way, way_name and way_ref tables.
29  * 
30  * Nodes used by they ways are stored in way_n2n table.
31  *
32  */
33
34 #include <stdio.h>
35 #include <unistd.h>
36 #include <string.h>
37 #include <strings.h>
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <fcntl.h>
41 #include <math.h>
42 #include <glib.h>
43 #include <glib/gstdio.h>
44 #include <sqlite3.h>
45 #include <expat.h>
46 #include <bzlib.h>
47
48 #include "osm.h"
49 #include "latlon.h"
50 #include "db.h"
51 #include "osm-db-import.h"
52
53 #if 0
54 #define VERBOSE
55 #endif
56 /* #define VERBOSE_KEYS */
57
58
59 /* Use g_convert to transliterate names.. my iconv seems to be fucked so this didn't work... */
60 /* #define TRANSLIT_NAMES */
61
62 #define FILE_BUFFER (128*1024)
63
64 static guint node_cnt=0;                /* Nodes */
65 static guint node_skip_cnt=0;   /* Skipped nodes */
66 static guint noded_cnt=0;               /* Nodes with (usable) data */
67 static guint way_cnt=0;                 /* Ways */
68 static guint way_names=0;               /* Ways with name */
69 static guint way_refs=0;                /* Ways with ref or int_ref */
70
71 static guint dbnode_cnt=0;
72 static guint dbnoded_cnt=0;
73 static guint dbway_cnt=0;
74
75 /* For threaded importing */
76 static GThread* import_thread=NULL;
77 static GSourceFunc osm_import_progress_cb=NULL;
78 static osm_import_data_req osm_import_req;
79 static guint import_sid=0;
80
81 static gboolean is_update=FALSE;
82 static XML_Parser xp;
83
84 /* XML tag IDs */
85 typedef enum {
86         START,
87         IN_OSM_TAG,
88         IN_NODE_TAG,
89         IN_WNODE_TAG,
90         IN_WAY_TAG,
91         IN_KEY_TAG,
92         IN_BOUND_TAG,
93         IN_RELATION_TAG,
94         IN_MEMBER_TAG,
95         END,
96         ERROR
97 } tag_state_t;
98
99 /* Parent tag type */
100 typedef enum {
101         IS_NONE,
102         IS_NODE,
103         IS_WAY,
104         IS_RELATION
105 } tag_parent_t;
106
107 /* Node types table */
108 /* XXX: Add support for parent category */
109 struct _nodeinfo {
110         gchar *k, *v;
111         node_type_t type;
112 } nodeinfo[] = {
113         { "amenity", "fuel",            NODE_AMENITY_FUEL },
114         { "amenity", "parking",         NODE_AMENITY_PARKING },
115
116         { "amenity", "pub",                     NODE_AMENITY_PUB },
117         { "amenity", "nightclub",       NODE_AMENITY_NIGHTCLUB },
118         { "amenity", "biergarten",      NODE_AMENITY_PUB },
119         { "amenity", "cafe",            NODE_AMENITY_CAFE },
120         { "amenity", "fast_food",       NODE_AMENITY_FOOD },
121         { "amenity", "restaurant",      NODE_AMENITY_RESTAURANT },
122
123         { "amenity", "telephone",       NODE_AMENITY_TELEPHONE },
124         { "amenity", "toilets",         NODE_AMENITY_WC },
125
126         { "amenity", "hospital",        NODE_AMENITY_HOSPITAL },
127         { "amenity", "doctors",         NODE_AMENITY_HOSPITAL },
128         { "amenity", "pharmacy",        NODE_AMENITY_PHARMACY },
129
130         { "amenity", "post_office",     NODE_AMENITY_POST },
131         { "amenity", "post_box",        NODE_AMENITY_POST_BOX },
132
133         { "amenity", "cinema",          NODE_AMENITY_CINEMA },
134         { "amenity", "theatre",         NODE_AMENITY_THEATRE },
135
136         { "amenity", "atm",                     NODE_AMENITY_ATM },
137         { "amenity", "bank",            NODE_AMENITY_BANK },
138
139         { "amenity", "police",          NODE_AMENITY_POLICE },
140
141         { "amenity", "speed_trap",      NODE_AMENITY_SPEEDCAM },
142         { "amenity", "speed_camera",    NODE_AMENITY_SPEEDCAM },
143         { "amenity", "speed camera",    NODE_AMENITY_SPEEDCAM },
144
145         { "amenity", "place_of_worship",NODE_AMENITY_POW },
146
147         { "amenity", "school",          NODE_AMENITY_SCHOOL },
148         { "amenity", "college",         NODE_AMENITY_COLLEGE },
149         { "amenity", "university",      NODE_AMENITY_COLLEGE },
150
151         { "amenity", "library",         NODE_AMENITY_LIBRARY },
152         { "amenity", "townhall",        NODE_AMENITY_TOWNHALL },
153
154         { "amenity", "supermarket",     NODE_AMENITY_SHOP_SUPERMARKET },
155         { "amenity", "shopping_centre", NODE_AMENITY_SHOPPING_CENTER },
156         { "amenity", "shop",            NODE_AMENITY_SHOP },
157         { "amenity", "shops",           NODE_AMENITY_SHOP },
158         { "amenity", "shopping",        NODE_AMENITY_SHOPPING_CENTER },
159         { "amenity", "shopping_mall",NODE_AMENITY_SHOPPING_CENTER },
160         { "amenity", "cycle_shop",      NODE_AMENITY_SHOP },
161         { "amenity", "bike_shop",       NODE_AMENITY_SHOP },
162         { "amenity", "coffee_shop",     NODE_AMENITY_SHOP },
163         { "amenity", "indoor_shopping_centre",  NODE_AMENITY_SHOP },
164         { "amenity", "farm_shop",       NODE_AMENITY_SHOP },
165         { "amenity", "tea_shop",        NODE_AMENITY_SHOP },
166
167         /* Shops */
168         { "shop",        "supermarket", NODE_AMENITY_SHOP_SUPERMARKET },
169         { "shop",        "alcohol",             NODE_AMENITY_SHOP_ALCOHOL },
170         { "shop",        "flowers",             NODE_AMENITY_SHOP_FLOWERS },
171         { "shop",        "bakery",              NODE_AMENITY_SHOP },
172         { "shop",        "butcher",             NODE_AMENITY_SHOP },
173         { "shop",        "clothing",    NODE_AMENITY_SHOP },
174         { "shop",        "souvenir",    NODE_AMENITY_SHOP },
175         { "shop",        "bicycles",    NODE_AMENITY_SHOP },
176         { "shop",        "grocers",             NODE_AMENITY_SHOP },
177         { "shop",        "newsagents",  NODE_AMENITY_SHOP },
178         { "shop",        "convenience", NODE_AMENITY_SHOP },
179         { "shop",        "bakers",              NODE_AMENITY_SHOP },
180         { "shop",        "garden_centre",NODE_AMENITY_SHOP },
181         { "shop",        "photography", NODE_AMENITY_SHOP },
182         { "shop",        "general_store",NODE_AMENITY_SHOP },
183         { "shop",        "food",                NODE_AMENITY_SHOP },
184         { "shop",        "drinks",              NODE_AMENITY_SHOP },
185         { "shop",        "sex",                 NODE_AMENITY_SHOP_ADULT },
186
187         { "shop",        "pharmacy",    NODE_AMENITY_PHARMACY },
188
189         /* Sport */
190         { "sport"  , "swimming",        NODE_SPORT_SWIMMING },
191         { "sport"  , "golf",            NODE_SPORT_GOLF },
192         { "sport"  , "tennis",          NODE_SPORT_TENNIS },
193         { "sport"  , "football",        NODE_SPORT_FOOTBALL },
194         { "sport"  , "soccer",          NODE_SPORT_SOCCER },
195         { "sport"  , "baskteball",      NODE_SPORT_BASKETBALL },
196         { "sport"  , "rugby",           NODE_SPORT_RUGBY },
197         { "sport"  , "skating",         NODE_SPORT_SKATING },
198         { "sport"  , "hockey",          NODE_SPORT_HOCKEY },
199         { "sport"  , "skateboard",      NODE_SPORT_SKATEBOARD },
200         { "sport"  , "bowling",         NODE_SPORT_BOWLING },
201         { "sport"  , "10pin",           NODE_SPORT_BOWLING },
202         { "sport"  , "motor",           NODE_SPORT_MOTOR },
203         { "sport"  , "shooting_range",NODE_SPORT_SHOOTING },
204         { "sport"  , "paintball",       NODE_SPORT_PAINTBALL },
205         { "sport"  , "horse_racing",NODE_SPORT_HORSES },
206         { "sport"  , "horse",           NODE_SPORT_HORSES },
207         { "sport"  , "horses",          NODE_SPORT_HORSES },
208         { "sport"  , "dog_racing",      NODE_SPORT_DOG },
209         { "sport"  , "pelota",          NODE_SPORT_PELOTA },
210         { "sport"  , "racquet",         NODE_SPORT_RACQUET },
211         { "sport"  , "equestrian",      NODE_SPORT_HORSES },
212         { "sport"  , "baseball",        NODE_SPORT_BASEBALL },
213         { "sport"  , "cricket",         NODE_SPORT_CRICKET },
214         { "sport"  , "croquet",         NODE_SPORT_CROQUET },
215         { "sport"  , "cycling",         NODE_SPORT_CYCLING },
216         { "sport"  , "bowls",           NODE_SPORT_BOWLS },
217         { "sport"  , "athletics",       NODE_SPORT_ATHLETICS },
218         { "sport"  , "gymnastics",      NODE_SPORT_GYMNASTICS },
219         { "sport"  , "multi",           NODE_SPORT_OTHER },
220         { "leisure", "sport_centre",NODE_SPORT_CENTER },
221
222         /* Tourism */
223         { "tourism", "information",     NODE_TOURISM_INFO },
224         { "tourism", "camp_site",       NODE_TOURISM_CAMP_SITE },
225         { "tourism", "caravan_site",NODE_TOURISM_CARAVAN_SITE },
226         { "tourism", "picnic_site",     NODE_TOURISM_PICNIC_SITE },
227         { "tourism", "theme_park",      NODE_TOURISM_THEME_PARK },
228         { "tourism", "hotel",           NODE_TOURISM_HOTEL },
229         { "tourism", "motel",           NODE_TOURISM_MOTEL },
230         { "tourism", "hostel",          NODE_TOURISM_HOSTEL },
231         { "tourism", "attraction",      NODE_TOURISM_ATTRACTION },
232         { "tourism", "zoo",                     NODE_TOURISM_ATTRACTION },
233
234         { "historic", "ruins",          NODE_TOURISM_ATTRACTION },
235         { "historic", "monument",       NODE_TOURISM_ATTRACTION },
236         { "historic", "memorial",       NODE_TOURISM_ATTRACTION },
237         { "historic", "museum",         NODE_HISTORIC_MUSEUM },
238         { "historic", "castle",         NODE_HISTORIC_CASTLE },
239
240         { "railway", "station",         NODE_RAILWAY_STATION },
241         { "railway", "halt",            NODE_RAILWAY_HALT },
242
243         { "aeroway", "terminal",        NODE_AIRPORT_TERMINAL },
244         { "aeroway", "aerodrome",       NODE_AIRPORT_TERMINAL },
245
246         /* Places */    
247         { "place", "city",                      NODE_PLACE_CITY },
248         { "place", "town",                      NODE_PLACE_TOWN },
249         { "place", "village",           NODE_PLACE_VILLAGE },
250         { "place", "hamlet",            NODE_PLACE_HAMLET },
251         { "place", "locality",          NODE_PLACE_LOCALITY },
252         { "place", "suburb",            NODE_PLACE_SUBURB },
253         { "place", "island",            NODE_PLACE_ISLAND },
254
255         { "highway", "traffic_signals", NODE_TRAFFIC_SIGNALS },
256         { "highway", "motorway_junction",       NODE_JUNCTION },
257         { "highway", "services",        NODE_AMENITY_PARKING },
258         { "highway", "toll_booth",      NODE_TOLLBOOTH },
259         { "highway", "gate",            NODE_GATE },
260
261         { NULL, NULL, NODE_PLAIN }
262 };
263
264 /* Array to get id number and defaults for ways of different types */
265 struct _wayinfo {
266         gchar *k, *v;
267         guint defspeed;
268         way_type_t type;
269         gboolean oneway, link, area, car, foot;
270 } wayinfo[] = {
271         { "highway", "motorway",120,WAY_MOTORWAY,               TRUE, FALSE, FALSE, TRUE, FALSE },
272         { "highway", "motorway_link",120,WAY_MOTORWAY,  TRUE, TRUE, FALSE, TRUE, FALSE },
273         { "highway", "trunk",100,WAY_TRUNK,                             FALSE, FALSE, FALSE, TRUE, FALSE },
274         { "highway", "trunk_link",100,WAY_TRUNK,                FALSE, TRUE, FALSE, TRUE, FALSE },
275         { "highway", "primary",80,WAY_PRIMARY,                  FALSE, FALSE, FALSE, TRUE, TRUE },
276         { "highway", "primary_link",60,WAY_PRIMARY,             FALSE, TRUE, FALSE, TRUE, TRUE },
277         { "highway", "secondary",80,WAY_SECONDARY,              FALSE, FALSE, FALSE, TRUE, TRUE },
278         { "highway", "secondary_link",60,WAY_SECONDARY, FALSE, TRUE, FALSE, TRUE, TRUE },
279         { "highway", "tertiary",60,WAY_TERTIARY,                FALSE, FALSE, FALSE, TRUE, TRUE },
280         { "highway", "unclassified",50,WAY_UNCLASSIFIED,        FALSE, FALSE, FALSE, TRUE, TRUE },
281         { "highway", "byway",40,WAY_UNCLASSIFIED,       FALSE, FALSE, FALSE, TRUE, TRUE },
282         { "highway", "residential",40,WAY_RESIDENTIAL,  FALSE, FALSE, FALSE, TRUE, TRUE },
283         { "highway", "service",20,WAY_SERVICE,                  FALSE, FALSE, FALSE, TRUE, TRUE },
284         { "highway", "track",20,WAY_TRACK,                              FALSE, FALSE, FALSE, TRUE, TRUE },
285         { "highway", "unsurfaced",60,WAY_TRACK,                 FALSE, FALSE, FALSE, TRUE, TRUE },
286         { "highway", "minor",60,WAY_TRACK,                      FALSE, FALSE, FALSE, TRUE, TRUE },
287         { "highway", "pedestrian",20,WAY_FOOTWAY,               FALSE, FALSE, FALSE, FALSE, TRUE },
288         { "highway", "footway",1,WAY_FOOTWAY,                   FALSE, FALSE, FALSE, FALSE, TRUE },
289         { "highway", "steps",0,WAY_FOOTWAY,                             FALSE, FALSE, FALSE, FALSE, TRUE},
290         { "highway", "bridleway",10,WAY_FOOTWAY,                FALSE, FALSE, FALSE, FALSE, TRUE },
291         { "highway", "cycleway",10,WAY_CYCLEWAY,                FALSE, FALSE, FALSE, FALSE, TRUE },
292         { "railway", "rail",0,WAY_RAIL,                                 FALSE, FALSE, FALSE, FALSE, FALSE },
293         { "aeroway", "runway",0,WAY_RUNWAY,                             FALSE, FALSE, FALSE, FALSE, FALSE },
294         { "aeroway", "taxiway",0,WAY_TAXIWAY,                   FALSE, FALSE, FALSE, FALSE, FALSE },
295         { "natural", "water",0,WAY_WATER,                               FALSE, FALSE, TRUE, FALSE, FALSE },
296         { "waterway", "river",0,WAY_WATER,                              FALSE, FALSE, FALSE, FALSE, FALSE },
297         { "waterway", "canal",0,WAY_WATER,                              FALSE, FALSE, FALSE, FALSE, FALSE },
298         { "waterway", "stream",0,WAY_WATER,                             FALSE, FALSE, FALSE, FALSE, FALSE },
299         { "building", "*",0,WAY_UNWAYED,                                FALSE, FALSE, TRUE, FALSE, FALSE },
300         { NULL, NULL, 0, WAY_UNWAYED, FALSE, FALSE, FALSE, FALSE, FALSE }
301 };
302
303 static sqlite3 *db;
304 tag_parent_t tag_parent=IS_NONE;
305
306 static GHashTable *osm_nodes;
307 static GHashTable *osm_node_tags;
308 static GHashTable *osm_way_tags;
309 static GSList *osm_ways;
310 static GSList *osm_poi;
311
312 static GHashTable *osm_place_country;
313 static GHashTable *osm_place_region;
314 static GHashTable *osm_place_city;
315 static GHashTable *osm_place_suburb;
316 static GHashTable *osm_place_village;
317 static GHashTable *osm_node_isin;
318 static GHashTable *osm_way_isin;
319
320 static node *cnode=NULL;
321 static way *cway=NULL;
322
323 struct sql_stmt {
324         /* POIs */
325         sqlite3_stmt *insert_poi;
326         sqlite3_stmt *delete_osm_poi;
327
328         /* Places */
329         sqlite3_stmt *insert_place;
330         sqlite3_stmt *delete_places;
331
332         /* Nodes */
333         sqlite3_stmt *insert_node;
334         sqlite3_stmt *delete_nodes;
335         sqlite3_stmt *select_node;
336         sqlite3_stmt *update_node;
337
338         /* Ways */
339         sqlite3_stmt *insert_way_data;
340         sqlite3_stmt *insert_way_ref;
341         sqlite3_stmt *insert_way_pc;
342         sqlite3_stmt *insert_way_name;
343         sqlite3_stmt *insert_way_names_nls;
344         sqlite3_stmt *insert_way_n2n;
345         sqlite3_stmt *delete_way;
346         sqlite3_stmt *delete_way_n2n;
347         sqlite3_stmt *delete_way_name;
348         sqlite3_stmt *delete_way_names_nls;
349         sqlite3_stmt *delete_way_ref;
350         sqlite3_stmt *delete_way_pc;
351 };
352 static struct sql_stmt sql;
353
354 static struct map_bbox bbox;
355 static gboolean use_bbox;
356
357 static void osm_free_way_data(way *w);
358 static void print_way(way *w);
359
360 static gboolean osm_db_prepare(sqlite3 *db);
361 static gboolean db_insert_node(node *n);
362 static guint32 osm_find_way_place(way *w, node_type_t nt);
363
364 /****************************************************/
365 /* Functions */
366 /****************************************************/
367
368 static void
369 db_finalize(void)
370 {
371 sqlite3_finalize(sql.insert_poi);
372 sqlite3_finalize(sql.delete_osm_poi);
373
374 sqlite3_finalize(sql.insert_node);
375 sqlite3_finalize(sql.select_node);
376 sqlite3_finalize(sql.delete_nodes);
377 sqlite3_finalize(sql.update_node);
378
379 sqlite3_finalize(sql.insert_place);
380 sqlite3_finalize(sql.delete_places);
381
382 sqlite3_finalize(sql.delete_way);
383 sqlite3_finalize(sql.insert_way_data);
384
385 sqlite3_finalize(sql.delete_way_name);
386 sqlite3_finalize(sql.insert_way_name);
387
388 sqlite3_finalize(sql.delete_way_n2n);
389 sqlite3_finalize(sql.insert_way_n2n);
390
391 sqlite3_finalize(sql.delete_way_pc);
392 sqlite3_finalize(sql.insert_way_pc);
393
394 sqlite3_finalize(sql.delete_way_names_nls);
395 sqlite3_finalize(sql.insert_way_names_nls);
396 }
397
398 static gboolean
399 osm_db_prepare(sqlite3 *db)
400 {
401 /* Way nodes */
402 sqlite3_prepare_v2(db, "insert or replace into nodes (nid,ilat,ilon,rlat,rlon,l,f) values (?,?,?,?,?,0,?)", -1, &sql.insert_node, NULL);
403 sqlite3_prepare_v2(db, "select ilat,ilon,l from nodes where nid=?", -1, &sql.select_node, NULL);
404 sqlite3_prepare_v2(db, "delete from nodes", -1, &sql.delete_nodes, NULL);
405 sqlite3_prepare_v2(db, "update nodes set l=l+1 where nid=?", -1, &sql.update_node, NULL);
406
407 /* Places */
408 sqlite3_prepare_v2(db, "insert or replace into places (nid,type,name,isin_c,isin_p) values (?, ?, ?, ?, ?)", -1, &sql.insert_place, NULL);
409 sqlite3_prepare_v2(db, "delete from places", -1, &sql.delete_places, NULL);
410
411 /* POI nodes */
412 if (sqlite3_prepare_v2(db, "insert or replace into poi (osm_id, lat, lon, label, cat_id, public, source, priority, isin_c, isin_p, desc, url, postal_code) "
413                                            " values (?, ?, ?, ?, ?, 1, 1, ?, ?, ?, ?, ?, ?)", -1, &sql.insert_poi, NULL)!=SQLITE_OK)
414         g_printf("SQL: %s\n", sqlite3_errmsg(db));
415
416 sqlite3_prepare_v2(db, "delete from poi where osm_id>0 and source=1", -1, &sql.delete_osm_poi, NULL);
417
418 /* Ways */
419 sqlite3_prepare_v2(db, "insert or replace into way (wid,nodes,type,flags,speed,isin_c,isin_p,lat,lon) values (?, ?, ?, ?, ?, ?, ?, ?, ?)", -1, &sql.insert_way_data, NULL);
420 sqlite3_prepare_v2(db, "delete from way", -1, &sql.delete_way, NULL);
421
422 /* Way nodes */
423 sqlite3_prepare_v2(db, "insert into way_n2n (wid,f,t) values (?,?,?)", -1, &sql.insert_way_n2n, NULL);
424 sqlite3_prepare_v2(db, "delete from way_n2n where wid=?", -1, &sql.delete_way_n2n, NULL);
425
426 /* Way names */
427 sqlite3_prepare_v2(db, "insert or replace into way_names (wid,name,norm) values (?, ?, ?)",  -1, &sql.insert_way_name, NULL);
428 sqlite3_prepare_v2(db, "delete from way_names", -1, &sql.delete_way_name, NULL);
429
430 /* Way postal codes */
431 sqlite3_prepare_v2(db, "insert or replace into way_pc (wid,pc) values (?, ?)",  -1, &sql.insert_way_pc, NULL);
432 sqlite3_prepare_v2(db, "delete from way_pc", -1, &sql.delete_way_pc, NULL);
433
434 /* Other language names for ways */
435 sqlite3_prepare_v2(db, "insert into way_names_nls (wid,lang,name, norm) values (?, ?, ?, ?)",  -1, &sql.insert_way_names_nls, NULL);
436 sqlite3_prepare_v2(db, "delete from way_names_nls where wid=?", -1, &sql.delete_way_names_nls, NULL);
437
438 /* Way ref and int_ref */
439 sqlite3_prepare_v2(db, "insert or replace into way_ref (rid,ref,int_ref) values (?, ?, ?)", -1, &sql.insert_way_ref, NULL);
440 sqlite3_prepare_v2(db, "delete from way_ref", -1, &sql.delete_way_ref, NULL);
441
442 return TRUE;
443 }
444
445 /********************************************************************/
446
447 static void
448 print_way(way *w)
449 {
450 #ifdef VERBOSE
451 g_assert(w);
452 g_printf("Way #%d(N:%d T:%d S:%d IS: %d/%d): %s [%s:%s:%s]\n", 
453                 w->id,  
454                 g_slist_length(w->nodes), 
455                 w->type,
456                 w->data ? w->data->speed : 0,
457                 w->data ? w->data->isin_c : -1,
458                 w->data ? w->data->isin_p : -1,
459                 w->data ? w->data->name ? w->data->name : "" : "", 
460                 w->flags & W_ONEWAY ? "-" : "=", 
461                 w->flags & W_ROUNDABOUT ? "O" : "-", 
462                 w->flags & W_LINK ? "|" : " ");
463 #endif
464 }
465
466 static void
467 print_node(node *n)
468 {
469 #ifdef VERBOSE
470 g_assert(n);
471 g_printf("Node #%d: T:%d IS: %d/%d [%s]\n",
472         n->id,
473         n->type,
474         n->data ? n->data->isin_c : -1,
475         n->data ? n->data->isin_p : -1,
476         n->data ? n->data->name : "");
477 #endif
478 }
479
480 /********************************************************************/
481
482 static gboolean
483 db_insert_node(node *n)
484 {
485 gint32 lat, lon;
486
487 g_assert(n);
488
489 lat=lat2mp_int(n->lat);
490 lon=lon2mp_int(n->lon);
491
492 sqlite3_bind_int(sql.insert_node, 1, n->id);
493
494 /* Projected and integerized lat/lot */
495 sqlite3_bind_int(sql.insert_node, 2, lat);
496 sqlite3_bind_int(sql.insert_node, 3, lon);
497 /* Original */
498 sqlite3_bind_double(sql.insert_node, 4, n->lat);
499 sqlite3_bind_double(sql.insert_node, 5, n->lon);
500 sqlite3_bind_int(sql.insert_node, 6, n->type);
501
502 db_exec(db, sql.insert_node);
503
504 return TRUE;
505 }
506
507 static gboolean
508 db_insert_place(node *n)
509 {
510 g_assert(n);
511 if (!n->data)
512         return FALSE;
513 if (!n->data->name)
514         return FALSE;
515 sqlite3_bind_int(sql.insert_place, 1, n->id);
516 sqlite3_bind_int(sql.insert_place, 2, n->type);
517 sqlite3_bind_text(sql.insert_place, 3, n->data->name, -1, SQLITE_TRANSIENT);
518 sqlite3_bind_int(sql.insert_place, 4, n->data->isin_p);
519 sqlite3_bind_int(sql.insert_place, 5, n->data->isin_c);
520
521 return db_exec(db,sql.insert_place);
522 }
523
524 static gboolean
525 db_insert_poi(node *n)
526 {
527 g_assert(n);
528 sqlite3_bind_int(sql.insert_poi, 1, n->id);
529 sqlite3_bind_double(sql.insert_poi, 2, n->lat);
530 sqlite3_bind_double(sql.insert_poi, 3, n->lon);
531 if (n->data->name)
532         sqlite3_bind_text(sql.insert_poi, 4, n->data->name, -1, SQLITE_TRANSIENT);
533 else
534         sqlite3_bind_text(sql.insert_poi, 4, "", -1, SQLITE_TRANSIENT);
535 sqlite3_bind_int(sql.insert_poi, 5, n->type);
536 sqlite3_bind_int(sql.insert_poi, 6, n->type/100);
537 sqlite3_bind_int(sql.insert_poi, 7, n->data->isin_c);
538 sqlite3_bind_int(sql.insert_poi, 8, n->data->isin_p);
539
540 if (n->data->desc)
541         sqlite3_bind_text(sql.insert_poi, 9, n->data->desc, -1, SQLITE_TRANSIENT);
542 if (n->data->url)
543         sqlite3_bind_text(sql.insert_poi, 10, n->data->url, -1, SQLITE_TRANSIENT);
544 if (n->data->postal_code)
545         sqlite3_bind_text(sql.insert_poi, 11, n->data->postal_code, -1, SQLITE_TRANSIENT);
546
547 return db_exec(db,sql.insert_poi);
548 }
549
550 /**
551  * Update node usage count
552  */
553 static gboolean
554 db_update_node_links(node *n)
555 {
556 g_assert(n);
557 sqlite3_bind_int(sql.update_node, 1, n->id);
558
559 return db_exec(db,sql.update_node);
560 }
561
562 /**
563  * Insert way,node1,node2 triplet
564  */
565 static gboolean
566 db_insert_way_n2n(way *w, node *nf, node *nt)
567 {
568 g_return_val_if_fail(w, FALSE);
569 g_return_val_if_fail(nf, FALSE);
570 g_return_val_if_fail(nt, FALSE);
571
572 sqlite3_bind_int(sql.insert_way_n2n, 1, w->id);
573 sqlite3_bind_int(sql.insert_way_n2n, 2, nf->id);
574 sqlite3_bind_int(sql.insert_way_n2n, 3, nt->id);
575
576 #ifdef VERBOSE_N2N
577 g_printf("%d [%d - %d]\n", w->id, nf->id, nt->id);
578 #endif
579
580 db_exec(db,sql.insert_way_n2n);
581 db_update_node_links(nf);
582 db_update_node_links(nt);
583 return TRUE;
584 }
585
586 /**
587  * Insert way ref and int_ref
588  */
589 static gboolean 
590 db_insert_way_ref(way *w)
591 {
592 if (!w->data)
593         return TRUE;
594
595 if (!w->data->ref && !w->data->int_ref)
596         return TRUE;
597
598 way_refs++;
599
600 sqlite3_bind_int(sql.insert_way_ref, 1, w->id);
601 if (w->data->ref)
602         sqlite3_bind_text(sql.insert_way_ref, 2, w->data->ref, -1, SQLITE_TRANSIENT);
603 if (w->data->int_ref)
604         sqlite3_bind_text(sql.insert_way_ref, 3, w->data->int_ref, -1, SQLITE_TRANSIENT);
605
606 return db_exec(db,sql.insert_way_ref);
607 }
608
609 /**
610  * Insert way name
611  */
612 static gboolean
613 db_insert_way_name(way *w)
614 {
615 if (!w->data)
616         return TRUE;
617 if (!w->data->name)
618         return TRUE;
619
620 way_names++;
621
622 sqlite3_bind_int(sql.insert_way_name, 1, w->id);
623 sqlite3_bind_text(sql.insert_way_name, 2, w->data->name, -1, SQLITE_TRANSIENT);
624
625 #ifdef TRANSLIT_NAMES
626 {
627         gchar *norm;
628         norm=g_convert(w->data->name, -1, "ASCII//TRANSLIT//IGNORE", "utf8", NULL, NULL, NULL);
629         if (norm && strcmp(w->data->name, norm)!=0) {
630                 sqlite3_bind_text(sql.insert_way_name, 3, norm, -1, SQLITE_TRANSIENT);
631         }
632         if (norm)
633                 g_free(norm);
634 }
635 #endif
636
637 return db_exec(db,sql.insert_way_name);
638 }
639
640 static gboolean
641 db_delete_way_names_nls(way *w)
642 {
643 sqlite3_bind_int(sql.delete_way_names_nls, 1, w->id);
644 return db_exec(db,sql.delete_way_names_nls);
645 }
646
647 static gboolean 
648 db_insert_way_pc(way *w)
649 {
650 if (!w->data)
651         return TRUE;
652 if (!w->data->postal_code)
653         return TRUE;
654
655 sqlite3_bind_int(sql.insert_way_pc, 1, w->id);
656 sqlite3_bind_text(sql.insert_way_pc, 2, w->data->postal_code, -1, SQLITE_TRANSIENT);
657
658 return db_exec(db,sql.insert_way_pc);
659 }
660
661 static gboolean
662 db_delete_way_pc(way *w)
663 {
664 sqlite3_bind_int(sql.delete_way_pc, 1, w->id);
665 return db_exec(db,sql.delete_way_pc);
666 }
667
668 static void
669 db_insert_way_names_nls_cb(gpointer key, gpointer value, gpointer user_data)
670 {
671 way *w=(way *)user_data;
672
673 sqlite3_bind_int(sql.insert_way_names_nls, 1, w->id);
674 sqlite3_bind_text(sql.insert_way_names_nls, 2, (gchar *)key, -1, SQLITE_TRANSIENT);
675 sqlite3_bind_text(sql.insert_way_names_nls, 3, (gchar *)value, -1, SQLITE_TRANSIENT);
676 #ifdef TRANSLIT_NAMES
677 {
678         gchar *norm;
679         norm=g_convert((gchar *value), -1, "ASCII//TRANSLIT//IGNORE", "utf8", NULL, NULL, NULL);
680         if (norm && strcmp((gchar *)value, norm)!=0) {
681                 sqlite3_bind_text(sql.insert_way_names_nls, 4, norm, -1, SQLITE_TRANSIENT);
682         }
683         if (norm)
684                 g_free(norm);
685 }
686 #endif
687 db_exec(db,sql.insert_way_names_nls);
688 }
689
690 static void
691 db_insert_way_names_nls(way *w)
692 {
693 if (!w->data)
694         return;
695 if (!w->data->names)
696         return;
697
698 g_hash_table_foreach(w->data->names, db_insert_way_names_nls_cb, w);
699 }
700
701 /**
702  * Insert all data for the given way
703  * - name
704  * - ref
705  * - nodes
706  * 
707  */
708 static gboolean
709 db_insert_way(way *w)
710 {
711 GSList *iter;
712 guint ncnt;
713 node *wmn;
714
715 if (!w)
716         return FALSE;
717
718 /* Skip things we don't use (yet) */
719 if (w->type==WAY_UNWAYED || w->type>WAY_ROAD_END)
720         return TRUE;
721
722 /* Insert nodes */
723 for (iter=w->nodes; iter!=NULL; iter=iter->next) {
724         if (!iter->next)
725                 break;
726         db_insert_way_n2n(w, iter->data, iter->next->data);
727 }
728
729 if (w->id==0)
730         return FALSE;
731
732 if (w->data) {
733         w->data->isin_p=osm_find_way_place(w, NODE_PLACE_CITY);
734         w->data->isin_c=osm_find_way_place(w, NODE_PLACE_COUNTRY);
735 }
736
737 print_way(w);
738
739 sqlite3_bind_int(sql.insert_way_data, 1, w->id);
740 sqlite3_bind_int(sql.insert_way_data, 2, w->ncnt);
741 sqlite3_bind_int(sql.insert_way_data, 3, w->type);
742 sqlite3_bind_int(sql.insert_way_data, 4, w->flags);
743 if (w->data) {
744         sqlite3_bind_int(sql.insert_way_data, 5, w->data->speed);
745         sqlite3_bind_int(sql.insert_way_data, 6, w->data->isin_c);
746         sqlite3_bind_int(sql.insert_way_data, 7, w->data->isin_p);
747 }
748
749 /* Get middle node, use it as a rough way location */
750 ncnt=g_slist_length(w->nodes);
751 if (ncnt>1) {
752         wmn=g_slist_nth_data(w->nodes, ncnt/2);
753         if (wmn) {
754                 sqlite3_bind_double(sql.insert_way_data, 8, wmn->lat);
755                 sqlite3_bind_double(sql.insert_way_data, 9, wmn->lon);
756         } else {
757                 g_printerr("Failed to get way middlepoint node for location information!\n");
758         }
759 }
760
761 db_exec(db,sql.insert_way_data);
762
763 db_insert_way_ref(w);
764 db_insert_way_name(w);
765 db_insert_way_names_nls(w);
766 db_insert_way_pc(w);
767
768 osm_free_way_data(w);
769 return TRUE;
770 }
771
772 /********************************************************************/
773
774 static gchar *
775 get_attr_key_value(const gchar **p, gchar *key)
776 {
777 gchar **d;
778
779 d=p;
780 while (*d!=NULL) {
781         if (strncmp(*d, key, strlen(key))==0) {
782                 d++;
783                 return *d;
784         }
785         d++;
786         d++;
787 }
788 return NULL;
789 }
790
791 static tag_state_t 
792 check_tag(const gchar *tag)
793 {
794 if (strcmp(tag,"node")==0) return IN_NODE_TAG;
795 else if (strcmp(tag,"nd")==0) return IN_WNODE_TAG;
796 else if (strcmp(tag,"way")==0) return IN_WAY_TAG;
797 else if (strcmp(tag,"tag")==0) return IN_KEY_TAG;
798 else if (strcmp(tag,"osm")==0) return IN_OSM_TAG;
799 else if (strcmp(tag,"bound")==0) return IN_BOUND_TAG;
800 else if (strcmp(tag,"relation")==0) return IN_RELATION_TAG;
801 else if (strcmp(tag,"member")==0) return IN_MEMBER_TAG;
802 else return ERROR;
803 }
804
805 static void
806 find_nls_names(gpointer key, gpointer value, gpointer user_data)
807 {
808 gchar *k, *v;
809 gchar *tmp;
810 GHashTable *nls;
811
812 k=(gchar *)key;
813 v=(gchar *)value;
814 nls=(GHashTable *)user_data;
815
816 /* Check if it is a name key, return if not. */
817 if (g_str_has_prefix(k, "name:")==FALSE)
818         return;
819
820 tmp=g_strrstr(k, ":");
821 if (!tmp)
822         return;
823 tmp++; /* skip : */
824 if (*tmp==0)
825         return;
826 g_hash_table_insert(nls, g_strdup(tmp), g_strdup(v));
827 #ifdef VERBOSE
828 g_printf("NLS(%s): [%s]\n", tmp, v);
829 #endif
830 }
831
832 /********************************************************************/
833
834 static void
835 node_print (node *n)
836 {
837 g_assert(n);
838 if (n->data) {
839         g_printf("N: %d [%f:%f][%s](%d)\n", 
840                 n->id, n->lat, n->lon, 
841                 n->data->name ? n->data->name : "-", 
842                 n->type);
843 } else {
844         g_printf("N: %d [%f:%f]\n",
845                 n->id, n->lat, n->lon);
846 }
847 }
848
849 #ifdef DEBUG
850 static void 
851 dump_array(const gchar **p)
852 {
853 char **d;
854
855 d=p;
856 while (*d!=NULL) {
857         g_printf("[%s]", *d);
858         d++;
859 }
860 g_print("\n");
861 }
862 #endif
863
864 static inline gboolean
865 osm_node_check_box(gdouble nlat, gdouble nlon)
866 {
867 if (use_bbox==FALSE)
868         return TRUE;
869 return (nlat > bbox.lat_min && nlat < bbox.lat_max && nlon > bbox.lon_min && nlon < bbox.lon_max) ? TRUE : FALSE;
870 }
871
872 static void
873 osm_new_node_data(node *n)
874 {
875 if (n==NULL) 
876         return;
877 if (n->data!=NULL) 
878         return;
879 n->data=g_slice_new0(node_data);
880 n->type=NODE_PLAIN;
881 noded_cnt++;
882 }
883
884 static void
885 osm_free_node_data(node *n)
886 {
887 g_assert(n);
888 g_assert(n->data);
889 if (n->data->name)
890         g_free(n->data->name);
891 if (n->data->url)
892         g_free(n->data->url);
893 if (n->data->desc)
894         g_free(n->data->desc);
895 if (n->data->postal_code)
896         g_free(n->data->postal_code);
897 g_slice_free(node_data, n->data);
898 n->data=NULL;
899 noded_cnt--;
900 }
901
902 static node *
903 osm_new_node(gint id, gdouble lat, gdouble lon)
904 {
905 node *n=NULL;
906
907 n=g_slice_new0(node);
908 g_assert(n);
909 n->id=id;
910 n->lat=lat;
911 n->lon=lon;
912 return n;
913 }
914
915 static void
916 osm_free_node(node *n)
917 {
918 g_assert(n);
919 g_slice_free(node, n);
920 }
921
922 static node *
923 osm_find_node(guint32 nid)
924 {
925 node *n;
926
927 g_assert(osm_nodes);
928 return g_hash_table_lookup(osm_nodes, GINT_TO_POINTER(nid));
929 }
930
931 static void
932 osm_new_way_data(way *w)
933 {
934 if (w==NULL) 
935         return;
936 if (w->data!=NULL) 
937         return;
938
939 w->data=g_slice_new0(way_data);
940 }
941
942 static void
943 osm_free_way_data(way *w)
944 {
945 g_assert(w);
946 if (!w->data)
947         return;
948 if (w->data->name)
949         g_free(w->data->name);
950 if (w->data->ref)
951         g_free(w->data->ref);
952 if (w->data->int_ref)
953         g_free(w->data->int_ref);
954 g_slice_free(way_data, w->data);
955 w->data=NULL;
956 }
957
958 static way *
959 osm_new_way(gint id)
960 {
961 way *w;
962
963 w=g_slice_new0(way);
964 g_assert(w);
965 w->id=id;
966 w->type=WAY_UNWAYED;
967
968 /* Add to list of ways */
969 return w;
970 }
971
972 static void
973 osm_free_way(way *w)
974 {
975 if (w->nodes)
976         g_slist_free(w->nodes);
977 g_slice_free(way, w);
978 }
979
980 static void
981 osm_way_add_to_list(way *w)
982 {
983 g_assert(w);
984 osm_ways=g_slist_prepend(osm_ways, w);
985 }
986
987 static void
988 osm_way_new_node(way *w, gint nid)
989 {
990 node *n;
991
992 g_assert(w);
993 n=osm_find_node(nid);
994 w->nodes=g_slist_prepend(w->nodes, n);
995 w->ncnt++;
996 }
997
998 /**
999  * Search the place hash table for the location of the node.
1000  *
1001  */
1002 static guint32 
1003 osm_find_node_place(node *n)
1004 {
1005 node *t;
1006 gchar **isin;
1007 gchar **place;
1008
1009 if (!n->data)
1010         return 0;
1011
1012 isin=g_hash_table_lookup(osm_node_isin, GINT_TO_POINTER(n->id));
1013
1014 if (!isin)
1015         return 0;
1016
1017 place=isin;
1018 while (*place!=NULL) {
1019         gchar *ps;
1020         ps=g_strstrip(*place);
1021 #ifdef VERBOSE
1022         g_printf("Checking (%d) [%s] in [%s]\n",n->type, n->data->name, ps);
1023 #endif
1024         switch (n->type) {
1025         case NODE_PLACE_CITY:
1026         case NODE_PLACE_TOWN:
1027         case NODE_PLACE_VILLAGE:
1028         case NODE_PLACE_HAMLET:
1029                 t=g_hash_table_lookup(osm_place_region, ps);
1030                 if (t)
1031                         return t->id;
1032                 t=g_hash_table_lookup(osm_place_country, ps);
1033                 if (t)
1034                         return t->id;
1035         break;
1036         case NODE_PLACE_SUBURB:
1037         case NODE_PLACE_LOCALITY:
1038                 t=g_hash_table_lookup(osm_place_city, ps);
1039                 if (t)
1040                         return t->id;
1041         break;
1042         case NODE_PLACE_ISLAND:
1043                 return 0;
1044         break;
1045         default:
1046                 t=g_hash_table_lookup(osm_place_city, ps);
1047                 if (t)
1048                         return t->id;
1049         break;
1050         }
1051         place++;
1052 }
1053
1054 return 0;
1055 }
1056
1057 static guint32
1058 osm_find_way_place(way *w, node_type_t nt)
1059 {
1060 gchar **isin;
1061 gchar **place;
1062
1063 isin=g_hash_table_lookup(osm_way_isin, GINT_TO_POINTER(w->id));
1064 if (!isin)
1065         return 0;
1066
1067 place=isin;
1068 while (*place!=NULL) {
1069         node *t;
1070         gchar *ps;
1071
1072         ps=g_strstrip(*place);
1073
1074 #ifdef VERBOSE
1075         g_printf("Checking (%d) in [%s]\n",w->id, ps);
1076 #endif
1077 switch (nt) {
1078         case NODE_PLACE_CITY:
1079         case NODE_PLACE_TOWN:
1080         case NODE_PLACE_VILLAGE:
1081         case NODE_PLACE_HAMLET:
1082         case NODE_PLACE_LOCALITY:
1083                 t=g_hash_table_lookup(osm_place_city, ps);
1084                 if (t)
1085                         return t->id;
1086         break;
1087         case NODE_PLACE_COUNTRY:
1088                 t=g_hash_table_lookup(osm_place_country, ps);
1089                 if (t)
1090                         return t->id;
1091         break;
1092         default:
1093                 g_assert_not_reached();
1094         break;
1095         }
1096         place++;
1097 }
1098
1099 return 0;
1100 }
1101
1102 /***********************************************************************/
1103
1104 static void
1105 osm_node_save_node(gint key, gpointer value, gpointer user_data)
1106 {
1107 node *n=(node *)value;
1108
1109 dbnode_cnt++;
1110 db_insert_node(n);
1111 if (dbnode_cnt % 20000==0)
1112         g_printf("\rNodes: %f%% (%u/%u)\n",
1113                 ((float)dbnode_cnt/(float)(node_cnt-node_skip_cnt))*100,
1114                 dbnode_cnt, node_cnt);
1115 }
1116
1117 /**
1118  * Check node type and insert as POI or Place
1119  * Discard extra data after insert.
1120  */
1121 static gboolean
1122 osm_node_save_poi(node *n, gpointer user_data)
1123 {
1124 if (!n) {
1125         g_printerr("ERROR: null poi\n");
1126         return FALSE;
1127 }
1128
1129 if (!n->data) {
1130         g_printerr("POI node with no data ?\n");
1131         return FALSE;
1132 }
1133
1134 n->data->isin_p=osm_find_node_place(n);
1135 n->data->isin_c=0;
1136
1137 if (n->type>NODE_POI_START && n->type<NODE_POI_END) {
1138         print_node(n);
1139         db_insert_poi(n);
1140         osm_free_node_data(n);
1141 } else if (n->type>NODE_PLACE_START && n->type<NODE_PLACE_END) {
1142         print_node(n);
1143         db_insert_place(n);
1144 } else {
1145         osm_free_node_data(n);
1146         return FALSE;
1147 }
1148
1149 return TRUE;
1150 }
1151
1152 static gboolean
1153 osm_planet_poi_clear_nodes(void)
1154 {
1155 g_print("Removing old OSM POIs...\n");
1156 db_transaction_begin(db);
1157 sqlite3_step(sql.delete_osm_poi);
1158 sqlite3_step(sql.delete_places);
1159 return db_transaction_commit(db);
1160 }
1161
1162 static gboolean
1163 osm_planet_poi_save_nodes(void)
1164 {
1165 g_print("Storing new POIs...\n");
1166 db_transaction_begin(db);
1167 g_slist_foreach(osm_poi, osm_node_save_poi, NULL);
1168 g_slist_free(osm_poi);
1169 return db_transaction_commit(db);
1170 }
1171
1172 /*********************************************************************/
1173
1174 static gboolean
1175 osm_planet_clear_nodes(void)
1176 {
1177 g_print("Clearing old nodes...\n");
1178 db_transaction_begin(db);
1179 sqlite3_step(sql.delete_nodes);
1180 return db_transaction_commit(db);
1181 }
1182
1183 static gboolean
1184 osm_planet_save_nodes(void)
1185 {
1186 g_print("Storing new nodes...\n");
1187 db_transaction_begin(db);
1188 g_hash_table_foreach(osm_nodes, osm_node_save_node, NULL);
1189 return db_transaction_commit(db);
1190 }
1191
1192 /*********************************************************************/
1193
1194 static void
1195 osm_way_save(way *value, gpointer user_data)
1196 {
1197 dbway_cnt++;
1198 db_transaction_begin(db);
1199 db_insert_way(value);
1200 db_transaction_commit(db);
1201 if (dbway_cnt % 15000==0 && dbway_cnt>0) {
1202                 g_printf("\rWays: %f%% (%u/%u)\n",
1203                         (((float)dbway_cnt/(float)way_cnt)*100),
1204                         dbway_cnt, way_cnt);
1205                 print_way(value);
1206 }
1207 }
1208
1209 static void
1210 osm_planet_clear_ways(void)
1211 {
1212 g_print("Clearing old way data...\n");
1213 db_transaction_begin(db);
1214 sqlite3_step(sql.delete_way);
1215 sqlite3_step(sql.delete_way_name);
1216 sqlite3_step(sql.delete_way_ref);
1217 sqlite3_step(sql.delete_way_n2n);
1218 db_transaction_commit(db);
1219 }
1220
1221 static gboolean
1222 osm_planet_save_ways(void)
1223 {
1224 g_print("Inserting new ways\n");
1225 g_slist_foreach(osm_ways, osm_way_save, NULL);
1226 }
1227
1228 /*********************************************************************/
1229
1230 static void
1231 osm_planet_save_all_nodes(void)
1232 {
1233 g_printf("Saving planet nodes to database:\n");
1234
1235 osm_planet_poi_clear_nodes();
1236 osm_planet_poi_save_nodes();
1237
1238 if (!is_update) {
1239         osm_planet_clear_nodes();
1240         osm_planet_clear_ways();
1241 }
1242 osm_planet_save_nodes();
1243 }
1244
1245 static void
1246 osm_planet_save_all_ways(void)
1247 {
1248 g_printf("Saving planet way to database:\n");
1249
1250 osm_planet_save_ways();
1251 }
1252
1253 /***********************************************************************/
1254
1255 static void
1256 _osm_tag_start(void *userData, const char *name, const char **atts)
1257 {
1258 tag_state_t t;
1259 gchar *k, *v;
1260 guint32 id, ndref;
1261 gdouble nlat, nlon;
1262
1263 t=check_tag(name);
1264 switch (t) {
1265         case IN_OSM_TAG:
1266                 g_printf("Starting...\n");
1267         break;
1268         case IN_NODE_TAG:
1269                 tag_parent=IS_NODE;
1270                 node_cnt++;
1271
1272                 id=atoi(get_attr_key_value(atts, "id"));
1273                 nlat=atof(get_attr_key_value(atts, "lat"));
1274                 nlon=atof(get_attr_key_value(atts, "lon"));
1275
1276                 cnode=osm_new_node(id, nlat, nlon);
1277                 osm_node_tags=g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free);
1278         break;
1279         case IN_WAY_TAG:
1280                 tag_parent=IS_WAY;
1281                 way_cnt++;
1282                 id=atoi(get_attr_key_value(atts, "id"));
1283                 cway=osm_new_way(id);
1284                 osm_way_tags=g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free);
1285         break;
1286         case IN_WNODE_TAG:
1287                 ndref=atoi(get_attr_key_value(atts, "ref"));
1288                 if (use_bbox==TRUE) {
1289                         if (osm_find_node(ndref)==NULL) {
1290                                 cway->id=0;
1291                                 return;
1292                         }
1293                 }
1294                 osm_way_new_node(cway, ndref);
1295         break;
1296         case IN_KEY_TAG:
1297                 k=get_attr_key_value(atts, "k");
1298                 if (strcmp(k,"created_by")==0)
1299                         return;
1300                 if (strcmp(k,"source")==0)
1301                         return;
1302
1303                 v=get_attr_key_value(atts, "v");
1304 #ifdef VERBOSE_KEYS
1305                 g_printf("TAG: K=[%s] V=[%s]\n", k, v);
1306 #endif
1307
1308                 switch (tag_parent) {
1309                 case IS_NONE:
1310                         g_printf("Tag key/value pair but unknown owner\n");
1311                 break;
1312                 case IS_NODE:
1313                 {
1314                         if (!osm_node_tags)
1315                                 return;
1316
1317                         /* Insert key/value pairs into hash table */
1318                         if (cnode==NULL) {
1319                                 g_printerr("In node tags but node is NULL!\n");
1320                                 return;
1321                         }
1322                         g_hash_table_insert(osm_node_tags, g_strdup(k), g_strdup(v));
1323                 }
1324                 break;
1325                 case IS_WAY: 
1326                 {
1327                         if (cway==NULL) {
1328                                 g_printerr("In way tags but way is NULL!\n");
1329                                 return;
1330                         }
1331                         g_hash_table_insert(osm_way_tags, g_strdup(k), g_strdup(v));
1332                         osm_new_way_data(cway);
1333                 }
1334                 break;
1335                 case IS_RELATION:
1336
1337                 break;
1338                 }
1339         break;
1340         case IN_BOUND_TAG:
1341                 /* Ignore for now */
1342                 g_printf("Ignoring bound tag\n");
1343         break;
1344         case IN_RELATION_TAG:
1345                 tag_parent=IS_RELATION;
1346
1347         break;
1348         case IN_MEMBER_TAG:
1349
1350         break;
1351         default:
1352                 tag_parent=IS_NONE;
1353                 g_printf("Unknown tag: %s\n", name);
1354         break;
1355 }
1356 }
1357
1358 #define GET_NODE_KEY(key, nfield) { \
1359         gchar *_kv; \
1360         _kv=g_hash_table_lookup(osm_node_tags, key); \
1361         if (_kv) { \
1362                 if (!nfield) { \
1363                         nfield=g_strstrip(g_utf8_normalize(_kv, -1, G_NORMALIZE_ALL_COMPOSE)); \
1364                 } else { \
1365                         gchar *_tmp=nfield; \
1366                         gchar *_norm=g_utf8_normalize(_kv, -1, G_NORMALIZE_ALL_COMPOSE); \
1367                         nfield=g_strdup_printf("%s %s", _tmp, g_strstrip(_norm)); \
1368                         g_free(_tmp); \
1369                         g_free(_norm); \
1370                 } \
1371         } \
1372 }
1373         
1374
1375 static void
1376 _osm_tag_end(void *userData, const char *name)
1377 {
1378 tag_state_t t;
1379 gchar *v;
1380 guint i;
1381 t=check_tag(name);
1382 switch (t) {
1383         case IN_NODE_TAG:
1384
1385                 if (node_cnt % 25000==0) {
1386                         g_printf("Nodes: %u of %u, POIs: %u, Outside box: %u\n", 
1387                                 node_cnt-node_skip_cnt, node_cnt, noded_cnt, node_skip_cnt);
1388                 }
1389
1390                 if (!osm_node_tags)
1391                         return;
1392
1393                 osm_new_node_data(cnode);
1394
1395                 for (i=0; nodeinfo[i].k; i++) {
1396                         v=g_hash_table_lookup(osm_node_tags, nodeinfo[i].k);
1397                         if (!v)
1398                                 continue;
1399                         if (strcasecmp (v, nodeinfo[i].v)==0) {
1400                                 cnode->type=nodeinfo[i].type;
1401                                 break;
1402                         }
1403                 }
1404
1405                 /* Check if node is inside bounding box, if not skip it. 
1406                  * But keep it if it's something we might need for other nodes:
1407                  * - Places (for is_in)
1408                  * - ...
1409                  */
1410                 if ((osm_node_check_box(cnode->lat, cnode->lon)==FALSE) && (cnode->type<NODE_PLACE_START)) {
1411                         osm_free_node_data(cnode);
1412                         osm_free_node(cnode);
1413                         g_hash_table_destroy(osm_node_tags);
1414                         node_skip_cnt++;
1415                         return;
1416                 }
1417
1418                 g_hash_table_insert(osm_nodes, GINT_TO_POINTER(cnode->id), cnode);
1419
1420                 if (cnode->type!=NODE_PLAIN) {
1421                         cnode->data->name=NULL;
1422                         v=g_hash_table_lookup(osm_node_tags, "name");
1423                         if (v)
1424                                 cnode->data->name=g_strstrip(g_utf8_normalize(v, -1, G_NORMALIZE_ALL_COMPOSE));
1425
1426                         GET_NODE_KEY("description", cnode->data->desc);
1427                         GET_NODE_KEY("note", cnode->data->desc);
1428                         GET_NODE_KEY("postal_code", cnode->data->postal_code);
1429                         GET_NODE_KEY("address", cnode->data->desc);
1430
1431                         /* Links */
1432                         v=g_hash_table_lookup(osm_node_tags, "url");
1433                         if (v) {
1434                                 cnode->data->url=g_strstrip(g_strdup(v));
1435                         } else {
1436                                 v=g_hash_table_lookup(osm_node_tags, "wikipedia");
1437                                 if (v && strncmp(v,"http:", 5)==0) 
1438                                         cnode->data->url=g_strstrip(g_strdup(v));
1439                         }
1440                 }
1441
1442                 cnode->data->isin_c=0;
1443                 cnode->data->isin_p=0;
1444                 v=g_hash_table_lookup(osm_node_tags, "is_in");
1445                 if (v) {
1446                         gchar **isin;                           
1447                         isin=g_strsplit(v, ",", 10);
1448                         g_hash_table_insert(osm_node_isin, GINT_TO_POINTER(cnode->id), isin);
1449                 }
1450
1451                 if (cnode->type==NODE_PLAIN) {
1452                         osm_free_node_data(cnode);
1453                 } else {
1454                         osm_poi=g_slist_prepend(osm_poi, cnode);
1455                         if (cnode->data->name) {
1456                                 switch (cnode->type) {
1457                                 case NODE_PLACE_COUNTRY:
1458                                         g_hash_table_insert(osm_place_country, cnode->data->name, cnode);
1459                                 break;
1460                                 case NODE_PLACE_CITY:
1461                                 case NODE_PLACE_TOWN:
1462                                         g_hash_table_insert(osm_place_city, cnode->data->name, cnode);
1463                                 break;
1464                                 case NODE_PLACE_SUBURB:
1465                                         g_hash_table_insert(osm_place_suburb, cnode->data->name, cnode);
1466                                 break;
1467                                 case NODE_PLACE_VILLAGE:
1468                                 case NODE_PLACE_HAMLET:
1469                                 case NODE_PLACE_LOCALITY:
1470                                         g_hash_table_insert(osm_place_village, cnode->data->name, cnode);
1471                                 break;
1472                                 case NODE_PLACE_ISLAND:
1473                                         /* Ignore for now */
1474                                 break;
1475                                 default:;
1476                                 }
1477                         }
1478                 }
1479                 g_hash_table_destroy(osm_node_tags);
1480                 cnode=NULL;
1481         break;
1482         case IN_WAY_TAG:
1483                 if (way_cnt % 1024==0) {
1484                         g_printf("\rWays: %d\n", way_cnt);
1485                 }
1486
1487                 cway->nodes=g_slist_reverse(cway->nodes);
1488
1489                 for (i=0; wayinfo[i].k; i++) {
1490                         v=g_hash_table_lookup(osm_way_tags, wayinfo[i].k);
1491                         if (!v)
1492                                 continue;
1493                         if (strcasecmp (v, wayinfo[i].v)==0) {
1494                                 if (wayinfo[i].link==TRUE)
1495                                         cway->flags|=W_LINK;
1496                                 if (wayinfo[i].area==TRUE)
1497                                         cway->flags|=W_AREA;
1498                                 if (wayinfo[i].oneway==TRUE)
1499                                         cway->flags|=W_ONEWAY;
1500                                 cway->type=wayinfo[i].type;
1501                                 if (cway->data->speed==0)
1502                                         cway->data->speed=wayinfo[i].defspeed;
1503                                 break;
1504                         }
1505                 }
1506
1507                 v=g_hash_table_lookup(osm_way_tags, "name");
1508                 if (v) {
1509                         cway->data->name=g_utf8_normalize(v, -1, G_NORMALIZE_ALL_COMPOSE);
1510                         /* Try to find other language names */
1511                         cway->data->names=g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free);
1512                         g_hash_table_foreach(osm_way_tags, find_nls_names, cway->data->names);
1513                         if (g_hash_table_size(cway->data->names)==0) {
1514                                 g_hash_table_destroy(cway->data->names);
1515                                 cway->data->names=NULL;
1516                         }
1517                 }
1518
1519                 v=g_hash_table_lookup(osm_way_tags, "ref");
1520                 if (v)
1521                         cway->data->ref=g_strdup(v);
1522                 v=g_hash_table_lookup(osm_way_tags, "int_ref");
1523                 if (v)
1524                         cway->data->int_ref=g_strdup(v);
1525
1526                 v=g_hash_table_lookup(osm_way_tags, "postal_code");
1527                 if (v)
1528                         cway->data->postal_code=g_strdup(v);
1529
1530                 v=g_hash_table_lookup(osm_way_tags, "oneway");
1531                 if (v) {
1532                         cway->flags|=W_ONEWAY;
1533                         if (strcmp(v, "-1")==0)
1534                                 cway->nodes=g_slist_reverse(cway->nodes);
1535                 }
1536
1537                 v=g_hash_table_lookup(osm_way_tags, "noexit");
1538                 if (v)
1539                         cway->flags|=W_NOEXIT;
1540                 
1541                 v=g_hash_table_lookup(osm_way_tags, "speedlimit");
1542                 if (v)
1543                         cway->data->speed=atoi(v);
1544                 v=g_hash_table_lookup(osm_way_tags, "maxspeed");
1545                 if (v)
1546                         cway->data->speed=atoi(v);
1547
1548                 v=g_hash_table_lookup(osm_way_tags, "layer");
1549                 if (v)
1550                         cway->data->layer=atoi(v);
1551
1552                 v=g_hash_table_lookup(osm_way_tags, "junction");
1553                 if (v && strcasecmp(v,"roundabout")==0) {
1554                         cway->flags|=W_ROUNDABOUT;
1555                         cway->flags|=W_ONEWAY;
1556                 } else if (v && strcasecmp(v,"mini_roundabout")==0) {
1557                         cway->flags|=W_ROUNDABOUT;
1558                         cway->flags|=W_ONEWAY;
1559                 }
1560
1561                 /* XXX: Should check keys */
1562                 v=g_hash_table_lookup(osm_way_tags, "access");
1563                 if (v && (strcasecmp(v, "private")==0)) {
1564                         cway->flags|=W_NOACCESS;
1565                 }
1566
1567                 print_way(cway);
1568
1569                 v=g_hash_table_lookup(osm_way_tags, "is_in");
1570                 if (v) {
1571                         gchar **isin;                           
1572                         isin=g_strsplit(v, ",", 10);
1573                         g_hash_table_insert(osm_way_isin, GINT_TO_POINTER(cway->id), isin);
1574                 }
1575
1576                 if (cway->data && cway->data->name==NULL && cway->data->ref==NULL &&
1577                         cway->data->int_ref==NULL && cway->data->layer==0 && cway->data->speed==0)
1578                         osm_free_way_data(cway);
1579
1580                 if (cway->id!=0)
1581                         osm_way_add_to_list(cway);
1582                 else 
1583                         osm_free_way(cway);
1584
1585                 cway=NULL;
1586                 g_hash_table_destroy(osm_way_tags);
1587         break;
1588         case IN_BOUND_TAG:
1589                 /* */
1590         break;
1591         case IN_OSM_TAG:
1592                 g_printf("\nPlanet loaded.\n");
1593         break;
1594         default:;
1595 }
1596 }
1597
1598 /************************************************************************/
1599
1600 static void
1601 storage_init(void)
1602 {
1603 osm_nodes=g_hash_table_new(g_direct_hash, g_direct_equal);
1604
1605 osm_place_country=g_hash_table_new(g_str_hash, g_str_equal);
1606 osm_place_city=g_hash_table_new(g_str_hash, g_str_equal);
1607 osm_place_suburb=g_hash_table_new(g_str_hash, g_str_equal);
1608 osm_place_village=g_hash_table_new(g_str_hash, g_str_equal);
1609 osm_place_region=g_hash_table_new(g_str_hash, g_str_equal);
1610 osm_node_isin=g_hash_table_new(g_direct_hash, g_direct_equal);
1611 osm_way_isin=g_hash_table_new(g_direct_hash, g_direct_equal);
1612 }
1613
1614 static void
1615 storage_free(void)
1616 {
1617 g_hash_table_destroy(osm_nodes);
1618
1619 g_hash_table_destroy(osm_place_country);
1620 g_hash_table_destroy(osm_place_city);
1621 g_hash_table_destroy(osm_place_suburb);
1622 g_hash_table_destroy(osm_place_village);
1623 g_hash_table_destroy(osm_place_region);
1624 g_hash_table_destroy(osm_node_isin);
1625 }
1626
1627 /************************************************************************/
1628
1629 static gint
1630 print_fail(const gchar *msg, gint ret)
1631 {
1632 g_printerr("ERROR: %s\n", msg);
1633 return ret;
1634 }
1635
1636 /************************************************************************/
1637
1638 static void
1639 print_memory_usage(void)
1640 {
1641 g_print("Memory usage per item:\n");
1642 g_printf("Node  size: %d\n", (gint)sizeof(node));
1643 g_printf("NodeD size: %d\n", (gint)sizeof(node_data));
1644 g_printf("Way   size: %d\n", (gint)sizeof(way));
1645 g_printf("WayD  size: %d\n", (gint)sizeof(way_data));
1646 }
1647
1648 /************************************************************************
1649  * Public inteface
1650  ************************************************************************/
1651
1652 void
1653 osm_planet_parser_init(void)
1654 {
1655 xp=XML_ParserCreate(NULL);
1656 XML_SetElementHandler(xp, _osm_tag_start, _osm_tag_end);
1657 storage_init();
1658 }
1659
1660 void
1661 osm_planet_parser_deinit(void)
1662 {
1663 XML_ParserFree(xp);
1664 storage_free();
1665 }
1666
1667 gboolean
1668 osm_planet_parse_buffer(const gchar *buffer, size_t r)
1669 {
1670 if (XML_Parse(xp, buffer, r, r>0 ? 0:1) == XML_STATUS_ERROR) {
1671         g_printerr("Parse error at line %d:\n%s\n",
1672                 (gint)XML_GetCurrentLineNumber(xp),
1673                 XML_ErrorString(XML_GetErrorCode(xp)));
1674         return FALSE;
1675 }
1676 return TRUE;
1677 }
1678
1679 gboolean 
1680 osm_planet_parse_file(gchar *pfile)
1681 {
1682 FILE *f;
1683 BZFILE *b;
1684 int bzerror;
1685 int r;
1686 gchar buffer[FILE_BUFFER];
1687 gboolean res=TRUE;
1688
1689 f=fopen(pfile, "r");
1690 if (!f) {
1691         perror("fopen failed\n");
1692         return FALSE;
1693 }
1694
1695 b=BZ2_bzReadOpen(&bzerror, f, 0, 0, NULL, 0);
1696 if (bzerror != BZ_OK) {
1697         g_printf("BZ2_bzReadOpen failed\n");
1698         BZ2_bzReadClose(&bzerror, b);
1699         return FALSE;
1700 }
1701
1702 do {
1703         r=BZ2_bzRead(&bzerror, b, buffer, FILE_BUFFER);
1704         if ((bzerror!=BZ_STREAM_END) && (bzerror!=BZ_OK)) {
1705                 res=FALSE;
1706                 break;
1707         }
1708         if (!osm_planet_parse_buffer(buffer, r)) {
1709                 res=FALSE;
1710                 break;
1711         }
1712 } while (bzerror==BZ_OK);
1713
1714 BZ2_bzReadClose(&bzerror, b);
1715 fclose(f);
1716 return res;
1717 }
1718
1719 /**
1720  * Set up bounding box for import.
1721  *
1722  */
1723 void
1724 osm_import_set_bbox(gboolean use_bb, gdouble latmin, gdouble lonmin, gdouble latmax, gdouble lonmax)
1725 {
1726 use_bbox=use_bb;
1727 bbox.lat_min=latmin;
1728 bbox.lon_min=lonmin;
1729 bbox.lat_max=latmax;
1730 bbox.lon_max=lonmax;
1731 g_printf("Skipping data outside of box: %f,%f - %f,%f\n",
1732         bbox.lat_min, bbox.lon_min,     bbox.lat_max, bbox.lon_max);
1733 }
1734
1735 static void
1736 osm_print_import_stats(void)
1737 {
1738 g_printf("Total nodes %d, POIs: %d and Ways %d.\n",     node_cnt, noded_cnt, way_cnt);
1739 g_printf("Cities/Towns: %d\n", g_hash_table_size(osm_place_city));
1740 g_printf("Villages/Hamlets: %d\n", g_hash_table_size(osm_place_village));
1741 g_printf("Suburbs: %d\n", g_hash_table_size(osm_place_suburb));
1742 g_printf("Nodes: %d\n", g_hash_table_size(osm_nodes));
1743 }
1744
1745 /**
1746  * Simple helper to do all preparations and importing from planet -> database
1747  *
1748  */
1749 gboolean
1750 osm_import(const gchar *planet, const gchar *database)
1751 {
1752 if (db_connect(&db, database)!=TRUE) {
1753         g_printerr("Database open failed: %s", database);
1754         return FALSE;
1755 }
1756
1757 if (!osm_db_create(db)) {
1758         g_printerr("Failed to create osm tables or indexes\n");
1759         return FALSE;
1760 }
1761
1762 if (!osm_db_prepare(db)) {
1763         g_printerr("Failed to prepare sql statements\n");
1764         return FALSE;
1765 }
1766
1767 osm_planet_parser_init();
1768
1769 if (osm_planet_parse_file(planet)==FALSE) {
1770         g_printerr("Failed to parse file: %s\n", planet);
1771         return FALSE;
1772 }
1773
1774 osm_print_import_stats();
1775
1776 osm_planet_save_all_nodes();
1777 osm_planet_save_all_ways();
1778 osm_planet_parser_deinit();
1779 db_finalize();
1780 db_close(&db);
1781 g_printf("All done.\n");
1782 return TRUE;
1783 }
1784
1785 static gpointer 
1786 osm_import_thread(gpointer user_data)
1787 {
1788 gboolean r;
1789 osm_import_data_req *req=(osm_import_data_req *)user_data;
1790
1791 g_assert(req);
1792 g_assert(req->planet);
1793 g_assert(req->db);
1794
1795 osm_import_progress_cb=req->progress_cb!=NULL ? req->progress_cb : NULL;
1796
1797 r=osm_import(req->planet, req->db);
1798 g_debug("OSM import result: %d", r);
1799
1800 g_free(req->planet);
1801 g_free(req->db);
1802
1803 if (req->done_cb!=NULL)
1804         g_idle_add(req->done_cb, GINT_TO_POINTER(r==TRUE ? 0 : 1));
1805
1806 return r==TRUE ? 0 : 1;
1807 }
1808
1809 /**
1810  * Helper to start an import in the background using a thread.
1811  *
1812  * Two callback can be given, one for progress feedback and one when the operation is done.
1813  * Done callback must call the join function.
1814  * Only one import thread can run at a time.
1815  *
1816  */
1817 gboolean 
1818 osm_import_bg(const gchar *planet, const gchar *database, GSourceFunc progress_cb, GSourceFunc done_cb)
1819 {
1820 GError *error=NULL;
1821
1822 g_return_val_if_fail(import_thread==NULL, FALSE);
1823
1824 osm_import_req.planet=g_strdup(planet);
1825 osm_import_req.db=g_strdup(database);
1826 osm_import_req.progress_cb=progress_cb;
1827 osm_import_req.done_cb=done_cb;
1828
1829 import_thread=g_thread_create(osm_import_thread, &osm_import_req, TRUE, &error);
1830 if (import_thread==NULL) {
1831         g_free(osm_import_req.planet);
1832         g_free(osm_import_req.db);
1833         g_printerr("Import thread creation failed.\n");
1834         return FALSE;
1835 }
1836 if (osm_import_progress_cb!=NULL)
1837         import_sid=g_timeout_add(1000, osm_import_progress_cb, NULL);
1838 return TRUE;
1839 }
1840
1841 gint
1842 osm_import_join_bg(void)
1843 {
1844 gint ret;
1845 g_assert(import_thread!=NULL);
1846
1847 if (import_sid!=0)
1848         g_source_remove(import_sid);
1849 ret=g_thread_join(import_thread);
1850 import_thread=NULL;
1851 return ret;
1852 }