The SwappyItems Key-Values Store
osmpbfreader.hpp
Go to the documentation of this file.
1 /*
2 Copyright (c) 2012, Canal TP
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the name of the Canal TP nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL CANAL TP BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #pragma once
28 
29 #include <stdint.h>
30 #include <netinet/in.h>
31 #include <zlib.h>
32 #include <string.h>
33 #include <fstream>
34 #include <iostream>
35 
36 // this describes the low-level blob storage
37 #include <osmpbf/fileformat.pb.h>
38 // this describes the high-level OSM objects
39 #include <osmpbf/osmformat.pb.h>
40 // the maximum size of a blob header in bytes
41 const int max_blob_header_size = 64 * 1024; // 64 kB
42 // the maximum size of an uncompressed blob in bytes
43 const int max_uncompressed_blob_size = 32 * 1024 * 1024; // 32 MB
44 // resolution for longitude/latitude used for conversion
45 // between representation as double and as int
46 const int lonlat_resolution = 1000 * 1000 * 1000;
47 
48 namespace CanalTP {
49 
50 // Represents the key/values of an object
51 typedef std::map<std::string, std::string> Tags;
52 
53 // References of a relation
54 struct Reference {
55  OSMPBF::Relation::MemberType member_type; // type de la relation
56  uint64_t member_id; // OSMID
57  std::string role; // le role
58 
59  Reference() {}
60  Reference(OSMPBF::Relation::MemberType member_type, uint64_t member_id, std::string role) :
62  {}
63 };
64 
65 typedef std::vector<Reference> References;
66 
67 // Main function
68 template<typename Visitor>
69 void read_osm_pbf(const std::string & filename, Visitor & visitor, bool wayOnly);
70 
71 struct warn {
72  warn() {std::cout << "\033[33m[WARN] ";}
73  template<typename T>warn & operator<<(const T & t){ std::cout << t; return *this;}
74  ~warn() {std::cout << "\033[0m" << std::endl;}
75 };
76 
77 struct info {
78  info() {
79 #ifdef DEBUG
80  std::cout << "\033[32m[INFO] ";
81 #endif
82  }
83  template<typename T>info & operator<<(const T & t){ std::cout << t; return *this;}
84  ~info() {
85 #ifdef DEBUG
86  std::cout << "\033[0m";
87 #endif
88  std::cout << std::endl;
89  }
90 };
91 
92 struct fatal {
93  fatal() {std::cout << "\033[31m[FATAL] ";}
94  template<typename T>fatal & operator<<(const T & t){ std::cout << t; return *this;}
95  ~fatal() {std::cout << "\033[0m" << std::endl; exit(1);}
96 };
97 
98 std::streampos fileSize(const char* filePath) {
99  std::streampos begin, end;
100  std::ifstream mfile(filePath, std::ios::binary);
101  begin = mfile.tellg();
102  mfile.seekg (0, std::ios::end);
103  end = mfile.tellg();
104  mfile.close();
105  return end-begin;
106 }
107 
108 template<typename T>
109 Tags get_tags(const T& object, const OSMPBF::PrimitiveBlock &primblock){
110  Tags result;
111  for(int i = 0; i < object.keys_size(); ++i){
112  uint64_t key = object.keys(i);
113  uint64_t val = object.vals(i);
114  std::string key_string = primblock.stringtable().s(key);
115  std::string val_string = primblock.stringtable().s(val);
116  result[key_string] = val_string;
117  }
118  return result;
119 }
120 
121 template<typename Visitor>
122 struct Parser {
123 
124  void parse(){
125 #ifdef DEBUG
126  std::streampos todo = file_size;
127  std::streampos last_todo = file_size;
128 #endif
129 
130  while(!this->file.eof() && !finished) {
131  OSMPBF::BlobHeader header = this->read_header();
132  if(!this->finished){
133  int32_t sz = this->read_blob(header);
134  if (sz > 0) {
135  if(header.type() == "OSMData") {
136  this->parse_primitiveblock(sz);
137  }
138  else if(header.type() == "OSMHeader"){
139  }
140  else {
141  warn() << " unknown blob type: " << header.type();
142  }
143 #ifdef DEBUG
144  todo -= max_blob_header_size;
145  if ((todo+10000000) < last_todo) {
146  last_todo = todo;
147  info() << 100*(todo/(float)file_size) << " \% to do";
148  }
149 #endif
150  }
151  }
152  }
153  }
154 
155  Parser(const std::string & filename, Visitor & visitor, bool _wayOnly) : visitor(visitor) {
156 
157  wayOnly = _wayOnly;
158  finished = false;
159  file = std::ifstream(filename.c_str(), std::ios::binary );
160 
161  file_size = fileSize(filename.c_str());
162  if(!file.is_open())
163  fatal() << "Unable to open the file " << filename;
166  info() << "#Reading the file " << filename;
167  info() << "#" << file_size/1000000 << " MB to do";
168  }
169 
171  delete[] buffer;
172  delete[] unpack_buffer;
173  }
174 
175 private:
176  std::ifstream file;
177  Visitor & visitor;
178  bool wayOnly;
179  bool finished;
180  char* buffer;
182  std::streampos file_size;
183 
184  OSMPBF::BlobHeader read_header(){
185  int32_t sz;
186  OSMPBF::BlobHeader result;
187 
188  // read the first 4 bytes of the file, this is the size of the blob-header
189  if( !file.read((char*)&sz, 4) ){
190  info() << "# We finished reading the file";
191  this->finished = true;
192  return result;
193  }
194 
195  sz = ntohl(sz);// convert the size from network byte-order to host byte-order
196 
197  if(sz > max_blob_header_size)
198  fatal() << "blob-header-size is bigger then allowed " << sz << " > " << max_blob_header_size;
199 
200  this->file.read(this->buffer, sz);
201  if(!this->file.good())
202  fatal() << "unable to read blob-header from file";
203 
204  // parse the blob-header from the read-buffer
205  if(!result.ParseFromArray(this->buffer, sz))
206  fatal() << "unable to parse blob header";
207  return result;
208  }
209 
210  int32_t read_blob(const OSMPBF::BlobHeader & header){
211  OSMPBF::Blob blob;
212  // size of the following blob
213  int32_t sz = header.datasize();
214 
216  fatal() << "blob-size is bigger then allowed";
217 
218  if(!this->file.read(buffer, sz))
219  fatal() << "unable to read blob from file";
220  if(!blob.ParseFromArray(this->buffer, sz))
221  fatal() << "unable to parse blob";
222 
223  // if the blob has uncompressed data
224  if(blob.has_raw()) {
225  // size of the blob-data
226  sz = blob.raw().size();
227 
228  // check that raw_size is set correctly
229  if(sz != blob.raw_size())
230  warn() << " reports wrong raw_size: " << blob.raw_size() << " bytes";
231 
232  memcpy(unpack_buffer, blob.raw().c_str(), sz);
233  return sz;
234  }
235 
236  if(blob.has_zlib_data()) {
237  //info() << "blob has zlib stream";
238  sz = blob.zlib_data().size();
239 
240  z_stream z;
241  z.next_in = (unsigned char*) blob.zlib_data().c_str();
242  z.avail_in = sz;
243  z.next_out = (unsigned char*) unpack_buffer;
244  z.avail_out = blob.raw_size();
245  z.zalloc = Z_NULL;
246  z.zfree = Z_NULL;
247  z.opaque = Z_NULL;
248 
249  if(inflateInit(&z) != Z_OK) {
250  fatal() << "failed to init zlib stream";
251  }
252  if(inflate(&z, Z_FINISH) != Z_STREAM_END) {
253  fatal() << "failed to inflate zlib stream";
254  }
255  if(inflateEnd(&z) != Z_OK) {
256  fatal() << "failed to deinit zlib stream";
257  }
258  return z.total_out;
259  }
260 
261  if(blob.has_lzma_data()) {
262  fatal() << "lzma-decompression is not supported";
263  }
264  return 0;
265  }
266 
267  void parse_primitiveblock(int32_t sz) {
268  OSMPBF::PrimitiveBlock primblock;
269  if(!primblock.ParseFromArray(this->unpack_buffer, sz))
270  fatal() << "unable to parse primitive block";
271 
272  for(int i = 0, l = primblock.primitivegroup_size(); i < l; i++) {
273  const OSMPBF::PrimitiveGroup& pg = primblock.primitivegroup(i);
274 
275  if (wayOnly) {
276  //if (pg.ways_size()>0) info() << "ways " << pg.ways_size();
277  for(int i = 0; i < pg.ways_size(); ++i) {
278  const OSMPBF::Way& w = pg.ways(i);
279 
280  uint64_t ref = 0;
281  std::vector<uint64_t> refs;
282  for(int j = 0; j < w.refs_size(); ++j){
283  ref += w.refs(j);
284  refs.push_back(ref);
285  }
286  uint64_t id = w.id();
287  visitor.way_callback(id, get_tags(w, primblock), refs);
288  }
289 
290  } else {
291  //if (pg.nodes_size()>0) info() << "simple nodes " << pg.nodes_size();
292 
293  // Simple Nodes
294  for(int i = 0; i < pg.nodes_size(); ++i) {
295  const OSMPBF::Node& n = pg.nodes(i);
296 
297  double lon = 0.000000001 * (primblock.lon_offset() + (primblock.granularity() * n.lon())) ;
298  double lat = 0.000000001 * (primblock.lat_offset() + (primblock.granularity() * n.lat())) ;
299  visitor.node_callback(n.id(), lon, lat, get_tags(n, primblock));
300  }
301 
302  // Dense Nodes
303  if(pg.has_dense()) {
304  const OSMPBF::DenseNodes& dn = pg.dense();
305  //info() << "dense nodes " << dn.id_size();
306 
307  uint64_t id = 0;
308  double lon = 0;
309  double lat = 0;
310 
311  int current_kv = 0;
312 
313  for(int i = 0; i < dn.id_size(); ++i) {
314  id += dn.id(i);
315  lon += 0.000000001 * (primblock.lon_offset() + (primblock.granularity() * dn.lon(i)));
316  lat += 0.000000001 * (primblock.lat_offset() + (primblock.granularity() * dn.lat(i)));
317 
318  Tags tags;
319  while (current_kv < dn.keys_vals_size() && dn.keys_vals(current_kv) != 0){
320  uint64_t key = dn.keys_vals(current_kv);
321  uint64_t val = dn.keys_vals(current_kv + 1);
322  std::string key_string = primblock.stringtable().s(key);
323  std::string val_string = primblock.stringtable().s(val);
324  current_kv += 2;
325  tags[key_string] = val_string;
326  }
327  ++current_kv;
328  visitor.node_callback(id, lon, lat, tags);
329  }
330  }
331 
332  //if (pg.relations_size()>0) info() << "relations " << pg.relations_size();
333  /*
334  for(int i=0; i < pg.relations_size(); ++i){
335  const OSMPBF::Relation& rel = pg.relations(i);
336  uint64_t id = 0;
337  References refs;
338 
339  for(int l = 0; l < rel.memids_size(); ++l){
340  id += rel.memids(l);
341  refs.push_back(Reference(rel.types(l), id, primblock.stringtable().s(rel.roles_sid(l))));
342  }
343  visitor.relation_callback(rel.id(), get_tags(rel, primblock), refs);
344  }*/
345  }
346 
347  }
348  }
349 };
350 
358 template<typename Visitor>
359 void read_osm_pbf(const std::string & filename, Visitor & visitor, bool wayOnly){
360  Parser<Visitor> p(filename, visitor, wayOnly);
361  p.parse();
362 }
363 
364 }
Definition: osmpbfreader.hpp:48
std::vector< Reference > References
Definition: osmpbfreader.hpp:65
void read_osm_pbf(const std::string &filename, Visitor &visitor, bool wayOnly)
Definition: osmpbfreader.hpp:359
std::streampos fileSize(const char *filePath)
Definition: osmpbfreader.hpp:98
std::map< std::string, std::string > Tags
Definition: osmpbfreader.hpp:51
Tags get_tags(const T &object, const OSMPBF::PrimitiveBlock &primblock)
Definition: osmpbfreader.hpp:109
const int lonlat_resolution
Definition: osmpbfreader.hpp:46
const int max_blob_header_size
Definition: osmpbfreader.hpp:41
const int max_uncompressed_blob_size
Definition: osmpbfreader.hpp:43
Definition: osmpbfreader.hpp:122
char * unpack_buffer
Definition: osmpbfreader.hpp:181
void parse_primitiveblock(int32_t sz)
Definition: osmpbfreader.hpp:267
bool wayOnly
Definition: osmpbfreader.hpp:178
std::streampos file_size
Definition: osmpbfreader.hpp:182
~Parser()
Definition: osmpbfreader.hpp:170
int32_t read_blob(const OSMPBF::BlobHeader &header)
Definition: osmpbfreader.hpp:210
char * buffer
Definition: osmpbfreader.hpp:180
Visitor & visitor
Definition: osmpbfreader.hpp:177
Parser(const std::string &filename, Visitor &visitor, bool _wayOnly)
Definition: osmpbfreader.hpp:155
OSMPBF::BlobHeader read_header()
Definition: osmpbfreader.hpp:184
void parse()
Definition: osmpbfreader.hpp:124
bool finished
Definition: osmpbfreader.hpp:179
std::ifstream file
Definition: osmpbfreader.hpp:176
Definition: osmpbfreader.hpp:54
Reference(OSMPBF::Relation::MemberType member_type, uint64_t member_id, std::string role)
Definition: osmpbfreader.hpp:60
uint64_t member_id
Definition: osmpbfreader.hpp:56
Reference()
Definition: osmpbfreader.hpp:59
std::string role
Definition: osmpbfreader.hpp:57
OSMPBF::Relation::MemberType member_type
Definition: osmpbfreader.hpp:55
Definition: osmpbfreader.hpp:92
~fatal()
Definition: osmpbfreader.hpp:95
fatal()
Definition: osmpbfreader.hpp:93
fatal & operator<<(const T &t)
Definition: osmpbfreader.hpp:94
Definition: osmpbfreader.hpp:77
info()
Definition: osmpbfreader.hpp:78
~info()
Definition: osmpbfreader.hpp:84
info & operator<<(const T &t)
Definition: osmpbfreader.hpp:83
Definition: osmpbfreader.hpp:71
warn()
Definition: osmpbfreader.hpp:72
warn & operator<<(const T &t)
Definition: osmpbfreader.hpp:73
~warn()
Definition: osmpbfreader.hpp:74