-
Notifications
You must be signed in to change notification settings - Fork 1
/
db.hpp
327 lines (259 loc) · 11.8 KB
/
db.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
#pragma once
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
#define RAPIDJSON_HAS_STDSTRING 1
#include <rapidjson/writer.h>
#include <rapidjson/stringbuffer.h>
#include "rapidjson/document.h"
#pragma GCC diagnostic pop
#include "rocksdb/db.h"
#include <rocksdb/table.h>
#include <rocksdb/filter_policy.h>
#include <rocksdb/cache.h>
#include <rocksdb/write_batch.h>
#include "rocksdb/slice.h"
#include "rocksdb/options.h"
#include "rocksdb/advanced_options.h"
#include "rocksdb/slice_transform.h"
#include <osmium/osm/types.hpp>
#include <osmium/visitor.hpp>
#include <chrono>
#include <string.h>
#include "pbf_encoding.hpp"
#include "json_encoding.hpp"
const std::string make_lookup(int64_t osm_id, const int version){
return std::to_string(osm_id) +"!"+ std::to_string(version);
}
const bool STORE_GEOMETRIES = true;
class ObjectStore {
rocksdb::DB* m_db;
rocksdb::ColumnFamilyHandle* m_cf_ways;
rocksdb::ColumnFamilyHandle* m_cf_nodes;
rocksdb::ColumnFamilyHandle* m_cf_relations;
rocksdb::ColumnFamilyHandle* m_cf_locations; //The location CF
//rocksdb::ColumnFamilyHandle* m_cf_changesets; //Not used (yet)
rocksdb::WriteOptions m_write_options;
rocksdb::WriteBatch m_buffer_batch;
void flush_family(const std::string type, rocksdb::ColumnFamilyHandle* cf) {
const auto start = std::chrono::steady_clock::now();
std::cerr << std::endl << "Flushing " << type << "..." ;
m_db->Flush(rocksdb::FlushOptions{}, cf);
const auto end = std::chrono::steady_clock::now();
const auto diff = end - start;
std::cerr << "done in " << std::chrono::duration <double, std::milli> (diff).count() << " ms" << std::endl;
}
void compact_family(const std::string type, rocksdb::ColumnFamilyHandle* cf) {
const auto start = std::chrono::steady_clock::now();
std::cerr << "Compacting " << type << "...";
m_db->CompactRange(rocksdb::CompactRangeOptions{}, cf, nullptr, nullptr);
const auto end = std::chrono::steady_clock::now();
const auto diff = end - start;
std::cerr << "done in " << std::chrono::duration <double, std::milli> (diff).count() << " ms" << std::endl;
}
void report_count_stats() {
uint64_t node_keys{0};
m_db->GetIntProperty(m_cf_nodes, "rocksdb.estimate-num-keys", &node_keys);
std::cerr << "Stored ~" << node_keys << "/" << stored_nodes_count << " nodes ";
uint64_t way_keys{0};
m_db->GetIntProperty(m_cf_ways, "rocksdb.estimate-num-keys", &way_keys);
std::cerr << "~" << way_keys << "/" << stored_ways_count << " ways ";
uint64_t relation_keys{0};
m_db->GetIntProperty(m_cf_relations, "rocksdb.estimate-num-keys", &relation_keys);
std::cerr << "~" << relation_keys << "/" << stored_relations_count << " relations" << std::endl;
uint64_t loc_keys{0};
m_db->GetIntProperty(m_cf_locations, "rocksdb.estimate-num-keys", &loc_keys);
std::cerr << "Stored ~" << loc_keys << " node keys for location " << std::endl;
}
public:
unsigned long empty_objects_count{0};
unsigned long stored_tags_count{0};
unsigned long stored_nodes_count{0};
unsigned long stored_locations_count{0};
unsigned long stored_ways_count{0};
unsigned long stored_relations_count{0};
unsigned long stored_objects_count() {
return stored_nodes_count + stored_ways_count + stored_relations_count;
}
ObjectStore(const std::string index_dir, const bool create) {
rocksdb::Options db_options;
db_options.allow_mmap_writes = false;
db_options.max_background_flushes = 4;
db_options.PrepareForBulkLoad();
db_options.target_file_size_base = 512 * 1024 * 1024;
m_write_options = rocksdb::WriteOptions();
m_write_options.disableWAL = true;
m_write_options.sync = false;
rocksdb::BlockBasedTableOptions table_options;
table_options.filter_policy = std::shared_ptr<const rocksdb::FilterPolicy>(rocksdb::NewBloomFilterPolicy(10));
// table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true));
db_options.table_factory.reset(NewBlockBasedTableFactory(table_options));
rocksdb::Status s;
if(create) {
// Open the DB in create mode:
//
// 1. Clear out the previous INDEX
// 2. Push back all column families
//
std::cerr << "Opening Database For Writing" << std::endl;;
rocksdb::DestroyDB(index_dir, db_options);
db_options.create_if_missing = true;
s = rocksdb::DB::Open(db_options, index_dir, &m_db);
s = m_db->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), "nodes", &m_cf_nodes);
assert(s.ok());
s = m_db->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), "locations", &m_cf_locations);
assert(s.ok());
s = m_db->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), "ways", &m_cf_ways);
assert(s.ok());
s = m_db->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), "relations", &m_cf_relations);
assert(s.ok());
// Open the database for read-only
} else {
db_options.error_if_exists = false;
db_options.create_if_missing = false;
std::cerr << "Opening Database READONLY" << std::endl;;
//Open column families
std::vector<rocksdb::ColumnFamilyDescriptor> column_families;
// Open default column family?
column_families.push_back(rocksdb::ColumnFamilyDescriptor(
rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions()));
// Specifiy the existing column families
column_families.push_back(rocksdb::ColumnFamilyDescriptor( "nodes", rocksdb::ColumnFamilyOptions()));
column_families.push_back(rocksdb::ColumnFamilyDescriptor( "locations", rocksdb::ColumnFamilyOptions()));
column_families.push_back(rocksdb::ColumnFamilyDescriptor( "ways", rocksdb::ColumnFamilyOptions()));
column_families.push_back(rocksdb::ColumnFamilyDescriptor( "relations", rocksdb::ColumnFamilyOptions()));
std::vector<rocksdb::ColumnFamilyHandle*> handles;
s = rocksdb::DB::OpenForReadOnly(db_options, index_dir, column_families, &handles, &m_db);
assert(s.ok());
m_cf_nodes = handles[1];
m_cf_locations = handles[2];
m_cf_ways = handles[3];
m_cf_relations = handles[4];
}
}
rocksdb::Status get_tags(const int64_t osm_id, const int osm_type, const int version, std::string* value) {
//
// Lookup a specific version of an object in the DB
//
const auto lookup = make_lookup(osm_id, version);
// Node
if(osm_type== 1) {
return m_db->Get(rocksdb::ReadOptions(), m_cf_nodes, lookup, value);
// Way
} else if (osm_type == 2) {
return m_db->Get(rocksdb::ReadOptions(), m_cf_ways, lookup, value);
// Relation
} else {
return m_db->Get(rocksdb::ReadOptions(), m_cf_relations, lookup, value);
}
}
rocksdb::Status get_node_locations(const std::string nodeID, std::string* value) {
return m_db->Get(rocksdb::ReadOptions(), m_cf_locations, nodeID, value);
}
/*
Store PBF Objects in RocksDB
*/
void store_pbf_node(const osmium::Node&node) {
std::string lookup = make_lookup( node.id(), node.version() );
if ( store_pbf_object( osmwayback::encode_node(node), lookup, m_cf_nodes) ){
stored_nodes_count++;
}
//PBF Nodes always include geometries, flush in bulks of 5M
if (stored_nodes_count != 0 && (stored_nodes_count % 5000000) == 0) {
flush_family("nodes", m_cf_nodes);
report_count_stats();
}
}
void store_pbf_way(const osmium::Way&way) {
std::string lookup = make_lookup( way.id(), way.version() );
if ( store_pbf_object( osmwayback::encode_way(way), lookup, m_cf_ways) ){
stored_ways_count++;
}
//PBF Ways... flush in bulks of 2M
if (stored_ways_count != 0 && (stored_ways_count % 2000000) == 0) {
flush_family("ways", m_cf_ways);
report_count_stats();
}
}
/* Looks up a NODE ID and performs an upsert to the locations CF, adding new historical
* versions to it, keyed by changeset.
*/
void upsert_node_location(const osmium::Node& node){
rapidjson::Document nodeLocations;
std::string rocksEntry;
//First, extract location information from this node.
std::string nodeKey = std::to_string(node.id());
rocksdb::Status s = m_db->Get(rocksdb::ReadOptions(), m_cf_locations, nodeKey, &rocksEntry);
if ( s.IsNotFound() ){
//There is not value at that key, so start the object
nodeLocations.SetObject();
}else{
//nodeLocations is a JSON object...
nodeLocations.Parse<0>(rocksEntry.c_str());
}
//Add this changeset to the node
if( jsonencoding::encode_location_json(node, nodeLocations) ){
rapidjson::StringBuffer buffer;
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
nodeLocations.Accept(writer);
rocksdb::Status stat = m_db->Put(rocksdb::WriteOptions(), m_cf_locations, nodeKey, buffer.GetString());
if ( stat.ok() ){
stored_locations_count++;
}
}
if (stored_locations_count != 0 && (stored_locations_count % 1000000) == 0) {
flush_family("locations", m_cf_locations);
}
}
void store_json_relation(const osmium::Relation& relation) {
//Get basic properties, initialize json
rapidjson::Document json;
json = jsonencoding::extract_osm_properties(relation);
std::string lookup = make_lookup( relation.id(), relation.version() );
if(store_json_object(json, lookup, m_cf_relations)) {
stored_relations_count++;
}
if (stored_relations_count != 0 && (stored_relations_count % 1000000) == 0) {
flush_family("relations", m_cf_relations);
report_count_stats();
}
}
/*
Store objects to RocksDB
*/
//Store PBF Object
bool store_pbf_object( const std::string value, const std::string lookup, rocksdb::ColumnFamilyHandle* cf ) {
rocksdb::Status stat = m_buffer_batch.Put(cf, lookup, value);
//Write in chunks of 2000
if (m_buffer_batch.Count() > 2000) {
m_db->Write(m_write_options, &m_buffer_batch);
m_buffer_batch.Clear();
}
return true;
}
//Store the rapidjson object to rocksdb
bool store_json_object( const rapidjson::Document& doc, const std::string lookup, rocksdb::ColumnFamilyHandle* cf ) {
rapidjson::StringBuffer buffer;
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
doc.Accept(writer);
rocksdb::Status stat = m_buffer_batch.Put(cf, lookup, buffer.GetString());
//Write in chunks of 1000
if (m_buffer_batch.Count() > 1000) {
m_db->Write(m_write_options, &m_buffer_batch);
m_buffer_batch.Clear();
}
return true;
}
void flush() {
m_db->Write(m_write_options, &m_buffer_batch);
m_buffer_batch.Clear();
flush_family("nodes", m_cf_nodes);
flush_family("ways", m_cf_ways);
flush_family("relations", m_cf_relations);
flush_family("locations", m_cf_locations);
compact_family("nodes", m_cf_nodes);
compact_family("ways", m_cf_ways);
compact_family("relations", m_cf_relations);
compact_family("locations", m_cf_locations);
report_count_stats();
}
};