Fast map matching  0.1.0
ubodt.cpp
1 //
2 // Created by Can Yang on 2020/3/22.
3 //
4 
5 #include "mm/fmm/ubodt.hpp"
6 #include "util/util.hpp"
7 #include <fstream>
8 #include <boost/archive/binary_iarchive.hpp>
9 
10 using namespace FMM;
11 using namespace FMM::CORE;
12 using namespace FMM::NETWORK;
13 using namespace FMM::MM;
14 UBODT::UBODT(int buckets_arg, int multiplier_arg) :
15  buckets(buckets_arg), multiplier(multiplier_arg) {
16  SPDLOG_TRACE("Intialization UBODT with buckets {} multiplier {}",
17  buckets, multiplier);
18  hashtable = (Record **) malloc(sizeof(Record *) * buckets);
19  for (int i = 0; i < buckets; i++) {
20  hashtable[i] = nullptr;
21  }
22  SPDLOG_TRACE("Intialization UBODT finished");
23 }
24 
25 UBODT::~UBODT() {
26  /* Clean hashtable */
27  SPDLOG_TRACE("Clean UBODT");
28  int i;
29  for (i = 0; i < buckets; ++i) {
30  Record *head = hashtable[i];
31  Record *curr;
32  while ((curr = head) != nullptr) {
33  head = head->next;
34  free(curr);
35  }
36  }
37  // Destory hash table pointer
38  free(hashtable);
39  SPDLOG_TRACE("Clean UBODT finished");
40 }
41 
42 Record *UBODT::look_up(NodeIndex source, NodeIndex target) const {
43  unsigned int h = cal_bucket_index(source, target);
44  Record *r = hashtable[h];
45  while (r != nullptr) {
46  if (r->source == source && r->target == target) {
47  return r;
48  } else {
49  r = r->next;
50  }
51  }
52  return r;
53 }
54 
55 std::vector<EdgeIndex> UBODT::look_sp_path(NodeIndex source,
56  NodeIndex target) const {
57  std::vector<EdgeIndex> edges;
58  if (source == target) { return edges; }
59  Record *r = look_up(source, target);
60  // No transition exist from source to target
61  if (r == nullptr) { return edges; }
62  while (r->first_n != target) {
63  edges.push_back(r->next_e);
64  r = look_up(r->first_n, target);
65  }
66  edges.push_back(r->next_e);
67  return edges;
68 }
69 
71  const std::vector<Edge> &edges,
72  std::vector<int> *indices) const {
73  C_Path cpath;
74  if (!indices->empty()) indices->clear();
75  if (path.empty()) return cpath;
76  int N = path.size();
77  cpath.push_back(path[0]->c->edge->id);
78  int current_idx = 0;
79  indices->push_back(current_idx);
80  for (int i = 0; i < N - 1; ++i) {
81  const Candidate *a = path[i]->c;
82  const Candidate *b = path[i + 1]->c;
83  if ((a->edge->id != b->edge->id) || (a->offset > b->offset)) {
84  // segs stores edge index
85  auto segs = look_sp_path(a->edge->target, b->edge->source);
86  // No transition exist in UBODT
87  if (segs.empty() && a->edge->target != b->edge->source) {
88  indices->clear();
89  return C_Path();
90  }
91  for (int e:segs) {
92  cpath.push_back(edges[e].id);
93  ++current_idx;
94  }
95  cpath.push_back(b->edge->id);
96  ++current_idx;
97  indices->push_back(current_idx);
98  } else {
99  indices->push_back(current_idx);
100  }
101  }
102  return cpath;
103 }
104 
105 double UBODT::get_delta() const {
106  return delta;
107 }
108 
109 unsigned int UBODT::cal_bucket_index(NodeIndex source, NodeIndex target) const {
110  return (source * multiplier + target) % buckets;
111 }
112 
113 
115  //int h = (r->source*multiplier+r->target)%buckets ;
116  int h = cal_bucket_index(r->source, r->target);
117  r->next = hashtable[h];
118  hashtable[h] = r;
119  if (r->cost > delta) delta = r->cost;
120 }
121 
122 long UBODT::estimate_ubodt_rows(const std::string &filename) {
123  struct stat stat_buf;
124  long rc = stat(filename.c_str(), &stat_buf);
125  if (rc == 0) {
126  int file_bytes = stat_buf.st_size;
127  SPDLOG_TRACE("UBODT file size is {} bytes", file_bytes);
128  std::string fn_extension = filename.substr(filename.find_last_of(".") + 1);
129  std::transform(fn_extension.begin(),
130  fn_extension.end(),
131  fn_extension.begin(),
132  [](unsigned char c) { return std::tolower(c); });
133  if (fn_extension == "csv" || fn_extension == "txt") {
134  int row_size = 36;
135  return file_bytes / row_size;
136  } else if (fn_extension == "bin" || fn_extension == "binary") {
137  Record r;
138  // When exporting to a file using boost binary writer,
139  // the padding is removed.
140  int row_size = 28;
141  return file_bytes / row_size;
142  }
143  }
144  return -1;
145 }
146 
147 int UBODT::find_prime_number(double value) {
148  std::vector<int> prime_numbers = {
149  5003, 10039, 20029, 50047, 100669, 200003, 500000,
150  1000039, 2000083, 5000101, 10000103, 20000033};
151  int N = prime_numbers.size();
152  for (int i = 0; i < N; ++i) {
153  if (value <= prime_numbers[i]) {
154  return prime_numbers[i];
155  }
156  }
157  return prime_numbers[N - 1];
158 }
159 
160 std::shared_ptr<UBODT> UBODT::read_ubodt_file(const std::string &filename,
161  int multiplier) {
162  if (UTIL::check_file_extension(filename,"bin")){
163  return read_ubodt_binary(filename,multiplier);
164  } else if (UTIL::check_file_extension(filename,"csv,txt")) {
165  return read_ubodt_csv(filename,multiplier);
166  } else {
167  SPDLOG_CRITICAL("File format not support: {}",filename);
168  std::exit(EXIT_FAILURE);
169  return nullptr;
170  }
171 }
172 
173 
174 std::shared_ptr<UBODT> UBODT::read_ubodt_csv(const std::string &filename,
175  int multiplier) {
176  SPDLOG_INFO("Reading UBODT file (CSV format) from {}", filename);
177  long rows = estimate_ubodt_rows(filename);
178  int buckets = find_prime_number(rows / LOAD_FACTOR);
179  SPDLOG_TRACE("Estimated buckets {}", buckets);
180  int progress_step = 1000000;
181  std::shared_ptr<UBODT> table = std::make_shared<UBODT>(buckets, multiplier);
182  FILE *stream = fopen(filename.c_str(), "r");
183  long NUM_ROWS = 0;
184  char line[BUFFER_LINE];
185  if (fgets(line, BUFFER_LINE, stream)) {
186  SPDLOG_TRACE("Header line skipped.");
187  }
188  while (fgets(line, BUFFER_LINE, stream)) {
189  ++NUM_ROWS;
190  Record *r = (Record *) malloc(sizeof(Record));
191  /* Parse line into a Record */
192  sscanf(
193  line, "%d;%d;%d;%d;%d;%lf",
194  &r->source,
195  &r->target,
196  &r->first_n,
197  &r->prev_n,
198  &r->next_e,
199  &r->cost
200  );
201  r->next = nullptr;
202  table->insert(r);
203  if (NUM_ROWS % progress_step == 0) {
204  SPDLOG_INFO("Read rows {}", NUM_ROWS);
205  }
206  }
207  fclose(stream);
208  double lf = NUM_ROWS / (double) buckets;
209  SPDLOG_TRACE("Estimated load factor #elements/#tablebuckets {}", lf);
210  if (lf > 10) { SPDLOG_WARN("Load factor is too large.") }
211  SPDLOG_INFO("Finish reading UBODT with rows {}", NUM_ROWS);
212  return table;
213 }
214 
215 std::shared_ptr<UBODT> UBODT::read_ubodt_binary(const std::string &filename,
216  int multiplier) {
217  SPDLOG_INFO("Reading UBODT file (binary format) from {}", filename);
218  long rows = estimate_ubodt_rows(filename);
219  int progress_step = 1000000;
220  SPDLOG_TRACE("Estimated rows is {}", rows);
221  int buckets = find_prime_number(rows / LOAD_FACTOR);
222  std::shared_ptr<UBODT> table = std::make_shared<UBODT>(buckets, multiplier);
223  long NUM_ROWS = 0;
224  std::ifstream ifs(filename.c_str());
225  // Check byte offset
226  std::streampos archiveOffset = ifs.tellg();
227  std::streampos streamEnd = ifs.seekg(0, std::ios_base::end).tellg();
228  ifs.seekg(archiveOffset);
229  boost::archive::binary_iarchive ia(ifs);
230  while (ifs.tellg() < streamEnd) {
231  ++NUM_ROWS;
232  Record *r = (Record *) malloc(sizeof(Record));
233  ia >> r->source;
234  ia >> r->target;
235  ia >> r->first_n;
236  ia >> r->prev_n;
237  ia >> r->next_e;
238  ia >> r->cost;
239  r->next = nullptr;
240  table->insert(r);
241  if (NUM_ROWS % progress_step == 0) {
242  SPDLOG_INFO("Read rows {}", NUM_ROWS);
243  }
244  }
245  ifs.close();
246  double lf = NUM_ROWS / (double) buckets;
247  SPDLOG_TRACE("Estimated load factor #elements/#tablebuckets {}", lf);
248  if (lf > 10) {
249  SPDLOG_WARN("Load factor is too large.")
250  }
251  SPDLOG_INFO("Finish reading UBODT with rows {}", NUM_ROWS)
252  return table;
253 }
FMM::MM::UBODT::cal_bucket_index
unsigned int cal_bucket_index(NETWORK::NodeIndex source, NETWORK::NodeIndex target) const
Find the bucket index for an OD pair.
Definition: ubodt.cpp:109
FMM::MM::UBODT::LOAD_FACTOR
constexpr static double LOAD_FACTOR
factor measuring the average number of elements in a bucket.
Definition: ubodt.hpp:140
FMM::MM::TGOpath
std::vector< const TGNode * > TGOpath
The optimal path of nodes in the transition graph.
Definition: transition_graph.hpp:46
FMM::MM::UBODT::look_up
Record * look_up(NETWORK::NodeIndex source, NETWORK::NodeIndex target) const
Look up the row according to a source node and a target node.
Definition: ubodt.cpp:42
FMM::MM::Candidate::edge
NETWORK::Edge * edge
candidate edge
Definition: mm_type.hpp:33
FMM::MM::UBODT::read_ubodt_binary
static std::shared_ptr< UBODT > read_ubodt_binary(const std::string &filename, int multiplier=50000)
Read UBODT from a binary file.
Definition: ubodt.cpp:215
FMM::MM::UBODT::construct_complete_path
C_Path construct_complete_path(const TGOpath &path, const std::vector< NETWORK::Edge > &edges, std::vector< int > *indices) const
Construct the complete path (a vector of edge ID) from an optimal path (a vector of optimal nodes in ...
Definition: ubodt.cpp:70
FMM::MM::UBODT::get_delta
double get_delta() const
Get the upperbound of the UBODT.
Definition: ubodt.cpp:105
FMM::UTIL::check_file_extension
bool check_file_extension(const std::string &filename, const std::string &extension_list_str)
Check if the filename has an extension in the list.
Definition: util.cpp:105
FMM::MM::UBODT::find_prime_number
static int find_prime_number(double value)
Find a large prime number according to input value.
Definition: ubodt.cpp:147
FMM::NETWORK::Edge::id
EdgeID id
Edge ID, can be discontinuous integers.
Definition: type.hpp:48
FMM::MM::C_Path
std::vector< FMM::NETWORK::EdgeID > C_Path
Complete path, ids of a sequence of topologically connected edges.
Definition: mm_type.hpp:47
FMM
Fast map matching.
Definition: geom_algorithm.hpp:17
FMM::MM::Record::target
NETWORK::NodeIndex target
target node
Definition: ubodt.hpp:25
FMM::MM::UBODT::BUFFER_LINE
static const int BUFFER_LINE
Number of characters to store in a line.
Definition: ubodt.hpp:143
FMM::MM::Record::prev_n
NETWORK::NodeIndex prev_n
last node visited before target
Definition: ubodt.hpp:27
FMM::MM::Candidate
Candidate edge matched to a GPS point
Definition: mm_type.hpp:26
FMM::MM::Candidate::offset
double offset
offset distance from the start of polyline to p'
Definition: mm_type.hpp:31
FMM::MM::Record::source
NETWORK::NodeIndex source
source node
Definition: ubodt.hpp:24
FMM::CORE
Core data types.
Definition: geometry.hpp:22
FMM::MM::UBODT::read_ubodt_csv
static std::shared_ptr< UBODT > read_ubodt_csv(const std::string &filename, int multiplier=50000)
Read UBODT from a CSV file.
Definition: ubodt.cpp:174
FMM::MM::Record::first_n
NETWORK::NodeIndex first_n
next node visited from source to target
Definition: ubodt.hpp:26
FMM::MM::UBODT::estimate_ubodt_rows
static long estimate_ubodt_rows(const std::string &filename)
Estimate the number of rows in a file.
Definition: ubodt.cpp:122
FMM::MM
Class related with map matching.
Definition: composite_graph.hpp:18
FMM::MM::UBODT::look_sp_path
std::vector< NETWORK::EdgeIndex > look_sp_path(NETWORK::NodeIndex source, NETWORK::NodeIndex target) const
Look up a shortest path (SP) containing edges from source to target.
Definition: ubodt.cpp:55
FMM::NETWORK
Classes related with network and graph.
Definition: graph.hpp:21
FMM::MM::Record::cost
double cost
distance from source to target
Definition: ubodt.hpp:29
FMM::MM::UBODT::read_ubodt_file
static std::shared_ptr< UBODT > read_ubodt_file(const std::string &filename, int multiplier=50000)
Read UBODT from a file.
Definition: ubodt.cpp:160
FMM::NETWORK::Edge::source
NodeIndex source
source node index
Definition: type.hpp:49
FMM::NETWORK::NodeIndex
unsigned int NodeIndex
Node Index in the network, range from [0,num_vertices-1 ].
Definition: type.hpp:24
FMM::MM::Record
Record type of the upper bounded origin destination table
Definition: ubodt.hpp:23
FMM::MM::Record::next_e
NETWORK::EdgeIndex next_e
next edge visited from source to target
Definition: ubodt.hpp:28
FMM::MM::UBODT::insert
void insert(Record *r)
Insert a record into the hash table.
Definition: ubodt.cpp:114
FMM::MM::Record::next
Record * next
the next record stored in hashtable
Definition: ubodt.hpp:30
FMM::NETWORK::Edge::target
NodeIndex target
target node index
Definition: type.hpp:50