5 #include "mm/fmm/ubodt.hpp"
6 #include "util/util.hpp"
8 #include <boost/archive/binary_iarchive.hpp>
14 UBODT::UBODT(
int buckets_arg,
int multiplier_arg) :
15 buckets(buckets_arg), multiplier(multiplier_arg) {
16 SPDLOG_TRACE(
"Intialization UBODT with buckets {} multiplier {}",
18 hashtable = (
Record **) malloc(
sizeof(
Record *) * buckets);
19 for (
int i = 0; i < buckets; i++) {
20 hashtable[i] =
nullptr;
22 SPDLOG_TRACE(
"Intialization UBODT finished");
27 SPDLOG_TRACE(
"Clean UBODT");
29 for (i = 0; i < buckets; ++i) {
30 Record *head = hashtable[i];
32 while ((curr = head) !=
nullptr) {
39 SPDLOG_TRACE(
"Clean UBODT finished");
45 while (r !=
nullptr) {
57 std::vector<EdgeIndex> edges;
58 if (source == target) {
return edges; }
61 if (r ==
nullptr) {
return edges; }
63 edges.push_back(r->
next_e);
66 edges.push_back(r->
next_e);
71 const std::vector<Edge> &edges,
72 std::vector<int> *indices)
const {
74 if (!indices->empty()) indices->clear();
75 if (path.empty())
return cpath;
77 cpath.push_back(path[0]->c->edge->id);
79 indices->push_back(current_idx);
80 for (
int i = 0; i < N - 1; ++i) {
92 cpath.push_back(edges[e].
id);
95 cpath.push_back(b->
edge->
id);
97 indices->push_back(current_idx);
99 indices->push_back(current_idx);
110 return (source * multiplier + target) % buckets;
117 r->
next = hashtable[h];
119 if (r->
cost > delta) delta = r->
cost;
123 struct stat stat_buf;
124 long rc = stat(filename.c_str(), &stat_buf);
126 int file_bytes = stat_buf.st_size;
127 SPDLOG_TRACE(
"UBODT file size is {} bytes", file_bytes);
128 std::string fn_extension = filename.substr(filename.find_last_of(
".") + 1);
129 std::transform(fn_extension.begin(),
131 fn_extension.begin(),
132 [](
unsigned char c) { return std::tolower(c); });
133 if (fn_extension ==
"csv" || fn_extension ==
"txt") {
135 return file_bytes / row_size;
136 }
else if (fn_extension ==
"bin" || fn_extension ==
"binary") {
141 return file_bytes / row_size;
148 std::vector<int> prime_numbers = {
149 5003, 10039, 20029, 50047, 100669, 200003, 500000,
150 1000039, 2000083, 5000101, 10000103, 20000033};
151 int N = prime_numbers.size();
152 for (
int i = 0; i < N; ++i) {
153 if (value <= prime_numbers[i]) {
154 return prime_numbers[i];
157 return prime_numbers[N - 1];
167 SPDLOG_CRITICAL(
"File format not support: {}",filename);
168 std::exit(EXIT_FAILURE);
176 SPDLOG_INFO(
"Reading UBODT file (CSV format) from {}", filename);
179 SPDLOG_TRACE(
"Estimated buckets {}", buckets);
180 int progress_step = 1000000;
181 std::shared_ptr<UBODT> table = std::make_shared<UBODT>(buckets, multiplier);
182 FILE *stream = fopen(filename.c_str(),
"r");
186 SPDLOG_TRACE(
"Header line skipped.");
193 line,
"%d;%d;%d;%d;%d;%lf",
203 if (NUM_ROWS % progress_step == 0) {
204 SPDLOG_INFO(
"Read rows {}", NUM_ROWS);
208 double lf = NUM_ROWS / (double) buckets;
209 SPDLOG_TRACE(
"Estimated load factor #elements/#tablebuckets {}", lf);
210 if (lf > 10) { SPDLOG_WARN(
"Load factor is too large.") }
211 SPDLOG_INFO(
"Finish reading UBODT with rows {}", NUM_ROWS);
217 SPDLOG_INFO(
"Reading UBODT file (binary format) from {}", filename);
219 int progress_step = 1000000;
220 SPDLOG_TRACE(
"Estimated rows is {}", rows);
222 std::shared_ptr<UBODT> table = std::make_shared<UBODT>(buckets, multiplier);
224 std::ifstream ifs(filename.c_str());
226 std::streampos archiveOffset = ifs.tellg();
227 std::streampos streamEnd = ifs.seekg(0, std::ios_base::end).tellg();
228 ifs.seekg(archiveOffset);
229 boost::archive::binary_iarchive ia(ifs);
230 while (ifs.tellg() < streamEnd) {
241 if (NUM_ROWS % progress_step == 0) {
242 SPDLOG_INFO(
"Read rows {}", NUM_ROWS);
246 double lf = NUM_ROWS / (double) buckets;
247 SPDLOG_TRACE(
"Estimated load factor #elements/#tablebuckets {}", lf);
249 SPDLOG_WARN(
"Load factor is too large.")
251 SPDLOG_INFO(
"Finish reading UBODT with rows {}", NUM_ROWS)
unsigned int cal_bucket_index(NETWORK::NodeIndex source, NETWORK::NodeIndex target) const
Find the bucket index for an OD pair.
constexpr static double LOAD_FACTOR
factor measuring the average number of elements in a bucket.
std::vector< const TGNode * > TGOpath
The optimal path of nodes in the transition graph.
Record * look_up(NETWORK::NodeIndex source, NETWORK::NodeIndex target) const
Look up the row according to a source node and a target node.
NETWORK::Edge * edge
candidate edge
static std::shared_ptr< UBODT > read_ubodt_binary(const std::string &filename, int multiplier=50000)
Read UBODT from a binary file.
C_Path construct_complete_path(const TGOpath &path, const std::vector< NETWORK::Edge > &edges, std::vector< int > *indices) const
Construct the complete path (a vector of edge ID) from an optimal path (a vector of optimal nodes in ...
double get_delta() const
Get the upperbound of the UBODT.
bool check_file_extension(const std::string &filename, const std::string &extension_list_str)
Check if the filename has an extension in the list.
static int find_prime_number(double value)
Find a large prime number according to input value.
EdgeID id
Edge ID, can be discontinuous integers.
std::vector< FMM::NETWORK::EdgeID > C_Path
Complete path, ids of a sequence of topologically connected edges.
NETWORK::NodeIndex target
target node
static const int BUFFER_LINE
Number of characters to store in a line.
NETWORK::NodeIndex prev_n
last node visited before target
Candidate edge matched to a GPS point
double offset
offset distance from the start of polyline to p'
NETWORK::NodeIndex source
source node
static std::shared_ptr< UBODT > read_ubodt_csv(const std::string &filename, int multiplier=50000)
Read UBODT from a CSV file.
NETWORK::NodeIndex first_n
next node visited from source to target
static long estimate_ubodt_rows(const std::string &filename)
Estimate the number of rows in a file.
Class related with map matching.
std::vector< NETWORK::EdgeIndex > look_sp_path(NETWORK::NodeIndex source, NETWORK::NodeIndex target) const
Look up a shortest path (SP) containing edges from source to target.
Classes related with network and graph.
double cost
distance from source to target
static std::shared_ptr< UBODT > read_ubodt_file(const std::string &filename, int multiplier=50000)
Read UBODT from a file.
NodeIndex source
source node index
unsigned int NodeIndex
Node Index in the network, range from [0,num_vertices-1 ].
Record type of the upper bounded origin destination table
NETWORK::EdgeIndex next_e
next edge visited from source to target
void insert(Record *r)
Insert a record into the hash table.
Record * next
the next record stored in hashtable
NodeIndex target
target node index