Created
April 13, 2017 06:42
-
-
Save antonmks/63f1984451f240142cf6ef52c97a671a to your computer and use it in GitHub Desktop.
Query 3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <ctime> | |
#include <fstream> | |
#include <iomanip> | |
#include <iostream> | |
#include <iterator> | |
#include <map> | |
#include <numeric> | |
#include <sstream> | |
#include <stdint.h> | |
#include <thrust/device_vector.h> | |
#include <thrust/extrema.h> | |
#include <thrust/reduce.h> | |
#include <thrust/sequence.h> | |
#include <thrust/sort.h> | |
#include <thrust/unique.h> | |
#include <time.h> | |
using namespace std; | |
struct my_unique_eq { | |
template <typename T> | |
__host__ __device__ bool operator()(const T d1, const T d2) const { | |
return ((thrust::get<0>(d1) == thrust::get<0>(d2)) && | |
(thrust::get<1>(d1) == thrust::get<1>(d2))); | |
} | |
}; | |
// applying WHERE conditions | |
struct check_records { | |
const unsigned int *time6; | |
const char *bool5; | |
bool *res; | |
const unsigned int *time6_from; | |
const unsigned int *time6_to; | |
const char *bool5_val; | |
check_records(const unsigned int *_time6, const char *_bool5, bool *_res, | |
const unsigned int *_time6_from, const unsigned int *_time6_to, | |
const char *_bool5_val) | |
: time6(_time6), bool5(_bool5), res(_res), time6_from(_time6_from), | |
time6_to(_time6_to), bool5_val(_bool5_val) {} | |
template <typename IndexType> | |
__host__ __device__ void operator()(const IndexType &i) { | |
if (time6[i] >= time6_from[0] && time6[i] <= time6_to[0] && | |
bool5[i] == bool5_val[0]) { | |
res[i] = 1; | |
} else | |
res[i] = 0; | |
} | |
}; | |
int main(int ac, char **av) { | |
unsigned int time6_from = 19000101, time6_to = 20300101; | |
char bool5_val = 0; | |
string usage = "Usage : query1 [-time6_from TIME6_FROM] [-time6_to TIME6_TO " | |
"] [-bool5 BOOL5]"; | |
if (ac == 1) { | |
cout << usage << endl; | |
exit(1); | |
}; | |
for (unsigned int i = 1; i < ac; i++) { | |
if (strcmp(av[i], "-time6_from") == 0) { | |
if (i + 1 < ac) { | |
time6_from = atoi(av[i + 1]); | |
i++; | |
} else { | |
cout << usage << endl; | |
exit(1); | |
}; | |
} | |
if (strcmp(av[i], "-time6_to") == 0) { | |
if (i + 1 < ac) { | |
time6_to = atoi(av[i + 1]); | |
i++; | |
} else { | |
cout << usage << endl; | |
exit(1); | |
}; | |
} else if (strcmp(av[i], "-bool5") == 0) { | |
if (i + 1 < ac) { | |
bool5_val = av[i + 1][0]; | |
i++; | |
} else { | |
cout << usage << endl; | |
exit(1); | |
}; | |
} | |
}; | |
cout << "Parsing columns " << endl; | |
string file_name = "col2.txt"; | |
std::fstream f(file_name, std::ios_base::in | ios::binary); | |
uint64_t key; | |
thrust::device_vector<uint64_t> id2; | |
std::vector<uint64_t> keys; | |
thrust::device_vector<bool> res; | |
if (f) { | |
while (f >> key) { | |
keys.push_back(key); | |
}; | |
id2.resize(keys.size()); | |
thrust::copy(keys.begin(), keys.end(), id2.begin()); | |
cout << "read " << file_name << endl; | |
} else { | |
cout << "Could not open file " << file_name << endl; | |
}; | |
file_name = "col1.txt"; | |
std::fstream f5(file_name, std::ios_base::in | ios::binary); | |
thrust::device_vector<uint64_t> id1; | |
keys.clear(); | |
if (f5) { | |
while (f5 >> key) { | |
keys.push_back(key); | |
}; | |
id1.resize(keys.size()); | |
thrust::copy(keys.begin(), keys.end(), id1.begin()); | |
cout << "read " << file_name << endl; | |
} else { | |
cout << "Could not open file " << file_name << endl; | |
}; | |
file_name = "col6.txt"; | |
std::fstream f1(file_name, std::ios_base::in | ios::binary); | |
string date_string; | |
thrust::device_vector<unsigned int> time6; | |
std::vector<unsigned int> date_keys; | |
if (f1) { | |
while (f1 >> date_string) { | |
date_keys.push_back(std::stoi(date_string.substr(0, 4) + | |
date_string.substr(5, 2) + | |
date_string.substr(8, 2))); | |
}; | |
time6.resize(date_keys.size()); | |
thrust::copy(date_keys.begin(), date_keys.end(), time6.begin()); | |
cout << "read " << file_name << endl; | |
} else { | |
cout << "Could not open file " << file_name << endl; | |
}; | |
file_name = "col7.txt"; | |
std::fstream f7(file_name, std::ios_base::in | ios::binary); | |
thrust::device_vector<unsigned int> enum7; | |
unsigned int myenum; | |
date_keys.clear(); | |
if (f7) { | |
while (f7 >> myenum) { | |
date_keys.push_back(myenum); | |
}; | |
enum7.resize(date_keys.size()); | |
thrust::copy(date_keys.begin(), date_keys.end(), enum7.begin()); | |
cout << "read " << file_name << endl; | |
} else { | |
cout << "Could not open file " << file_name << endl; | |
}; | |
file_name = "col5.txt"; | |
std::fstream f2(file_name, std::ios_base::in | ios::binary); | |
char bool_char; | |
thrust::device_vector<char> bool5; | |
std::vector<char> bool_keys; | |
if (f2) { | |
while (f2 >> bool_char) { | |
bool_keys.push_back(bool_char); | |
}; | |
cout << "read " << file_name << endl; | |
bool5.resize(bool_keys.size()); | |
thrust::copy(bool_keys.begin(), bool_keys.end(), bool5.begin()); | |
} else { | |
cout << "Could not open file " << file_name << endl; | |
}; | |
// Now we have all 5 arrays in a device memory | |
std::clock_t start1 = std::clock(); | |
res.resize(bool_keys.size()); | |
// SQL WHERE condition check | |
thrust::device_vector<unsigned int> dev_time6_from(1); | |
thrust::device_vector<unsigned int> dev_time6_to(1); | |
thrust::device_vector<char> dev_bool5(1); | |
dev_time6_from[0] = time6_from; | |
dev_time6_to[0] = time6_to; | |
dev_bool5[0] = bool5_val; | |
thrust::counting_iterator<unsigned int> begin(0); | |
check_records ff( | |
(const unsigned int *)thrust::raw_pointer_cast(time6.data()), | |
(const char *)thrust::raw_pointer_cast(bool5.data()), | |
thrust::raw_pointer_cast(res.data()), | |
(const unsigned int *)thrust::raw_pointer_cast(dev_time6_from.data()), | |
(const unsigned int *)thrust::raw_pointer_cast(dev_time6_to.data()), | |
(const char *)thrust::raw_pointer_cast(dev_bool5.data())); | |
thrust::for_each(begin, begin + res.size(), ff); | |
time6.resize(0); | |
time6.shrink_to_fit(); | |
bool5.resize(0); | |
bool5.shrink_to_fit(); | |
auto w_count = thrust::count(res.begin(), res.end(), 0); | |
// copy_if the results | |
thrust::device_vector<uint64_t> id2_cpy(w_count); | |
thrust::copy_if(id2.begin(), id2.end(), res.begin(), id2_cpy.begin(), | |
thrust::identity<bool>()); | |
id2.resize(0); | |
id2.shrink_to_fit(); | |
thrust::device_vector<uint64_t> id1_cpy(w_count); | |
thrust::copy_if(id1.begin(), id1.end(), res.begin(), id1_cpy.begin(), | |
thrust::identity<bool>()); | |
id1.resize(0); | |
id1.shrink_to_fit(); | |
thrust::device_vector<unsigned int> time6_cpy(w_count); | |
thrust::copy_if(time6.begin(), time6.end(), res.begin(), time6_cpy.begin(), | |
thrust::identity<bool>()); | |
time6.resize(0); | |
time6.shrink_to_fit(); | |
thrust::device_vector<char> bool5_cpy(w_count); | |
thrust::copy_if(bool5.begin(), bool5.end(), res.begin(), bool5_cpy.begin(), | |
thrust::identity<bool>()); | |
bool5.resize(0); | |
bool5.shrink_to_fit(); | |
thrust::device_vector<unsigned int> enum7_cpy(w_count); | |
thrust::copy_if(enum7.begin(), enum7.end(), res.begin(), enum7_cpy.begin(), | |
thrust::identity<bool>()); | |
enum7.resize(0); | |
enum7.shrink_to_fit(); | |
thrust::device_vector<unsigned int> enum7_cpy_2 = enum7_cpy; | |
// SQL DISTINCT | |
thrust::stable_sort_by_key(id2_cpy.begin(), id2_cpy.end(), enum7_cpy.begin()); | |
thrust::stable_sort_by_key(enum7_cpy.begin(), enum7_cpy.end(), | |
id2_cpy.begin()); | |
thrust::device_vector<uint64_t> d_ukeys(w_count); | |
thrust::device_vector<unsigned int> d_usegs(w_count); | |
auto ip1 = thrust::unique_copy(thrust::make_zip_iterator(thrust::make_tuple( | |
id2_cpy.begin(), enum7_cpy.begin())), | |
thrust::make_zip_iterator(thrust::make_tuple( | |
id2_cpy.end(), enum7_cpy.end())), | |
thrust::make_zip_iterator(thrust::make_tuple( | |
d_ukeys.begin(), d_usegs.begin())), | |
my_unique_eq()); | |
auto sz2 = ip1 - thrust::make_zip_iterator( | |
thrust::make_tuple(d_ukeys.begin(), d_usegs.begin())); | |
d_ukeys.resize(sz2); | |
d_usegs.resize(sz2); | |
thrust::device_vector<int> d_seg_nums(sz2); | |
thrust::device_vector<int> d_seg_lens(sz2); | |
auto ip2 = thrust::reduce_by_key(d_usegs.begin(), d_usegs.end(), | |
thrust::make_constant_iterator(1), | |
d_seg_nums.begin(), d_seg_lens.begin()); | |
auto sz3 = thrust::get<1>(ip2) - d_seg_lens.begin(); | |
d_seg_nums.resize(sz3); | |
d_seg_lens.resize(sz3); | |
cout << "enum7 COUNT(DISTINCT(id2))" << endl; | |
for (unsigned int i = 0; i < sz3; i++) | |
cout << d_seg_nums[i] << " " << d_seg_lens[i] << endl; | |
thrust::stable_sort_by_key(id1_cpy.begin(), id1_cpy.end(), | |
enum7_cpy_2.begin()); | |
thrust::stable_sort_by_key(enum7_cpy_2.begin(), enum7_cpy_2.end(), | |
id1_cpy.begin()); | |
d_ukeys.resize(w_count); | |
d_usegs.resize(w_count); | |
ip1 = thrust::unique_copy(thrust::make_zip_iterator(thrust::make_tuple( | |
id1_cpy.begin(), enum7_cpy_2.begin())), | |
thrust::make_zip_iterator(thrust::make_tuple( | |
id1_cpy.end(), enum7_cpy_2.end())), | |
thrust::make_zip_iterator(thrust::make_tuple( | |
d_ukeys.begin(), d_usegs.begin())), | |
my_unique_eq()); | |
sz2 = ip1 - thrust::make_zip_iterator( | |
thrust::make_tuple(d_ukeys.begin(), d_usegs.begin())); | |
d_seg_nums.resize(sz2); | |
d_seg_lens.resize(sz2); | |
ip2 = thrust::reduce_by_key(d_usegs.begin(), d_usegs.end(), | |
thrust::make_constant_iterator(1), | |
d_seg_nums.begin(), d_seg_lens.begin()); | |
sz3 = thrust::get<1>(ip2) - d_seg_lens.begin(); | |
d_seg_nums.resize(sz3); | |
d_seg_lens.resize(sz3); | |
cout << "enum7 COUNT(DISTINCT(id1))" << endl; | |
for (unsigned int i = 0; i < sz3; i++) | |
cout << d_seg_nums[i] << " " << d_seg_lens[i] << endl; | |
std::cout << "time " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) | |
<< '\n'; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment