Skip to content

Instantly share code, notes, and snippets.

@antonmks
Created April 13, 2017 06:42
Show Gist options
  • Save antonmks/63f1984451f240142cf6ef52c97a671a to your computer and use it in GitHub Desktop.
Save antonmks/63f1984451f240142cf6ef52c97a671a to your computer and use it in GitHub Desktop.
Query 3
#include <algorithm>
#include <ctime>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <iterator>
#include <map>
#include <numeric>
#include <sstream>
#include <stdint.h>
#include <thrust/device_vector.h>
#include <thrust/extrema.h>
#include <thrust/reduce.h>
#include <thrust/sequence.h>
#include <thrust/sort.h>
#include <thrust/unique.h>
#include <time.h>
using namespace std;
struct my_unique_eq {
template <typename T>
__host__ __device__ bool operator()(const T d1, const T d2) const {
return ((thrust::get<0>(d1) == thrust::get<0>(d2)) &&
(thrust::get<1>(d1) == thrust::get<1>(d2)));
}
};
// applying WHERE conditions
struct check_records {
const unsigned int *time6;
const char *bool5;
bool *res;
const unsigned int *time6_from;
const unsigned int *time6_to;
const char *bool5_val;
check_records(const unsigned int *_time6, const char *_bool5, bool *_res,
const unsigned int *_time6_from, const unsigned int *_time6_to,
const char *_bool5_val)
: time6(_time6), bool5(_bool5), res(_res), time6_from(_time6_from),
time6_to(_time6_to), bool5_val(_bool5_val) {}
template <typename IndexType>
__host__ __device__ void operator()(const IndexType &i) {
if (time6[i] >= time6_from[0] && time6[i] <= time6_to[0] &&
bool5[i] == bool5_val[0]) {
res[i] = 1;
} else
res[i] = 0;
}
};
int main(int ac, char **av) {
unsigned int time6_from = 19000101, time6_to = 20300101;
char bool5_val = 0;
string usage = "Usage : query1 [-time6_from TIME6_FROM] [-time6_to TIME6_TO "
"] [-bool5 BOOL5]";
if (ac == 1) {
cout << usage << endl;
exit(1);
};
for (unsigned int i = 1; i < ac; i++) {
if (strcmp(av[i], "-time6_from") == 0) {
if (i + 1 < ac) {
time6_from = atoi(av[i + 1]);
i++;
} else {
cout << usage << endl;
exit(1);
};
}
if (strcmp(av[i], "-time6_to") == 0) {
if (i + 1 < ac) {
time6_to = atoi(av[i + 1]);
i++;
} else {
cout << usage << endl;
exit(1);
};
} else if (strcmp(av[i], "-bool5") == 0) {
if (i + 1 < ac) {
bool5_val = av[i + 1][0];
i++;
} else {
cout << usage << endl;
exit(1);
};
}
};
cout << "Parsing columns " << endl;
string file_name = "col2.txt";
std::fstream f(file_name, std::ios_base::in | ios::binary);
uint64_t key;
thrust::device_vector<uint64_t> id2;
std::vector<uint64_t> keys;
thrust::device_vector<bool> res;
if (f) {
while (f >> key) {
keys.push_back(key);
};
id2.resize(keys.size());
thrust::copy(keys.begin(), keys.end(), id2.begin());
cout << "read " << file_name << endl;
} else {
cout << "Could not open file " << file_name << endl;
};
file_name = "col1.txt";
std::fstream f5(file_name, std::ios_base::in | ios::binary);
thrust::device_vector<uint64_t> id1;
keys.clear();
if (f5) {
while (f5 >> key) {
keys.push_back(key);
};
id1.resize(keys.size());
thrust::copy(keys.begin(), keys.end(), id1.begin());
cout << "read " << file_name << endl;
} else {
cout << "Could not open file " << file_name << endl;
};
file_name = "col6.txt";
std::fstream f1(file_name, std::ios_base::in | ios::binary);
string date_string;
thrust::device_vector<unsigned int> time6;
std::vector<unsigned int> date_keys;
if (f1) {
while (f1 >> date_string) {
date_keys.push_back(std::stoi(date_string.substr(0, 4) +
date_string.substr(5, 2) +
date_string.substr(8, 2)));
};
time6.resize(date_keys.size());
thrust::copy(date_keys.begin(), date_keys.end(), time6.begin());
cout << "read " << file_name << endl;
} else {
cout << "Could not open file " << file_name << endl;
};
file_name = "col7.txt";
std::fstream f7(file_name, std::ios_base::in | ios::binary);
thrust::device_vector<unsigned int> enum7;
unsigned int myenum;
date_keys.clear();
if (f7) {
while (f7 >> myenum) {
date_keys.push_back(myenum);
};
enum7.resize(date_keys.size());
thrust::copy(date_keys.begin(), date_keys.end(), enum7.begin());
cout << "read " << file_name << endl;
} else {
cout << "Could not open file " << file_name << endl;
};
file_name = "col5.txt";
std::fstream f2(file_name, std::ios_base::in | ios::binary);
char bool_char;
thrust::device_vector<char> bool5;
std::vector<char> bool_keys;
if (f2) {
while (f2 >> bool_char) {
bool_keys.push_back(bool_char);
};
cout << "read " << file_name << endl;
bool5.resize(bool_keys.size());
thrust::copy(bool_keys.begin(), bool_keys.end(), bool5.begin());
} else {
cout << "Could not open file " << file_name << endl;
};
// Now we have all 5 arrays in a device memory
std::clock_t start1 = std::clock();
res.resize(bool_keys.size());
// SQL WHERE condition check
thrust::device_vector<unsigned int> dev_time6_from(1);
thrust::device_vector<unsigned int> dev_time6_to(1);
thrust::device_vector<char> dev_bool5(1);
dev_time6_from[0] = time6_from;
dev_time6_to[0] = time6_to;
dev_bool5[0] = bool5_val;
thrust::counting_iterator<unsigned int> begin(0);
check_records ff(
(const unsigned int *)thrust::raw_pointer_cast(time6.data()),
(const char *)thrust::raw_pointer_cast(bool5.data()),
thrust::raw_pointer_cast(res.data()),
(const unsigned int *)thrust::raw_pointer_cast(dev_time6_from.data()),
(const unsigned int *)thrust::raw_pointer_cast(dev_time6_to.data()),
(const char *)thrust::raw_pointer_cast(dev_bool5.data()));
thrust::for_each(begin, begin + res.size(), ff);
time6.resize(0);
time6.shrink_to_fit();
bool5.resize(0);
bool5.shrink_to_fit();
auto w_count = thrust::count(res.begin(), res.end(), 0);
// copy_if the results
thrust::device_vector<uint64_t> id2_cpy(w_count);
thrust::copy_if(id2.begin(), id2.end(), res.begin(), id2_cpy.begin(),
thrust::identity<bool>());
id2.resize(0);
id2.shrink_to_fit();
thrust::device_vector<uint64_t> id1_cpy(w_count);
thrust::copy_if(id1.begin(), id1.end(), res.begin(), id1_cpy.begin(),
thrust::identity<bool>());
id1.resize(0);
id1.shrink_to_fit();
thrust::device_vector<unsigned int> time6_cpy(w_count);
thrust::copy_if(time6.begin(), time6.end(), res.begin(), time6_cpy.begin(),
thrust::identity<bool>());
time6.resize(0);
time6.shrink_to_fit();
thrust::device_vector<char> bool5_cpy(w_count);
thrust::copy_if(bool5.begin(), bool5.end(), res.begin(), bool5_cpy.begin(),
thrust::identity<bool>());
bool5.resize(0);
bool5.shrink_to_fit();
thrust::device_vector<unsigned int> enum7_cpy(w_count);
thrust::copy_if(enum7.begin(), enum7.end(), res.begin(), enum7_cpy.begin(),
thrust::identity<bool>());
enum7.resize(0);
enum7.shrink_to_fit();
thrust::device_vector<unsigned int> enum7_cpy_2 = enum7_cpy;
// SQL DISTINCT
thrust::stable_sort_by_key(id2_cpy.begin(), id2_cpy.end(), enum7_cpy.begin());
thrust::stable_sort_by_key(enum7_cpy.begin(), enum7_cpy.end(),
id2_cpy.begin());
thrust::device_vector<uint64_t> d_ukeys(w_count);
thrust::device_vector<unsigned int> d_usegs(w_count);
auto ip1 = thrust::unique_copy(thrust::make_zip_iterator(thrust::make_tuple(
id2_cpy.begin(), enum7_cpy.begin())),
thrust::make_zip_iterator(thrust::make_tuple(
id2_cpy.end(), enum7_cpy.end())),
thrust::make_zip_iterator(thrust::make_tuple(
d_ukeys.begin(), d_usegs.begin())),
my_unique_eq());
auto sz2 = ip1 - thrust::make_zip_iterator(
thrust::make_tuple(d_ukeys.begin(), d_usegs.begin()));
d_ukeys.resize(sz2);
d_usegs.resize(sz2);
thrust::device_vector<int> d_seg_nums(sz2);
thrust::device_vector<int> d_seg_lens(sz2);
auto ip2 = thrust::reduce_by_key(d_usegs.begin(), d_usegs.end(),
thrust::make_constant_iterator(1),
d_seg_nums.begin(), d_seg_lens.begin());
auto sz3 = thrust::get<1>(ip2) - d_seg_lens.begin();
d_seg_nums.resize(sz3);
d_seg_lens.resize(sz3);
cout << "enum7 COUNT(DISTINCT(id2))" << endl;
for (unsigned int i = 0; i < sz3; i++)
cout << d_seg_nums[i] << " " << d_seg_lens[i] << endl;
thrust::stable_sort_by_key(id1_cpy.begin(), id1_cpy.end(),
enum7_cpy_2.begin());
thrust::stable_sort_by_key(enum7_cpy_2.begin(), enum7_cpy_2.end(),
id1_cpy.begin());
d_ukeys.resize(w_count);
d_usegs.resize(w_count);
ip1 = thrust::unique_copy(thrust::make_zip_iterator(thrust::make_tuple(
id1_cpy.begin(), enum7_cpy_2.begin())),
thrust::make_zip_iterator(thrust::make_tuple(
id1_cpy.end(), enum7_cpy_2.end())),
thrust::make_zip_iterator(thrust::make_tuple(
d_ukeys.begin(), d_usegs.begin())),
my_unique_eq());
sz2 = ip1 - thrust::make_zip_iterator(
thrust::make_tuple(d_ukeys.begin(), d_usegs.begin()));
d_seg_nums.resize(sz2);
d_seg_lens.resize(sz2);
ip2 = thrust::reduce_by_key(d_usegs.begin(), d_usegs.end(),
thrust::make_constant_iterator(1),
d_seg_nums.begin(), d_seg_lens.begin());
sz3 = thrust::get<1>(ip2) - d_seg_lens.begin();
d_seg_nums.resize(sz3);
d_seg_lens.resize(sz3);
cout << "enum7 COUNT(DISTINCT(id1))" << endl;
for (unsigned int i = 0; i < sz3; i++)
cout << d_seg_nums[i] << " " << d_seg_lens[i] << endl;
std::cout << "time " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC)
<< '\n';
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment