Last active
May 4, 2017 14:22
-
-
Save antonmks/762f8edee5422e518d70093f9b12fb2b to your computer and use it in GitHub Desktop.
Query 1 scalable
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <ctime> | |
#include <fstream> | |
#include <iomanip> | |
#include <iostream> | |
#include <iterator> | |
#include <map> | |
#include <numeric> | |
#include <sstream> | |
#include <stdint.h> | |
#include <thrust/device_vector.h> | |
#include <thrust/sort.h> | |
#include <thrust/merge.h> | |
#include <thrust/unique.h> | |
#include <time.h> | |
using namespace std; | |
using namespace thrust::placeholders; | |
struct check_records { | |
const unsigned int *time6; | |
const char *bool5; | |
bool *res; | |
const unsigned int *time6_from; | |
const unsigned int *time6_to; | |
const char *bool5_val; | |
check_records(const unsigned int *_time6, const char *_bool5, bool *_res, | |
const unsigned int *_time6_from, const unsigned int *_time6_to, | |
const char *_bool5_val) | |
: time6(_time6), bool5(_bool5), res(_res), time6_from(_time6_from), | |
time6_to(_time6_to), bool5_val(_bool5_val) {} | |
template <typename IndexType> | |
__host__ __device__ void operator()(const IndexType &i) { | |
if (time6[i] >= time6_from[0] && time6[i] <= time6_to[0] && | |
bool5[i] == bool5_val[0]) { | |
res[i] = 1; | |
} else | |
res[i] = 0; | |
} | |
}; | |
int main(int ac, char **av) { | |
unsigned int time6_from = 19000101, time6_to = 20300101; | |
char bool5_val = 0; | |
string usage = "Usage : query1 [-time6_from TIME6_FROM] [-time6_to TIME6_TO " | |
"] [-bool5 BOOL5]"; | |
if (ac == 1) { | |
cout << usage << endl; | |
exit(1); | |
}; | |
for (unsigned int i = 1; i < ac; i++) { | |
if (strcmp(av[i], "-time6_from") == 0) { | |
if (i + 1 < ac) { | |
time6_from = atoi(av[i + 1]); | |
i++; | |
} else { | |
cout << usage << endl; | |
exit(1); | |
}; | |
} | |
if (strcmp(av[i], "-time6_to") == 0) { | |
if (i + 1 < ac) { | |
time6_to = atoi(av[i + 1]); | |
i++; | |
} else { | |
cout << usage << endl; | |
exit(1); | |
#include <algorithm> | |
#include <ctime> | |
#include <fstream> | |
#include <iomanip> | |
#include <iostream> | |
#include <iterator> | |
#include <map> | |
#include <numeric> | |
#include <sstream> | |
#include <stdint.h> | |
#include <thrust/device_vector.h> | |
#include <thrust/sort.h> | |
#include <thrust/merge.h> | |
#include <thrust/unique.h> | |
#include <time.h> | |
using namespace std; | |
using namespace thrust::placeholders; | |
struct check_records { | |
const unsigned int *time6; | |
const char *bool5; | |
bool *res; | |
const unsigned int *time6_from; | |
const unsigned int *time6_to; | |
const char *bool5_val; | |
check_records(const unsigned int *_time6, const char *_bool5, bool *_res, | |
const unsigned int *_time6_from, const unsigned int *_time6_to, | |
const char *_bool5_val) | |
: time6(_time6), bool5(_bool5), res(_res), time6_from(_time6_from), | |
time6_to(_time6_to), bool5_val(_bool5_val) {} | |
template <typename IndexType> | |
__host__ __device__ void operator()(const IndexType &i) { | |
if (time6[i] >= time6_from[0] && time6[i] <= time6_to[0] && | |
bool5[i] == bool5_val[0]) { | |
res[i] = 1; | |
} else | |
res[i] = 0; | |
} | |
}; | |
int main(int ac, char **av) { | |
unsigned int time6_from = 19000101, time6_to = 20300101; | |
char bool5_val = 0; | |
string usage = "Usage : query1 [-time6_from TIME6_FROM] [-time6_to TIME6_TO " | |
"] [-bool5 BOOL5]"; | |
if (ac == 1) { | |
cout << usage << endl; | |
exit(1); | |
}; | |
for (unsigned int i = 1; i < ac; i++) { | |
if (strcmp(av[i], "-time6_from") == 0) { | |
if (i + 1 < ac) { | |
time6_from = atoi(av[i + 1]); | |
i++; | |
} else { | |
cout << usage << endl; | |
exit(1); | |
}; | |
} | |
if (strcmp(av[i], "-time6_to") == 0) { | |
if (i + 1 < ac) { | |
time6_to = atoi(av[i + 1]); | |
i++; | |
} else { | |
cout << usage << endl; | |
exit(1); | |
}; | |
} else if (strcmp(av[i], "-bool5") == 0) { | |
if (i + 1 < ac) { | |
bool5_val = av[i + 1][0]; | |
i++; | |
} else { | |
cout << usage << endl; | |
exit(1); | |
}; | |
} | |
}; | |
// cout << time6_from << " " << time6_to << " " << bool5_val << endl; | |
cout << "Parsing columns " << endl; | |
string file_name = "col1.txt"; | |
std::fstream f(file_name, std::ios_base::in | ios::binary); | |
uint64_t key; | |
thrust::device_vector<uint64_t> id1; | |
std::vector<uint64_t> keys; | |
thrust::device_vector<bool> res; | |
if (f) { | |
while (f >> key) { | |
keys.push_back(key); | |
}; | |
id1.resize(keys.size()); | |
thrust::copy(keys.begin(), keys.end(), id1.begin()); | |
} else { | |
cout << "Could not open file " << file_name << endl; | |
}; | |
file_name = "col6.txt"; | |
std::fstream f1(file_name, std::ios_base::in | ios::binary); | |
string date_string; | |
thrust::device_vector<unsigned int> time6; | |
std::vector<unsigned int> date_keys; | |
if (f1) { | |
while (f1 >> date_string) { | |
date_keys.push_back(std::stoi(date_string.substr(0, 4) + | |
date_string.substr(5, 2) + | |
date_string.substr(8, 2))); | |
}; | |
time6.resize(date_keys.size()); | |
thrust::copy(date_keys.begin(), date_keys.end(), time6.begin()); | |
} else { | |
cout << "Could not open file " << file_name << endl; | |
}; | |
file_name = "col5.txt"; | |
std::fstream f2(file_name, std::ios_base::in | ios::binary); | |
char bool_char; | |
thrust::device_vector<char> bool5; | |
std::vector<char> bool_keys; | |
if (f2) { | |
while (f2 >> bool_char) { | |
bool_keys.push_back(bool_char); | |
}; | |
bool5.resize(bool_keys.size()); | |
thrust::copy(bool_keys.begin(), bool_keys.end(), bool5.begin()); | |
} else { | |
cout << "Could not open file " << file_name << endl; | |
}; | |
// Now we have all 3 arrays in a device memory | |
cout << "Done parsing " << endl; | |
std::clock_t start1 = std::clock(); | |
res.resize(bool_keys.size()); | |
// SQL WHERE condition check | |
#include <algorithm> | |
#include <ctime> | |
#include <fstream> | |
#include <iomanip> | |
#include <iostream> | |
#include <iterator> | |
#include <map> | |
#include <numeric> | |
#include <sstream> | |
#include <stdint.h> | |
#include <thrust/device_vector.h> | |
#include <thrust/sort.h> | |
#include <thrust/merge.h> | |
#include <thrust/unique.h> | |
#include <thrust/generate.h> | |
#include <time.h> | |
#include <random> | |
#include <limits> | |
using namespace std; | |
using namespace thrust::placeholders; | |
size_t getFreeMem() | |
{ | |
size_t available, total; | |
cudaMemGetInfo(&available, &total); | |
return available; | |
}; | |
struct check_records { | |
const unsigned int *time6; | |
const char *bool5; | |
bool *res; | |
const unsigned int *time6_from; | |
const unsigned int *time6_to; | |
const char *bool5_val; | |
check_records(const unsigned int *_time6, const char *_bool5, bool *_res, | |
const unsigned int *_time6_from, const unsigned int *_time6_to, | |
const char *_bool5_val) | |
: time6(_time6), bool5(_bool5), res(_res), time6_from(_time6_from), | |
time6_to(_time6_to), bool5_val(_bool5_val) {} | |
template <typename IndexType> | |
__host__ __device__ void operator()(const IndexType &i) { | |
if (time6[i] >= time6_from[0] && time6[i] <= time6_to[0] && | |
bool5[i] == bool5_val[0]) { | |
res[i] = 1; | |
} else | |
res[i] = 0; | |
} | |
}; | |
int main(int ac, char **av) { | |
unsigned int time6_from = 19000101, time6_to = 20300101; | |
uint64_t line_count, segment_size = 200000000; | |
char bool5_val = 0; | |
string usage = "Usage : query1 [-time6_from TIME6_FROM] [-time6_to TIME6_TO] [-bool5 BOOL5] [-line_count LINE_COUNT] [-segment_size SEGMENT_SIZE]"; | |
if (ac == 1) { | |
cout << usage << endl; | |
exit(1); | |
}; | |
for (unsigned int i = 1; i < ac; i++) { | |
if (strcmp(av[i], "-time6_from") == 0) { | |
if (i + 1 < ac) { | |
time6_from = atoi(av[i + 1]); | |
i++; | |
} | |
else { | |
cout << usage << endl; | |
exit(1); | |
}; | |
} | |
else if (strcmp(av[i], "-time6_to") == 0) { | |
if (i + 1 < ac) { | |
time6_to = atoi(av[i + 1]); | |
i++; | |
} | |
else { | |
cout << usage << endl; | |
exit(1); | |
}; | |
} | |
else if (strcmp(av[i], "-bool5") == 0) { | |
if (i + 1 < ac) { | |
bool5_val = av[i + 1][0]; | |
i++; | |
} | |
else { | |
cout << usage << endl; | |
exit(1); | |
}; | |
} | |
else if (strcmp(av[i], "-line_count") == 0) { | |
if (i + 1 < ac) { | |
line_count = atoi(av[i + 1]); | |
i++; | |
} | |
else { | |
cout << usage << endl; | |
exit(1); | |
}; | |
} | |
else if (strcmp(av[i], "-segment_size") == 0) { | |
if (i + 1 < ac) { | |
segment_size = atoi(av[i + 1]); | |
i++; | |
} | |
else { | |
cout << usage << endl; | |
exit(1); | |
}; | |
}; | |
}; | |
string file_name = "col1.txt"; | |
std::fstream f(file_name, std::ios_base::in | ios::binary); | |
uint64_t key; | |
thrust::device_vector<uint64_t> id1; | |
std::vector<uint64_t> keys; | |
file_name = "col6.txt"; | |
std::fstream f1(file_name, std::ios_base::in | ios::binary); | |
string date_string; | |
thrust::device_vector<unsigned int> time6; | |
std::vector<unsigned int> date_keys; | |
file_name = "col5.txt"; | |
std::fstream f2(file_name, std::ios_base::in | ios::binary); | |
char bool_char; | |
thrust::device_vector<char> bool5; | |
std::vector<char> bool_keys; | |
unsigned int curr_cnt = 0; | |
unsigned long long int total_count = 0, total_line_cnt = 0; | |
thrust::device_vector<bool> res; | |
uint64_t * m_A; | |
cudaMallocManaged(&m_A, sizeof(uint64_t)*line_count); | |
thrust::device_ptr<uint64_t> h_values(m_A); | |
uint64_t * m_A_tmp; | |
cudaMallocManaged(&m_A_tmp, sizeof(uint64_t)*line_count); | |
thrust::device_ptr<uint64_t> h_values_tmp(m_A_tmp); | |
std::clock_t start1 = 0, start2; | |
while(total_line_cnt < line_count) { | |
curr_cnt = 0; | |
keys.clear(); | |
if (f) { | |
while (curr_cnt < segment_size && f >> key) { | |
keys.push_back(key); | |
curr_cnt++; | |
total_line_cnt++; | |
}; | |
id1.resize(keys.size()); | |
thrust::copy(keys.begin(), keys.end(), id1.begin()); | |
} | |
else { | |
cout << "Could not open file " << file_name << endl; | |
exit(0); | |
}; | |
curr_cnt = 0; | |
date_keys.clear(); | |
if (f1) { | |
while (curr_cnt < segment_size && f1 >> date_string) { | |
date_keys.push_back(std::stoi(date_string.substr(0, 4) + | |
date_string.substr(5, 2) + | |
date_string.substr(8, 2))); | |
curr_cnt++; | |
}; | |
time6.resize(date_keys.size()); | |
thrust::copy(date_keys.begin(), date_keys.end(), time6.begin()); | |
} | |
else { | |
cout << "Could not open file " << file_name << endl; | |
exit(0); | |
}; | |
curr_cnt = 0; | |
bool_keys.clear(); | |
if (f2) { | |
while (curr_cnt < segment_size && f2 >> bool_char) { | |
bool_keys.push_back(bool_char); | |
curr_cnt++; | |
}; | |
bool5.resize(bool_keys.size()); | |
thrust::copy(bool_keys.begin(), bool_keys.end(), bool5.begin()); | |
} | |
else { | |
cout << "Could not open file " << file_name << endl; | |
exit(0); | |
}; | |
// Now we have all 3 arrays in a device memory | |
thrust::device_vector<bool> res(curr_cnt); | |
// SQL WHERE condition check | |
start2 = std::clock(); | |
thrust::device_vector<unsigned int> dev_time6_from(1); | |
thrust::device_vector<unsigned int> dev_time6_to(1); | |
thrust::device_vector<char> dev_bool5(1); | |
dev_time6_from[0] = time6_from; | |
dev_time6_to[0] = time6_to; | |
dev_bool5[0] = bool5_val; | |
thrust::counting_iterator<unsigned int> begin(0); | |
check_records ff( | |
(const unsigned int *)thrust::raw_pointer_cast(time6.data()), | |
(const char *)thrust::raw_pointer_cast(bool5.data()), | |
thrust::raw_pointer_cast(res.data()), | |
(const unsigned int *)thrust::raw_pointer_cast(dev_time6_from.data()), | |
(const unsigned int *)thrust::raw_pointer_cast(dev_time6_to.data()), | |
(const char *)thrust::raw_pointer_cast(dev_bool5.data())); | |
thrust::for_each(begin, begin + res.size(), ff); | |
time6.resize(0); | |
time6.shrink_to_fit(); | |
bool5.resize(0); | |
bool5.shrink_to_fit(); | |
// copy_if the results | |
thrust::device_vector<uint64_t> id1_cpy(res.size()); | |
auto w_count = thrust::copy_if(id1.begin(), id1.end(), res.begin(), | |
id1_cpy.begin(), thrust::identity<bool>()) - | |
id1_cpy.begin(); | |
// SQL DISTINCT | |
thrust::sort(id1_cpy.begin(), id1_cpy.begin() + w_count); | |
auto distinct_cnt = | |
thrust::unique(id1_cpy.begin(), id1_cpy.begin() + w_count) - | |
id1_cpy.begin(); | |
cout << "distinct count " << distinct_cnt << endl; | |
if(total_count == 0) { | |
thrust::copy(id1_cpy.begin(), id1_cpy.begin() + distinct_cnt, h_values); | |
} | |
else { | |
thrust::merge(h_values, h_values + total_count, id1_cpy.begin(), id1_cpy.begin() + distinct_cnt, h_values_tmp); | |
thrust::copy(h_values_tmp, h_values_tmp + distinct_cnt + total_count, h_values); | |
}; | |
total_count = total_count + distinct_cnt; | |
start1 = start1 + (std::clock() - start2); | |
}; | |
start2 = std::clock(); | |
res.resize(0); | |
res.shrink_to_fit(); | |
id1.resize(0); | |
id1.shrink_to_fit(); | |
time6.resize(0); | |
time6.shrink_to_fit(); | |
bool5.resize(0); | |
bool5.shrink_to_fit(); | |
cudaFree(m_A_tmp); | |
auto final_distinct_cnt = | |
thrust::unique(h_values, h_values + total_count) - h_values; | |
cout << "merge distinct count " << final_distinct_cnt << endl; | |
start1 = start1 + (std::clock() - start2); | |
std::cout << "gpu running time " << start1/(double)CLOCKS_PER_SEC << '\n'; | |
cudaFree(m_A); | |
return 0; | |
} | |
// SQL DISTINCT | |
thrust::sort(id1_cpy.begin(), id1_cpy.begin() + w_count); | |
std::cout << "sort time " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) << '\n'; | |
auto distinct_cnt = | |
thrust::unique(id1_cpy.begin(), id1_cpy.begin() + w_count) - | |
id1_cpy.begin(); | |
cout << "distinct count " << distinct_cnt << endl; | |
std::cout << "time " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) | |
<< '\n'; | |
unsigned int cycle_cnt = 2; | |
uint64_t * m_A; | |
cudaMallocManaged(&m_A, sizeof(uint64_t)*distinct_cnt*cycle_cnt); | |
thrust::device_ptr<uint64_t> h_values(m_A); | |
uint64_t * m_A_tmp; | |
cudaMallocManaged(&m_A_tmp, sizeof(uint64_t)*distinct_cnt*cycle_cnt); | |
thrust::device_ptr<uint64_t> h_values_tmp(m_A_tmp); | |
thrust::copy(id1_cpy.begin(), id1_cpy.begin() + distinct_cnt, h_values); | |
for(unsigned int i = 0; i < cycle_cnt-1; i++) { | |
thrust::transform(id1_cpy.begin(), id1_cpy.begin() + distinct_cnt, id1_cpy.begin(), _1+1+i); //changing all values | |
thrust::merge(h_values, h_values + distinct_cnt*(1+i), id1_cpy.begin(), id1_cpy.begin() + distinct_cnt, h_values_tmp); | |
thrust::copy(h_values_tmp, h_values_tmp + distinct_cnt*(2+i), h_values); | |
std::cout << "cycle time " << i << " " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) << '\n'; | |
}; | |
auto final_distinct_cnt = | |
thrust::unique(h_values, h_values + distinct_cnt*cycle_cnt) - h_values; | |
std::cout << "unique time " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) | |
<< '\n'; | |
cout << "merge distinct count " << final_distinct_cnt << endl; | |
std::cout << "merge time " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) | |
<< '\n'; | |
return 0; | |
} | |
date_string.substr(5, 2) + | |
date_string.substr(8, 2))); | |
}; | |
time6.resize(date_keys.size()); | |
thrust::copy(date_keys.begin(), date_keys.end(), time6.begin()); | |
} else { | |
cout << "Could not open file " << file_name << endl; | |
}; | |
file_name = "col5.txt"; | |
std::fstream f2(file_name, std::ios_base::in | ios::binary); | |
char bool_char; | |
thrust::device_vector<char> bool5; | |
std::vector<char> bool_keys; | |
if (f2) { | |
while (f2 >> bool_char) { | |
bool_keys.push_back(bool_char); | |
}; | |
bool5.resize(bool_keys.size()); | |
thrust::copy(bool_keys.begin(), bool_keys.end(), bool5.begin()); | |
} else { | |
cout << "Could not open file " << file_name << endl; | |
}; | |
// Now we have all 3 arrays in a device memory | |
cout << "Done parsing " << endl; | |
std::clock_t start1 = std::clock(); | |
res.resize(bool_keys.size()); | |
// SQL WHERE condition check | |
thrust::device_vector<unsigned int> dev_time6_from(1); | |
thrust::device_vector<unsigned int> dev_time6_to(1); | |
thrust::device_vector<char> dev_bool5(1); | |
dev_time6_from[0] = time6_from; | |
dev_time6_to[0] = time6_to; | |
dev_bool5[0] = bool5_val; | |
thrust::counting_iterator<unsigned int> begin(0); | |
check_records ff( | |
(const unsigned int *)thrust::raw_pointer_cast(time6.data()), | |
(const char *)thrust::raw_pointer_cast(bool5.data()), | |
thrust::raw_pointer_cast(res.data()), | |
(const unsigned int *)thrust::raw_pointer_cast(dev_time6_from.data()), | |
(const unsigned int *)thrust::raw_pointer_cast(dev_time6_to.data()), | |
(const char *)thrust::raw_pointer_cast(dev_bool5.data())); | |
thrust::for_each(begin, begin + res.size(), ff); | |
std::cout << "check time " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) | |
<< '\n'; | |
time6.resize(0); | |
time6.shrink_to_fit(); | |
bool5.resize(0); | |
bool5.shrink_to_fit(); | |
std::cout << "shrink 1 time " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) << '\n'; | |
// copy_if the results | |
thrust::device_vector<uint64_t> id1_cpy(res.size()); | |
auto w_count = thrust::copy_if(id1.begin(), id1.end(), res.begin(), | |
id1_cpy.begin(), thrust::identity<bool>()) - | |
id1_cpy.begin(); | |
std::cout << "cpy1 time " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) << '\n'; | |
res.resize(0); | |
res.shrink_to_fit(); | |
id1.resize(0); | |
id1.shrink_to_fit(); | |
std::cout << "shrink2 time " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) << '\n'; | |
// SQL DISTINCT | |
thrust::sort(id1_cpy.begin(), id1_cpy.begin() + w_count); | |
std::cout << "sort time " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) << '\n'; | |
auto distinct_cnt = | |
thrust::unique(id1_cpy.begin(), id1_cpy.begin() + w_count) - | |
id1_cpy.begin(); | |
cout << "distinct count " << distinct_cnt << endl; | |
std::cout << "time " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) | |
<< '\n'; | |
unsigned int cycle_cnt = 2; | |
uint64_t * m_A; | |
cudaMallocManaged(&m_A, sizeof(uint64_t)*distinct_cnt*cycle_cnt); | |
thrust::device_ptr<uint64_t> h_values(m_A); | |
uint64_t * m_A_tmp; | |
cudaMallocManaged(&m_A_tmp, sizeof(uint64_t)*distinct_cnt*cycle_cnt); | |
thrust::device_ptr<uint64_t> h_values_tmp(m_A_tmp); | |
thrust::copy(id1_cpy.begin(), id1_cpy.begin() + distinct_cnt, h_values); | |
for(unsigned int i = 0; i < cycle_cnt-1; i++) { | |
thrust::transform(id1_cpy.begin(), id1_cpy.begin() + distinct_cnt, id1_cpy.begin(), _1+1+i); //changing all values | |
thrust::merge(h_values, h_values + distinct_cnt*(1+i), id1_cpy.begin(), id1_cpy.begin() + distinct_cnt, h_values_tmp); | |
thrust::copy(h_values_tmp, h_values_tmp + distinct_cnt*(2+i), h_values); | |
std::cout << "cycle time " << i << " " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) << '\n'; | |
}; | |
auto final_distinct_cnt = | |
thrust::unique(h_values, h_values + distinct_cnt*cycle_cnt) - h_values; | |
std::cout << "unique time " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) | |
<< '\n'; | |
cout << "merge distinct count " << final_distinct_cnt << endl; | |
std::cout << "merge time " << ((std::clock() - start1) / (double)CLOCKS_PER_SEC) | |
<< '\n'; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment