Created
November 29, 2019 13:59
-
-
Save mr-eyes/2b9884f52d1951ac47fefa108546ae37 to your computer and use it in GitHub Desktop.
random kmers generation with phmap insertion
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
simple random kmers generator with storage in parallel hash map. | |
clone: https://github.com/greg7mdp/parallel-hashmap.git | |
g++ -Ofast -O3 main.cpp | |
*/ | |
#include "parallel-hashmap/parallel_hashmap/phmap.h" | |
#include <iostream> | |
#include <string> | |
#include <vector> | |
#include <random> | |
#include <cstdint> | |
#include <functional> //for std::function | |
#include <algorithm> //for std::generate_n | |
using namespace std; | |
typedef std::vector<char> char_array; | |
char_array charset() | |
{ | |
//Change this to suit | |
return char_array({'A','C','G','T'}); | |
}; | |
std::string random_string(size_t length, std::function<char(void)> rand_char ) | |
{ | |
std::string str(length,0); | |
std::generate_n( str.begin(), length, rand_char ); | |
return str; | |
} | |
void dump(phmap::flat_hash_map<uint64_t, string> & kmers){ | |
for(auto kmer : kmers) | |
cout << kmer.second << ": " << kmer.first << endl; | |
} | |
int main(){ | |
// Constanats | |
const int kSize = 31; | |
const int kmers_no = 1e6; // 1 million kmers | |
const int loops = 10; | |
// kmers parallel hashmap | |
phmap::flat_hash_map<uint64_t, string> kmers; | |
// Random kmers generator | |
const auto ch_set = charset(); | |
std::default_random_engine rng(std::random_device{}()); | |
std::uniform_int_distribution<> dist(0, ch_set.size()-1); | |
auto randchar = [ ch_set,&dist,&rng ](){return ch_set[ dist(rng) ];}; | |
// hasher | |
std::hash<std::string> kmer_hasher; | |
for(int i = 0; i < loops; i++){ | |
// Insertion | |
for(int j=0; j < kmers_no; j++){ | |
string kmer_str = random_string(kSize,randchar); | |
uint64_t kmer_hash = kmer_hasher(kmer_str); | |
kmers[kmer_hash] = kmer_str; | |
} | |
// kmers.clear(); | |
} | |
// dump(kmers); | |
// cout << "Inserted: " << kmers.size() << " kmers" << endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment