Last active
January 11, 2018 04:33
-
-
Save fataltes/0b19fd2f62d8eebcd8868be4aebd5157 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int main(int argc, char *argv[]) { | |
uint16_t num_samples = 2586; | |
std::string filename = argv[1]; | |
BitVectorRRR eqcls(filename); | |
size_t totalEqClsCnt = eqcls.bit_size()/num_samples; //222584822; | |
std::cout << "Total bit size: " << eqcls.bit_size() << "\ntotal # of equivalence classes: " << totalEqClsCnt << "\n"; | |
size_t gigs = 8; | |
std::string dir = "bvs_8g"; | |
uint64_t totalBitsAllowed = ((gigs*8*1024*1024*1024)/num_samples)*num_samples; | |
std::cout << "total bits allowed is " << totalBitsAllowed << "\n\n"; | |
std::string output_dir = "/mnt/scratch4/fatemeh/"+ dir; | |
uint64_t fileCntr = 0; | |
uint64_t bvIdx = 0; | |
BitVector* bv = new BitVector(totalBitsAllowed); | |
std::cout << "Go over equivalence classes ..\n"; | |
sdslhash<BitVector> hasher; | |
spp::sparse_hash_map<uint64_t, uint32_t> hashMap; | |
for (size_t eqclsCntr = 0; eqclsCntr < totalEqClsCnt; eqclsCntr++) { | |
//std::cout << eqclsCntr << ":" << totalBitsAllowed << " " << bvIdx << " - "; | |
// save to file after each 8 gig of data | |
if (totalBitsAllowed - bvIdx + 1 < num_samples) { | |
std::cout << eqclsCntr << ":" << totalBitsAllowed << "," << bvIdx << " diff: " << totalBitsAllowed-bvIdx | |
<< " size in bytes: " << bv->size_in_bytes() << "\n"; | |
BitVectorRRR bvr(*bv); | |
std::string outfile_name = output_dir + "/bv-" + std::to_string(fileCntr++) + ".rrr" ; | |
bvr.serialize(outfile_name); | |
delete bv; | |
totalBitsAllowed = std::min(totalBitsAllowed, (totalEqClsCnt-eqclsCntr)*num_samples); | |
bv = new BitVector(totalBitsAllowed); | |
bvIdx = 0; | |
} | |
BitVector colorCls(num_samples); | |
size_t i = 0; | |
while (i < num_samples) { | |
size_t bitCnt = std::min(num_samples-i, (size_t)64); | |
size_t wrd = eqcls.get_int(eqclsCntr*num_samples+i, bitCnt); | |
for (size_t j = 0; j < bitCnt; j++) { | |
if ((wrd >> j) & 0x01) { | |
bv->set(bvIdx); | |
colorCls.set(i); | |
} | |
bvIdx++; | |
i++; | |
} | |
} | |
uint64_t hashVal = hasher(colorCls); | |
if (hashMap.find(hashVal) == hashMap.end()) { | |
hashMap[hashVal] = 1; | |
} | |
else { | |
std::cerr << "collision: " << eqclsCntr << " hash: " << hashVal << "\n"; | |
hashMap[hashVal]+= 1; | |
} | |
} | |
// remaining (last) part | |
BitVectorRRR bvr(*bv); | |
std::string outfile_name = output_dir + "/bv-" + std::to_string(fileCntr) + ".rrr" ; | |
bvr.serialize(outfile_name); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment