Created
May 5, 2011 17:55
-
-
Save ilyaraz/957527 to your computer and use it in GitHub Desktop.
Cache-aware binary search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "timer.h" | |
#include "random_int.h" | |
#include <boost/bind.hpp> | |
#include <boost/foreach.hpp> | |
#include <algorithm> | |
#include <iostream> | |
#include <stdexcept> | |
#include <vector> | |
void generateVector(int n, int lowerBound, int upperBound, std::vector<int> &result) { | |
using utils::RandomInt; | |
RandomInt random(lowerBound, upperBound); | |
result.resize(n); | |
std::generate(result.begin(), result.end(), boost::bind(&RandomInt::generate, &random)); | |
} | |
void testLowerBound(const std::vector<int> &data, const std::vector<int> &queries) { | |
utils::Timer timer; | |
int result = 0; | |
BOOST_FOREACH(int query, queries) { | |
result += static_cast<int>(std::lower_bound(data.begin(), data.end(), query) - data.begin()); | |
} | |
std::cout << result << std::endl | |
<< timer.getElapsedTime() / queries.size() << std::endl; | |
} | |
void testSmartSearch(const std::vector<int> &data, const std::vector<int> &queries) { | |
int blockSize = 0; | |
while (blockSize * blockSize * blockSize < static_cast<int>(data.size())) { | |
blockSize++; | |
} | |
if (blockSize * blockSize * blockSize != static_cast<int>(data.size())) { | |
throw std::runtime_error("data.size() is not a cube"); | |
} | |
std::vector<int> level1(blockSize), level2(blockSize * blockSize); | |
for (int i = 0; i < blockSize; ++i) { | |
level1[i] = data[(i + 1) * blockSize * blockSize - 1]; | |
} | |
for (int i = 0; i < blockSize * blockSize; ++i) { | |
level2[i] = data[(i + 1) * blockSize - 1]; | |
} | |
utils::Timer timer; | |
int result = 0; | |
BOOST_FOREACH(int query, queries) { | |
int x = static_cast<int>(std::lower_bound(level1.begin(), level1.end(), query) - level1.begin()); | |
if (x != blockSize) { | |
x = static_cast<int>(std::lower_bound(level2.begin() + x * blockSize, level2.begin() + (x + 1) * blockSize, query) - level2.begin()); | |
x = static_cast<int>(std::lower_bound(data.begin() + x * blockSize, data.begin() + (x + 1) * blockSize, query) - data.begin()); | |
result += x; | |
} | |
else { | |
result += static_cast<int>(data.size()); | |
} | |
} | |
std::cout << result << std::endl | |
<< timer.getElapsedTime() / queries.size() << std::endl; | |
} | |
int main() { | |
const int N = 1 << 27; | |
const int MAX_NUM = 2000000000; | |
const int NUM_QUERIES = 10000000; | |
std::vector<int> data; | |
generateVector(N, 0, MAX_NUM, data); | |
std::sort(data.begin(), data.end()); | |
std::cout << "generated data" << std::endl; | |
std::vector<int> queries; | |
generateVector(NUM_QUERIES, 0, MAX_NUM, queries); | |
std::cout << "generated queries" << std::endl; | |
testSmartSearch(data, queries); | |
testLowerBound(data, queries); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef _RANDOM_INT_H_ | |
#define _RANDOM_INT_H_ | |
#include <boost/date_time/posix_time/posix_time_types.hpp> | |
#include <boost/random/mersenne_twister.hpp> | |
#include <boost/random/uniform_int.hpp> | |
#include <boost/random/variate_generator.hpp> | |
namespace utils { | |
namespace pt = boost::posix_time; | |
class RandomInt { | |
public: | |
RandomInt(int lowerBound, int upperBound, int seed): | |
distribution(lowerBound, upperBound), | |
uniformGenerator(rawGenerator, distribution) { | |
rawGenerator.seed(seed); | |
} | |
RandomInt(int lowerBound, int upperBound): | |
distribution(lowerBound, upperBound), | |
uniformGenerator(rawGenerator, distribution) { | |
pt::ptime currentTime(pt::microsec_clock::local_time()); | |
rawGenerator.seed(currentTime.time_of_day().total_microseconds()); | |
} | |
int generate() { | |
return uniformGenerator(); | |
} | |
private: | |
boost::mt19937 rawGenerator; | |
boost::uniform_int<> distribution; | |
boost::variate_generator<boost::mt19937&, boost::uniform_int<> > uniformGenerator; | |
}; | |
} | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef _TIMER_H_ | |
#define _TIMER_H_ | |
#include <boost/date_time/posix_time/posix_time_types.hpp> | |
namespace utils { | |
namespace pt = boost::posix_time; | |
class Timer { | |
public: | |
Timer() { | |
restart(); | |
} | |
~Timer() { | |
} | |
void restart() { | |
startTime = pt::ptime(pt::microsec_clock::local_time()); | |
} | |
const double getElapsedTime() const { | |
const double MICROSECONDS_IN_SECOND = 1000000.0; | |
pt::ptime currentTime(pt::microsec_clock::local_time()); | |
return (currentTime - startTime).total_microseconds() / MICROSECONDS_IN_SECOND; | |
} | |
private: | |
pt::ptime startTime; | |
}; | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment