Last active
August 29, 2015 14:16
-
-
Save socantre/0b20c09877c875b2db6d to your computer and use it in GitHub Desktop.
Take a uniform sampling of whitespace delimted strings from the standard input. algorithm R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <random> | |
#include <algorithm> | |
#include <iterator> | |
#include <numeric> | |
#include <iomanip> | |
#include <string> | |
int main(int argc, char *argv[]) { | |
if (argc < 2 || argv[1] == "--help" || argv[1] == "-h") { | |
std::cout << "Usage: " << argv[0] << "<sample size>\n"; | |
std::cout << " " << argv[0] << "[--help|-h]\n\n"; | |
std::cout << "Description: Take a uniform sampling of whitespace delimted strings from the standard input.\n"; | |
return argc < 2 ? EXIT_FAILURE : EXIT_SUCCESS; | |
} | |
int sample_size; | |
try { | |
sample_size = std::stoi(argv[1]); | |
} | |
catch (...) { | |
std::cerr << "Unable to parse sample size '" << argv[1] << "'\n"; | |
return EXIT_FAILURE; | |
} | |
if (sample_size < 1) { | |
std::cerr << "Invalid sample size '" << argv[1] << "'. Sample size must be larger than zero.\n"; | |
return EXIT_FAILURE; | |
} | |
std::default_random_engine eng{ std::random_device{}() }; | |
std::uniform_real_distribution<> dist; | |
std::vector<std::string> sample(sample_size); | |
std::istream_iterator<std::string> input(std::cin); | |
std::istream_iterator<std::string> input_end; | |
// initialize the resevoir with the first k elements | |
for (int i = 0; i < sample_size && input != input_end; ++i) { | |
sample[i] = *input++; | |
} | |
std::string s; | |
long long input_count = sample_size; | |
while (input != input_end) { | |
s = *input++; | |
if (input_count == std::numeric_limits<decltype(input_count)>::max()) { | |
std::cerr << "Error: input too long. Overflowing internal count."; | |
return EXIT_FAILURE; | |
} | |
input_count++; | |
// for each element thereafter, add it to the resevoir with probability k / (i+1) | |
// input_count = i + 1 | |
// i would be the zero based index of the input | |
if (std::uniform_int_distribution<>(1, input_count)(eng) <= sample_size) { | |
sample[std::uniform_int_distribution<>(0, sample.size() - 1)(eng)] = s; // Choose the element in the resevoir to replace randomly | |
} | |
} | |
std::copy(begin(sample), end(sample), std::ostream_iterator<std::string>(std::cout, "\n")); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment