Skip to content

Instantly share code, notes, and snippets.

@socantre
Last active August 29, 2015 14:16
Show Gist options
  • Save socantre/0b20c09877c875b2db6d to your computer and use it in GitHub Desktop.
Save socantre/0b20c09877c875b2db6d to your computer and use it in GitHub Desktop.
Take a uniform sampling of whitespace delimted strings from the standard input. algorithm R
#include <iostream>
#include <random>
#include <algorithm>
#include <iterator>
#include <numeric>
#include <iomanip>
#include <string>
int main(int argc, char *argv[]) {
if (argc < 2 || argv[1] == "--help" || argv[1] == "-h") {
std::cout << "Usage: " << argv[0] << "<sample size>\n";
std::cout << " " << argv[0] << "[--help|-h]\n\n";
std::cout << "Description: Take a uniform sampling of whitespace delimted strings from the standard input.\n";
return argc < 2 ? EXIT_FAILURE : EXIT_SUCCESS;
}
int sample_size;
try {
sample_size = std::stoi(argv[1]);
}
catch (...) {
std::cerr << "Unable to parse sample size '" << argv[1] << "'\n";
return EXIT_FAILURE;
}
if (sample_size < 1) {
std::cerr << "Invalid sample size '" << argv[1] << "'. Sample size must be larger than zero.\n";
return EXIT_FAILURE;
}
std::default_random_engine eng{ std::random_device{}() };
std::uniform_real_distribution<> dist;
std::vector<std::string> sample(sample_size);
std::istream_iterator<std::string> input(std::cin);
std::istream_iterator<std::string> input_end;
// initialize the resevoir with the first k elements
for (int i = 0; i < sample_size && input != input_end; ++i) {
sample[i] = *input++;
}
std::string s;
long long input_count = sample_size;
while (input != input_end) {
s = *input++;
if (input_count == std::numeric_limits<decltype(input_count)>::max()) {
std::cerr << "Error: input too long. Overflowing internal count.";
return EXIT_FAILURE;
}
input_count++;
// for each element thereafter, add it to the resevoir with probability k / (i+1)
// input_count = i + 1
// i would be the zero based index of the input
if (std::uniform_int_distribution<>(1, input_count)(eng) <= sample_size) {
sample[std::uniform_int_distribution<>(0, sample.size() - 1)(eng)] = s; // Choose the element in the resevoir to replace randomly
}
}
std::copy(begin(sample), end(sample), std::ostream_iterator<std::string>(std::cout, "\n"));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment