Skip to content

Instantly share code, notes, and snippets.

@ribomation
Created January 15, 2024 13:44
Show Gist options
  • Save ribomation/3117c2a634c7955e23b1c0d9af79fd4f to your computer and use it in GitHub Desktop.
Save ribomation/3117c2a634c7955e23b1c0d9af79fd4f to your computer and use it in GitHub Desktop.
Generates weather-data using C++ as input for the 1 Billion Row Challenge (1BRC)
#include <iostream>
#include <fstream>
#include <string>
#include <random>
#include <utility>
#include <vector>
#include <format>
#include <cmath>
#include "util.hxx"
using std::cout;
using std::string;
using namespace std::string_literals;
namespace rm = ribomation::util;
struct Station {
const string name{};
const double temperature{};
Station(string name, double temperature) : name(std::move(name)), temperature(temperature) {}
};
auto loadStations(string const& filename) -> std::vector<Station> {
auto f = std::ifstream{filename};
if (!f) throw std::invalid_argument{"cannot open "s + filename};
auto stations = std::vector<Station>{};
stations.reserve(500);
for (string line; std::getline(f, line);) {
//Austin;20.7
auto sep = line.find(';');
auto name = line.substr(0, sep);
auto temp = std::stod(line.substr(sep + 1));
stations.emplace_back(name, temp);
}
return stations;
}
auto generate(std::vector<Station> const& stations, std::default_random_engine& r) -> string {
auto nextStation = std::uniform_int_distribution{0UL, stations.size() - 1};
auto& station = stations[nextStation(r)];
auto nextTemperature = std::normal_distribution<double>{station.temperature, 10.0};
auto temperature = std::round(nextTemperature(r) * 10.0) / 10.0;
return std::format("{};{:.1f}", station.name, temperature);
}
int main(int argc, char* argv[]) {
auto stationsFile = "src/resources/stations.txt"s;
auto numValues = 1000U;
auto filename = "data/weather-data.csv"s;
for (auto k = 1; k < argc; ++k) {
auto arg = string{argv[k]};
if (arg == "-n"s) {
numValues = std::stoi(argv[++k]);
} else if (arg == "-f"s) {
filename = argv[++k];
} else if (arg == "-s"s) {
stationsFile = argv[++k];
} else {
std::cerr << "usage: " << argv[0] << " [-n <int>] [-f <str>] [-s <str>]\n";
return 1;
}
}
cout << "# values: " << numValues << "\n";
cout << "filename: " << filename << "\n";
rm::elapsed([stationsFile, numValues, filename]() {
auto stations = loadStations(stationsFile);
cout << "loaded " << stations.size() << " names\n";
auto f = std::ofstream{filename};
if (!f) throw std::invalid_argument{"cannot open output file "s + filename};
auto devRandom = std::random_device{};
auto r = std::default_random_engine{devRandom()};
for (auto k = 1U; k <= numValues; ++k) {
f << generate(stations, r) << "\n";
}
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment