Created
January 15, 2024 13:44
-
-
Save ribomation/3117c2a634c7955e23b1c0d9af79fd4f to your computer and use it in GitHub Desktop.
Generates weather-data using C++ as input for the 1 Billion Row Challenge (1BRC)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <string> | |
#include <random> | |
#include <utility> | |
#include <vector> | |
#include <format> | |
#include <cmath> | |
#include "util.hxx" | |
using std::cout; | |
using std::string; | |
using namespace std::string_literals; | |
namespace rm = ribomation::util; | |
struct Station { | |
const string name{}; | |
const double temperature{}; | |
Station(string name, double temperature) : name(std::move(name)), temperature(temperature) {} | |
}; | |
auto loadStations(string const& filename) -> std::vector<Station> { | |
auto f = std::ifstream{filename}; | |
if (!f) throw std::invalid_argument{"cannot open "s + filename}; | |
auto stations = std::vector<Station>{}; | |
stations.reserve(500); | |
for (string line; std::getline(f, line);) { | |
//Austin;20.7 | |
auto sep = line.find(';'); | |
auto name = line.substr(0, sep); | |
auto temp = std::stod(line.substr(sep + 1)); | |
stations.emplace_back(name, temp); | |
} | |
return stations; | |
} | |
auto generate(std::vector<Station> const& stations, std::default_random_engine& r) -> string { | |
auto nextStation = std::uniform_int_distribution{0UL, stations.size() - 1}; | |
auto& station = stations[nextStation(r)]; | |
auto nextTemperature = std::normal_distribution<double>{station.temperature, 10.0}; | |
auto temperature = std::round(nextTemperature(r) * 10.0) / 10.0; | |
return std::format("{};{:.1f}", station.name, temperature); | |
} | |
int main(int argc, char* argv[]) { | |
auto stationsFile = "src/resources/stations.txt"s; | |
auto numValues = 1000U; | |
auto filename = "data/weather-data.csv"s; | |
for (auto k = 1; k < argc; ++k) { | |
auto arg = string{argv[k]}; | |
if (arg == "-n"s) { | |
numValues = std::stoi(argv[++k]); | |
} else if (arg == "-f"s) { | |
filename = argv[++k]; | |
} else if (arg == "-s"s) { | |
stationsFile = argv[++k]; | |
} else { | |
std::cerr << "usage: " << argv[0] << " [-n <int>] [-f <str>] [-s <str>]\n"; | |
return 1; | |
} | |
} | |
cout << "# values: " << numValues << "\n"; | |
cout << "filename: " << filename << "\n"; | |
rm::elapsed([stationsFile, numValues, filename]() { | |
auto stations = loadStations(stationsFile); | |
cout << "loaded " << stations.size() << " names\n"; | |
auto f = std::ofstream{filename}; | |
if (!f) throw std::invalid_argument{"cannot open output file "s + filename}; | |
auto devRandom = std::random_device{}; | |
auto r = std::default_random_engine{devRandom()}; | |
for (auto k = 1U; k <= numValues; ++k) { | |
f << generate(stations, r) << "\n"; | |
} | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment