Skip to content

Instantly share code, notes, and snippets.

@ccbrown
Last active February 13, 2016 05:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ccbrown/215722eb48d2cfced1e3 to your computer and use it in GitHub Desktop.
Save ccbrown/215722eb48d2cfced1e3 to your computer and use it in GitHub Desktop.
extract_palindromes.cpp
#include <gsl.h>
#include <omp.h>
#include <thread>
#include <vector>
#include <sys/stat.h>
using namespace std::literals;
std::size_t file_size(gsl::not_null<gsl::czstring<>> name) {
struct stat stat_buf;
if (stat(name, &stat_buf)) {
throw std::runtime_error("unable to stat "s + name.get());
}
return stat_buf.st_size;
}
void read_file(gsl::not_null<gsl::czstring<>> name, gsl::span<gsl::byte> destination, gsl::not_null<std::atomic<std::size_t>*> bytes_read) {
auto f = fopen(name, "r");
if (!f) {
throw std::runtime_error("unable to open "s + name.get());
}
auto _ = gsl::finally([f] { fclose(f); });
constexpr std::size_t read_size = 16 * 1024;
std::size_t written = 0;
while (!feof(f)) {
auto max_read_size = std::min(read_size, destination.size() - written);
if (!max_read_size) {
char eof_test;
if (fread(&eof_test, 1, 1, f)) {
throw std::runtime_error("destination too small for "s + name.get());
}
} else {
auto result = fread(destination.data() + written, 1, max_read_size, f);
written += result;
*bytes_read += result;
}
if (ferror(f)) {
throw std::runtime_error("error reading "s + name.get());
}
}
}
bool is_palindrome(gsl::span<const gsl::byte> bytes) noexcept {
return std::equal(bytes.begin(), bytes.begin() + bytes.size() / 2, bytes.rbegin());
}
std::vector<gsl::span<const gsl::byte>> palindromes(gsl::span<const gsl::byte> contents) noexcept {
auto ptr = contents.data();
std::size_t rem = contents.size();
std::size_t lines = 0;
std::vector<gsl::span<const gsl::byte>> ret;
while (rem) {
auto newline = reinterpret_cast<gsl::byte*>(memchr(ptr, '\n', rem));
auto length = newline ? newline - ptr : rem;
++lines;
if (is_palindrome(gsl::as_span(ptr, length))) {
ret.emplace_back(ptr, length);
}
if (!newline) { break; }
rem -= length + 1;
ptr = ++newline;
}
return ret;
}
void extract_palindromes(gsl::not_null<gsl::czstring<>> input_path, gsl::not_null<gsl::czstring<>> output_path) {
auto output_file = fopen(output_path, "w");
if (!output_file) {
throw std::runtime_error("unable to open "s + output_path.get() + " for writing");
}
auto _1 = gsl::finally([output_file] { fclose(output_file); });
gsl::byte* input_buffer = nullptr;
std::size_t input_size = 0;
std::tie(input_buffer, input_size) = std::get_temporary_buffer<gsl::byte>(file_size(input_path));
auto _2 = gsl::finally([=] { std::return_temporary_buffer(input_buffer); });
std::atomic<std::size_t> input_bytes_read{0};
read_file(input_path, gsl::as_span(input_buffer, input_size), &input_bytes_read);
auto results = palindromes(gsl::as_span(input_buffer, input_bytes_read));
for (auto& palindrome : results) {
if (fwrite(palindrome.data(), 1, palindrome.size(), output_file) < palindrome.size() || fwrite("\n", 1, 1, output_file) < 1) {
throw std::runtime_error("error writing to "s + output_path.get());
}
}
}
void extract_palindromes_mp(gsl::not_null<gsl::czstring<>> input_path, gsl::not_null<gsl::czstring<>> output_path) {
auto output_file = fopen(output_path, "w");
if (!output_file) {
throw std::runtime_error("unable to open "s + output_path.get() + " for writing");
}
auto _1 = gsl::finally([output_file] { fclose(output_file); });
gsl::byte* input_buffer = nullptr;
std::size_t input_size = 0;
std::tie(input_buffer, input_size) = std::get_temporary_buffer<gsl::byte>(file_size(input_path));
auto _2 = gsl::finally([=] { std::return_temporary_buffer(input_buffer); });
std::atomic<std::size_t> input_bytes_read{0};
std::atomic<bool> input_read_finished{false};
std::thread input_thread([&] {
read_file(input_path, gsl::as_span(input_buffer, input_size), &input_bytes_read);
input_read_finished = true;
});
std::size_t input_bytes_consumed = 0;
#pragma omp parallel
{
bool is_finished = false;
while (!is_finished) {
gsl::span<const gsl::byte> input;
constexpr std::size_t preferred_min_input_size = 4 * 1024;
#pragma omp critical
{
auto input_read_finished_snapshot = input_read_finished.load();
auto input_bytes_read_snapshot = input_bytes_read.load();
auto input_bytes_available = input_bytes_read_snapshot - input_bytes_consumed;
if (input_bytes_available > preferred_min_input_size) {
auto preferred_input_size = std::max(input_bytes_available / omp_get_num_threads(), preferred_min_input_size);
if (auto end = reinterpret_cast<const gsl::byte*>(memchr(input_buffer + input_bytes_consumed + preferred_input_size, '\n', input_bytes_available - preferred_input_size))) {
input = gsl::as_span(input_buffer + input_bytes_consumed, end - (input_buffer + input_bytes_consumed) + 1);
input_bytes_consumed += input.size();
}
} else if (input_read_finished_snapshot) {
if (input_bytes_available) {
input = gsl::as_span(input_buffer + input_bytes_consumed, input_bytes_available);
input_bytes_consumed = input_bytes_read_snapshot;
}
is_finished = true;
}
}
if (!input.empty()) {
auto results = palindromes(input);
for (auto& palindrome : results) {
flockfile(output_file);
auto _ = gsl::finally([=] { funlockfile(output_file); });
if (fwrite(palindrome.data(), 1, palindrome.size(), output_file) < palindrome.size() || fwrite("\n", 1, 1, output_file) < 1) {
throw std::runtime_error("error writing to "s + output_path.get());
}
}
} else {
std::this_thread::sleep_for(100ms);
}
}
}
input_thread.join();
}
int main(int argc, gsl::czstring<> argv[]) {
if (argc < 3) {
throw std::runtime_error("usage: "s + argv[0] + " input_file output_file"s);
}
extract_palindromes_mp(argv[1], argv[2]);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment