Skip to content

Instantly share code, notes, and snippets.

@maksverver
Last active December 8, 2022 20:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save maksverver/54c0d12043659c8ab2b885560d36132d to your computer and use it in GitHub Desktop.
Save maksverver/54c0d12043659c8ab2b885560d36132d to your computer and use it in GitHub Desktop.
#ifndef MEMORY_MAPPED_FILE_H_INCLUDED
#define MEMORY_MAPPED_FILE_H_INCLUDED
#include <utility>
#include <cstdlib>
#include <span>
#include <sys/fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
class MemoryMappedFile
{
public:
MemoryMappedFile() {}
MemoryMappedFile(const char* path) { Open(path); }
MemoryMappedFile(MemoryMappedFile&& other) { *this = std::move(other); }
~MemoryMappedFile() { Close(); }
MemoryMappedFile& operator=(MemoryMappedFile&& other) {
Close();
size = std::exchange(other.size, 0);
data = std::exchange(other.data, nullptr);
return *this;
}
bool Open(const char* path) {
int fd = open(path, O_RDONLY);
bool result = false;
if (fd != -1) {
struct stat st;
int res = fstat(fd, &st);
if (res == 0) {
size_t new_size = st.st_size;
void *new_data = mmap(nullptr, new_size, PROT_READ, MAP_SHARED, fd, 0);
if (new_data != MAP_FAILED) {
if (data != nullptr) munmap(data, size);
size = new_size;
data = new_data;
result = true;
}
}
close(fd);
}
return result;
}
void Close() {
if (!IsOpen()) return;
munmap(data, size);
size = 0;
data = nullptr;
}
bool IsOpen() const { return data != nullptr; }
size_t GetSize() const { return size; }
explicit operator bool() const { return IsOpen(); }
template<class T>
std::span<T> GetSpan() const {
return std::span<T>((T*)data, (T*)data + (size + sizeof(T) - 1) / sizeof(T));
}
private:
size_t size = 0;
void *data = nullptr;
};
#endif // ndef MEMORY_MAPPED_FILE_H_INCLUDED
#include <immintrin.h>
#include <algorithm>
#include <array>
#include <bit>
#include <charconv>
#include <chrono>
#include <iostream>
#include <vector>
#include <deque>
#include <string>
#include <string_view>
#include <span>
#include <thread>
#include <unordered_set>
#include <unordered_map>
#include <future>
#include <variant>
#include "oisyn-timer.h"
#include "oisyn-parallel.h"
#include "MemoryMappedFile.h"
/*
int findchar(const char* ptr, char c)
{
auto allC = _mm256_set1_epi8(c);
constexpr uint RegSize = sizeof(allC);
for (uint offset = 0; ; offset += RegSize)
{
auto chars = _mm256_cmpeq_epi8(_mm256_loadu_epi8(ptr + offset), allC);
if (uint mvmask = _mm256_movemask_epi8(chars))
return std::countr_zero(mvmask) + offset;
}
}
*/
template<class C>
void Process(const char* data, long long* treeScore, int majorMax, int majorStride, int minorMin, int minorMax, int minorStride)
{
int nmajor = majorMax / majorStride;
auto algo = [=](int threadIdx, int numThreads)
{
int from = threadIdx * nmajor / numThreads;
int to = (threadIdx + 1) * nmajor / numThreads;
from *= majorStride;
to *= majorStride;
C comparator;
for (int major = from; major < to; major += majorStride)
{
int treeDists[10] = { };
for (int minor = minorMin, d = 0; comparator(minor, minorMax); minor += minorStride, d++)
{
int offset = major + minor;
int height = data[offset] - '0';
int distance = d - treeDists[height];
treeScore[offset] *= distance;
std::fill_n(treeDists, height + 1, d);
}
}
};
/*
if (nmajor < 200)
algo(0, 1./
else
{
auto futures = DoParallel(algo);
for (auto& f : futures)
f.wait();
}
*/
algo(0, 1);
}
bool run(const char* file)
{
Timer total(AutoStart);
Timer load(AutoStart);
MemoryMappedFile mmap(file);
if (!mmap)
return false;
auto data = mmap.GetSpan<const char>();
//int width = findchar(data.data(), '\n');
int width = std::find(data.begin(), data.end(), '\n') - data.begin();
int stride = width + 1;
int height = int(mmap.GetSize() + 1) / stride;
load.Stop();
Timer part1(AutoStart);
std::vector<bool> bits(stride * height);
int numTrees = 0;
for (int y = 0; y < height; y++)
{
int max = 0;
int offset = y * stride;
for (int x = 0; x < width; x++, offset++)
{
if (data[offset] > max)
{
if (!bits[offset])
numTrees++;
bits[offset] = 1;
max = data[offset];
}
}
int maxHeight = max;
max = 0;
offset--;
for (int x = width - 1; x >= 0; x--, offset--)
{
char c = data[offset];
if (c > max)
{
if (!bits[offset])
numTrees++;
bits[offset] = 1;
max = data[offset];
}
if (c >= maxHeight)
break;
}
}
for (int x = 0; x < width; x++)
{
int max = 0;
int offset = x;
for (int y = 0; y < height; y++, offset += stride)
{
if (data[offset] > max)
{
if (!bits[offset])
numTrees++;
bits[offset] = 1;
max = data[offset];
}
}
int maxHeight = max;
max = 0;
offset -= stride;
for (int y = height - 1; y >= 0; y--, offset -= stride)
{
char c = data[offset];
if (c > max)
{
if (!bits[offset])
numTrees++;
bits[offset] = 1;
max = data[offset];
}
if (c >= maxHeight)
break;
}
}
part1.Stop();
Timer part2(AutoStart);
std::vector<long long> treeScore(height * stride, 1);
Process<std::less<>>(data.data(), treeScore.data(), height * stride, stride, 0, width, 1); // left
Process<std::greater_equal<>>(data.data(), treeScore.data(), height * stride, stride, width - 1, 0, -1); // right
Process<std::less<>>(data.data(), treeScore.data(), width, 1, 0, height * stride, stride); // up
Process<std::greater_equal<>>(data.data(), treeScore.data(), width, 1, (height - 1) * stride, 0, -stride); // down
auto maxScore = std::ranges::max(treeScore);
part2.Stop();
total.Stop();
std::cout << "Time: " << total.GetTime() << "us (load:" << load.GetTime() << "us, part1:" << part1.GetTime() << "us, part2:" << part2.GetTime() << "us)\n" << numTrees << "\n" << maxScore << "\n";
return true;
}
int main()
{
const char* inputs[] =
{
//"example.txt",
"../testdata/08.in",
"../extradata/aoc_2022_day08_rect.txt",
"../extradata/aoc_2022_day08_sparse.txt",
"../extradata/day8-2000x2000.txt",
};
for (auto f : inputs)
{
std::cout << "\n===[ " << f << " ]==========\n";
if (!run(f))
std::cerr << "Can't open " << f << std::endl;
}
}
#include <algorithm>
#include <thread>
#include <type_traits>
#include <vector>
inline constexpr int MaxThreads = 32;
template<class F> auto DoParallel(F&& func)
{
int numThreads = std::clamp((int)std::thread::hardware_concurrency() - 2, 1, MaxThreads);
using result_t = std::invoke_result_t<std::decay_t<F>, int, int>;
std::vector<std::future<result_t>> results;
results.reserve(numThreads);
for (int i = 0; i < numThreads; i++)
results.push_back(std::async(std::launch::async, std::forward<F>(func), i, numThreads));
return results;
}
#include <chrono>
inline constexpr struct AutoStart_t { } AutoStart;
class Timer
{
using duration = std::chrono::high_resolution_clock::duration;
public:
Timer() = default;
Timer(AutoStart_t) { Start(); }
void Start() { m_t -= std::chrono::high_resolution_clock::now().time_since_epoch(); }
void Stop() { m_t += std::chrono::high_resolution_clock::now().time_since_epoch(); }
template<class D = std::chrono::microseconds>
long long GetTime() const { return std::chrono::duration_cast<D>(m_t).count(); }
private:
duration m_t = { };
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment