Skip to content

Instantly share code, notes, and snippets.

@mrange
Last active October 10, 2016 10:11
Show Gist options
  • Save mrange/0d25f223ec1f14cacd1e17307b3881cb to your computer and use it in GitHub Desktop.
Save mrange/0d25f223ec1f14cacd1e17307b3881cb to your computer and use it in GitHub Desktop.
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <chrono>
#include <exception>
#include <memory>
#include <stdexcept>
#include <utility>
#include <windows.h>
#include <intrin.h>
#define CHECK(expr) \
if (!(expr)) \
{ \
throw std::runtime_error (#expr); \
}
namespace
{
template<typename TOnExit>
struct scope_guard
{
scope_guard (TOnExit const & on_exit) noexcept
: on_exit (on_exit)
{
}
scope_guard (TOnExit && on_exit) noexcept
: on_exit (std::move (on_exit))
{
}
~scope_guard () noexcept
{
on_exit ();
}
scope_guard (scope_guard const &) = delete ;
scope_guard (scope_guard && so)
: on_exit (so.on_exit)
{
so.on_exit = [] () {};
}
scope_guard operator= (scope_guard const &) = delete ;
scope_guard operator= (scope_guard &&) = delete ;
private:
TOnExit on_exit;
};
template<typename TOnExit>
auto on_exit (TOnExit && on_exit) noexcept
{
return scope_guard<TOnExit> (std::forward<TOnExit> (on_exit));
}
struct unit_t
{
};
constexpr unit_t unit = unit_t ();
unit_t look_for_duplicates (wchar_t const * file_name)
{
std::printf ("Looking for duplicates in %S\n", file_name);
auto in_the_beginning = std::chrono::high_resolution_clock::now ();
auto print__elapsed_time= on_exit ([=] ()
{
auto right_now = std::chrono::high_resolution_clock::now ();
auto diff = right_now - in_the_beginning;
auto ms = std::chrono::duration_cast<std::chrono::milliseconds> (diff);
std::printf (" it took %lld ms\n", ms.count ());
});
auto file_handle = CreateFile (
file_name
, GENERIC_READ
, 0
, nullptr
, OPEN_EXISTING
, FILE_ATTRIBUTE_NORMAL // | FILE_FLAG_NO_BUFFERING
, nullptr
);
CHECK (file_handle != INVALID_HANDLE_VALUE);
auto close__file_handle = on_exit ([=] () { CloseHandle (file_handle); });
auto file_mapping = CreateFileMapping (
file_handle
, nullptr
, PAGE_READONLY // | SEC_IMAGE_NO_EXECUTE
, 0
, 0
, nullptr);
CHECK (file_mapping);
auto close__file_mapping= on_exit ([=] () { CloseHandle (file_mapping); });
auto buffer = MapViewOfFile (
file_mapping
, FILE_MAP_READ
, 0
, 0
, 0
);
CHECK(buffer);
auto close__buffer = on_exit ([=] () { UnmapViewOfFile (buffer); });
LARGE_INTEGER file_size {};
auto file_size_result = GetFileSizeEx(file_handle, &file_size);
CHECK (file_size_result);
std::uint8_t const * begin = static_cast<std::uint8_t const*> (buffer);
std::uint8_t const * end = begin + file_size.QuadPart;
std::uint8_t const * current = begin;
using set_value_t = LONGLONG;
constexpr auto set_value_shift= 6;
constexpr auto set_value_size = sizeof (set_value_t);
constexpr auto set_value_mask = (1 << set_value_shift) - 1;
static_assert (set_value_size == (1 << (set_value_shift - 3)), "set_value_size and set_value_shift must match");
std::size_t remaining = file_size.QuadPart / 8;
constexpr auto set_size = 26*26*26*1000 / (1 << set_value_shift);
std::unique_ptr<set_value_t> set_up (new set_value_t [set_size]);
auto set = set_up.get ();
std::memset (set, 0, set_size * set_value_size);
while (remaining > 0)
{
auto index = 0U;
index += (*current++ - 'A')*26*26*1000;
index += (*current++ - 'A')*26*1000;
index += (*current++ - 'A')*1000;
index += (*current++ - '0')*100;
index += (*current++ - '0')*10;
index += (*current++ - '0')*1;
current += 2;
auto pos = index >> set_value_shift;
auto bit = index & set_value_mask;
if (_bittestandset64 (set + pos, bit))
{
char dupe[8] {};
auto prev = current - 8;
std::copy (prev, prev + 6, dupe);
std::printf ("Duplicate found: %s\n", dupe);
}
--remaining;
}
return unit;
}
}
int main ()
{
try
{
look_for_duplicates (LR"(C:\temp\GoForPerf\Rgn00.txt)");
look_for_duplicates (LR"(C:\temp\GoForPerf\Rgn01.txt)");
look_for_duplicates (LR"(C:\temp\GoForPerf\Rgn02.txt)");
return 0;
}
catch (std::exception const & e)
{
std::printf ("Caught exception: %s\n", e.what ());
return 998;
}
catch (...)
{
return 999;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment