Last active
October 10, 2016 10:11
-
-
Save mrange/0d25f223ec1f14cacd1e17307b3881cb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdio> | |
#include <cstdlib> | |
#include <algorithm> | |
#include <chrono> | |
#include <exception> | |
#include <memory> | |
#include <stdexcept> | |
#include <utility> | |
#include <windows.h> | |
#include <intrin.h> | |
#define CHECK(expr) \ | |
if (!(expr)) \ | |
{ \ | |
throw std::runtime_error (#expr); \ | |
} | |
namespace | |
{ | |
template<typename TOnExit> | |
struct scope_guard | |
{ | |
scope_guard (TOnExit const & on_exit) noexcept | |
: on_exit (on_exit) | |
{ | |
} | |
scope_guard (TOnExit && on_exit) noexcept | |
: on_exit (std::move (on_exit)) | |
{ | |
} | |
~scope_guard () noexcept | |
{ | |
on_exit (); | |
} | |
scope_guard (scope_guard const &) = delete ; | |
scope_guard (scope_guard && so) | |
: on_exit (so.on_exit) | |
{ | |
so.on_exit = [] () {}; | |
} | |
scope_guard operator= (scope_guard const &) = delete ; | |
scope_guard operator= (scope_guard &&) = delete ; | |
private: | |
TOnExit on_exit; | |
}; | |
template<typename TOnExit> | |
auto on_exit (TOnExit && on_exit) noexcept | |
{ | |
return scope_guard<TOnExit> (std::forward<TOnExit> (on_exit)); | |
} | |
struct unit_t | |
{ | |
}; | |
constexpr unit_t unit = unit_t (); | |
unit_t look_for_duplicates (wchar_t const * file_name) | |
{ | |
std::printf ("Looking for duplicates in %S\n", file_name); | |
auto in_the_beginning = std::chrono::high_resolution_clock::now (); | |
auto print__elapsed_time= on_exit ([=] () | |
{ | |
auto right_now = std::chrono::high_resolution_clock::now (); | |
auto diff = right_now - in_the_beginning; | |
auto ms = std::chrono::duration_cast<std::chrono::milliseconds> (diff); | |
std::printf (" it took %lld ms\n", ms.count ()); | |
}); | |
auto file_handle = CreateFile ( | |
file_name | |
, GENERIC_READ | |
, 0 | |
, nullptr | |
, OPEN_EXISTING | |
, FILE_ATTRIBUTE_NORMAL // | FILE_FLAG_NO_BUFFERING | |
, nullptr | |
); | |
CHECK (file_handle != INVALID_HANDLE_VALUE); | |
auto close__file_handle = on_exit ([=] () { CloseHandle (file_handle); }); | |
auto file_mapping = CreateFileMapping ( | |
file_handle | |
, nullptr | |
, PAGE_READONLY // | SEC_IMAGE_NO_EXECUTE | |
, 0 | |
, 0 | |
, nullptr); | |
CHECK (file_mapping); | |
auto close__file_mapping= on_exit ([=] () { CloseHandle (file_mapping); }); | |
auto buffer = MapViewOfFile ( | |
file_mapping | |
, FILE_MAP_READ | |
, 0 | |
, 0 | |
, 0 | |
); | |
CHECK(buffer); | |
auto close__buffer = on_exit ([=] () { UnmapViewOfFile (buffer); }); | |
LARGE_INTEGER file_size {}; | |
auto file_size_result = GetFileSizeEx(file_handle, &file_size); | |
CHECK (file_size_result); | |
std::uint8_t const * begin = static_cast<std::uint8_t const*> (buffer); | |
std::uint8_t const * end = begin + file_size.QuadPart; | |
std::uint8_t const * current = begin; | |
using set_value_t = LONGLONG; | |
constexpr auto set_value_shift= 6; | |
constexpr auto set_value_size = sizeof (set_value_t); | |
constexpr auto set_value_mask = (1 << set_value_shift) - 1; | |
static_assert (set_value_size == (1 << (set_value_shift - 3)), "set_value_size and set_value_shift must match"); | |
std::size_t remaining = file_size.QuadPart / 8; | |
constexpr auto set_size = 26*26*26*1000 / (1 << set_value_shift); | |
std::unique_ptr<set_value_t> set_up (new set_value_t [set_size]); | |
auto set = set_up.get (); | |
std::memset (set, 0, set_size * set_value_size); | |
while (remaining > 0) | |
{ | |
auto index = 0U; | |
index += (*current++ - 'A')*26*26*1000; | |
index += (*current++ - 'A')*26*1000; | |
index += (*current++ - 'A')*1000; | |
index += (*current++ - '0')*100; | |
index += (*current++ - '0')*10; | |
index += (*current++ - '0')*1; | |
current += 2; | |
auto pos = index >> set_value_shift; | |
auto bit = index & set_value_mask; | |
if (_bittestandset64 (set + pos, bit)) | |
{ | |
char dupe[8] {}; | |
auto prev = current - 8; | |
std::copy (prev, prev + 6, dupe); | |
std::printf ("Duplicate found: %s\n", dupe); | |
} | |
--remaining; | |
} | |
return unit; | |
} | |
} | |
int main () | |
{ | |
try | |
{ | |
look_for_duplicates (LR"(C:\temp\GoForPerf\Rgn00.txt)"); | |
look_for_duplicates (LR"(C:\temp\GoForPerf\Rgn01.txt)"); | |
look_for_duplicates (LR"(C:\temp\GoForPerf\Rgn02.txt)"); | |
return 0; | |
} | |
catch (std::exception const & e) | |
{ | |
std::printf ("Caught exception: %s\n", e.what ()); | |
return 998; | |
} | |
catch (...) | |
{ | |
return 999; | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment