Skip to content

Instantly share code, notes, and snippets.

@tvandijck
Created October 24, 2018 08:31
Show Gist options
  • Save tvandijck/e8ac50f01b6c656f5599d50b83e35ca9 to your computer and use it in GitHub Desktop.
Save tvandijck/e8ac50f01b6c656f5599d50b83e35ca9 to your computer and use it in GitHub Desktop.
// MeowHash.cpp : This file contains the 'main' function. Program execution begins and ends there.
//
#include "pch.h"
#include <inttypes.h>
#include <intrin.h>
#include <stdio.h>
#include <string>
#include <map>
#include <vector>
#include <Windows.h>
#include <shlwapi.h>
#pragma comment(lib, "Shlwapi.lib")
#include "meow_hash.h"
/*union meow_lane
{
struct
{
__m128i L0;
__m128i L1;
__m128i L2;
__m128i L3;
};
uint64_t Sub[8];
uint32_t Sub32[16];
};
static inline void AESRotate(meow_lane &A, meow_lane &B)
{
A.L0 = _mm_aesdec_si128(A.L0, B.L0);
A.L1 = _mm_aesdec_si128(A.L1, B.L1);
A.L2 = _mm_aesdec_si128(A.L2, B.L2);
A.L3 = _mm_aesdec_si128(A.L3, B.L3);
__m128i Temp = B.L0;
B.L0 = B.L1;
B.L1 = B.L2;
B.L2 = B.L3;
B.L3 = Temp;
}
static inline void AESLoad(meow_lane& S, const uint8_t* from)
{
S.L0 = _mm_aesdec_si128(S.L0, *(__m128i *)(from));
S.L1 = _mm_aesdec_si128(S.L1, *(__m128i *)(from + 16));
S.L2 = _mm_aesdec_si128(S.L2, *(__m128i *)(from + 32));
S.L3 = _mm_aesdec_si128(S.L3, *(__m128i *)(from + 48));
}
static inline void AESMerge(meow_lane& A, const meow_lane& B)
{
A.L0 = _mm_aesdec_si128(A.L0, B.L0);
A.L1 = _mm_aesdec_si128(A.L1, B.L1);
A.L2 = _mm_aesdec_si128(A.L2, B.L2);
A.L3 = _mm_aesdec_si128(A.L3, B.L3);
}
struct meow_state
{
meow_lane S0123;
meow_lane S4567;
meow_lane S89AB;
meow_lane SCDEF;
};
static meow_lane MeowHash(uint64_t seed, const void* sourceInit, uint64_t len, meow_state& state)
{
// NOTE(casey): The initialization vector follows falkhash's lead and uses the seed twice, but the second time
// the length plus one is added to differentiate. This seemed sensible, but I haven't thought too hard about this,
// there may be better things to use as an IV.
meow_lane IV;
IV.Sub[0] = IV.Sub[2] = IV.Sub[4] = IV.Sub[6] = seed;
IV.Sub[1] = IV.Sub[3] = IV.Sub[5] = IV.Sub[7] = seed + len + 1;
// NOTE(casey): Initialize all 16 streams with the initialization vector
state.S0123 = IV;
state.S4567 = IV;
state.S89AB = IV;
state.SCDEF = IV;
// NOTE(casey): Handle as many full 256-byte blocks as possible
const uint8_t* source = static_cast<const uint8_t*>(sourceInit);
uint64_t blockCount = (len >> 8);
len -= (blockCount << 8);
while (blockCount--)
{
AESLoad(state.S0123, source);
AESLoad(state.S4567, source + 64);
AESLoad(state.S89AB, source + 128);
AESLoad(state.SCDEF, source + 192);
source += (1 << 8);
}
// NOTE(casey): If residual data remains, hash one final 256-byte block padded with the initialization vector
if (len)
{
// TODO(casey): Can this just be zeroes?
meow_lane partial[] = { IV, IV, IV, IV };
uint8_t* dest = reinterpret_cast<uint8_t*>(partial);
while (len--)
{
*dest++ = *source++;
}
uint8_t* P = reinterpret_cast<uint8_t*>(partial);
AESMerge(state.S0123, partial[0]);
AESMerge(state.S4567, partial[1]);
AESMerge(state.S89AB, partial[2]);
AESMerge(state.SCDEF, partial[3]);
}
// NOTE(casey): Combine the 16 streams into a single hash to spread the bits out evenly
meow_lane R0 = IV;
AESRotate(R0, state.S0123);
AESRotate(R0, state.S4567);
AESRotate(R0, state.S89AB);
AESRotate(R0, state.SCDEF);
AESRotate(R0, state.S0123);
AESRotate(R0, state.S4567);
AESRotate(R0, state.S89AB);
AESRotate(R0, state.SCDEF);
AESRotate(R0, state.S0123);
AESRotate(R0, state.S4567);
AESRotate(R0, state.S89AB);
AESRotate(R0, state.SCDEF);
AESRotate(R0, state.S0123);
AESRotate(R0, state.S4567);
AESRotate(R0, state.S89AB);
AESRotate(R0, state.SCDEF);
// NOTE(casey): Repeat AES enough times to ensure diffusion to all bits in each 128-bit lane
AESMerge(R0, IV);
AESMerge(R0, IV);
AESMerge(R0, IV);
AESMerge(R0, IV);
AESMerge(R0, IV);
return(R0);
}*/
#define HASH_64
std::string toString(const meow_lane& A)
{
char buffer[256];
#if defined(HASH_256)
sprintf_s(buffer, "%08X%08X%08X%08X-%08X%08X%08X%08X",
A.Sub32[0], A.Sub32[1], A.Sub32[2], A.Sub32[3],
A.Sub32[4], A.Sub32[5], A.Sub32[6], A.Sub32[7]);
#elif defined(HASH_128)
sprintf_s(buffer, "%08X%08X%08X%08X",
A.Sub32[0], A.Sub32[1], A.Sub32[2], A.Sub32[3]);
#elif defined(HASH_64)
sprintf_s(buffer, "%08X%08X",
A.Sub32[0], A.Sub32[1]);
#else
sprintf_s(buffer, "%08X%08X%08X%08X-%08X%08X%08X%08X-%08X%08X%08X%08X-%08X%08X%08X%08X",
A.Sub32[0], A.Sub32[1], A.Sub32[2], A.Sub32[3],
A.Sub32[4], A.Sub32[5], A.Sub32[6], A.Sub32[7],
A.Sub32[8], A.Sub32[9], A.Sub32[10], A.Sub32[11],
A.Sub32[12], A.Sub32[13], A.Sub32[14], A.Sub32[15]);
#endif
return buffer;
}
/*std::string toString(const meow_state& A)
{
std::string str = "\n";
str += " S0123: " + toString(A.S0123) + "\n";
str += " S4567: " + toString(A.S4567) + "\n";
str += " S89AB: " + toString(A.S89AB) + "\n";
str += " SCDEF: " + toString(A.SCDEF) + "\n";
return str;
}*/
struct Info
{
std::string fileName;
std::string state;
int fileSize;
};
static std::map<std::string, Info> s_collisionMap;
static std::map<std::string, std::vector<Info>> s_collisions;
void Hash(std::string fileName)
{
FILE* file = fopen(fileName.c_str(), "rb");
if (file != nullptr)
{
fseek(file, 0, SEEK_END);
int size = (int)ftell(file);
fseek(file, 0, SEEK_SET);
char* buffer = (char*)_aligned_malloc(size, 16);
fread(buffer, 1, size, file);
fclose(file);
//meow_state state;
//meow_lane hash = MeowHash(0, buffer, size, state);
meow_lane hash = MeowHash1(0, size, buffer);
_aligned_free(buffer);
std::string hashStr = toString(hash);
auto iter = s_collisionMap.find(hashStr);
if (iter != s_collisionMap.end())
{
if (s_collisions[hashStr].empty())
{
s_collisions[hashStr].push_back(iter->second);
}
Info info;
info.fileName = fileName;
info.fileSize = size;
//info.state = toString(state);
s_collisions[hashStr].push_back(info);
} else
{
Info info;
info.fileName = fileName;
info.fileSize = size;
//info.state = toString(state);
s_collisionMap.insert(std::make_pair(hashStr, info));
}
}
}
void EnumFiles(std::string folderPath, std::string searchParameter)
{
std::string searchFile = folderPath + searchParameter;
WIN32_FIND_DATAA info;
HANDLE hFile = FindFirstFileA(searchFile.c_str(), &info);
if (hFile != INVALID_HANDLE_VALUE)
{
do
{
if (info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
{
if (strcmp(info.cFileName, ".") != 0 && strcmp(info.cFileName, "..") != 0)
{
EnumFiles(folderPath + info.cFileName + "\\", searchParameter);
}
}
else
{
Hash(folderPath + info.cFileName);
}
} while (FindNextFileA(hFile, &info));
FindClose(hFile);
}
}
int main()
{
EnumFiles("C:\\extracted\\", "*.*");
int collisionCount = 0;
for (auto& item : s_collisions)
{
collisionCount++;
printf("COLLISION [%d]:\n", collisionCount);
printf(" hash : %s\n", item.first.c_str());
printf(" collisions : %d\n", (int)item.second.size());
for (auto& info : item.second)
{
printf(" file : %s\n", info.fileName.c_str());
printf(" size : %d\n", info.fileSize);
//printf(" state : %s\n", info.state.c_str());
char output[260];
char* ptr = PathFindFileNameA(info.fileName.c_str());
sprintf(output, "c:\\coll\\%s", ptr);
CopyFileA(info.fileName.c_str(), output, false);
}
}
printf("\n");
printf("Number of collisions: %d\n", collisionCount);
printf("Number of files : %d\n", (int)s_collisionMap.size());
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment