Skip to content

Instantly share code, notes, and snippets.

@leochou0729
Last active May 7, 2020 07:17
Show Gist options
  • Save leochou0729/4983fedebffef733fdfc97f4b1b6696a to your computer and use it in GitHub Desktop.
Save leochou0729/4983fedebffef733fdfc97f4b1b6696a to your computer and use it in GitHub Desktop.
#include <sys/mount.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <dirent.h>
#include <set>
#include <string>
#include <sstream>
#include <iomanip>
#include <iostream>
#include <chrono>
#include "xxhash.hpp"
std::set<std::string> fileSet;
unsigned long long ullFileCount = 0;
unsigned long long ullCollision = 0;
bool HashFile(const std::string& strFile, std::string& strHash)
{
FILE *f;
size_t n;
unsigned char buf[4096];
if ((f = fopen(strFile.c_str(), "rb")) == nullptr)
{
return false;
}
xxh::hash3_state_t<128> hash_stream;
while ((n = fread(buf, 1, sizeof(buf), f)) > 0)
{
hash_stream.update(buf, n);
}
fclose(f);
xxh::hash_t<128> result = hash_stream.digest();
unsigned char *pResult = (unsigned char *)&result;
std::stringstream ss;
ss << std::hex;
for (int i = 0; i < 16; ++i)
{
ss << std::setw(2) << std::setfill('0') << (0xff & (unsigned int)pResult[i]);
}
strHash = ss.str();
return true;
}
bool IsDirectory(std::string& strPath)
{
struct stat pathStat;
stat(strPath.c_str(), &pathStat);
return S_ISDIR(pathStat.st_mode);
}
bool IsRegularFile(std::string& strPath)
{
struct stat pathStat;
stat(strPath.c_str(), &pathStat);
return S_ISREG(pathStat.st_mode);
}
void ListFiles(std::string& strBasePath)
{
struct dirent *dp;
DIR *dir = opendir(strBasePath.c_str());
if (!dir)
{
return;
}
while ((dp = readdir(dir)) != nullptr)
{
if (dp->d_type == DT_DIR && strcmp(dp->d_name, ".") != 0 && strcmp(dp->d_name, "..") != 0)
{
std::string strDir = strBasePath;
if (strDir.back() != '/')
{
strDir += "/";
}
strDir += dp->d_name;
ListFiles(strDir);
}
else if (dp->d_type == DT_REG && std::string(dp->d_name).substr(0, 1) == ".")
{
continue;
}
else if (dp->d_type == DT_REG)
{
std::string strFile = strBasePath;
if (strFile.back() != '/')
{
strFile += "/";
}
strFile += dp->d_name;
std::string strHash;
if (HashFile(strFile, strHash))
{
ullFileCount++;
if (fileSet.insert(std::move(strHash)).second == false)
{
ullCollision++;
}
}
}
}
closedir(dir);
}
int main(int argc, const char * argv[])
{
std::string rootPath = "/";
auto start = std::chrono::steady_clock::now();
ListFiles(rootPath);
auto end = std::chrono::steady_clock::now();
std::cout << "Elapsed time in seconds = " << std::chrono::duration_cast<std::chrono::seconds>(end - start).count() << std::endl;
std::cout << "File count = " << ullFileCount << std::endl;
std::cout << "Hash collision = " << ullCollision << std::endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment