Created
June 12, 2019 13:30
-
-
Save neko-neko-nyan/a54b2c8a5bac330365c29e6cb479e6c0 to your computer and use it in GitHub Desktop.
Скрипт поиска одинаковых файлов (по sha256)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import os | |
import stat | |
import hashlib | |
def index_dir(d): | |
s = os.stat(d) | |
if stat.S_ISREG(s.st_mode): | |
yield d | |
elif stat.S_ISDIR(s.st_mode): | |
for i in os.listdir(d): | |
yield from index_dir(os.path.join(d, i)) | |
def hash_file(i): | |
h = hashlib.sha256() | |
with open(i, "rb") as f: | |
h.update(f.read(2 * 1024 * 1024)) | |
return h.digest() | |
def get_key(h, v): | |
for k, i in h.items(): | |
if i == v: | |
return k | |
def main(): | |
index = [] | |
for i in sys.argv[1:]: | |
index.extend(index_dir(i)) | |
index = {i: hash_file(i) for i in index} | |
while index: | |
k, v = index.popitem() | |
if v in index.values(): | |
print(os.path.relpath(k, '.'), os.path.relpath(get_key(index, v), '.')) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment