Skip to content

Instantly share code, notes, and snippets.

@neko-neko-nyan
Created June 12, 2019 13:30
Show Gist options
  • Save neko-neko-nyan/a54b2c8a5bac330365c29e6cb479e6c0 to your computer and use it in GitHub Desktop.
Save neko-neko-nyan/a54b2c8a5bac330365c29e6cb479e6c0 to your computer and use it in GitHub Desktop.
Скрипт поиска одинаковых файлов (по sha256)
#!/usr/bin/env python3
import sys
import os
import stat
import hashlib
def index_dir(d):
s = os.stat(d)
if stat.S_ISREG(s.st_mode):
yield d
elif stat.S_ISDIR(s.st_mode):
for i in os.listdir(d):
yield from index_dir(os.path.join(d, i))
def hash_file(i):
h = hashlib.sha256()
with open(i, "rb") as f:
h.update(f.read(2 * 1024 * 1024))
return h.digest()
def get_key(h, v):
for k, i in h.items():
if i == v:
return k
def main():
index = []
for i in sys.argv[1:]:
index.extend(index_dir(i))
index = {i: hash_file(i) for i in index}
while index:
k, v = index.popitem()
if v in index.values():
print(os.path.relpath(k, '.'), os.path.relpath(get_key(index, v), '.'))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment