Skip to content

Instantly share code, notes, and snippets.

@kissarat
Created November 16, 2014 10:57
Show Gist options
  • Save kissarat/d02858eb1fbfcff1e74d to your computer and use it in GitHub Desktop.
Save kissarat/d02858eb1fbfcff1e74d to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
from os import listdir, chdir, stat
from stat import *
from platform import uname, python_implementation, python_version_tuple
from os.path import abspath, basename
from sys import argv
from hashlib import md5
from base64 import b64encode
from json import dumps
from datetime import datetime
from time import sleep
import gzip
if len(argv) < 2 or len(argv) > 3:
print("Usage walker.py <dirname> [filename]")
print("if the filename is not specified than dirname-YY-MM-DD-HH.ficher is used")
exit(-1)
elif 2 == len(argv):
dirname = argv[1]
dirname = basename(abspath(dirname))
filename = dirname + '-' + datetime.now().strftime("%y-%m-%d-%H") + '.ficher'
else:
filename = argv[2]
dirname = argv[1]
dirs = 0
files = 0
max_file_size = 128 * 1024 * 1024
def sub(d):
global dirs, files
chdir(d)
folder = listdir('.')
folder.sort()
d = {}
for entry in folder:
if '.' == entry[0]:
continue
info = stat(entry)
mode = info.st_mode
if S_ISDIR(mode):
d[entry] = sub(entry)
dirs += 1
elif S_ISREG(mode):
try:
with open(entry, "rb") as f:
if info.st_size < max_file_size:
checksum = md5(f.read()).digest()
else:
checksum = md5()
while True:
chunk = f.read(max_file_size)
checksum.update(chunk)
if len(chunk) < max_file_size:
break
checksum = checksum.digest()
checksum = b64encode(checksum)
d[entry] = [
str(checksum, 'ascii'),
info.st_size,
int(info.st_mtime),
mode,
info.st_uid,
info.st_gid
]
files += 1
except Exception as ex:
print(str(ex))
if 0 == files % 32:
sleep(0.01)
chdir('..')
return d
data = sub(dirname)
data = {
'version': 0.1,
'time': datetime.now().isoformat(),
'path': dirname,
'abspath': abspath(dirname),
'system': uname(),
'python': [python_implementation(), python_version_tuple()],
'dirs': dirs,
'files': files,
'ficher': data
}
data = dumps(data, ensure_ascii=False, indent=0)
data = bytes(data, "utf8")
with gzip.open(filename, "w") as gz:
gz.write(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment