Last active
February 15, 2018 22:30
-
-
Save kebman/c161238831d97da85cdea9ada329e8c4 to your computer and use it in GitHub Desktop.
A small Python script meant to find file duplicates by listing hashes, file (birth) creation times and file sizes. Copy and edit as you wish.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
import os | |
import hashlib | |
import datetime | |
# Warning: Recursive program. If ran or set from root, it will list ALL THE FILES on the drive. Hit Ctrl+C if you get bored. | |
path = "." | |
# UX (and OS X) spesific path names | |
homedir = os.path.expanduser('~') | |
def sha256(fname): | |
"""Return sha256 hash from input file (fname).""" | |
hash_sha256 = hashlib.sha256() | |
with open(fname, "rb") as f: | |
for chunk in iter(lambda: f.read(65536), b""): | |
hash_sha256.update(chunk) | |
return hash_sha256.hexdigest() | |
def walkOs(path): | |
"""Walk the file system from input path (path), and print selected stats.""" | |
# dependency: sha256() | |
os.chdir(path) | |
for entry in os.walk("."): | |
word = str(entry[0]) | |
for file in entry[2]: | |
fullpathfile = os.getcwd() + word[1:] + "/" + file | |
birthstamp = os.stat(fullpathfile).st_birthtime | |
size = str(os.stat(fullpathfile).st_size) | |
value = datetime.datetime.fromtimestamp(birthstamp) | |
print(value.strftime('%Y-%m-%d %H:%M:%S') + " " + sha256(fullpathfile) + " " + fullpathfile + " " + size + "b") | |
walkOs(path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Also my first real Python project :p