Last active
May 26, 2019 10:41
-
-
Save sq5rix/c61c474e4fd98a7d55ce4280ad2aa787 to your computer and use it in GitHub Desktop.
Script to hash all files in a directory and all directories below
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Very dangerous!! | |
# | |
# Take care - run this script if you are sure what you do | |
# this script can WREAK HAVOC in yoy file system and is is irreversible | |
# | |
# it hashes all your file names ending with ext in the directory rootDir and below | |
# when you run it, there is no way to reverse the names, unless you have them stored elsewhere | |
# | |
# I use it to hash picture names for machine learning, I don't print names, because I work on thousands of files | |
# | |
# Use it only on data which require hashing, like file names | |
# | |
# Free to use | |
# | |
# Author: Tom Wawer | |
# | |
# <script src="https://gist.github.com/sq5rix/c61c474e4fd98a7d55ce4280ad2aa787.js"></script> | |
# | |
# https://gist.github.com/sq5rix/c61c474e4fd98a7d55ce4280ad2aa787 | |
# | |
import hashlib | |
import os | |
# change this extension if you need to hash other files | |
ext = ".jpg" | |
sha = hashlib.sha256() | |
# this is root directory, change it wit care, check if you have only data which require hashing in it | |
rootDir = "/home/tom/Python/bugs/pix-short" | |
# simple os.walk to rename file names with their hashes | |
for dirName, subdirList, fileList in os.walk(rootDir): | |
for fname in fileList: | |
if fname.lower().endswith(ext): | |
sha.update(fname.encode()) | |
os.rename(os.path.join(dirName, fname), os.path.join(dirName, sha.hexdigest()) + ext) | |
#### End of file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Very dangerous!! | |
# | |
# Take care - run this script if you are sure what you do | |
# this script can WREAK HAVOC in yoy file system and is is irreversible | |
# | |
# it checks and hashes all your pictures ending with ext in the directory rootDir and below | |
# when you run it, there is no way to reverse the names, unless you have them stored elsewhere | |
# | |
# I use it to check and hash picture names for machine learning, I don't print names, because I work on thousands of files | |
# | |
# Free to use | |
# | |
# Author: Tom Wawer | |
# | |
# <script src="https://gist.github.com/sq5rix/c61c474e4fd98a7d55ce4280ad2aa787.js"></script> | |
# | |
# https://gist.github.com/sq5rix/c61c474e4fd98a7d55ce4280ad2aa787 | |
# | |
import hashlib | |
import os | |
from PIL import Image | |
# change this extension if you need to hash other files | |
ext = ".jpg" | |
sha = hashlib.sha256() | |
# this is root directory, change it wit care, check if you have only data which require hashing in it | |
rootDir = "/home/tom/Python/painters/train" | |
# simple os.walk to rename file names with their hashes | |
for dirName, subdirList, fileList in os.walk(rootDir): | |
print(dirName) | |
for fname in fileList: | |
if fname.lower().endswith(ext): | |
imagename = os.path.join(dirName, fname) | |
try: | |
im = Image.open(imagename) | |
sha.update(fname.encode()) | |
os.rename(imagename, os.path.join(dirName, sha.hexdigest()) + ".jpg") | |
except: | |
print(f"deleting bad image {fname}") | |
os.remove(imagename) | |
#### End of file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment