Skip to content

Instantly share code, notes, and snippets.

@sq5rix
Last active May 26, 2019 10:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sq5rix/c61c474e4fd98a7d55ce4280ad2aa787 to your computer and use it in GitHub Desktop.
Save sq5rix/c61c474e4fd98a7d55ce4280ad2aa787 to your computer and use it in GitHub Desktop.
Script to hash all files in a directory and all directories below
# Very dangerous!!
#
# Take care - run this script if you are sure what you do
# this script can WREAK HAVOC in yoy file system and is is irreversible
#
# it hashes all your file names ending with ext in the directory rootDir and below
# when you run it, there is no way to reverse the names, unless you have them stored elsewhere
#
# I use it to hash picture names for machine learning, I don't print names, because I work on thousands of files
#
# Use it only on data which require hashing, like file names
#
# Free to use
#
# Author: Tom Wawer
#
# <script src="https://gist.github.com/sq5rix/c61c474e4fd98a7d55ce4280ad2aa787.js"></script>
#
# https://gist.github.com/sq5rix/c61c474e4fd98a7d55ce4280ad2aa787
#
import hashlib
import os
# change this extension if you need to hash other files
ext = ".jpg"
sha = hashlib.sha256()
# this is root directory, change it wit care, check if you have only data which require hashing in it
rootDir = "/home/tom/Python/bugs/pix-short"
# simple os.walk to rename file names with their hashes
for dirName, subdirList, fileList in os.walk(rootDir):
for fname in fileList:
if fname.lower().endswith(ext):
sha.update(fname.encode())
os.rename(os.path.join(dirName, fname), os.path.join(dirName, sha.hexdigest()) + ext)
#### End of file
# Very dangerous!!
#
# Take care - run this script if you are sure what you do
# this script can WREAK HAVOC in yoy file system and is is irreversible
#
# it checks and hashes all your pictures ending with ext in the directory rootDir and below
# when you run it, there is no way to reverse the names, unless you have them stored elsewhere
#
# I use it to check and hash picture names for machine learning, I don't print names, because I work on thousands of files
#
# Free to use
#
# Author: Tom Wawer
#
# <script src="https://gist.github.com/sq5rix/c61c474e4fd98a7d55ce4280ad2aa787.js"></script>
#
# https://gist.github.com/sq5rix/c61c474e4fd98a7d55ce4280ad2aa787
#
import hashlib
import os
from PIL import Image
# change this extension if you need to hash other files
ext = ".jpg"
sha = hashlib.sha256()
# this is root directory, change it wit care, check if you have only data which require hashing in it
rootDir = "/home/tom/Python/painters/train"
# simple os.walk to rename file names with their hashes
for dirName, subdirList, fileList in os.walk(rootDir):
print(dirName)
for fname in fileList:
if fname.lower().endswith(ext):
imagename = os.path.join(dirName, fname)
try:
im = Image.open(imagename)
sha.update(fname.encode())
os.rename(imagename, os.path.join(dirName, sha.hexdigest()) + ".jpg")
except:
print(f"deleting bad image {fname}")
os.remove(imagename)
#### End of file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment