Created
January 3, 2022 22:25
-
-
Save BonBonSlick/70ee2d9e5542c89c31439308e05f765d to your computer and use it in GitHub Desktop.
Python check images for duplicates and remove
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from PIL import Image | |
import imagehash | |
import time | |
import os | |
allowedFileFormats = '.jpg, .png, .webp, .mp4, .gif'; | |
folderFailesPath = 'C:/Users/user/Downloads/test' | |
removedFilesForIteration = 0 | |
numberofChecks = 0 | |
totalRemovedFiles = 0 | |
def isAllowedFormat(fileExtension): | |
return fileExtension in allowedFileFormats | |
def removeSimilarFiles(): | |
dirFileNames = os.listdir(folderFailesPath) | |
files = [fileName for fileName in dirFileNames if os.path.isfile(folderFailesPath + '/'+ fileName)] | |
# print(dirFileNames) | |
# print(files) | |
print('Total files to check for current iteration: ', len(dirFileNames)) | |
removedFilesForIteration = 0 | |
for compareFileName in files: | |
compareFileName = folderFailesPath + '/'+ compareFileName | |
print('Comparing file name: ', compareFileName) | |
print('Is file exists file: ', os.path.exists(compareFileName)) | |
if os.path.exists(compareFileName): | |
fileExtension = os.path.splitext(compareFileName)[1] | |
if not isAllowedFormat(fileExtension): | |
print('Removing file with wrong extension: ', compareFileName) | |
if os.path.exists(compareFileName): | |
os.remove(compareFileName) | |
continue | |
# print('Comparing file name: ', compareFileName) | |
compareFileHash = imagehash.average_hash(Image.open(compareFileName)) | |
for withFileName in files: | |
withFileName = folderFailesPath + '/'+ withFileName | |
print('Is file WITH exists file: ', os.path.exists(withFileName)) | |
# print('with file name: ', withFileName) | |
if os.path.exists(withFileName): | |
fileWithExtension = os.path.splitext(withFileName)[1] | |
if not isAllowedFormat(fileWithExtension): | |
print('Removing file with wrong extension: ', withFileName) | |
if os.path.exists(withFileName): | |
os.remove(withFileName) | |
continue | |
if compareFileName != withFileName: | |
print('with file name: ', withFileName) | |
compareWithFileHash = imagehash.average_hash(Image.open(withFileName)) | |
if 0 == compareFileHash - compareWithFileHash: | |
print('Removing similar file by name: ', withFileName) | |
removedFilesForIteration += 1 | |
if os.path.exists(withFileName): | |
os.remove(withFileName) | |
print('Total removed files per check iteration: ', removedFilesForIteration) | |
return removedFilesForIteration | |
while True: | |
print('STARTING CHECK ') | |
totalRemovedFiles += removeSimilarFiles() | |
numberofChecks += 1 | |
print('Total check iterations: ', numberofChecks) | |
print('Total removed files: ', totalRemovedFiles) | |
time.sleep(60) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment