Skip to content

Instantly share code, notes, and snippets.

@BonBonSlick
Created January 3, 2022 22:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BonBonSlick/70ee2d9e5542c89c31439308e05f765d to your computer and use it in GitHub Desktop.
Save BonBonSlick/70ee2d9e5542c89c31439308e05f765d to your computer and use it in GitHub Desktop.
Python check images for duplicates and remove
from PIL import Image
import imagehash
import time
import os
allowedFileFormats = '.jpg, .png, .webp, .mp4, .gif';
folderFailesPath = 'C:/Users/user/Downloads/test'
removedFilesForIteration = 0
numberofChecks = 0
totalRemovedFiles = 0
def isAllowedFormat(fileExtension):
return fileExtension in allowedFileFormats
def removeSimilarFiles():
dirFileNames = os.listdir(folderFailesPath)
files = [fileName for fileName in dirFileNames if os.path.isfile(folderFailesPath + '/'+ fileName)]
# print(dirFileNames)
# print(files)
print('Total files to check for current iteration: ', len(dirFileNames))
removedFilesForIteration = 0
for compareFileName in files:
compareFileName = folderFailesPath + '/'+ compareFileName
print('Comparing file name: ', compareFileName)
print('Is file exists file: ', os.path.exists(compareFileName))
if os.path.exists(compareFileName):
fileExtension = os.path.splitext(compareFileName)[1]
if not isAllowedFormat(fileExtension):
print('Removing file with wrong extension: ', compareFileName)
if os.path.exists(compareFileName):
os.remove(compareFileName)
continue
# print('Comparing file name: ', compareFileName)
compareFileHash = imagehash.average_hash(Image.open(compareFileName))
for withFileName in files:
withFileName = folderFailesPath + '/'+ withFileName
print('Is file WITH exists file: ', os.path.exists(withFileName))
# print('with file name: ', withFileName)
if os.path.exists(withFileName):
fileWithExtension = os.path.splitext(withFileName)[1]
if not isAllowedFormat(fileWithExtension):
print('Removing file with wrong extension: ', withFileName)
if os.path.exists(withFileName):
os.remove(withFileName)
continue
if compareFileName != withFileName:
print('with file name: ', withFileName)
compareWithFileHash = imagehash.average_hash(Image.open(withFileName))
if 0 == compareFileHash - compareWithFileHash:
print('Removing similar file by name: ', withFileName)
removedFilesForIteration += 1
if os.path.exists(withFileName):
os.remove(withFileName)
print('Total removed files per check iteration: ', removedFilesForIteration)
return removedFilesForIteration
while True:
print('STARTING CHECK ')
totalRemovedFiles += removeSimilarFiles()
numberofChecks += 1
print('Total check iterations: ', numberofChecks)
print('Total removed files: ', totalRemovedFiles)
time.sleep(60)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment