Skip to content

Instantly share code, notes, and snippets.

@lukifer195
Created July 30, 2020 07:40
Show Gist options
  • Save lukifer195/7271b1bd4bac872e5831717b58c7809d to your computer and use it in GitHub Desktop.
Save lukifer195/7271b1bd4bac872e5831717b58c7809d to your computer and use it in GitHub Desktop.
Check duplicated file by some array bytes
import os,glob
def br(data):
print(data*80)
#==============================================#
folder_path = r'D:\Downloads'
#==============================================#
set_compare = set()
set_duplicated = set()
for filename in glob.glob(os.path.join(folder_path, '*.jpg') ):
with open(filename, 'r' ,encoding = 'utf8' ,errors = 'ignore') as f:
# br('*')
print (filename)
text = f.read()[200:500]
# br('*')
# print(text)
if text in set_compare:
print( filename , ' duplicated ')
set_duplicated.add(filename)
else:
set_compare.add(text)
br('#')
for x in set_duplicated:
os.remove(x)
x.replace(r'\\\\' , r'\\')
print(x)
br('#')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment