Skip to content

Instantly share code, notes, and snippets.

@410063005
Last active June 4, 2018 06:31
Show Gist options
  • Save 410063005/040e2c956948b15b3993101e852b91b1 to your computer and use it in GitHub Desktop.
Save 410063005/040e2c956948b15b3993101e852b91b1 to your computer and use it in GitHub Desktop.
一个可寻找目录中重复图片文件的脚本,用于找到android项目中重复的图片资源
import os
import os.path
import hashlib
import sys
hash_array = {}
def file_md5(file_path):
md5 = hashlib.md5()
with file(file_path, "rb") as f:
for block in iter(lambda: f.read(128), ""):
md5.update(block)
return md5.hexdigest()
def find_png(target_dir):
for root, dirs, files in os.walk(target_dir):
#png_files = [item for item in files if item.endswith('.png')]
for item in files:
if item.endswith('.png') or item.endswith('.webp') or item.endswith('.jpg'):
file_path = os.path.join(root, item)
file_hash = file_md5(file_path)
#print file_path, file_hash
if file_hash in hash_array and not (hash_array[file_hash] is None):
hash_array[file_hash].append(file_path)
else:
hash_array[file_hash] = []
hash_array[file_hash].append(file_path)
if __name__ == '__main__':
if len(sys.argv) < 2:
print 'Usage: png_finder <dir>'
else:
find_png(sys.argv[1])
file_count = 0
file_size = 0
for (k, v) in hash_array.iteritems():
v_len = len(v)
if v_len > 1:
file_count = file_count + v_len - 1
for i in range(v_len - 1):
file_size = file_size + os.path.getsize(v[i])
print k, v, v_len
print 'duplicated file count', file_count
print 'duplicated file size', file_size, 'B'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment