Last active
June 4, 2018 06:31
-
-
Save 410063005/040e2c956948b15b3993101e852b91b1 to your computer and use it in GitHub Desktop.
一个可寻找目录中重复图片文件的脚本,用于找到android项目中重复的图片资源
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import os.path | |
import hashlib | |
import sys | |
hash_array = {} | |
def file_md5(file_path): | |
md5 = hashlib.md5() | |
with file(file_path, "rb") as f: | |
for block in iter(lambda: f.read(128), ""): | |
md5.update(block) | |
return md5.hexdigest() | |
def find_png(target_dir): | |
for root, dirs, files in os.walk(target_dir): | |
#png_files = [item for item in files if item.endswith('.png')] | |
for item in files: | |
if item.endswith('.png') or item.endswith('.webp') or item.endswith('.jpg'): | |
file_path = os.path.join(root, item) | |
file_hash = file_md5(file_path) | |
#print file_path, file_hash | |
if file_hash in hash_array and not (hash_array[file_hash] is None): | |
hash_array[file_hash].append(file_path) | |
else: | |
hash_array[file_hash] = [] | |
hash_array[file_hash].append(file_path) | |
if __name__ == '__main__': | |
if len(sys.argv) < 2: | |
print 'Usage: png_finder <dir>' | |
else: | |
find_png(sys.argv[1]) | |
file_count = 0 | |
file_size = 0 | |
for (k, v) in hash_array.iteritems(): | |
v_len = len(v) | |
if v_len > 1: | |
file_count = file_count + v_len - 1 | |
for i in range(v_len - 1): | |
file_size = file_size + os.path.getsize(v[i]) | |
print k, v, v_len | |
print 'duplicated file count', file_count | |
print 'duplicated file size', file_size, 'B' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment