Last active
November 5, 2018 13:40
-
-
Save samsee/91632653e5c5f97f3e18f4734c071e5a to your computer and use it in GitHub Desktop.
duplicated image search on synology photostation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rootdir = "/var/nas/photo/" | |
for root, subdirs, files in os.walk(rootdir): | |
subdirs.remove('@eaDir') | |
print(root, subdirs, len(files)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Main 코드 | |
rootdir = "/var/nas/photo" | |
for root, subdirs, files in os.walk(rootdir): | |
subdirs.remove('@eaDir') | |
#print(root, subdirs, len(files)) | |
files = get_image_files(root) | |
for f in files: | |
count() | |
try: | |
h = get_image_average_hash(join(root, f)) | |
create_row(conn, (join(root, f), str(h))) | |
except: | |
print("Error on", join(root, f), sys.exc_info()[0]) | |
continue | |
conn.commit() # 디렉토리 하나 처리할 때 마다 커밋 | |
conn.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_image_average_hash(file_fullpath): | |
# TODO Check file_fullpath | |
from PIL import Image | |
import imagehash | |
im = Image.open(file_fullpath) | |
return imagehash.average_hash(im) | |
# 이미지 파일 배열로 가져오기 | |
def get_image_files(path): | |
"""Get array of image files in directory | |
Args: | |
path (str): directory to find. | |
""" | |
# 이미지 파일 필터링 | |
import os, re | |
return [filename for filename in os.listdir(path) | |
if re.search(r'\.(bmp|gif|jpeg|jpg|png|tif)$', filename, re.IGNORECASE)] | |
cnt = 0 | |
# 처리된 이미지 카운트, 100개 마다 출력 | |
def count(): | |
import datetime | |
global cnt | |
cnt += 1 | |
if (cnt % 100 == 0): | |
print(datetime.datetime.now(), cnt) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ImageHash 테스트 | |
from PIL import Image | |
img_path = '/var/nas/photo/Family/' | |
img_name = '20141122_195058.jpg' | |
im = Image.open(img_path + img_name) | |
h = imagehash.average_hash(im) | |
print(h) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Database 준비 | |
# http://www.sqlitetutorial.net/sqlite-python/create-tables/ 참조 | |
import sqlite3 | |
conn = sqlite3.connect("img_dup_find.db") # sqlite DB 파일 만들기 | |
# 테이블 만들기 함수 | |
def create_table(conn, create_table_sql): | |
""" create a table from the create_table_sql statement | |
:param conn: Connection object | |
:param create_table_sql: a CREATE TABLE statement | |
:return: | |
""" | |
try: | |
c = conn.cursor() | |
c.execute(create_table_sql) | |
except Error as e: | |
print(e) | |
def create_row(conn, img_dup): | |
""" | |
Create a new row | |
:param conn: | |
:param img_dup: 파일명, 해시값 | |
:return: id | |
""" | |
sql = ''' INSERT INTO img_dup(file_name,hash) | |
VALUES(?,?) ''' | |
cur = conn.cursor() | |
cur.execute(sql, img_dup) | |
return cur.lastrowid | |
sql_create_img_dup_table = """ CREATE TABLE IF NOT EXISTS img_dup ( | |
id integer PRIMARY KEY, | |
file_name text NOT NULL, | |
hash varchar(16) NOT NULL | |
); """ | |
create_table(conn, sql_create_img_dup_table) # 테이블 생성 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment