wyb330/imgmerge.py

## imgmerge.py
'''
python 패키지 설치

pip install opencv-python
pip install numpy
pip install bs4
'''

# sub/idx 자막인 경우 아래처럼 명령을 실행
# python imgmerge.py -i C:\sub\*.* -o C:\save_path

import glob
import os
import cv2
import numpy as np
from argparse import ArgumentParser
import re
from bs4 import BeautifulSoup


VOBTIMECODE1 = '(#\d+:)(\d+:*\d+,\d+->\d+:\d+,\d+)(.)*'
VOBTIMECODE2 = '(#\d+:)(\d+:\d+:*\d+,\d+->\d+:\d+:\d+,\d+)(.)*'
VOBTIMECODE3 = '(#\d+:)(\d+,\d+->\d+,\d+)(.)*'

font = cv2.FONT_HERSHEY_TRIPLEX
font_size = 1.2


def display_text(img, text):
    cv2.putText(img, text, (30, 40), font, font_size, (0, 0, 0), thickness=2)


def merge_images(images, output):
    result = np.concatenate(images, axis=0)
    cv2.imwrite(output, result)


def to_srt_timestamp(total_seconds):
    total_seconds = total_seconds / 1000
    hours = int(total_seconds / 3600)
    minutes = int(total_seconds / 60 - hours * 60)
    seconds = int(total_seconds - hours * 3600 - minutes * 60)
    milliseconds = round((total_seconds - seconds - hours * 3600 - minutes * 60)*1000)

    return '{:02d}:{:02d}:{:02d}.{:03d}'.format(hours, minutes, seconds, milliseconds)


def str2time(time):
    t = time.split('_')
    seconds = int(t[0]) * 3600 + 60 * int(t[1]) + int(t[2]) + (int(t[3]) / 1000)
    return int(seconds * 1000)


def sub_image(img, rect):
    column = rect[0]
    row = rect[1]
    width = rect[2]
    height = rect[3]
    img = img[row:row+height, column:column+width]
    return img


def copy_image(img1, img2):
    h1, w1 = img1.shape[:2]
    h2, w2 = img2.shape[:2]
    row = (h1 - h2) // 2
    column = (w1 - w2) // 2
    img1[row:row + h2, column:column + w2] = img2
    return img1


def detect_sub_area(image):
    """
    이미지에서 자막이 있는 영역을 찾는다.
    자막이 없는 영역은 흰색으로 채워져 있으므로 영역의 평균 및 표준편차 색상값으로 자막인지 여부를 판별한다.
    :param image: 자막이 있는 이미지
    :return:
    """
    (H, W) = image.shape[:2]
    index = 0
    d = 5
    threshold = 240

    while (index + 1) * d < H:
        img = sub_image(image, [0, index * d, W, d])
        avg = np.mean(img)
        std = np.std(img)
        if (avg < threshold) and (std > 10):
            break
        index += 1

    return W, H, index * d


def calc_batch_size(w, h):
    size = w * h * 0.8
    batch = int(10000 * 1000 / size)
    print("batch size :{}".format(batch))
    return batch


def vob2timecode(vob):

    def s2t(time):
        ts = time.split(',')
        ms = ts[1]
        t = ts[0].split(':')
        if len(t) == 3:
            seconds = int(t[0]) * 3600 + 60 * int(t[1]) + int(t[2]) + (int(ms) / 1000)
        elif len(t) == 2:
            seconds = 60 * int(t[0]) + int(t[1]) + (int(ms) / 1000)
        else:
            seconds = int(t[0]) + (int(ms) / 1000)
        return int(seconds * 1000)

    sub_t = vob.split('->')
    start = to_srt_timestamp(s2t(sub_t[0]))
    end = to_srt_timestamp(s2t(sub_t[1]))
    return '{} --> {}'.format(start, end)


def sub_timecode(regex1, regex2, regex3, line):
    m = regex1.match(line)
    if m is not None:
        timecode = m.group(2)
        timecode = vob2timecode(timecode)
    else:
        m = regex2.match(line)
        if m is not None:
            timecode = m.group(2)
            timecode = vob2timecode(timecode)
        else:
            m = regex3.match(line)
            if m is not None:
                timecode = m.group(2)
                timecode = vob2timecode(timecode)
            else:
                timecode = line
    return timecode


def vobsub_timecodes(html_file):
    with open(html_file, 'r', encoding='utf8') as f:
        soup = BeautifulSoup(f, "html.parser")
        lines = soup.get_text().split('\n')
    regex1 = re.compile(VOBTIMECODE1)
    regex2 = re.compile(VOBTIMECODE2)
    regex3 = re.compile(VOBTIMECODE3)
    lines = [sub_timecode(regex1, regex2, regex3, line) for line in lines if len(line) > 0 and line[0] == '#']
    return lines


def filename2timecode(file):
    sub_t = '.'.join(file.split('.')[:-1])
    if sub_t.endswith('!'):
        sub_t = str(sub_t[:-1])
    sub_t = sub_t.split('__')
    if len(sub_t) == 1:
        return file
    start = to_srt_timestamp(str2time(sub_t[0]))
    end = to_srt_timestamp(str2time(sub_t[1]))
    return '{} --> {}'.format(start, end)


def vobsub_img_size(files):
    max_h, max_w = 0, 0
    for file in files:
        if os.path.basename(file) == 'index.html':
            continue
        img = cv2.imread(file)
        h, w = img.shape[:2]
        if h > max_h:
            max_h = h
        if w > max_w:
            max_w = w

    return max_h, max_w


def main(path, save_path, batch_size=100, output=None):
    files = glob.glob(path)
    h, w = (0, 0)
    max_h, max_w = 0, 0
    if len(files) > 0:
        width, height, top = detect_sub_area(cv2.imread(files[0]))
        # sub/idx 형식이면 html 파일에서 타임코드를 받아온다.
        if os.path.basename(files[-1]) == 'index.html':
            sub_idx = True
            timecodes = vobsub_timecodes(os.path.join(path, files[-1]))
            max_h, max_w = vobsub_img_size(files)
            max_h += 120
            max_w = max(700, max_w)
            print('자막 크기: {} * {}'.format(max_w, max_h))
            rect = [0, 0, max_w, max_h]
        else:
            sub_idx = False
            timecodes = [filename2timecode(os.path.basename(file)) for file in files]
            h_t = 40 * 2  # 타임 코드 텍스트 높이
            rect = [0, top - h_t, width, height - top + h_t]
            print("자막 영역 : {}".format(rect))

        if batch_size == 0:
            batch_size = calc_batch_size(rect[2], rect[3])
        images = []
        index = 0

        for timecode, file in zip(timecodes, files):
            img = cv2.imread(file)
            # sub/idx는 자막 이미지만 있고 크기가 다 다르기 때문에 빈 이미지를 만든 후
            # 자막 이미지를 복사한 후 처리한다.
            if sub_idx:
                new_img = np.ones((max_h, max_w, 3), np.int8) * 255
                img = copy_image(new_img, img)
            else:
                img = sub_image(img, rect)

            display_text(img, timecode)

            if index == 0:
                h, w = img.shape[:2]
            else:
                h_n, w_n = img.shape[:2]
                if (w != w_n) or (h != h_n):
                    img = cv2.resize(img, (w, h))
            images.append(img)
            index += 1
            if len(images) >= batch_size:
                merge_images(images, os.path.join(save_path, "sub_images{}.png".format(index)))
                images = []

        if len(images) > 0:
            merge_images(images, os.path.join(save_path, "sub_images{}.png".format(index)))
    else:
        print("합칠 이미지가 존재하지 않습니다.")


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("-i", help="자막 이미지", required=True)
    parser.add_argument("-o", help="저장할 디렉토리", required=True)
    parser.add_argument("-b", default=0, type=int, help="합칠 이미지 단위")
    args = parser.parse_args()
    main(args.i, args.o, args.b)
	'''
	python 패키지 설치

	pip install opencv-python
	pip install numpy
	pip install bs4
	'''

	# sub/idx 자막인 경우 아래처럼 명령을 실행
	# python imgmerge.py -i C:\sub\. -o C:\save_path

	import glob
	import os
	import cv2
	import numpy as np
	from argparse import ArgumentParser
	import re
	from bs4 import BeautifulSoup


	VOBTIMECODE1 = '(#\d+:)(\d+:\d+,\d+->\d+:\d+,\d+)(.)'
	VOBTIMECODE2 = '(#\d+:)(\d+:\d+:\d+,\d+->\d+:\d+:\d+,\d+)(.)'
	VOBTIMECODE3 = '(#\d+:)(\d+,\d+->\d+,\d+)(.)*'

	font = cv2.FONT_HERSHEY_TRIPLEX
	font_size = 1.2


	def display_text(img, text):
	cv2.putText(img, text, (30, 40), font, font_size, (0, 0, 0), thickness=2)


	def merge_images(images, output):
	result = np.concatenate(images, axis=0)
	cv2.imwrite(output, result)


	def to_srt_timestamp(total_seconds):
	total_seconds = total_seconds / 1000
	hours = int(total_seconds / 3600)
	minutes = int(total_seconds / 60 - hours * 60)
	seconds = int(total_seconds - hours * 3600 - minutes * 60)
	milliseconds = round((total_seconds - seconds - hours * 3600 - minutes * 60)*1000)

	return '{:02d}:{:02d}:{:02d}.{:03d}'.format(hours, minutes, seconds, milliseconds)


	def str2time(time):
	t = time.split('_')
	seconds = int(t[0]) * 3600 + 60 * int(t[1]) + int(t[2]) + (int(t[3]) / 1000)
	return int(seconds * 1000)


	def sub_image(img, rect):
	column = rect[0]
	row = rect[1]
	width = rect[2]
	height = rect[3]
	img = img[row:row+height, column:column+width]
	return img


	def copy_image(img1, img2):
	h1, w1 = img1.shape[:2]
	h2, w2 = img2.shape[:2]
	row = (h1 - h2) // 2
	column = (w1 - w2) // 2
	img1[row:row + h2, column:column + w2] = img2
	return img1


	def detect_sub_area(image):
	"""
	이미지에서 자막이 있는 영역을 찾는다.
	자막이 없는 영역은 흰색으로 채워져 있으므로 영역의 평균 및 표준편차 색상값으로 자막인지 여부를 판별한다.
	:param image: 자막이 있는 이미지
	:return:
	"""
	(H, W) = image.shape[:2]
	index = 0
	d = 5
	threshold = 240

	while (index + 1) * d < H:
	img = sub_image(image, [0, index * d, W, d])
	avg = np.mean(img)
	std = np.std(img)
	if (avg < threshold) and (std > 10):
	break
	index += 1

	return W, H, index * d


	def calc_batch_size(w, h):
	size = w * h * 0.8
	batch = int(10000 * 1000 / size)
	print("batch size :{}".format(batch))
	return batch


	def vob2timecode(vob):

	def s2t(time):
	ts = time.split(',')
	ms = ts[1]
	t = ts[0].split(':')
	if len(t) == 3:
	seconds = int(t[0]) * 3600 + 60 * int(t[1]) + int(t[2]) + (int(ms) / 1000)
	elif len(t) == 2:
	seconds = 60 * int(t[0]) + int(t[1]) + (int(ms) / 1000)
	else:
	seconds = int(t[0]) + (int(ms) / 1000)
	return int(seconds * 1000)

	sub_t = vob.split('->')
	start = to_srt_timestamp(s2t(sub_t[0]))
	end = to_srt_timestamp(s2t(sub_t[1]))
	return '{} --> {}'.format(start, end)


	def sub_timecode(regex1, regex2, regex3, line):
	m = regex1.match(line)
	if m is not None:
	timecode = m.group(2)
	timecode = vob2timecode(timecode)
	else:
	m = regex2.match(line)
	if m is not None:
	timecode = m.group(2)
	timecode = vob2timecode(timecode)
	else:
	m = regex3.match(line)
	if m is not None:
	timecode = m.group(2)
	timecode = vob2timecode(timecode)
	else:
	timecode = line
	return timecode


	def vobsub_timecodes(html_file):
	with open(html_file, 'r', encoding='utf8') as f:
	soup = BeautifulSoup(f, "html.parser")
	lines = soup.get_text().split('\n')
	regex1 = re.compile(VOBTIMECODE1)
	regex2 = re.compile(VOBTIMECODE2)
	regex3 = re.compile(VOBTIMECODE3)
	lines = [sub_timecode(regex1, regex2, regex3, line) for line in lines if len(line) > 0 and line[0] == '#']
	return lines


	def filename2timecode(file):
	sub_t = '.'.join(file.split('.')[:-1])
	if sub_t.endswith('!'):
	sub_t = str(sub_t[:-1])
	sub_t = sub_t.split('__')
	if len(sub_t) == 1:
	return file
	start = to_srt_timestamp(str2time(sub_t[0]))
	end = to_srt_timestamp(str2time(sub_t[1]))
	return '{} --> {}'.format(start, end)


	def vobsub_img_size(files):
	max_h, max_w = 0, 0
	for file in files:
	if os.path.basename(file) == 'index.html':
	continue
	img = cv2.imread(file)
	h, w = img.shape[:2]
	if h > max_h:
	max_h = h
	if w > max_w:
	max_w = w

	return max_h, max_w


	def main(path, save_path, batch_size=100, output=None):
	files = glob.glob(path)
	h, w = (0, 0)
	max_h, max_w = 0, 0
	if len(files) > 0:
	width, height, top = detect_sub_area(cv2.imread(files[0]))
	# sub/idx 형식이면 html 파일에서 타임코드를 받아온다.
	if os.path.basename(files[-1]) == 'index.html':
	sub_idx = True
	timecodes = vobsub_timecodes(os.path.join(path, files[-1]))
	max_h, max_w = vobsub_img_size(files)
	max_h += 120
	max_w = max(700, max_w)
	print('자막 크기: {} * {}'.format(max_w, max_h))
	rect = [0, 0, max_w, max_h]
	else:
	sub_idx = False
	timecodes = [filename2timecode(os.path.basename(file)) for file in files]
	h_t = 40 * 2 # 타임 코드 텍스트 높이
	rect = [0, top - h_t, width, height - top + h_t]
	print("자막 영역 : {}".format(rect))

	if batch_size == 0:
	batch_size = calc_batch_size(rect[2], rect[3])
	images = []
	index = 0

	for timecode, file in zip(timecodes, files):
	img = cv2.imread(file)
	# sub/idx는 자막 이미지만 있고 크기가 다 다르기 때문에 빈 이미지를 만든 후
	# 자막 이미지를 복사한 후 처리한다.
	if sub_idx:
	new_img = np.ones((max_h, max_w, 3), np.int8) * 255
	img = copy_image(new_img, img)
	else:
	img = sub_image(img, rect)

	display_text(img, timecode)

	if index == 0:
	h, w = img.shape[:2]
	else:
	h_n, w_n = img.shape[:2]
	if (w != w_n) or (h != h_n):
	img = cv2.resize(img, (w, h))
	images.append(img)
	index += 1
	if len(images) >= batch_size:
	merge_images(images, os.path.join(save_path, "sub_images{}.png".format(index)))
	images = []

	if len(images) > 0:
	merge_images(images, os.path.join(save_path, "sub_images{}.png".format(index)))
	else:
	print("합칠 이미지가 존재하지 않습니다.")


	if __name__ == "__main__":
	parser = ArgumentParser()
	parser.add_argument("-i", help="자막 이미지", required=True)
	parser.add_argument("-o", help="저장할 디렉토리", required=True)
	parser.add_argument("-b", default=0, type=int, help="합칠 이미지 단위")
	args = parser.parse_args()
	main(args.i, args.o, args.b)