Skip to content

Instantly share code, notes, and snippets.

@Taehun
Created April 3, 2012 12:19
Show Gist options
  • Save Taehun/2291528 to your computer and use it in GitHub Desktop.
Save Taehun/2291528 to your computer and use it in GitHub Desktop.
Python: 리눅스용 한/영 통합자막 생성 스크립트
#!/usr/bin/python
# -*- coding:utf-8 -*-
# 사용법: ./merge_sub.py <자막 파일>
import sys
import os
import re
def main(argv):
en_start = False
kr_start = False
subs = {}
syncs = {}
sync = 0
sub = ""
if len(argv) != 1:
print "Usage: merge_sub.py <subtitle file>"
return
p1 = re.compile(r'<SYNC.+[sS].+=(\d+)><[pP].+KRCC>(.+)')
p2 = re.compile(r'<SYNC.+[sS].+=(\d+)><[pP].+ENCC>(.+)')
p3 = re.compile(r'</BODY>')
os.rename(argv[0], argv[0]+".ori")
f = open(argv[0]+".ori", "r")
while True:
ln = f.readline()
if not ln: break
kr_match = p1.search(ln)
en_match = p2.search(ln)
if kr_match:
if not '&nbsp' in sub:
subs[sync] = [sub]
syncs[sync/1000] = sync
sync = int(kr_match.group(1))
sub = kr_match.group(2)[:-2]
kr_start = True
elif kr_start:
sub = sub + ln[:-2]
if en_match:
if not '&nbsp' in sub:
for v in range(sync-1000, sync+1001, 1000):
if v/1000 in syncs:
subs[syncs[v/1000]].append(sub)
sync = int(en_match.group(1))
sub = en_match.group(2)[:-2]
en_start = True
kr_start = False
elif en_start:
sub = sub + ln[:-2]
f.close()
f = open(argv[0]+".ori", "r")
f2 = open(argv[0], "w")
en_start = False
while True:
ln = f.readline()
if not ln: break
kr_match = p1.search(ln)
en_match = p2.search(ln)
end_match = p3.search(ln)
if en_match:
en_start = True
if end_match:
en_start = False
if not en_start:
f2.write(ln)
if kr_match:
sync = int(kr_match.group(1))
if sync in subs:
if len(subs[sync]) == 2:
f2.write(subs[sync][1] + "<br>")
f.close()
f2.close()
if __name__ == "__main__":
main(sys.argv[1:])
@Taehun
Copy link
Author

Taehun commented Apr 3, 2012

I'm python newbie. Give me feedback from python guy.

@sanxiyn
Copy link

sanxiyn commented Apr 3, 2012

Use kr_match.group(1) instead of p1.sub(r'\1', kr_match.group()).

@sanxiyn
Copy link

sanxiyn commented Apr 3, 2012

Replace filter(lambda...) with plain if check inside for loop.

@Taehun
Copy link
Author

Taehun commented Apr 3, 2012

Thanks sanxian.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment