Skip to content

Instantly share code, notes, and snippets.

@leyafo
Created June 12, 2015 15:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save leyafo/5cad8e3689c96ef61e61 to your computer and use it in GitHub Desktop.
Save leyafo/5cad8e3689c96ef61e61 to your computer and use it in GitHub Desktop.
srt subtitles parse
import os, sys;
import distutils.file_util as file_util
def iterate_path(source):
#list all files
files = os.listdir(source)
for f in files:
if(os.path.isfile(os.path.join(source,f))):
#check hide file
if(f[0] == "." or f == 'tmp.py'):
continue
yield f
def gbk2utf(s):
return s.decode('gb18030').encode('utf-8')
def parse(fname):
f = open(fname, "r")
i = fname.find('.')+1
new_fname = fname[i:fname.find('.', i)]
new_f = open(new_fname+".txt", "w+")
need_write_file = False
need_collect = False
results = ""
for line in f.readlines():
line = gbk2utf(line.strip())
if need_write_file:
new_f.write(results)
new_f.write('\n')
need_write_file=False
results = ""
if line.find('-->') != -1:
need_collect = True
continue
if need_collect:
if line != "":
results += line.replace('\n', '\t')
else:
need_write_file = True
need_collect = False
new_f.close
if __name__ == "__main__":
source = sys.argv[1]
for fname in iterate_path(source):
parse(fname)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment