Skip to content

Instantly share code, notes, and snippets.

@t2psyto
Created June 3, 2015 15:35
Show Gist options
  • Save t2psyto/18f7188415086f7e70c1 to your computer and use it in GitHub Desktop.
Save t2psyto/18f7188415086f7e70c1 to your computer and use it in GitHub Desktop.
.srt ファイルを日本語翻訳するツール。注意:節度をもって使うこと。google translate にChrome(selenium webdriver)の自動操作でアクセスします。
# -*- coding:utf-8 -*-
from selenium import webdriver
user_agent = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) " +
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36"
)
sourcedir = "/tmp"
sourcefiles = ["subfile1.srt", "subfile2.srt"]
import time
class GoogletTanslate(object):
driver = None
chromedriver = "/Users/t2psyto/Downloads/chromedriver"
phantomjs = "/Users/t2psyto/bin/phantomjs"
def create_driver(self):
if self.driver == None:
self.driver = webdriver.Chrome(executable_path=self.chromedriver)
#self.driver = driver = webdriver.PhantomJS(executable_path=self.phantomjs, desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true'])
return self.driver
def open_google_translate(self):
self.create_driver()
self.driver.get("https://translate.google.com/?hl=ja")
def translate(self, inputtext):
driver = self.driver
lang_from = driver.find_element_by_xpath('//*[@id="gt-sl-sugg"]/div[1]')
lang_from.click()
lang_to = driver.find_element_by_xpath('//*[@id="gt-tl-sugg"]/div[1]')
lang_to.click()
#paste src
elem = driver.find_element_by_xpath('//*[@id="source"]')
driver.execute_script("arguments[0].value=arguments[1]", elem, inputtext)
time.sleep(2)
#print elem.text
#honyaku
form_submit = driver.find_element_by_xpath('//*[@id="gt-lang-submit"]')
form_submit.click()
time.sleep(3)
#get translated text
result_box = driver.find_element_by_xpath('//*[@id="result_box"]')
result_text = result_box.text
return result_text
if __name__ == '__main__':
import pysrt
gt = GoogletTanslate()
gt.open_google_translate()
for sourcefile in sourcefiles:
print sourcefile, ": open."
sourcefullpath = sourcedir + "/" + sourcefile
subs = pysrt.open(sourcefullpath)
suben = []
for sub in subs:
#suben.append(u"#%d: %s" % (sub.index, sub.text.replace("\n", " ")) )
stren = sub.text.replace("\n", " ")
#行数を揃えるため、空行を適当な文字で埋めておく。
if stren.strip() == "":
stren = "-"
suben.append(stren)
srten = u"\n".join(suben)
print sourcefile, ": translate begin."
retja = gt.translate(srten)
print sourcefile, ": translate done."
#lija = retja.replace("#".decode("utf-8"),u"").replace(":".decode("utf-8"),u"\t").split(u"\n")
lija = retja.split(u"\n")
for i,sub in enumerate(subs):
subtexten = suben[i]
#subtextja = lija[i].split(u"\t")[1]
subtextja = lija[i]
#字幕の 1行目 英語、2行目 日本語
sub.text = subtexten + "\n" + subtextja
subs.save(sourcedir + "/" + "ja_" + sourcefile, "utf-8")
print sourcefile, ": saved."
gt.driver.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment