Created
June 3, 2015 15:35
-
-
Save t2psyto/18f7188415086f7e70c1 to your computer and use it in GitHub Desktop.
.srt ファイルを日本語翻訳するツール。注意:節度をもって使うこと。google translate にChrome(selenium webdriver)の自動操作でアクセスします。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding:utf-8 -*- | |
from selenium import webdriver | |
user_agent = ( | |
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) " + | |
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36" | |
) | |
sourcedir = "/tmp" | |
sourcefiles = ["subfile1.srt", "subfile2.srt"] | |
import time | |
class GoogletTanslate(object): | |
driver = None | |
chromedriver = "/Users/t2psyto/Downloads/chromedriver" | |
phantomjs = "/Users/t2psyto/bin/phantomjs" | |
def create_driver(self): | |
if self.driver == None: | |
self.driver = webdriver.Chrome(executable_path=self.chromedriver) | |
#self.driver = driver = webdriver.PhantomJS(executable_path=self.phantomjs, desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true']) | |
return self.driver | |
def open_google_translate(self): | |
self.create_driver() | |
self.driver.get("https://translate.google.com/?hl=ja") | |
def translate(self, inputtext): | |
driver = self.driver | |
lang_from = driver.find_element_by_xpath('//*[@id="gt-sl-sugg"]/div[1]') | |
lang_from.click() | |
lang_to = driver.find_element_by_xpath('//*[@id="gt-tl-sugg"]/div[1]') | |
lang_to.click() | |
#paste src | |
elem = driver.find_element_by_xpath('//*[@id="source"]') | |
driver.execute_script("arguments[0].value=arguments[1]", elem, inputtext) | |
time.sleep(2) | |
#print elem.text | |
#honyaku | |
form_submit = driver.find_element_by_xpath('//*[@id="gt-lang-submit"]') | |
form_submit.click() | |
time.sleep(3) | |
#get translated text | |
result_box = driver.find_element_by_xpath('//*[@id="result_box"]') | |
result_text = result_box.text | |
return result_text | |
if __name__ == '__main__': | |
import pysrt | |
gt = GoogletTanslate() | |
gt.open_google_translate() | |
for sourcefile in sourcefiles: | |
print sourcefile, ": open." | |
sourcefullpath = sourcedir + "/" + sourcefile | |
subs = pysrt.open(sourcefullpath) | |
suben = [] | |
for sub in subs: | |
#suben.append(u"#%d: %s" % (sub.index, sub.text.replace("\n", " ")) ) | |
stren = sub.text.replace("\n", " ") | |
#行数を揃えるため、空行を適当な文字で埋めておく。 | |
if stren.strip() == "": | |
stren = "-" | |
suben.append(stren) | |
srten = u"\n".join(suben) | |
print sourcefile, ": translate begin." | |
retja = gt.translate(srten) | |
print sourcefile, ": translate done." | |
#lija = retja.replace("#".decode("utf-8"),u"").replace(":".decode("utf-8"),u"\t").split(u"\n") | |
lija = retja.split(u"\n") | |
for i,sub in enumerate(subs): | |
subtexten = suben[i] | |
#subtextja = lija[i].split(u"\t")[1] | |
subtextja = lija[i] | |
#字幕の 1行目 英語、2行目 日本語 | |
sub.text = subtexten + "\n" + subtextja | |
subs.save(sourcedir + "/" + "ja_" + sourcefile, "utf-8") | |
print sourcefile, ": saved." | |
gt.driver.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment