Skip to content

Instantly share code, notes, and snippets.

@mayhewsw
Created August 10, 2016 22:04
Show Gist options
  • Save mayhewsw/9d14bd5218bce1b57f050ccc79ce08e5 to your computer and use it in GitHub Desktop.
Save mayhewsw/9d14bd5218bce1b57f050ccc79ce08e5 to your computer and use it in GitHub Desktop.
Google API Word Translation
#!/usr/bin/python
from googleapiclient.discovery import build
import codecs
import HTMLParser
import shelve
# As of Aug 1 2016
API_KEY = "YOUR_API_KEY_HERE"
def translatefile(fname, outfname, source, target):
"""
Given a filename, an outfname, and a source and target languages, this will translate
the first word of each tab-sep row in fname from source to target and write to outfname. Language codes are Google
two letter codes (en, uz, tr, de, etc.)
"""
outlines = []
service = build('translate', 'v2',developerKey=API_KEY)
h = HTMLParser.HTMLParser()
memo = shelve.open("translatedict-" + source + "-" + target + ".shelf")
with codecs.open(fname, "r", "utf-8") as f:
lines = f.readlines()
words = []
for line in lines:
sline = line.split("\t")
srcword = str(sline[0]).strip()
if srcword not in memo:
words.append(srcword)
for i in range(0, len(words), 75):
iwords = words[i:i+75]
print "size of request:",len(iwords)
try:
response = service.translations().list(source=source,target=target, q=iwords).execute()
if len(response["translations"]) > 0:
translations = response["translations"]
for w,t in zip(iwords,translations):
tword = t["translatedText"]
memo[str(w)] = tword
else:
print "WHAAAAT"
except Exception as e:
print "Whoops... exception"
print e
for line in lines:
sline = line.split("\t")
srcword = str(sline[0]).strip()
# otherwise, just leave it
if srcword in memo:
w = h.unescape(memo[srcword])
trans = w.split()[0]
else:
trans = "not in memo" + srcword
outlines.append(srcword + "\t" + trans)
outlines.append("\n")
with codecs.open(outfname, "w", "utf-8") as out:
for line in outlines:
out.write(line);
memo.close()
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="")
parser.add_argument("fname",help="Input file name (first word of each row is translated)")
parser.add_argument("outfname",help="Output file. Format: origword transword")
parser.add_argument("source",help="Source language code (2 letter)")
parser.add_argument("target",help="Target language code (2 letter)")
args = parser.parse_args()
translatefile(args.fname, args.outfname, args.source, args.target)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment