Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@dpapathanasiou
Created May 25, 2012 22:05
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save dpapathanasiou/2790853 to your computer and use it in GitHub Desktop.
Save dpapathanasiou/2790853 to your computer and use it in GitHub Desktop.
Using Microsoft’s Translator API with Python
#!/usr/bin/python
"""
msmt.py
Functions to access the Microsoft Translator API HTTP Interface, using python's urllib/urllib2 libraries
"""
import urllib, urllib2
import json
from datetime import datetime
def datestring (display_format="%a, %d %b %Y %H:%M:%S", datetime_object=None):
"""Convert the datetime.date object (defaults to now, in utc) into a string, in the given display format"""
if datetime_object is None:
datetime_object = datetime.utcnow()
return datetime.strftime(datetime_object, display_format)
def get_access_token (client_id, client_secret):
"""Make an HTTP POST request to the token service, and return the access_token,
as described in number 3, here: http://msdn.microsoft.com/en-us/library/hh454949.aspx
"""
data = urllib.urlencode({
'client_id' : client_id,
'client_secret' : client_secret,
'grant_type' : 'client_credentials',
'scope' : 'http://api.microsofttranslator.com'
})
try:
request = urllib2.Request('https://datamarket.accesscontrol.windows.net/v2/OAuth2-13')
request.add_data(data)
response = urllib2.urlopen(request)
response_data = json.loads(response.read())
if response_data.has_key('access_token'):
return response_data['access_token']
except urllib2.URLError, e:
if hasattr(e, 'reason'):
print datestring(), 'Could not connect to the server:', e.reason
elif hasattr(e, 'code'):
print datestring(), 'Server error: ', e.code
except TypeError:
print datestring(), 'Bad data from server'
supported_languages = { # as defined here: http://msdn.microsoft.com/en-us/library/hh456380.aspx
'ar' : ' Arabic',
'bg' : 'Bulgarian',
'ca' : 'Catalan',
'zh-CHS' : 'Chinese (Simplified)',
'zh-CHT' : 'Chinese (Traditional)',
'cs' : 'Czech',
'da' : 'Danish',
'nl' : 'Dutch',
'en' : 'English',
'et' : 'Estonian',
'fi' : 'Finnish',
'fr' : 'French',
'de' : 'German',
'el' : 'Greek',
'ht' : 'Haitian Creole',
'he' : 'Hebrew',
'hi' : 'Hindi',
'hu' : 'Hungarian',
'id' : 'Indonesian',
'it' : 'Italian',
'ja' : 'Japanese',
'ko' : 'Korean',
'lv' : 'Latvian',
'lt' : 'Lithuanian',
'mww' : 'Hmong Daw',
'no' : 'Norwegian',
'pl' : 'Polish',
'pt' : 'Portuguese',
'ro' : 'Romanian',
'ru' : 'Russian',
'sk' : 'Slovak',
'sl' : 'Slovenian',
'es' : 'Spanish',
'sv' : 'Swedish',
'th' : 'Thai',
'tr' : 'Turkish',
'uk' : 'Ukrainian',
'vi' : 'Vietnamese',
}
def print_supported_languages ():
"""Display the list of supported language codes and the descriptions as a single string
(used when a call to translate requests an unsupported code)"""
codes = []
for k,v in supported_languages.items():
codes.append('\t'.join([k, '=', v]))
return '\n'.join(codes)
def to_bytestring (s):
"""Convert the given unicode string to a bytestring, using utf-8 encoding,
unless it's already a bytestring"""
if s:
if isinstance(s, str):
return s
else:
return s.encode('utf-8')
def translate (access_token, text, to_lang, from_lang=None):
"""Use the HTTP Interface to translate text, as described here:
http://msdn.microsoft.com/en-us/library/ff512387.aspx
and return an xml string if successful
"""
if not access_token:
print 'Sorry, the access token is invalid'
else:
if to_lang not in supported_languages.keys():
print 'Sorry, the API cannot translate to', to_lang
print 'Please use one of these instead:'
print print_supported_languages()
else:
data = { 'text' : to_bytestring(text), 'to' : to_lang }
if from_lang:
if from_lang not in supported_languages.keys():
print 'Sorry, the API cannot translate from', from_lang
print 'Please use one of these instead:'
print print_supported_languages()
return
else:
data['from'] = from_lang
try:
request = urllib2.Request('http://api.microsofttranslator.com/v2/Http.svc/Translate?'+urllib.urlencode(data))
request.add_header('Authorization', 'Bearer '+access_token)
response = urllib2.urlopen(request)
return response.read()
except urllib2.URLError, e:
if hasattr(e, 'reason'):
print datestring(), 'Could not connect to the server:', e.reason
elif hasattr(e, 'code'):
print datestring(), 'Server error: ', e.code
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment