Skip to content

Instantly share code, notes, and snippets.

@dirkk0
Created November 17, 2012 09:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dirkk0/4094607 to your computer and use it in GitHub Desktop.
Save dirkk0/4094607 to your computer and use it in GitHub Desktop.
translate by scraping dict.cc
# found at http://blog.dispatched.ch/2009/03/15/webscraping-with-python-and-beautifulsoup/
import urllib
import urllib2
import string
import sys
from BeautifulSoup import BeautifulSoup
user_agent = 'Mozilla/5 (Solaris 10) Gecko'
headers = { 'User-Agent' : user_agent }
if len(sys.argv) > 1:
values = {'s' : sys.argv[1] }
else:
values = {'s' : 'kaffeetasse' }
data = urllib.urlencode(values)
request = urllib2.Request("http://www.dict.cc/", data, headers)
response = urllib2.urlopen(request)
the_page = response.read()
pool = BeautifulSoup(the_page)
results = pool.findAll('td', attrs={'class' : 'td7nl'})
source = ''
translations = []
for result in results:
word = ''
for tmp in result.findAll(text=True):
word = word + " " + unicode(tmp).encode("utf-8")
if source == '':
source = word
else:
translations.append((source, word))
for translation in translations:
print "%s => %s" % (translation[0], translation[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment