Skip to content

Instantly share code, notes, and snippets.

@gamesbook
Created December 30, 2013 10:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gamesbook/8180423 to your computer and use it in GitHub Desktop.
Save gamesbook/8180423 to your computer and use it in GitHub Desktop.
Words...
import urllib2
import string
from BeautifulSoup import BeautifulSoup
#from bs4 import BeautifulSoup # if using BeautifulSoup4
name_out = '/path/to/Desktop/words.csv'
the_file_out = open(name_out, 'w')
for alpha in list(string.ascii_lowercase): # ['a','b','c']: #
lines = []
print 'processing %s' % alpha
soup = BeautifulSoup(urllib2.urlopen('http://phrontistery.info/%s.html' % alpha).read())
soup.table['class']
t = soup.find('table', {"class": "words"})
rows = t.findAll('tr')
for r in rows:
tds = r.findAll('td')
if tds[0].string:
lines.append((tds[0].string.strip('\n'), tds[1].string.strip('\n')))
for l in lines:
try:
the_file_out.write('"%s","%s"\n' % (l[0],l[1]))
except UnicodeEncodeError:
print l[0],l[1]
the_file_out.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment