Skip to content

Instantly share code, notes, and snippets.

@bcarpio
Created December 2, 2012 06:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save bcarpio/4187385 to your computer and use it in GitHub Desktop.
Save bcarpio/4187385 to your computer and use it in GitHub Desktop.
MegaMillions Into Mongodb
#!/usr/bin/python
# vim: set expandtab:
import urllib2
from BeautifulSoup import BeautifulSoup
from pymongo import Connection
host = 'localhost'
database = 'lotto'
collection = 'mega_millions'
def mongo_connection():
con = Connection(host)
col = con[database][collection]
return col
def main():
col = mongo_connection()
page_num = 1
total_pages = 63
while True:
if page_num > total_pages: break
page_num = str(page_num)
soup = BeautifulSoup(urllib2.urlopen('http://www.usamega.com/mega-millions-history.asp?p='+page_num).read())
for row in soup('table')[4].findAll('tr'):
win_dict = {}
tds = row('td')
if tds[1].a is not None:
win_dict['date'] = tds[1].a.string
if tds[3].b is not None:
num_list = []
#Told you we would get back to it
number_list = tds[3].b.string.split('·')
for num in number_list:
num_list.append(int(num))
win_dict['numbers'] = num_list
mega_number = tds[3].strong.string
win_dict['mega_number'] = int(mega_number)
col.insert(win_dict)
page_num = int(page_num)
page_num += 1
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment