Skip to content

Instantly share code, notes, and snippets.

@rosskarchner
Created February 15, 2012 00:14
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rosskarchner/1831953 to your computer and use it in GitHub Desktop.
Save rosskarchner/1831953 to your computer and use it in GitHub Desktop.
CooksIllustrated.com recipe database to Evernote
import mechanize, codecs
from BeautifulSoup import BeautifulSoup, Comment
ci_login_url="https://auth.cooksillustrated.com/"
recipe_search_root="http://www.cooksillustrated.com/search/results.asp"
initial_query='?query=+&filters=&sort=&filters=type:Recipe'
br = mechanize.Browser()
br.open(ci_login_url)
intro=u"""<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export2.dtd">
<en-export>"""
outro=u"""</en-export>"""
note_template=u"""<note><title>%s</title><content><![CDATA[<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
<en-note>%s</en-note>]]></content>
<note-attributes><source>web.clip</source>
<source-url>%s</source-url></note-attributes>
</note>"""
outfile=(codecs.open('cooksillustrated.enex', 'wb', 'utf-8'))
#login to Cooks Illustrated
for form in br.forms():
if form.action == 'https://auth.cooksillustrated.com/':
form['logInInfo.UserName']= 'your login'
form['logInInfo.Password']= 'your password'
response=form.click()
break
def process_recipe_page(query, outfile):
br.open(recipe_search_root+query)
soup= BeautifulSoup(br.response().read())
recipes=soup.findAll(attrs={'class': 'CooksIllustrated Recipe'})
for recipe in recipes:
for classed in recipe.findAll(attrs={'class': True}):
del classed['class']
title= recipe.find('a').text
url= 'http://www.cooksillustrated.com'+ recipe.find('a')['href']
recipe.find('a')['href']=url
content= u' '.join([unicode(tag) for tag in recipe])
outfile.write(note_template % (title, content, url))
pagination=soup.find(attrs={'class':'pagination'})
final_page_link= pagination.findAll('a')[-1]
if final_page_link.text == u'Next':
process_recipe_page(final_page_link['href'], outfile)
outfile.write(intro)
process_recipe_page(initial_query, outfile)
outfile.write(outro)
outfile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment