Skip to content

Instantly share code, notes, and snippets.

@reflog
Created January 22, 2012 13:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save reflog/1657112 to your computer and use it in GitHub Desktop.
Save reflog/1657112 to your computer and use it in GitHub Desktop.
import re, urllib2,sys
k = 0
for i in range(7,0,-1):
page_url = "http://theoatmeal.com/comics_pg/page:%d"%i
out = open("oatmeal\\out%d.html"%i,"w")
out.write("<html><body>\n")
print "getting " + page_url
f = urllib2.urlopen(page_url)
a = f.read()
results = []
for params in re.findall("""<a href="/comics/(.*?)"><img src="(.*?)" alt="(.*?)" class="border0" /></a>""",a):
title = params[2]
sub_url = "http://theoatmeal.com/comics/" + params[0]
print "getting " + sub_url
try:
sf = urllib2.urlopen(sub_url)
except:
print "Unexpected error:", sys.exc_info()[0]
continue
sa = sf.read()
r1 = re.findall('"(http://s3.amazonaws.com/theoatmeal-img/comics/.*?)"',sa,re.S)
result = {
'title' : title,
'sub_images' : r1
}
if len(result['sub_images'])==0:
print "crap"
else:
results += [ result ]
print result
results.reverse()
for result in results:
out.write("<hr/>\n")
out.write(result['title'] + "<br/>\n")
for si in result['sub_images']:
print "getting " + si
try:
u = urllib2.urlopen(si)
except:
print "Unexpected error:", sys.exc_info()[0]
continue
fn = 'oatmeal\\%d.%s'%(k,si[-3:])
k += 1
localFile = open(fn, 'wb')
localFile.write(u.read())
localFile.close()
out.write("<img src=\"%s\"/> <br/>\n"%fn)
out.flush()
out.write("</body></html>\n");
out.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment