Skip to content

Instantly share code, notes, and snippets.

@eapen
Last active November 19, 2016 00:14
Show Gist options
  • Save eapen/606d8822dd3624227f52 to your computer and use it in GitHub Desktop.
Save eapen/606d8822dd3624227f52 to your computer and use it in GitHub Desktop.
import urllib2
import lxml.html as lh
import re
USERAGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36'
url = 'http://www.marathonfoto.com/Proofs?PIN=XXXX&LastName=XXXX'
BASE_URL = 'http://www.marathonfoto.com/Ajax/Zoom/?frameid={}'
# URL:
# http://www.marathonfoto.com/Proofs?PIN=XXXX&LastName=XXXX
# Frame URL:
# http://www.marathonfoto.com/Ajax/Zoom/?frameid=12345
# Image
# http://render.marathonfoto.com/Render/Z.ashx?O=123456&R=1234&F=0005&K=123234123
doc = lh.parse(urllib2.urlopen(url))
counter = 1
for frame in doc.iter('a'):
if frame.text_content().strip().startswith("Enlarge"):
frameid = frame.attrib['data-frameid']
print BASE_URL.format(frameid)
req = urllib2.Request(BASE_URL.format(frameid))
req.add_header('Referer', url)
req.add_header('User-Agent', USERAGENT)
frame_content = urllib2.urlopen(req).read()
p = re.compile(ur'\/\/(render\.marathonfoto\.com\/.*)\'\)')
matches = re.search(p, frame_content)
with open('/tmp/marathon-' + str(counter) + '.jpg', 'wb') as f:
image_url = "http://" + matches.group(1)
image_url = image_url.replace("&", "&")
req = urllib2.Request(image_url)
print image_url
req.add_header('Referer', BASE_URL.format(frameid))
req.add_header('User-Agent', USERAGENT)
data = urllib2.urlopen(req)
f.write(data.read())
counter += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment