Skip to content

Instantly share code, notes, and snippets.

@nmandery
Created May 19, 2015 16:02
Show Gist options
  • Save nmandery/894caf02db3af595ebd5 to your computer and use it in GitHub Desktop.
Save nmandery/894caf02db3af595ebd5 to your computer and use it in GitHub Desktop.
download the images of a document from issuu.com
#!python
# encoding=utf8
# download the images of a document from issuu.com
import requests
import urlparse
import os
import os.path
# link to the jpg of the first page
# use the web inspector in the browser to find this link
page_one_link="http://image.issuu.com/140117093509-09e7ed9f22e117a50abada8baf38cc02/jpg/page_1.jpg"
#"http://image.issuu.com/100702105253-7529ac7d83034636b81db86c735c1cec/jpg/page_1.jpg"
#"http://image.issuu.com/140623082141-a75d4e43b7e3100e128e3d4595ee9e81/jpg/page_1.jpg"
def forever_page_seq():
i = 1
while True:
yield 'page_{0}.jpg'.format(i)
i+=1
def get_document_name(page_one_link):
path=urlparse.urlparse(page_one_link).path
return path.split('/')[1].split('-')[0]
def download(page_one_link):
down_dir='issuu_{0}'.format(get_document_name(page_one_link))
if not os.path.exists(down_dir):
os.makedirs(down_dir)
page_base_url=page_one_link[:page_one_link.rfind('/')]
for page in forever_page_seq():
print('downloading {0}'.format(page))
r=requests.get('{0}/{1}'.format(page_base_url, page), stream=True)
if r.status_code!=200:
print('dowloading finished or failed')
break
with open(os.path.join(down_dir, page), 'wb') as fh:
for chunk in r.iter_content(chunk_size=10*1024):
if chunk:
fh.write(chunk)
fh.flush()
if __name__ == "__main__":
download(page_one_link)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment