nmandery/download_issuu_document.py

## download_issuu_document.py
#!python
# encoding=utf8
# download the images of a document from issuu.com

import requests
import urlparse
import os
import os.path

# link to the jpg of the first page
# use the web inspector in the browser to find this link
page_one_link="http://image.issuu.com/140117093509-09e7ed9f22e117a50abada8baf38cc02/jpg/page_1.jpg"
#"http://image.issuu.com/100702105253-7529ac7d83034636b81db86c735c1cec/jpg/page_1.jpg"
#"http://image.issuu.com/140623082141-a75d4e43b7e3100e128e3d4595ee9e81/jpg/page_1.jpg"

def forever_page_seq():
    i = 1
    while True:
        yield 'page_{0}.jpg'.format(i)
        i+=1

def get_document_name(page_one_link):
    path=urlparse.urlparse(page_one_link).path
    return path.split('/')[1].split('-')[0]

def download(page_one_link):
    down_dir='issuu_{0}'.format(get_document_name(page_one_link))
    if not os.path.exists(down_dir):
        os.makedirs(down_dir)
    page_base_url=page_one_link[:page_one_link.rfind('/')]
    for page in forever_page_seq():
        print('downloading {0}'.format(page))
        r=requests.get('{0}/{1}'.format(page_base_url, page), stream=True)
        if r.status_code!=200:
            print('dowloading finished or failed')
            break
        with open(os.path.join(down_dir, page), 'wb') as fh:
            for chunk in r.iter_content(chunk_size=10*1024):
                if chunk:
                    fh.write(chunk)
            fh.flush()

if __name__ == "__main__":
    download(page_one_link)
	#!python
	# encoding=utf8
	# download the images of a document from issuu.com

	import requests
	import urlparse
	import os
	import os.path

	# link to the jpg of the first page
	# use the web inspector in the browser to find this link
	page_one_link="http://image.issuu.com/140117093509-09e7ed9f22e117a50abada8baf38cc02/jpg/page_1.jpg"
	#"http://image.issuu.com/100702105253-7529ac7d83034636b81db86c735c1cec/jpg/page_1.jpg"
	#"http://image.issuu.com/140623082141-a75d4e43b7e3100e128e3d4595ee9e81/jpg/page_1.jpg"

	def forever_page_seq():
	i = 1
	while True:
	yield 'page_{0}.jpg'.format(i)
	i+=1

	def get_document_name(page_one_link):
	path=urlparse.urlparse(page_one_link).path
	return path.split('/')[1].split('-')[0]

	def download(page_one_link):
	down_dir='issuu_{0}'.format(get_document_name(page_one_link))
	if not os.path.exists(down_dir):
	os.makedirs(down_dir)
	page_base_url=page_one_link[:page_one_link.rfind('/')]
	for page in forever_page_seq():
	print('downloading {0}'.format(page))
	r=requests.get('{0}/{1}'.format(page_base_url, page), stream=True)
	if r.status_code!=200:
	print('dowloading finished or failed')
	break
	with open(os.path.join(down_dir, page), 'wb') as fh:
	for chunk in r.iter_content(chunk_size=10*1024):
	if chunk:
	fh.write(chunk)
	fh.flush()

	if __name__ == "__main__":
	download(page_one_link)