legendmohe/downloadwallpaper.py

## downloadwallpaper.py
#!/usr/bin/env python
# encoding: utf-8

import os
import sys
import errno
import urllib
import urllib2
import re


# http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
def chunk_report(bytes_so_far, chunk_size, total_size):
    percent = float(bytes_so_far) / total_size
    percent = round(percent*100, 2)
    sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\r" %
                    (bytes_so_far, total_size, percent))
    if bytes_so_far >= total_size:
        sys.stdout.write('\n')
    sys.stdout.flush()


def chunk_read(response, chunk_size=8192, report_hook=None):
    total_size = response.info().getheader('Content-Length').strip()
    total_size = int(total_size)
    bytes_so_far = 0
    data = ''

    while 1:
        chunk = response.read(chunk_size)
        bytes_so_far += len(chunk)
        data += chunk

        if not chunk:
            break

        if report_hook:
            report_hook(bytes_so_far, chunk_size, total_size)

    return data


def download(frompage=1, topage=0):
    downloadpath = "http://wallpapers.wallhaven.cc/wallpapers/full/wallhaven-"
    output_path = 'wallpaper'
    try:
        os.mkdir(output_path)
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise

    # hosturl = 'http://wallbase.cc/toplist/index/{0}?section=wallpapers&res=1920x1080'
    hosturl = 'http://alpha.wallhaven.cc/search?categories=111&purity=110&resolutions=1920x1080&ratios=16x9&sorting=random&order=desc&page={0}'
    for index in range(frompage, topage + 1):
        print "page ", index, hosturl.format(index)

        content = urllib2.urlopen(hosturl.format(index)).read()
        # p = re.compile(r'id="thumb(\d+)"')
        p = re.compile(r'small/th-(\d+).jpg')
        for id in re.findall(p, content):
            print "id:", id
            filename = id + ".jpg"
            outputpath = output_path + os.sep + filename
            if os.path.exists(outputpath):
                print filename, "exists."
                continue

            print "downloading:", downloadpath + filename
            with open(outputpath, "wb") as ofile:
                try:
                    response = urllib2.urlopen(downloadpath + filename)
                    ofile.write(chunk_read(response, report_hook=chunk_report))
                except urllib2.HTTPError, e:
                    print e

if __name__ == '__main__':
    download(topage=10)
	#!/usr/bin/env python
	# encoding: utf-8

	import os
	import sys
	import errno
	import urllib
	import urllib2
	import re


	# http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
	def chunk_report(bytes_so_far, chunk_size, total_size):
	percent = float(bytes_so_far) / total_size
	percent = round(percent*100, 2)
	sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\r" %
	(bytes_so_far, total_size, percent))
	if bytes_so_far >= total_size:
	sys.stdout.write('\n')
	sys.stdout.flush()


	def chunk_read(response, chunk_size=8192, report_hook=None):
	total_size = response.info().getheader('Content-Length').strip()
	total_size = int(total_size)
	bytes_so_far = 0
	data = ''

	while 1:
	chunk = response.read(chunk_size)
	bytes_so_far += len(chunk)
	data += chunk

	if not chunk:
	break

	if report_hook:
	report_hook(bytes_so_far, chunk_size, total_size)

	return data


	def download(frompage=1, topage=0):
	downloadpath = "http://wallpapers.wallhaven.cc/wallpapers/full/wallhaven-"
	output_path = 'wallpaper'
	try:
	os.mkdir(output_path)
	except OSError as exception:
	if exception.errno != errno.EEXIST:
	raise

	# hosturl = 'http://wallbase.cc/toplist/index/{0}?section=wallpapers&res=1920x1080'
	hosturl = 'http://alpha.wallhaven.cc/search?categories=111&purity=110&resolutions=1920x1080&ratios=16x9&sorting=random&order=desc&page={0}'
	for index in range(frompage, topage + 1):
	print "page ", index, hosturl.format(index)

	content = urllib2.urlopen(hosturl.format(index)).read()
	# p = re.compile(r'id="thumb(\d+)"')
	p = re.compile(r'small/th-(\d+).jpg')
	for id in re.findall(p, content):
	print "id:", id
	filename = id + ".jpg"
	outputpath = output_path + os.sep + filename
	if os.path.exists(outputpath):
	print filename, "exists."
	continue

	print "downloading:", downloadpath + filename
	with open(outputpath, "wb") as ofile:
	try:
	response = urllib2.urlopen(downloadpath + filename)
	ofile.write(chunk_read(response, report_hook=chunk_report))
	except urllib2.HTTPError, e:
	print e

	if __name__ == '__main__':
	download(topage=10)