Skip to content

Instantly share code, notes, and snippets.

@legendmohe
Last active March 16, 2016 01:58
Show Gist options
  • Save legendmohe/9767060 to your computer and use it in GitHub Desktop.
Save legendmohe/9767060 to your computer and use it in GitHub Desktop.
download images from wallbase.cc
#!/usr/bin/env python
# encoding: utf-8
import os
import sys
import errno
import urllib
import urllib2
import re
# http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
def chunk_report(bytes_so_far, chunk_size, total_size):
percent = float(bytes_so_far) / total_size
percent = round(percent*100, 2)
sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\r" %
(bytes_so_far, total_size, percent))
if bytes_so_far >= total_size:
sys.stdout.write('\n')
sys.stdout.flush()
def chunk_read(response, chunk_size=8192, report_hook=None):
total_size = response.info().getheader('Content-Length').strip()
total_size = int(total_size)
bytes_so_far = 0
data = ''
while 1:
chunk = response.read(chunk_size)
bytes_so_far += len(chunk)
data += chunk
if not chunk:
break
if report_hook:
report_hook(bytes_so_far, chunk_size, total_size)
return data
def download(frompage=1, topage=0):
downloadpath = "http://wallpapers.wallhaven.cc/wallpapers/full/wallhaven-"
output_path = 'wallpaper'
try:
os.mkdir(output_path)
except OSError as exception:
if exception.errno != errno.EEXIST:
raise
# hosturl = 'http://wallbase.cc/toplist/index/{0}?section=wallpapers&res=1920x1080'
hosturl = 'http://alpha.wallhaven.cc/search?categories=111&purity=110&resolutions=1920x1080&ratios=16x9&sorting=random&order=desc&page={0}'
for index in range(frompage, topage + 1):
print "page ", index, hosturl.format(index)
content = urllib2.urlopen(hosturl.format(index)).read()
# p = re.compile(r'id="thumb(\d+)"')
p = re.compile(r'small/th-(\d+).jpg')
for id in re.findall(p, content):
print "id:", id
filename = id + ".jpg"
outputpath = output_path + os.sep + filename
if os.path.exists(outputpath):
print filename, "exists."
continue
print "downloading:", downloadpath + filename
with open(outputpath, "wb") as ofile:
try:
response = urllib2.urlopen(downloadpath + filename)
ofile.write(chunk_read(response, report_hook=chunk_report))
except urllib2.HTTPError, e:
print e
if __name__ == '__main__':
download(topage=10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment