jasonwhite/desktopogra.py

## desktopogra.py
#!/usr/bin/python3
"""
Desktopography provides no way of downloading all of their wallpapers at once.
This script will scrape the Desktopography website and download all of the
wallpapers for the specified resolutions and years. Wallpapers that have already
been downloaded are skipped.

Usage: ./desktopogra.py [DIR]

The wallpapers are saved in the following format:

    DIR/year/wallpaper_name_widthxheight.jpg

where DIR is specified on the command line. By default, DIR is the current
directory.
"""

import urllib.request
import re
import os
import sys

base_url = "http://desktopography.net"

def wallpapers(year=2014):
    """
    Lists all the wallpapers for the given year.
    """
    url = base_url + "/exhibition/%s" % year
    response = urllib.request.urlopen(url)
    html = response.read().decode('utf-8')
    for m in re.finditer('href="/exhibition/'+ str(year) +'/(.+?)"', html):
        yield Wallpaper(year, m.group(1))

class Wallpaper():
    def __init__(self, year, name):
        self.year = year
        self.name = name

    def resolutions(self):
        """Lists all the possible resolutions for this wallpaper."""
        url = base_url + "/exhibition/%s/%s" % (self.year, self.name)
        response = urllib.request.urlopen(url)
        html = response.read().decode('utf-8')
        for m in re.finditer('href="/exhibition/%s/%s/(\d+)x(\d+)/download"' %
                (self.year, self.name), html):
            yield (int(m.group(1)), int(m.group(2)))

    def download(self, resolution, path):
        """Downloads the specified resolution for this wallpaper and saves it to
        the specified path."""
        url = base_url + "/exhibition/%s/%s/%dx%d/download" % (
                self.year, self.name, resolution[0], resolution[1])
        urllib.request.urlretrieve(url, filename=path)

if __name__ == "__main__":
    directory = '.'
    if len(sys.argv) > 1:
        directory = sys.argv[1]

    # Preferred resolutions. Set to None to download ALL resolutions.
    resolutions = [(2560,1440), (1920,1200), (1920,1080), (1680,1050), (1600,900)]

    # Years to download
    years = [2014]

    for year in years:
        # Create the directory if necessary
        year_dir = os.path.join(directory, str(year))
        try:
            os.makedirs(year_dir)
        except os.error:
            pass

        print("Downloading Desktopography %s:" % year)
        for wp in wallpapers(year):
            for w,h in wp.resolutions():
                if not resolutions or (w,h) in resolutions: # Filter out resolutions
                    path = os.path.join(year_dir, "%s_%dx%d.jpg" % (wp.name, w, h))
                    if os.path.isfile(path):
                        print("  Skipping    '%s' at %dx%d" % (wp.name, w, h))
                    else:
                        print("  Downloading '%s' at %dx%d" % (wp.name, w, h))
                        wp.download((w,h), path)
	#!/usr/bin/python3
	"""
	Desktopography provides no way of downloading all of their wallpapers at once.
	This script will scrape the Desktopography website and download all of the
	wallpapers for the specified resolutions and years. Wallpapers that have already
	been downloaded are skipped.

	Usage: ./desktopogra.py [DIR]

	The wallpapers are saved in the following format:

	DIR/year/wallpaper_name_widthxheight.jpg

	where DIR is specified on the command line. By default, DIR is the current
	directory.
	"""

	import urllib.request
	import re
	import os
	import sys

	base_url = "http://desktopography.net"

	def wallpapers(year=2014):
	"""
	Lists all the wallpapers for the given year.
	"""
	url = base_url + "/exhibition/%s" % year
	response = urllib.request.urlopen(url)
	html = response.read().decode('utf-8')
	for m in re.finditer('href="/exhibition/'+ str(year) +'/(.+?)"', html):
	yield Wallpaper(year, m.group(1))

	class Wallpaper():
	def __init__(self, year, name):
	self.year = year
	self.name = name

	def resolutions(self):
	"""Lists all the possible resolutions for this wallpaper."""
	url = base_url + "/exhibition/%s/%s" % (self.year, self.name)
	response = urllib.request.urlopen(url)
	html = response.read().decode('utf-8')
	for m in re.finditer('href="/exhibition/%s/%s/(\d+)x(\d+)/download"' %
	(self.year, self.name), html):
	yield (int(m.group(1)), int(m.group(2)))

	def download(self, resolution, path):
	"""Downloads the specified resolution for this wallpaper and saves it to
	the specified path."""
	url = base_url + "/exhibition/%s/%s/%dx%d/download" % (
	self.year, self.name, resolution[0], resolution[1])
	urllib.request.urlretrieve(url, filename=path)

	if __name__ == "__main__":
	directory = '.'
	if len(sys.argv) > 1:
	directory = sys.argv[1]

	# Preferred resolutions. Set to None to download ALL resolutions.
	resolutions = [(2560,1440), (1920,1200), (1920,1080), (1680,1050), (1600,900)]

	# Years to download
	years = [2014]

	for year in years:
	# Create the directory if necessary
	year_dir = os.path.join(directory, str(year))
	try:
	os.makedirs(year_dir)
	except os.error:
	pass

	print("Downloading Desktopography %s:" % year)
	for wp in wallpapers(year):
	for w,h in wp.resolutions():
	if not resolutions or (w,h) in resolutions: # Filter out resolutions
	path = os.path.join(year_dir, "%s_%dx%d.jpg" % (wp.name, w, h))
	if os.path.isfile(path):
	print(" Skipping '%s' at %dx%d" % (wp.name, w, h))
	else:
	print(" Downloading '%s' at %dx%d" % (wp.name, w, h))
	wp.download((w,h), path)