andylshort/simon_stalenhag.py

## simon_stalenhag.py
import os
import re
import sys
import urllib.error
import urllib.request


if len(sys.argv) != 2:
    print("Please specify destination directory as an argument")
    sys.exit(-1)

dest_folder = sys.argv[1]
if not os.path.isdir(dest_folder):
    print("Destination is not a folder")
    sys.exit(-1)

url = "http://www.simonstalenhag.se/"
image_regex = "href=\"bilderbig/([^\\.]*_\\d\\d\\d\\d)\\.jpg\""

# Download the webpage
response = urllib.request.urlopen(url)
webContent = response.read()

# Scrape for image names
matches = set(re.findall(image_regex, str(webContent)))
matches = list(matches)
matches.sort()

if len(matches) > 0:
    # Download all large versions of images
    for match in matches:
        image_url = url + "bilderbig/" + match + ".jpg"
        destination = dest_folder + match + ".jpg"
        print(image_url)

        try:
            with urllib.request.urlopen(image_url) as response, open(destination, 'wb') as out_file:
                data = response.read()
                out_file.write(data)
        except urllib.error.HTTPError:
            print("Could not download " + image_url)
	import os
	import re
	import sys
	import urllib.error
	import urllib.request


	if len(sys.argv) != 2:
	print("Please specify destination directory as an argument")
	sys.exit(-1)

	dest_folder = sys.argv[1]
	if not os.path.isdir(dest_folder):
	print("Destination is not a folder")
	sys.exit(-1)

	url = "http://www.simonstalenhag.se/"
	image_regex = "href=\"bilderbig/([^\\.]*_\\d\\d\\d\\d)\\.jpg\""

	# Download the webpage
	response = urllib.request.urlopen(url)
	webContent = response.read()

	# Scrape for image names
	matches = set(re.findall(image_regex, str(webContent)))
	matches = list(matches)
	matches.sort()

	if len(matches) > 0:
	# Download all large versions of images
	for match in matches:
	image_url = url + "bilderbig/" + match + ".jpg"
	destination = dest_folder + match + ".jpg"
	print(image_url)

	try:
	with urllib.request.urlopen(image_url) as response, open(destination, 'wb') as out_file:
	data = response.read()
	out_file.write(data)
	except urllib.error.HTTPError:
	print("Could not download " + image_url)