cnDelbert/SimpleDesktops.py

## SimpleDesktops.py
# -*- coding: utf-8 -*-
__author__ = 'Delbert'
# You can pass 2 parameters to init_config()
# `path` is the directory you would like to save the images
# `image_id` is the image ID you want to download from.
from bs4 import BeautifulSoup
import requests
import os


def init_config(path='', image_id=0):
    if path:
        if not path.startswith('./'):
            path = './' + path
        if not path.endswith('/'):
            path = path + '/'
        dir_to_save_images = path
    else:
        dir_to_save_images = './simple/'
    image_id_to_start = image_id
    return dir_to_save_images, image_id_to_start


def download_image(path, image_id=0):
    base_url = "http://simpledesktops.com/download/?desktop="
    if not os.path.isfile(path + 'down.log'):
        down_log_file = open(path + 'down.log', 'wt', encoding='utf-8')
        down_log_file.write('Image ID\t Image URLs\n')
    else:
        down_log_file = open(path + 'down.log', 'at', encoding='utf-8')
    if not os.path.isfile(path + 'error.log'):
        error_log_file = open(path + 'error.log', 'wt', encoding='utf-8')
    else:
        error_log_file = open(path + 'error.log', 'at', encoding='utf-8')

    while image_id > 0:
        full_url = base_url + str(image_id)
        try:
            resp = requests.get(full_url)
        except TimeoutError:
            print("A timeout retry...")
            resp = requests.get(full_url, timeout=10)

        if not resp.status_code == requests.codes.ok:
            error_log_file.write(full_url + ' Error ' + str(resp.status_code) + '\n')
            print(full_url + ' Error ' + str(resp.status_code))
            image_id -= 1
            continue

        full_url = resp.url
        image_name = full_url.split('/')[-1]
        print("Downloading {file}...".format(file=image_name))
        image = open(path + image_name, 'wb')
        image.write(resp.content)
        image.close()
        down_log_file.write('{id}\t{url}\n'.format(id=image_id, url=full_url))
        image_id -= 1

    print('Download Complete.')
    down_log_file.close()
    error_log_file.close()


def check_directory(path):
    if not os.path.exists(path):
        os.mkdir(path)


def parse_detail_page(path, url):
    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Encoding": "gzip, deflate",
        "Accept-Language": "en-US,en;q=0.5",
        "Connection": "keep-alive",
        "Cookie": "__utma=36407714.1844768811.1422784067.1422784067.1422784067.1; "
                  "__utmb=36407714.1.10.1422784067; __utmc=36407714; "
                  "__utmz=36407714.1422784067.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)",
        "Host": "simpledesktops.com",
        "Referer": "http://simpledesktops.com/browse/",
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:35.0) Gecko/20100101 Firefox/35.0"
    }
    full_url = "http://simpledesktops.com" + url
    resp = requests.get(full_url, headers=headers)
    if not resp.status_code == requests.codes.ok:
        print(full_url + str(resp.status_code) + ' Error')
        return

    detail_page_code = resp.text
    all_links = BeautifulSoup(detail_page_code).find_all('a')
    image_id = 0

    for link in all_links:
        link = link.get('href')
        if link.startswith('/download/?desktop='):
            image_id = int(link.strip('/download/?desktop='))
            break
    download_image(path, image_id)


def parse_homepage(path, image_id):
    check_directory(path)
    if image_id:
        download_image(path, image_id)
    else:
        homepage_url = 'http://simpledesktops.com/browse/'
        homepage_code = BeautifulSoup(requests.get(homepage_url).text)
        latest_image_div = homepage_code.find_all('div', {'class': 'desktop'})[0]
        latest_image = BeautifulSoup(str(latest_image_div))
        details_page = latest_image.find_all("a")[0]["href"]
        parse_detail_page(path, details_page)


def main():
    # You can pass two parameters to init_config(dir_to_save_images, image_id_to_start)
    dir_to_save_images, image_id_to_start = init_config()
    parse_homepage(dir_to_save_images, image_id_to_start)


if __name__ == '__main__':
    main()
	# -- coding: utf-8 --
	__author__ = 'Delbert'
	# You can pass 2 parameters to init_config()
	# `path` is the directory you would like to save the images
	# `image_id` is the image ID you want to download from.
	from bs4 import BeautifulSoup
	import requests
	import os


	def init_config(path='', image_id=0):
	if path:
	if not path.startswith('./'):
	path = './' + path
	if not path.endswith('/'):
	path = path + '/'
	dir_to_save_images = path
	else:
	dir_to_save_images = './simple/'
	image_id_to_start = image_id
	return dir_to_save_images, image_id_to_start


	def download_image(path, image_id=0):
	base_url = "http://simpledesktops.com/download/?desktop="
	if not os.path.isfile(path + 'down.log'):
	down_log_file = open(path + 'down.log', 'wt', encoding='utf-8')
	down_log_file.write('Image ID\t Image URLs\n')
	else:
	down_log_file = open(path + 'down.log', 'at', encoding='utf-8')
	if not os.path.isfile(path + 'error.log'):
	error_log_file = open(path + 'error.log', 'wt', encoding='utf-8')
	else:
	error_log_file = open(path + 'error.log', 'at', encoding='utf-8')

	while image_id > 0:
	full_url = base_url + str(image_id)
	try:
	resp = requests.get(full_url)
	except TimeoutError:
	print("A timeout retry...")
	resp = requests.get(full_url, timeout=10)

	if not resp.status_code == requests.codes.ok:
	error_log_file.write(full_url + ' Error ' + str(resp.status_code) + '\n')
	print(full_url + ' Error ' + str(resp.status_code))
	image_id -= 1
	continue

	full_url = resp.url
	image_name = full_url.split('/')[-1]
	print("Downloading {file}...".format(file=image_name))
	image = open(path + image_name, 'wb')
	image.write(resp.content)
	image.close()
	down_log_file.write('{id}\t{url}\n'.format(id=image_id, url=full_url))
	image_id -= 1

	print('Download Complete.')
	down_log_file.close()
	error_log_file.close()


	def check_directory(path):
	if not os.path.exists(path):
	os.mkdir(path)


	def parse_detail_page(path, url):
	headers = {
	"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8",
	"Accept-Encoding": "gzip, deflate",
	"Accept-Language": "en-US,en;q=0.5",
	"Connection": "keep-alive",
	"Cookie": "__utma=36407714.1844768811.1422784067.1422784067.1422784067.1; "
	"__utmb=36407714.1.10.1422784067; __utmc=36407714; "
	"__utmz=36407714.1422784067.1.1.utmcsr=(direct)\|utmccn=(direct)\|utmcmd=(none)",
	"Host": "simpledesktops.com",
	"Referer": "http://simpledesktops.com/browse/",
	"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:35.0) Gecko/20100101 Firefox/35.0"
	}
	full_url = "http://simpledesktops.com" + url
	resp = requests.get(full_url, headers=headers)
	if not resp.status_code == requests.codes.ok:
	print(full_url + str(resp.status_code) + ' Error')
	return

	detail_page_code = resp.text
	all_links = BeautifulSoup(detail_page_code).find_all('a')
	image_id = 0

	for link in all_links:
	link = link.get('href')
	if link.startswith('/download/?desktop='):
	image_id = int(link.strip('/download/?desktop='))
	break
	download_image(path, image_id)


	def parse_homepage(path, image_id):
	check_directory(path)
	if image_id:
	download_image(path, image_id)
	else:
	homepage_url = 'http://simpledesktops.com/browse/'
	homepage_code = BeautifulSoup(requests.get(homepage_url).text)
	latest_image_div = homepage_code.find_all('div', {'class': 'desktop'})[0]
	latest_image = BeautifulSoup(str(latest_image_div))
	details_page = latest_image.find_all("a")[0]["href"]
	parse_detail_page(path, details_page)


	def main():
	# You can pass two parameters to init_config(dir_to_save_images, image_id_to_start)
	dir_to_save_images, image_id_to_start = init_config()
	parse_homepage(dir_to_save_images, image_id_to_start)


	if __name__ == '__main__':
	main()