Skip to content

Instantly share code, notes, and snippets.

@jeffreyroberts
Created November 11, 2018 19:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeffreyroberts/0eed5c723be6deb0e8b01e8d2569b5cf to your computer and use it in GitHub Desktop.
Save jeffreyroberts/0eed5c723be6deb0e8b01e8d2569b5cf to your computer and use it in GitHub Desktop.
WallPaper Scraper
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
from urllib.request import urlopen
import urllib.request
import ssl
def simple_get(url):
"""
Attempts to get the content at `url` by making an HTTP GET request.
If the content-type of response is some kind of HTML/XML, return the
text content, otherwise return None.
"""
try:
with closing(get(url, stream=True)) as resp:
if is_good_response(resp):
return resp.content
else:
return None
except RequestException as e:
log_error('Error during requests to {0} : {1}'.format(url, str(e)))
return None
def is_good_response(resp):
"""
Returns True if the response seems to be HTML, False otherwise.
"""
content_type = resp.headers['Content-Type'].lower()
return (resp.status_code == 200
and content_type is not None
and content_type.find('html') > -1)
def log_error(e):
"""
It is always a good idea to log errors.
This function just prints them, but you can
make it do anything.
"""
print(e)
for x in range(70):
var = []
raw_html = simple_get('https://wall.alphacoders.com/by_sub_category.php?id=170808&name=Fractal+Wallpapers&page=' + str(x))
html = BeautifulSoup(raw_html, 'html.parser')
for d in html.find_all('div', class_="boxgrid"):
for a in d.find_all('a'):
wallpaper_raw = simple_get('https://wall.alphacoders.com/' + a['href'])
wallpaper_html = BeautifulSoup(wallpaper_raw, 'html.parser')
wallpaper_div = wallpaper_html.find('div', { 'class': 'img-container-desktop'})
wallpaper_anchor = wallpaper_div.find('a')
wallpaper_filename = wallpaper_anchor['href'][36:]
print(wallpaper_filename)
req = urllib.request.Request(wallpaper_anchor['href'], data=None, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'})
context = ssl._create_unverified_context()
res = urlopen(req, context=context)
file = open('/Users/jlroberts/Projects/Python/Wallpapers/files/' + wallpaper_filename, 'wb')
file.write(res.read())
file.close()
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment