Skip to content

Instantly share code, notes, and snippets.

@zzh8829
Created June 29, 2014 01:57
Show Gist options
  • Save zzh8829/35334f0b78d678b0ec81 to your computer and use it in GitHub Desktop.
Save zzh8829/35334f0b78d678b0ec81 to your computer and use it in GitHub Desktop.
Reddit EarthPorn automatic downloader
import feedparser
import re
from bs4 import BeautifulSoup
import pprint
import os
import urllib.request
import imghdr
import sys
from time import localtime, strftime
from PIL import Image
import io
root = "F:/project/python/autobg/"
user_agent = "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36"
earthporn = "http://www.reddit.com/r/EarthPorn/.rss"
rss = feedparser.parse(earthporn)
images = [ path.split(".")[0] for path in os.listdir(root + "images") ]
sys.stdout = open(root + "log.txt","a")
print(strftime("%Y-%m-%d %H:%M:%S", localtime()))
for entry in rss["entries"]:
try:
desc = BeautifulSoup(entry["description"])
url = desc.find(text="[link]").parent["href"]
if "/imgur.com/" in url:
url = url.replace("/imgur.com/","/i.imgur.com/") + ".jpg"
thumbnail = entry.media_thumbnail[0]["url"][-20:-4]
if thumbnail in images:
print("[Ignore]: Already exists")
continue
size = list(map(int,re.search("(\d+)[ Xx×]+(\d+)",entry["title"]).group(1,2)))
if size[0]*size[1]/1366.0/768 < 0.8:
print("[Ignore]: Image too small " + str(size))
continue
image = urllib.request.urlopen(urllib.request.Request(url, headers={'User-Agent': user_agent})).read()
extension = Image.open(io.BytesIO(image)).format
filename = thumbnail + "." + extension.lower()
with open(root + "images/" + filename, "wb") as f:
f.write(image)
print("[Success]: " + filename + " " +str(size))
except Exception as e:
print("[Error]: " + str(e))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment