Skip to content

Instantly share code, notes, and snippets.

@gavin19
Created July 1, 2019 13:46
Show Gist options
  • Save gavin19/e379303b0aee730dd4e331559bcc207a to your computer and use it in GitHub Desktop.
Save gavin19/e379303b0aee730dd4e331559bcc207a to your computer and use it in GitHub Desktop.
Download images from reddit by keyword/subreddit/time/limit
import re, praw, requests
from time import sleep
def my_robot(query, sub='all', time='all', limit=25):
r = praw.Reddit(...)
# Get `limit` no. of posts based on `sub`, `query` and `time`
posts = [p for p in r.subreddit(sub).search(query, time, limit=limit)]
# Compile query pattern for regex search
pattern = re.compile(r'\b{}\b'.format(query), re.I)
# Set header for requests downlaods
header = {'user-agent': 'python:img-downloader:0.1'}
def _filter(i):
exts = i.url.endswith(('jpg', 'jpeg', 'png', 'gif'))
domains = ("gfycat.com", "another.com")
domain = any(d in i.url for d in domains)
word = pattern.search(i.title)
return exts and not domain and word
# Filter for image links
iposts = filter(_filter, posts)
# Download images
for p in iposts:
fn = p.url.split('/')[-1]
try:
req = requests.get(p.url, headers=header)
with open(fn, 'wb') as f:
f.write(req.content)
# Insert half-second delay between downloads
sleep(0.5)
except Exception as e:
print("Problem downloading:" + fn)
print(e)
my_robot("dog", "pics", "week", 20)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment