Skip to content

Instantly share code, notes, and snippets.

Last active July 16, 2020 17:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save j-mcc1993/0880415bd92d52eddaac to your computer and use it in GitHub Desktop.
Save j-mcc1993/0880415bd92d52eddaac to your computer and use it in GitHub Desktop.
Reddit Scraper 2.0 with Imgur API
import datetime
import os
import praw
import pprint
import urllib2
from imgurpython import ImgurClient
from sys import argv
# Set Imgur authentication fields
client_id = REDACTED
client_secret = REDACTED
# Reddit authentication
r_client_id = REDACTED
r_client_secret = REDACTED
# Get arguments from command line
script, sub, lim, debug = argv;
# Downloads image to directory given imgur ID
def download_image(img_id, directory, author):
print(img_id + ': downloading...')
response = urllib2.urlopen('' % (img_id))
html =
path = directory + author + '.png'
group = open(path, 'wb')
# Downloads an album of images given an imgur album ID
def download_album(album_id, directory, author):
# create list of image objects
image_obj_list = client.get_album_images(album_id)
# empty list for image id's
id_list = []
i = 0;
for img in image_obj_list:
print( + ': downloading...')
response = urllib2.urlopen('' % (
html =
path = directory + author + ' ' + str(i) + '.png'
group = open(path, 'wb')
i += 1
# Optionally print list of ID's
if debug == '1':
# Initialize Imgur client and PRAW object
client = ImgurClient(client_id, client_secret)
r = praw.Reddit(client_id = r_client_id,
client_secret = r_client_secret,
user_agent = 'RedditScraper2.0 by /u/I_Am_Treebeard')
# Generate list of urls
subreddit = r.subreddit(sub)
link_list = = int(lim))
# Make directory for photos
directory = '/Users/j.mcc3093/Desktop/%s (%s)/' % (sub,
if not os.path.exists(directory): os.makedirs(directory)
# Main loop to scrape URLs
for link in link_list:
# Get URL and author name
author =
url = link.url
# Don't download gifs
if '.gif' in url:
# Check whether PRAW inserted a question mark
if '?' in url:
url = url[:url.rfind('?')]
# Check if link is an album
if '/a/' in url:
album_id = url[url.rfind('/a/')+3:]
download_album(album_id, directory, author)
# Check if link is a single image
elif 'i.imgur' in url or 'imgur' in url:
img_id = url[url.rfind('.com/')+5:].replace('.jpg', '')
download_image(img_id, directory, author)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment