Skip to content

Instantly share code, notes, and snippets.

@IceCereal
Last active July 14, 2019 20:01
Show Gist options
  • Save IceCereal/60976aae6288b13173f17818ac702ce2 to your computer and use it in GitHub Desktop.
Save IceCereal/60976aae6288b13173f17818ac702ce2 to your computer and use it in GitHub Desktop.
Reddit (Subreddit) Data Downloader. This one is configured to images, so the extensions are according to images. The Reddit (Subreddit) Data Saver is used to save data (Title, Content) to a particular directory.
"""
This is a downloader of data from a particular subreddit.
Change the `subredditName` to your desired subreddit and then
watch the things get downloaded. You need praw (at the time of
writing this, it's praw6) and wget to run this program.
Step 1: https://www.reddit.com/prefs/apps <- Make an Application
Get the client_id and client_secret
Step 2: Enter desired subreddit in subredditName
Step 3: Run the program
Notes: Writing to a file and the reading it back is just a precaution
in the event that the instance gets disconnected midway and you'd
still like to download it nonetheless.
"""
import os
import praw
import subprocess
# Your Subreddit
subredditName = "animegirls"
os.mkdir(subredditName)
client_id = "CLIENT_ID"
client_secret = "CLIENT_SECRET"
reddit = praw.Reddit(user_agent=subredditName + " Image Extractor",
client_id = client_id,
client_secret = client_secret)
# Change this depending on your extension
imageEnds = ('.jpg', '.png', '.svg', '.jpeg', '.tif', '.tiff')
count = 0 # local count
gCount = 0 # global count
uCount = 0 # url count
urls = []
for submission in reddit.subreddit(subredditName).top(limit=1000):
if submission.url.endswith(imageEnds):
urls.append(submission.url)
count += 1
uCount += 1
if count > 99:
with open(subredditName+".txt", 'a') as Fobj:
for url in urls:
Fobj.write(str(url) + "\n")
urls = []
count = 0
gCount += 1
print (gCount, uCount)
# Remaining urls are written to the file
with open(subredditName+".txt", 'a') as Fobj:
for url in urls:
Fobj.write(str(url) + "\n")
# Start downloading files
urls = []
with open(subredditName+".txt", 'r') as Fobj:
for line in Fobj:
urls.append(line[:-1])
for i in range(len(urls)):
print ("Downloading:\t", i, "/", len(urls))
subprocess.run(['wget', '-P', subredditName+'/', str(urls[i])])
"""
This is a saver of data from a particular subreddit.
Change the `subredditName` to your desired subreddit and then
watch the things get downloaded. You need praw (at the time of
writing this, it's praw6) and wget to run this program.
Step 1: https://www.reddit.com/prefs/apps <- Make an Application
Get the client_id and client_secret
Step 2: Enter desired subreddit in subredditName
Step 3: Run the program
Author: IceCereal
"""
import os
import praw
# Your Subreddit
subredditName = "animegirls"
os.mkdir(subredditName)
client_id = "CLIENT_ID GOES HERE"
client_secret = "CLIENT_SECRET GOES HERE"
reddit = praw.Reddit(user_agent=subredditName + " Data Saver",
client_id = client_id,
client_secret = client_secret)
count = 0 # local count
for submission in reddit.subreddit(subredditName).top(limit=1000):
content = []
content.append([submission.title, submission.selftext])
with open(subredditName + "/" + str(count) +".txt", 'w') as Fobj:
Fobj.write(str(content))
count += 1
print (count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment