Last active
July 14, 2019 20:01
-
-
Save IceCereal/60976aae6288b13173f17818ac702ce2 to your computer and use it in GitHub Desktop.
Reddit (Subreddit) Data Downloader. This one is configured to images, so the extensions are according to images. The Reddit (Subreddit) Data Saver is used to save data (Title, Content) to a particular directory.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This is a downloader of data from a particular subreddit. | |
Change the `subredditName` to your desired subreddit and then | |
watch the things get downloaded. You need praw (at the time of | |
writing this, it's praw6) and wget to run this program. | |
Step 1: https://www.reddit.com/prefs/apps <- Make an Application | |
Get the client_id and client_secret | |
Step 2: Enter desired subreddit in subredditName | |
Step 3: Run the program | |
Notes: Writing to a file and the reading it back is just a precaution | |
in the event that the instance gets disconnected midway and you'd | |
still like to download it nonetheless. | |
""" | |
import os | |
import praw | |
import subprocess | |
# Your Subreddit | |
subredditName = "animegirls" | |
os.mkdir(subredditName) | |
client_id = "CLIENT_ID" | |
client_secret = "CLIENT_SECRET" | |
reddit = praw.Reddit(user_agent=subredditName + " Image Extractor", | |
client_id = client_id, | |
client_secret = client_secret) | |
# Change this depending on your extension | |
imageEnds = ('.jpg', '.png', '.svg', '.jpeg', '.tif', '.tiff') | |
count = 0 # local count | |
gCount = 0 # global count | |
uCount = 0 # url count | |
urls = [] | |
for submission in reddit.subreddit(subredditName).top(limit=1000): | |
if submission.url.endswith(imageEnds): | |
urls.append(submission.url) | |
count += 1 | |
uCount += 1 | |
if count > 99: | |
with open(subredditName+".txt", 'a') as Fobj: | |
for url in urls: | |
Fobj.write(str(url) + "\n") | |
urls = [] | |
count = 0 | |
gCount += 1 | |
print (gCount, uCount) | |
# Remaining urls are written to the file | |
with open(subredditName+".txt", 'a') as Fobj: | |
for url in urls: | |
Fobj.write(str(url) + "\n") | |
# Start downloading files | |
urls = [] | |
with open(subredditName+".txt", 'r') as Fobj: | |
for line in Fobj: | |
urls.append(line[:-1]) | |
for i in range(len(urls)): | |
print ("Downloading:\t", i, "/", len(urls)) | |
subprocess.run(['wget', '-P', subredditName+'/', str(urls[i])]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This is a saver of data from a particular subreddit. | |
Change the `subredditName` to your desired subreddit and then | |
watch the things get downloaded. You need praw (at the time of | |
writing this, it's praw6) and wget to run this program. | |
Step 1: https://www.reddit.com/prefs/apps <- Make an Application | |
Get the client_id and client_secret | |
Step 2: Enter desired subreddit in subredditName | |
Step 3: Run the program | |
Author: IceCereal | |
""" | |
import os | |
import praw | |
# Your Subreddit | |
subredditName = "animegirls" | |
os.mkdir(subredditName) | |
client_id = "CLIENT_ID GOES HERE" | |
client_secret = "CLIENT_SECRET GOES HERE" | |
reddit = praw.Reddit(user_agent=subredditName + " Data Saver", | |
client_id = client_id, | |
client_secret = client_secret) | |
count = 0 # local count | |
for submission in reddit.subreddit(subredditName).top(limit=1000): | |
content = [] | |
content.append([submission.title, submission.selftext]) | |
with open(subredditName + "/" + str(count) +".txt", 'w') as Fobj: | |
Fobj.write(str(content)) | |
count += 1 | |
print (count) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment