IceCereal/RedditDataDownloader.py

## RedditDataDownloader.py
"""
	This is a downloader of data from a particular subreddit.
	Change the `subredditName` to your desired subreddit and then
	watch the things get downloaded. You need praw (at the time of
	writing this, it's praw6) and wget to run this program.

	Step 1: https://www.reddit.com/prefs/apps <- Make an Application
		Get the client_id and client_secret
	Step 2: Enter desired subreddit in subredditName
	Step 3: Run the program

	Notes: 	Writing to a file and the reading it back is just a precaution
		in the event that the instance gets disconnected midway and you'd
		still like to download it nonetheless.
"""
import os
import praw
import subprocess

# Your Subreddit
subredditName = "animegirls"

os.mkdir(subredditName)

client_id = "CLIENT_ID"
client_secret = "CLIENT_SECRET"

reddit = praw.Reddit(user_agent=subredditName + " Image Extractor",
			client_id = client_id,
			client_secret = client_secret)

# Change this depending on your extension
imageEnds = ('.jpg', '.png', '.svg', '.jpeg', '.tif', '.tiff')

count = 0 #	local count
gCount = 0 #	global count
uCount = 0 #	url count

urls = []
for submission in reddit.subreddit(subredditName).top(limit=1000):
	if submission.url.endswith(imageEnds):
		urls.append(submission.url)
		count += 1
		uCount += 1

	if count > 99:
		with open(subredditName+".txt", 'a') as Fobj:
			for url in urls:
				Fobj.write(str(url) + "\n")
			urls = []
			count = 0

	gCount += 1

	print (gCount, uCount)

# Remaining urls are written to the file
with open(subredditName+".txt", 'a') as Fobj:
	for url in urls:
		Fobj.write(str(url) + "\n")

# Start downloading files
urls = []
with open(subredditName+".txt", 'r') as Fobj:
	for line in Fobj:
		urls.append(line[:-1])

for i in range(len(urls)):
	print ("Downloading:\t", i, "/", len(urls))
	subprocess.run(['wget', '-P', subredditName+'/', str(urls[i])])

## RedditDataSaver.py
"""
	This is a saver of data from a particular subreddit.
	Change the `subredditName` to your desired subreddit and then
	watch the things get downloaded. You need praw (at the time of
	writing this, it's praw6) and wget to run this program.

	Step 1: https://www.reddit.com/prefs/apps <- Make an Application
		Get the client_id and client_secret
	Step 2: Enter desired subreddit in subredditName
	Step 3: Run the program

	Author: IceCereal
"""
import os
import praw

# Your Subreddit
subredditName = "animegirls"

os.mkdir(subredditName)

client_id = "CLIENT_ID GOES HERE"
client_secret = "CLIENT_SECRET GOES HERE"

reddit = praw.Reddit(user_agent=subredditName + " Data Saver",
			client_id = client_id,
			client_secret = client_secret)

count = 0 #	local count

for submission in reddit.subreddit(subredditName).top(limit=1000):
	content = []
	content.append([submission.title, submission.selftext])

	with open(subredditName + "/" + str(count) +".txt", 'w') as Fobj:
		Fobj.write(str(content))

	count += 1
	print (count)
	"""
	This is a downloader of data from a particular subreddit.
	Change the `subredditName` to your desired subreddit and then
	watch the things get downloaded. You need praw (at the time of
	writing this, it's praw6) and wget to run this program.

	Step 1: https://www.reddit.com/prefs/apps <- Make an Application
	Get the client_id and client_secret
	Step 2: Enter desired subreddit in subredditName
	Step 3: Run the program

	Notes: Writing to a file and the reading it back is just a precaution
	in the event that the instance gets disconnected midway and you'd
	still like to download it nonetheless.
	"""
	import os
	import praw
	import subprocess

	# Your Subreddit
	subredditName = "animegirls"

	os.mkdir(subredditName)

	client_id = "CLIENT_ID"
	client_secret = "CLIENT_SECRET"

	reddit = praw.Reddit(user_agent=subredditName + " Image Extractor",
	client_id = client_id,
	client_secret = client_secret)

	# Change this depending on your extension
	imageEnds = ('.jpg', '.png', '.svg', '.jpeg', '.tif', '.tiff')

	count = 0 # local count
	gCount = 0 # global count
	uCount = 0 # url count

	urls = []
	for submission in reddit.subreddit(subredditName).top(limit=1000):
	if submission.url.endswith(imageEnds):
	urls.append(submission.url)
	count += 1
	uCount += 1

	if count > 99:
	with open(subredditName+".txt", 'a') as Fobj:
	for url in urls:
	Fobj.write(str(url) + "\n")
	urls = []
	count = 0

	gCount += 1

	print (gCount, uCount)

	# Remaining urls are written to the file
	with open(subredditName+".txt", 'a') as Fobj:
	for url in urls:
	Fobj.write(str(url) + "\n")

	# Start downloading files
	urls = []
	with open(subredditName+".txt", 'r') as Fobj:
	for line in Fobj:
	urls.append(line[:-1])

	for i in range(len(urls)):
	print ("Downloading:\t", i, "/", len(urls))
	subprocess.run(['wget', '-P', subredditName+'/', str(urls[i])])
	"""
	This is a saver of data from a particular subreddit.
	Change the `subredditName` to your desired subreddit and then
	watch the things get downloaded. You need praw (at the time of
	writing this, it's praw6) and wget to run this program.

	Step 1: https://www.reddit.com/prefs/apps <- Make an Application
	Get the client_id and client_secret
	Step 2: Enter desired subreddit in subredditName
	Step 3: Run the program

	Author: IceCereal
	"""
	import os
	import praw

	# Your Subreddit
	subredditName = "animegirls"

	os.mkdir(subredditName)

	client_id = "CLIENT_ID GOES HERE"
	client_secret = "CLIENT_SECRET GOES HERE"

	reddit = praw.Reddit(user_agent=subredditName + " Data Saver",
	client_id = client_id,
	client_secret = client_secret)

	count = 0 # local count

	for submission in reddit.subreddit(subredditName).top(limit=1000):
	content = []
	content.append([submission.title, submission.selftext])

	with open(subredditName + "/" + str(count) +".txt", 'w') as Fobj:
	Fobj.write(str(content))

	count += 1
	print (count)