JianLoong/simple_reddit.py

## simple_reddit.py
import json
from concurrent.futures import as_completed

import requests
from requests_futures.sessions import FuturesSession

REDDIT_URL: str = "https://www.reddit.com/r/programming.json?limit=1"

def process():

    # Create header to spoof browser
    header = {
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
        "Sec-Fetch-Site": "same-origin",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-User": "?1",
        "Sec-Fetch-Dest": "document",
        "Referer": "https://www.google.com/",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "en-US,en;q=0.9"
    }

    r = requests.Session()
    r.headers = header
    try:
        response = r.get(REDDIT_URL)
    except:
        exit(1)

    # Get the results of the request - posts is a array of JSON.
    # Notice here we are accessing the data -> children
    posts = response.json()['data']['children']
    results = []
    urls = []
    index = 1
    for post in posts:
        print(str(index) + " out of " + str(len(posts)))
        index = index + 1
        title = post['data']['title']
        permalink = post['data']['permalink']
        name = post['data']['name']
        created = post['data']['created_utc']
        selftext = post['data']['selftext']

        result = {
          "title": title,
          "permalink": permalink,
          "name": name,
          "created": created,
          "selftext": selftext
        }

        urls.append("https://www.reddit.com" + permalink + ".json")

    replies = [];
    submissions = []

    with FuturesSession(max_workers=30) as session:
        session.headers = header
        futures = [session.get(url) for url in urls]
        for future in as_completed(futures):
            replies_response = future.result()
            temp = replies_response.json()[0]["data"]["children"][0]["data"]["title"]
            print(temp)
            _replies_arr = replies_response.json()[1]
            replies = []
            for reply in _replies_arr['data']['children']:
                _body = reply['data']['body']
                replies.append(_body)

            submission = {
                "title": temp,
                "reply": replies
            }
            submissions.append(submission)

    print(json.dumps(submissions, indent=4))

if __name__ == "__main__":
    process()
    exit(0)
	import json
	from concurrent.futures import as_completed

	import requests
	from requests_futures.sessions import FuturesSession

	REDDIT_URL: str = "https://www.reddit.com/r/programming.json?limit=1"

	def process():

	# Create header to spoof browser
	header = {
	"Connection": "keep-alive",
	"Upgrade-Insecure-Requests": "1",
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
	"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3;q=0.9",
	"Sec-Fetch-Site": "same-origin",
	"Sec-Fetch-Mode": "navigate",
	"Sec-Fetch-User": "?1",
	"Sec-Fetch-Dest": "document",
	"Referer": "https://www.google.com/",
	"Accept-Encoding": "gzip, deflate, br",
	"Accept-Language": "en-US,en;q=0.9"
	}

	r = requests.Session()
	r.headers = header
	try:
	response = r.get(REDDIT_URL)
	except:
	exit(1)

	# Get the results of the request - posts is a array of JSON.
	# Notice here we are accessing the data -> children
	posts = response.json()['data']['children']
	results = []
	urls = []
	index = 1
	for post in posts:
	print(str(index) + " out of " + str(len(posts)))
	index = index + 1
	title = post['data']['title']
	permalink = post['data']['permalink']
	name = post['data']['name']
	created = post['data']['created_utc']
	selftext = post['data']['selftext']

	result = {
	"title": title,
	"permalink": permalink,
	"name": name,
	"created": created,
	"selftext": selftext
	}

	urls.append("https://www.reddit.com" + permalink + ".json")

	replies = [];
	submissions = []

	with FuturesSession(max_workers=30) as session:
	session.headers = header
	futures = [session.get(url) for url in urls]
	for future in as_completed(futures):
	replies_response = future.result()
	temp = replies_response.json()[0]["data"]["children"][0]["data"]["title"]
	print(temp)
	_replies_arr = replies_response.json()[1]
	replies = []
	for reply in _replies_arr['data']['children']:
	_body = reply['data']['body']
	replies.append(_body)

	submission = {
	"title": temp,
	"reply": replies
	}
	submissions.append(submission)

	print(json.dumps(submissions, indent=4))

	if __name__ == "__main__":
	process()
	exit(0)