Skip to content

Instantly share code, notes, and snippets.

@erickvneri
Created March 10, 2021 22:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erickvneri/6db898978709022488b99f42cd5341ed to your computer and use it in GitHub Desktop.
Save erickvneri/6db898978709022488b99f42cd5341ed to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
import sys
import csv
import logging
import datetime
import argparse
import praw
from prawcore.exceptions import OAuthException
from praw.exceptions import (
MissingRequiredAttributeException,
PRAWException)
"""
@@@ PRAW Reddit Script Scrapper @@@
Dummy CLI implementation to scrap topics
at Reddit. Its core purpose is to prepare
a CSV file based on the topic selected.
To authenticate this script client, please
create a `praw.ini` file with the following
content:
```
[scrapper]
client_id=xxxx-xxxx-xxxxx
client_secret=xxxx-xxxx-xxxx
user_agent=name-of-script-app
```
Note: this information can be found at
https://www.reddit.com/prefs/apps
"""
praw_env = 'scrapper' # praw.ini section with auth credential reference
# Logging basic config
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s: %(message)s')
def enable_praw_cli():
parser = argparse.ArgumentParser(description='PRAW Reddit Script Scrapper')
parser.add_argument('-w', '--write', dest='write_file', type=str, default=None)
parser.add_argument('-u', '--user-agent', dest='user_agent', type=str, required=True)
args = parser.parse_args()
return args
def auth_praw_client(praw_ini_keyword: str) -> praw.Reddit:
"""
praw.ini file is used to collect
client credentials. To configure,
check comments above.
"""
try:
reddit = praw.Reddit(site_name=praw_env)
except (
# Possible auth-related exceptions.
MissingRequiredAttributeException,
PRAWException,
OAuthException,
) as e:
logging.warning(e)
else:
logging.info('client authorized correctly')
return reddit
def main() -> None:
# CLI Args collected through
# args object.
args = enable_praw_cli()
### Call praw.Reddit instance
reddit = auth_praw_client(praw_env)
### CSV File - Initialize
# Setting up header of csv file
csv_header = [
'created_date', 'title', 'url', 'author', 'comments_count', 'upvote_ratio'
]
if args.write_file:
# Set filename template
timestamp = str(datetime.datetime.now()).replace(' ', '')
dump_file = f'{args.write_file}-{timestamp}.csv'
# Open file
with open(dump_file, 'a') as csv_dump:
writer = csv.DictWriter(csv_dump, csv_header)
writer.writeheader()
# scrap_raw = reddit.subreddit('SmartThings').top('all')
scrap_raw = reddit.subreddit('SmartThings').new()
logging.info(f'Creating {dump_file} CSV dump file')
for topic in scrap_raw:
dump_data = dict(
created_date=datetime.datetime.fromtimestamp(topic.created),
title=topic.title,
url=topic.url,
author=topic.author,
comments_count=topic.num_comments,
upvote_ratio=topic.upvote_ratio*100)
writer.writerow(dump_data)
logging.info(f'CSV file created: {dump_file}')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment