erickvneri/scrapper.py

## scrapper.py
#!/usr/bin/python3
import sys
import csv
import logging
import datetime
import argparse

import praw
from prawcore.exceptions import OAuthException
from praw.exceptions import (
    MissingRequiredAttributeException,
    PRAWException)
"""
@@@ PRAW Reddit Script Scrapper @@@

Dummy CLI implementation to scrap topics
at Reddit. Its core purpose is to prepare
a CSV file based on the topic selected.


To authenticate this script client, please
create a `praw.ini` file with the following
content:

```
[scrapper]
client_id=xxxx-xxxx-xxxxx
client_secret=xxxx-xxxx-xxxx
user_agent=name-of-script-app
```

Note: this information can be found at
https://www.reddit.com/prefs/apps
"""
praw_env = 'scrapper'  # praw.ini section with auth credential reference

# Logging basic config
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s: %(message)s')

def enable_praw_cli():
    parser = argparse.ArgumentParser(description='PRAW Reddit Script Scrapper')
    parser.add_argument('-w', '--write', dest='write_file', type=str, default=None)
    parser.add_argument('-u', '--user-agent', dest='user_agent', type=str, required=True)
    args = parser.parse_args()
    return args

def auth_praw_client(praw_ini_keyword: str) -> praw.Reddit:
    """
    praw.ini file is used to collect
    client credentials. To configure,
    check comments above.
    """
    try:
        reddit = praw.Reddit(site_name=praw_env)
    except (
        # Possible auth-related exceptions.
        MissingRequiredAttributeException,
        PRAWException,
        OAuthException,
        ) as e:
        logging.warning(e)
    else:
        logging.info('client authorized correctly')
        return reddit

def main() -> None:
    # CLI Args collected through
    # args object.
    args = enable_praw_cli()

    ### Call praw.Reddit instance
    reddit = auth_praw_client(praw_env)

    ### CSV File - Initialize
    # Setting up header of csv file
    csv_header = [
        'created_date', 'title', 'url', 'author', 'comments_count', 'upvote_ratio'
    ]

    if args.write_file:
        # Set filename template
        timestamp = str(datetime.datetime.now()).replace(' ', '')
        dump_file = f'{args.write_file}-{timestamp}.csv'
        # Open file
        with open(dump_file, 'a') as csv_dump:
            writer = csv.DictWriter(csv_dump, csv_header)
            writer.writeheader()

            # scrap_raw = reddit.subreddit('SmartThings').top('all')
            scrap_raw = reddit.subreddit('SmartThings').new()
            logging.info(f'Creating {dump_file} CSV dump file')
            for topic in scrap_raw:
                dump_data = dict(
                    created_date=datetime.datetime.fromtimestamp(topic.created),
                    title=topic.title,
                    url=topic.url,
                    author=topic.author,
                    comments_count=topic.num_comments,
                    upvote_ratio=topic.upvote_ratio*100)
                writer.writerow(dump_data)
            logging.info(f'CSV file created: {dump_file}')


if __name__ == '__main__':
    main()
	#!/usr/bin/python3
	import sys
	import csv
	import logging
	import datetime
	import argparse

	import praw
	from prawcore.exceptions import OAuthException
	from praw.exceptions import (
	MissingRequiredAttributeException,
	PRAWException)
	"""
	@@@ PRAW Reddit Script Scrapper @@@

	Dummy CLI implementation to scrap topics
	at Reddit. Its core purpose is to prepare
	a CSV file based on the topic selected.


	To authenticate this script client, please
	create a `praw.ini` file with the following
	content:

	```
	[scrapper]
	client_id=xxxx-xxxx-xxxxx
	client_secret=xxxx-xxxx-xxxx
	user_agent=name-of-script-app
	```

	Note: this information can be found at
	https://www.reddit.com/prefs/apps
	"""
	praw_env = 'scrapper' # praw.ini section with auth credential reference

	# Logging basic config
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s: %(message)s')

	def enable_praw_cli():
	parser = argparse.ArgumentParser(description='PRAW Reddit Script Scrapper')
	parser.add_argument('-w', '--write', dest='write_file', type=str, default=None)
	parser.add_argument('-u', '--user-agent', dest='user_agent', type=str, required=True)
	args = parser.parse_args()
	return args

	def auth_praw_client(praw_ini_keyword: str) -> praw.Reddit:
	"""
	praw.ini file is used to collect
	client credentials. To configure,
	check comments above.
	"""
	try:
	reddit = praw.Reddit(site_name=praw_env)
	except (
	# Possible auth-related exceptions.
	MissingRequiredAttributeException,
	PRAWException,
	OAuthException,
	) as e:
	logging.warning(e)
	else:
	logging.info('client authorized correctly')
	return reddit

	def main() -> None:
	# CLI Args collected through
	# args object.
	args = enable_praw_cli()

	### Call praw.Reddit instance
	reddit = auth_praw_client(praw_env)

	### CSV File - Initialize
	# Setting up header of csv file
	csv_header = [
	'created_date', 'title', 'url', 'author', 'comments_count', 'upvote_ratio'
	]

	if args.write_file:
	# Set filename template
	timestamp = str(datetime.datetime.now()).replace(' ', '')
	dump_file = f'{args.write_file}-{timestamp}.csv'
	# Open file
	with open(dump_file, 'a') as csv_dump:
	writer = csv.DictWriter(csv_dump, csv_header)
	writer.writeheader()

	# scrap_raw = reddit.subreddit('SmartThings').top('all')
	scrap_raw = reddit.subreddit('SmartThings').new()
	logging.info(f'Creating {dump_file} CSV dump file')
	for topic in scrap_raw:
	dump_data = dict(
	created_date=datetime.datetime.fromtimestamp(topic.created),
	title=topic.title,
	url=topic.url,
	author=topic.author,
	comments_count=topic.num_comments,
	upvote_ratio=topic.upvote_ratio*100)
	writer.writerow(dump_data)
	logging.info(f'CSV file created: {dump_file}')


	if __name__ == '__main__':
	main()