Skip to content

Instantly share code, notes, and snippets.

@errakeshpd
Last active January 16, 2017 06:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save errakeshpd/8b1b47f993087bca0135a3d27c97a499 to your computer and use it in GitHub Desktop.
Save errakeshpd/8b1b47f993087bca0135a3d27c97a499 to your computer and use it in GitHub Desktop.
code
"""
This is a command line application that allows you to scrape twitter!
"""
import collections
import json
from argparse import ArgumentParser
from datetime import datetime
from os.path import isfile
from json import dump
import logging
import sys
from twitterscraper import query_tweets
from twitterscraper.query import query_all_tweets
class JSONEncoder(json.JSONEncoder):
def default(self, obj):
if hasattr(obj, '__json__'):
return obj.__json__()
elif isinstance(obj, collections.Iterable):
return list(obj)
elif isinstance(obj, datetime):
return obj.isoformat()
elif hasattr(obj, '__getitem__') and hasattr(obj, 'keys'):
return dict(obj)
elif hasattr(obj, '__dict__'):
return {member: getattr(obj, member)
for member in dir(obj)
if not member.startswith('_') and
not hasattr(getattr(obj, member), '__call__')}
return json.JSONEncoder.default(self, obj)
def main():
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
try:
parser = ArgumentParser(
description=__doc__
)
parser.add_argument("query", type=str, help="Advanced twitter query")
parser.add_argument("-o", "--output", type=type(sys.stdout), default=sys.stdout,help="Path to a JSON file to store the gathered "
"tweets to.")
parser.add_argument("-l", "--limit", type=int, default=None,
help="Number of minimum tweets to gather.")
parser.add_argument("-a", "--all", action='store_true',
help="Set this flag if you want to get all tweets "
"in the history of twitter. This may take a "
"while but also activates parallel tweet "
"gathering. The number of tweets however, "
"will be capped at around 100000 per 10 "
"days.")
args = parser.parse_args()
if isfile(args.output):
logging.error("Output file already exists! Aborting.")
exit(-1)
if args.all:
tweets = query_all_tweets(args.query)
else:
tweets = query_tweets(args.query, args.limit)
print("taco")
for tweet in tweets: print(tweet)
with open(args.output, "w") as output:
dump(tweets, output, cls=JSONEncoder)
except KeyboardInterrupt:
logging.info("Program interrupted by user. Quitting...")
print("taco", file=sys.stdout)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment