Skip to content

Instantly share code, notes, and snippets.

@harshavardhana
Created December 6, 2012 04:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save harshavardhana/4221876 to your computer and use it in GitHub Desktop.
Save harshavardhana/4221876 to your computer and use it in GitHub Desktop.
Get Followers Tweets and Following through "https://twitter.com/<userid>/followers" and encode into json
#!/usr/bin/env python
import sys
import logging
import chardet
import json
from optparse import OptionParser
from boilerpipy import (Extractor, isValidhtml,
compat_urllib_request)
def main():
parser = OptionParser(usage="%prog: [options] [file]")
parser.add_option('-u', '--url', help="use URL instead of a local file")
parser.add_option('-q', '--query', help="query should be a string")
parser.add_option('-d', help="enable debug", action="store_true", default=False, dest="debug")
(options, args) = parser.parse_args()
if not (options.url and options.query):
parser.print_help()
sys.exit(1)
loglevel = logging.INFO
if options.debug:
loglevel = logging.DEBUG
url = None
if options.url:
if not isValidhtml(options.url):
print "Unrecognized URL, please provide a content-type of text/html"
sys.exit(255)
url = compat_urllib_request.urlopen(options.url)
try:
content = url.read()
try:
enc = chardet.detect(content)['encoding']
content = content.decode(enc)
except:
pass
out = Extractor(content, tag=options.query, loglevel=loglevel).query()
if out is None:
raise
import re
twitter_dict = {}
for i in out[0:3]:
if i.endswith('Tweets'):
twitter_dict['total_tweets'] = re.findall(r'\d+', i.replace(',', ''))[0]
if i.endswith('Following'):
twitter_dict['following'] = re.findall(r'\d+', i.replace(',', ''))[0]
if i.endswith('Followers'):
twitter_dict['followers'] = re.findall(r'\d+', i.replace(',', ''))[0]
encoder = json.encoder.JSONEncoder()
print encoder.encode(twitter_dict)
print encoder.encode(twitter_dict)
except Exception as err:
print "Error in printing the extracted html () %s" % err
finally:
url.close()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment