Skip to content

Instantly share code, notes, and snippets.

@psychemedia
Created January 22, 2012 18:35
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save psychemedia/1658128 to your computer and use it in GitHub Desktop.
Save psychemedia/1658128 to your computer and use it in GitHub Desktop.
Root around the Twitter Contributors/Contributees api to see if we can plot corporate comms networks
import simplejson,urllib2
import md5, tempfile, time
import argparse,os
import networkx as nx
parser = argparse.ArgumentParser(description='Mine Twitter account contributions')
parser.add_argument('-contributeto',nargs='*', help="A space separated list of account names (without the @) for whom you want to find the contributors.")
parser.add_argument('-contributeby',nargs='*', help="A space separated list of account names (without the @) whom you believe contributes to other accounts.")
parser.add_argument('-depth',default=3,type=int,metavar='N',help='Snowball search depth.')
args=parser.parse_args()
DG=nx.DiGraph()
def checkDir(dirpath):
if not os.path.exists(dirpath):
os.makedirs(dirpath)
def getContributors(user,userlist):
net=[]
print 'Getting contributors to',user
try:
data= simplejson.load(urllib2.urlopen('https://api.twitter.com/1/users/contributors.json?screen_name='+user))
print data
for d in data:
net.append(d['screen_name'])
if d['screen_name'] not in userlist: userlist.append(d['screen_name'])
except:
print 'oops'
return net,userlist
def getContributees(user,accountlist):
print 'Getting contributions of',user
net=[]
try:
data= simplejson.load(urllib2.urlopen('https://api.twitter.com/1/users/contributees.json?screen_name='+user))
for d in data:
net.append(d['screen_name'])
if d['screen_name'] not in accountlist: accountlist.append(d['screen_name'])
except:
pass
return net,accountlist
#accountlist=['twitterapi']
accountlist=args.contributeto
userlist=args.contributeby
contributors={}
contributees={}
depth=args.depth
if args.contributeto and len(args.contributeto):
print "finding contributors to..."
fpath='/'.join(['reports','contributors','_'.join(args.contributeto)])
typ='contributors'
data={'accountlist':args.contributeto,'userlist':[],'contributors':{},'contributees':{},'graph':DG}
elif args.contributeby and len(args.contributeby):
print "finding contributions by..."
fpath='/'.join(['reports','contributees','_'.join(args.contributeby)])
typ='contributees'
data={'accountlist':[],'userlist':args.contributeby,'contributors':{},'contributees':{},'graph':DG}
else:
exit(-1)
checkDir(fpath)
#==
#tweak of http://developer.yahoo.com/python/python-caching.html
class DiskCacheFetcherfname:
def __init__(self, cache_dir=None):
# If no cache directory specified, use system temp directory
if cache_dir is None:
cache_dir = tempfile.gettempdir()
self.cache_dir = cache_dir
def fetch(self, url, max_age=0):
# Use MD5 hash of the URL as the filename
filename = md5.new(url).hexdigest()
filepath = os.path.join(self.cache_dir, filename)
if os.path.exists(filepath):
if int(time.time()) - os.path.getmtime(filepath) < max_age:
#return open(filepath).read()
print "using cached copy of fetched url: ",url
return filepath
print "fetching fresh copy of fetched url: ",url
# Retrieve over HTTP and cache, using rename to avoid collisions
tempdata = urllib2.urlopen(url).read()
fd, temppath = tempfile.mkstemp()
fp = os.fdopen(fd, 'w')
fp.write(tempdata)
fp.close()
os.rename(temppath, filepath)
return filepath
def getTwCachedData(url, cachetime=144000):
fetcher=DiskCacheFetcherfname('cache')
fn=fetcher.fetch(url, cachetime)
f=open(fn)
data=f.read()
f.close()
#print 'data----',data
jdata=simplejson.loads(data)
if 'error' in jdata:
if jdata['error'].startswith('Rate limit exceeded'):
os.remove(fn)
return jdata
def rgetContributors(user,bigdata):
net=[]
print 'Getting contributors to',user
bigdata['graph'].add_node(user.lower(),label=user)
try:
url='https://api.twitter.com/1/users/contributors.json?screen_name='+user
print 'trying',url
#data= simplejson.load(urllib2.urlopen(url))
data=getTwCachedData(url)
#print data
for d in data:
if 'screen_name' in d:
dsname=d['screen_name']
net.append(dsname)
if dsname not in bigdata['userlist']:
bigdata['userlist'].append(dsname)
bigdata['graph'].add_node(dsname.lower(),label=dsname)
bigdata['graph'].add_edge(dsname.lower(),user.lower())
except:
print 'oops'
bigdata['contributors'][user]=net
return bigdata
def rgetContributees(user,bigdata):
print 'Getting contributions of',user
bigdata['graph'].add_node(user.lower(),label=user)
net=[]
try:
url='https://api.twitter.com/1/users/contributees.json?screen_name='+user
print 'trying',url
#data= simplejson.load(urllib2.urlopen(url))
data=getTwCachedData(url)
for d in data:
if 'screen_name' in d:
dsname=d['screen_name']
net.append(dsname)
if dsname not in bigdata['accountlist']:
bigdata['accountlist'].append(dsname)
bigdata['graph'].add_node(dsname.lower(),label=dsname)
bigdata['graph'].add_edge(user.lower(),dsname.lower())
except:
print 'oops2'
bigdata['contributees'][user]=net
return bigdata
#via mhawksey - googole: site:twitter.com "via web by"
#twitterapi, starbucks, HuffingtonPost,sportscenter,todayshow,reelseo,qualcomm,DefJamRecords,HornitosTequila,googletalks,salesforce,noh8campaign,chevron,mtv,jangomail,ESPNCFB,noh8campaign,playstation,mail
#Originally inspired by http://www.drewconway.com/zia/?p=345
def snowball_build(bigdata,rounds,typ='contributors'):
print 'Starting...'
if typ=='contributors':
offset=0
else:
offset=1
for r in range(0,rounds):
print "STARTING PASS",str(r)
if (r+offset) % 2:
print "Finding contributees...",str(r)
for user in bigdata['userlist']:
if user not in bigdata['contributees']:
bigdata=rgetContributees(user,bigdata)
else:
# THis includes first pass
print "Finding contributors...",str(r)
for account in bigdata['accountlist']:
if account not in bigdata['contributors']:
bigdata=rgetContributors(account,bigdata)
return bigdata
data=snowball_build(data,depth,typ)
print data
print 'contributors',data['contributors']
print 'contributees',data['contributees']
print 'accountlist',data['accountlist']
print 'userlist',data['userlist']
nx.write_graphml(data['graph'], fpath+"/graph.graphml")
nx.write_edgelist(data['graph'], fpath+"/graph.txt",data=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment