Skip to content

Instantly share code, notes, and snippets.

@rishi-raj-jain
Last active February 24, 2021 20:48
Show Gist options
  • Save rishi-raj-jain/8e3e807489e70a1e1b15d617f4f7acb5 to your computer and use it in GitHub Desktop.
Save rishi-raj-jain/8e3e807489e70a1e1b15d617f4f7acb5 to your computer and use it in GitHub Desktop.
import sys, csv, pprint, json
import networkx as nx
import matplotlib.pyplot as plt
import community as community_louvain
import matplotlib.cm as cm
from community import generate_dendrogram, best_partition, partition_at_level
from pymongo import MongoClient
from networkx.readwrite import json_graph
import threading
# Connecting to the db
collection = db['tweets']
G= nx.Graph() # User Mentions
G2= nx.Graph() # Retweets Mentions
# Keep track of tweets counted
mapOriginal= {}
'''TODO: Add edges to the graph'''
def addToGraph(src, destinations):
global G
for dest in destinations:
if G.has_edge(src, dest['screen_name']):
G[src][dest['screen_name']]['weight']+=1
else:
G.add_edge(src, dest['screen_name'], weight=1)
'''TODO: Process tweet to get user mentions'''
def processTweet(item):
mentions=[]
owner= item['user']['screen_name']
if item.get('truncated') is not None:
if item['truncated']:
mentions= item['extended_tweet']['entities']['user_mentions']
else:
mentions= item['entities']['user_mentions']
else:
mentions= item['entities']['user_mentions']
addToGraph(owner, mentions)
'''TODO: Add retweet to the G2 graph'''
def addToRetweet(src, dest):
global G2
if G2.has_edge(src, dest):
G2[src][dest]['weight']+=1
else:
G2.add_edge(src, dest, weight=1)
'''TODO: Write user mentions to CSV'''
def UserMentionsCSV():
with open('usermentions.csv', 'w', newline='') as fileSave:
global G
fileSave.truncate(0)
writer= csv.writer(fileSave)
writer.writerow(['Source', 'Target', 'Weight'])
for u,v in G.edges():
writer.writerow([u, v, G[u][v]['weight']])
'''TODO: Write retweets to CSV'''
def RetweetsCSV():
with open('retweets.csv', 'w', newline='') as fileSave:
global G2
fileSave.truncate(0)
writer= csv.writer(fileSave)
writer.writerow(['Source', 'Target', 'Weight'])
for u,v in G2.edges():
writer.writerow([u, v, G2[u][v]['weight']])
# To break after
J= 0
#Iterating in each record
for item in collection.find():
J+=1
if J==10000:
break
ifRetweeted= True if item.get('retweeted_status') is not None else False
ifQuoted= True if item.get('quoted_status') is not None else False
# If the tweet is original tweet itself (i.e. no retweet or not a quoted one)
if (not ifRetweeted) and (not ifQuoted):
if mapOriginal.get(item['id']) is None:
mapOriginal[item['id']]= 1
processTweet(item)
#In case it's just a retweet
elif (ifRetweeted) and (not ifQuoted):
if mapOriginal.get(item['retweeted_status']['id']) is None:
mapOriginal[item['retweeted_status']['id']]= 1
processTweet(item['retweeted_status'])
addToRetweet(item['user']['screen_name'], item['retweeted_status']['user']['screen_name'])
#In case it's just a quoted one
else:
if mapOriginal.get(item['id']) is None:
#Get the new tweeter & mentions in that post
mapOriginal[item['id']]= 1
processTweet(item)
if mapOriginal.get(item['quoted_status']['id']) is None:
#Get the original tweeter & mentions in that post
mapOriginal[item['quoted_status']['id']]= 1
processTweet(item['quoted_status'])
addToRetweet(item['user']['screen_name'], item['quoted_status']['user']['screen_name'])
print("Done with iterations...")
print("Starting export to CSV...")
# Create CSV's (Multithreading)
T1= threading.Thread(target= UserMentionsCSV)
T2= threading.Thread(target= RetweetsCSV)
T1.start()
T2.start()
T1.join()
T2.join()
print("Completed!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment