Skip to content

Instantly share code, notes, and snippets.

@rishi-raj-jain
Last active February 24, 2021 20:49
Show Gist options
  • Save rishi-raj-jain/e44b84ebb4c6bb7fe09f3eeeac72f45a to your computer and use it in GitHub Desktop.
Save rishi-raj-jain/e44b84ebb4c6bb7fe09f3eeeac72f45a to your computer and use it in GitHub Desktop.
import sys
import networkx as nx
import matplotlib.pyplot as plt
import community as community_louvain
from community import generate_dendrogram, best_partition, partition_at_level
from pymongo import MongoClient
import matplotlib.cm as cm
import pprint, json
import matplotlib.colors as colors
import matplotlib.cm as cmx
'''
Producing the graphs using the script as suggested in (Plotting & Adding Colors, Labels):
1. https://stackoverflow.com/questions/43541376/how-to-draw-communities-with-networkx
2. https://stackoverflow.com/questions/22992009/legend-in-python-networkx
'''
def community_layout(g, partition):
"""
Compute the layout for a modular graph.
Arguments:
----------
g -- networkx.Graph or networkx.DiGraph instance
graph to plot
partition -- dict mapping int node -> int community
graph partitions
Returns:
--------
pos -- dict mapping int node -> (float x, float y)
node positions
"""
pos_communities = _position_communities(g, partition, scale=3.)
pos_nodes = _position_nodes(g, partition, scale=1.)
# combine positions
pos = dict()
for node in g.nodes():
pos[node] = pos_communities[node] + pos_nodes[node]
return pos
def _position_communities(g, partition, **kwargs):
# create a weighted graph, in which each node corresponds to a community,
# and each edge weight to the number of edges between communities
between_community_edges = _find_between_community_edges(g, partition)
communities = set(partition.values())
print("Number of communities ---> {}".format(len(communities)))
hypergraph = nx.DiGraph()
hypergraph.add_nodes_from(communities)
for (ci, cj), edges in between_community_edges.items():
hypergraph.add_edge(ci, cj, weight=len(edges))
# find layout for communities
pos_communities = nx.spring_layout(hypergraph, **kwargs)
# set node positions to position of community
pos = dict()
for node, community in partition.items():
pos[node] = pos_communities[community]
return pos
def _find_between_community_edges(g, partition):
edges = dict()
for (ni, nj) in g.edges():
ci = partition[ni]
cj = partition[nj]
if ci != cj:
try:
edges[(ci, cj)] += [(ni, nj)]
except KeyError:
edges[(ci, cj)] = [(ni, nj)]
return edges
def _position_nodes(g, partition, **kwargs):
"""
Positions nodes within communities.
"""
communities = dict()
for node, community in partition.items():
try:
communities[community] += [node]
except KeyError:
communities[community] = [node]
pos = dict()
for ci, nodes in communities.items():
subgraph = g.subgraph(nodes)
pos_subgraph = nx.spring_layout(subgraph, **kwargs)
pos.update(pos_subgraph)
return pos
# Connecting to the db
collection = db['tweets']
# Creating empty graph
G= nx.Graph()
# Keep track of tweets counted
mapOriginal= {}
''' TODO: Add edges to the graph '''
def addToGraph(src, destinations):
global G
for dest in destinations:
if G.has_edge(src, dest['screen_name']):
G[src][dest['screen_name']]['weight']+=1
else:
G.add_edge(src, dest['screen_name'], weight=1)
'''TODO: Process tweet to get user mentions'''
def processTweet(item):
mentions=[]
owner= item['user']['screen_name']
if item.get('truncated') is not None:
if item['truncated']:
mentions= item['extended_tweet']['entities']['user_mentions']
else:
mentions= item['entities']['user_mentions']
else:
mentions= item['entities']['user_mentions']
addToGraph(owner, mentions)
# To break after
J= 0
#Iterating in each record
for item in collection.find():
J+=1
if J==100:
break
ifRetweeted= True if item.get('retweeted_status') is not None else False
ifQuoted= True if item.get('quoted_status') is not None else False
# If the tweet is original tweet itself (i.e. no retweet or not a quoted one)
if (not ifRetweeted) and (not ifQuoted):
if mapOriginal.get(item['id']) is None:
mapOriginal[item['id']]= 1
processTweet(item)
#In case it's just a retweet
elif (ifRetweeted) and (not ifQuoted):
if mapOriginal.get(item['retweeted_status']['id']) is None:
mapOriginal[item['retweeted_status']['id']]= 1
processTweet(item['retweeted_status'])
#In case it's just a quoted one
else:
if mapOriginal.get(item['id']) is None:
#Get the new tweeter & mentions in that post
mapOriginal[item['id']]= 1
processTweet(item)
if mapOriginal.get(item['quoted_status']['id']) is None:
#Get the original tweeter & mentions in that post
mapOriginal[item['quoted_status']['id']]= 1
processTweet(item['quoted_status'])
# Applying louvain community detection algorithm
partition= community_louvain.best_partition(G)
valueInput= [partition.get(node, 0) for node in G.nodes()]
jeT= plt.get_cmap('jet')
plt.figure(1, figsize=(192, 108))
pos= community_layout(G, partition)
cNorm = colors.Normalize(vmin=0, vmax= max(valueInput))
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap= jeT)
f = plt.figure(1, figsize=(192, 108))
ax = f.add_subplot(1,1,1)
for label in partition:
ax.plot([0],[0], color= scalarMap.to_rgba(partition[label]), label=label)
nx.draw(G, pos, cmap= jeT, vmin= 0, vmax= max(valueInput), node_color= valueInput, with_labels= False, ax= ax)
plt.axis('off')
f.set_facecolor('w')
plt.legend(loc='center')
f.tight_layout()
plt.savefig("mappedLouvain100.png")
sys.exit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment