Skip to content

Instantly share code, notes, and snippets.

@n-eq
Last active March 21, 2020 14:55
Show Gist options
  • Save n-eq/1e968934278b0f6691971aa7f6d6cccc to your computer and use it in GitHub Desktop.
Save n-eq/1e968934278b0f6691971aa7f6d6cccc to your computer and use it in GitHub Desktop.
Draw a graph of all the quote tweets (including children quote tweets) of a particular tweet (Depth First Search)
"""
@author: marrakchino
## Notes
* I used both tweepy and twitter because none of the two libraries is extensive enough to suit my needs:
1. Search for a specific tweet using its `id`, tweepy does this perfectly
2. Use a raw query (as in the web app), in particular to find quote tweets of a tweet, twitter does this perfecetly
* To my great disapointment, I learned that `The Search API is not complete index of all Tweets, but instead an index of recent Tweets.
The index includes between 6-9 days of Tweets.` This means this script would only work when applied on a 'recent' tweet.
* TODO:
- label edges
- (uni!)directional edges
- clickable nodes
- better node placing/highlighting
Highly inspired from https://github.com/ugis22/analysing_twitter
"""
import time
import networkx as nx
import twitter
import tweepy
import matplotlib.pyplot as plt
# root tweet
BASE_ID = ""
api = twitter.Api(consumer_key="",
consumer_secret="",
access_token_key="",
access_token_secret="")
auth = tweepy.OAuthHandler("", "")
auth.set_access_token("", "")
tweep = tweepy.API(auth)
graph = nx.Graph()
l = [] # all processed tweets
# id: string representation (id_str field in the Tweet model)
def quotes_of(id):
query="q=-from%3Aquotedreplies%20url%3A" + id + "&f=live"
results = api.GetSearch(raw_query=query)
print("quotes of {} : {}".format(id, [res.id for res in results]))
new_tweets = []
for res in results:
if (res.id_str == id):
# Idk how this can happen but it does...
continue
tweet = tweep.get_status(res.id_str)
new_tweets.append(tweet)
graph.add_edge(res.id_str, id)
graph.node[res.id_str]["user"] = tweet.user.screen_name
graph.node[res.id_str]["text"] = tweet.text
print("Added node from {} to {}".format(res.id_str, id))
print("{} new tweet(s) quoted from {}".format(len(new_tweets), id))
return new_tweets
def loop(base_id = BASE_ID):
queue = [tweep.get_status(base_id)]
while len(queue) > 0:
t = queue.pop()
if t not in l:
l.append(t)
res = quotes_of(t.id_str)
if (len(res) == 1 and res[0].id_str == t.id_str):
break
for r in res:
if r not in queue:
queue.append(r)
time.sleep(0.5) # avoid being blocked by the api
print("Found {} tweets".format(len(l)))
def draw_graph():
largest_subgraph = max(nx.connected_component_subgraphs(graph), key=len)
pos = nx.spring_layout(largest_subgraph, k=0.05)
plt.figure(figsize = (20,20))
nx.draw(largest_subgraph, pos=pos, node_color=range(len(graph)), cmap=plt.cm.PiYG, edge_color="black", linewidths=0.8, node_size=60, with_labels=False)
plt.show()
def save(filename='graph.png'):
plt.savefig(filename)
if __name__ == '__main__':
loopover()
draw_graph()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment