Last active
August 29, 2015 14:20
-
-
Save rvanbruggen/1b7c85e02af67a2ce436 to your computer and use it in GitHub Desktop.
CyclingTweets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Tweet Id using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Tweet Id", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Username using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Username", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Tweet time using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Tweet time", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Tweet time using expression value.toDate()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Tweet time", | |
"expression": "value.toDate()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Is ReTweet using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Is ReTweet", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Favorite using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Favorite", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column ReTweet using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "ReTweet", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Is ReTweet using expression value.toNumber()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Is ReTweet", | |
"expression": "value.toNumber()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Favorite using expression value.toNumber()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Favorite", | |
"expression": "value.toNumber()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column ReTweet using expression value.toNumber()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "ReTweet", | |
"expression": "value.toNumber()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Twitter URL using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Twitter URL", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Tweet using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Tweet", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Tweet Id to TweetID", | |
"oldColumnName": "Tweet Id", | |
"newColumnName": "TweetID" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Tweet time to TweetTime", | |
"oldColumnName": "Tweet time", | |
"newColumnName": "TweetTime" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Is ReTweet to IsRetweet", | |
"oldColumnName": "Is ReTweet", | |
"newColumnName": "IsRetweet" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Twitter URL to TwitterURL", | |
"oldColumnName": "Twitter URL", | |
"newColumnName": "TwitterURL" | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Tweet using expression grel:replaceChars(value,\",\",\" ,\")", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Tweet", | |
"expression": "grel:replaceChars(value,\",\",\" ,\")", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Tweet using expression grel:replacechars(value,\" \",\" \")", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Tweet", | |
"expression": "grel:replacechars(value,\" \",\" \")", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
} | |
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Add this to the <your neo4j directory>/conf/neo4j.properties after adding | |
//graphaware-noderank-2.2.1.30.2.jar and | |
//graphaware-server-enterprise-all-2.2.1.30.jar | |
//to <your neo4j directory>/plugins directory | |
com.graphaware.runtime.enabled=true | |
#NR becomes the module ID: | |
com.graphaware.module.NR.1=com.graphaware.module.noderank.NodeRankModuleBootstrapper | |
#optional number of top ranked nodes to remember, the default is 10 | |
com.graphaware.module.NR.maxTopRankNodes=50 | |
#optional daming factor, which is a number p such that a random node will be selected at any step of the algorithm | |
#with the probability 1-p (as opposed to following a random relationship). The default is 0.85 | |
com.graphaware.module.NR.dampingFactor=0.85 | |
#optional key of the property that gets written to the ranked nodes, default is "nodeRank" | |
com.graphaware.module.NR.propertyKey=nodeRank | |
#optionally specify nodes to rank using an expression-based node inclusion policy, default is all business (i.e. non-framework-internal) nodes | |
com.graphaware.module.NR.node=hasLabel('Handle') | |
#optionally specify relationships to follow using an expression-based relationship inclusion policy, default is all business (i.e. non-framework-internal) relationships | |
com.graphaware.module.NR.relationship=isType('FOLLOWS') | |
#NR becomes the module ID: | |
com.graphaware.module.TR.2=com.graphaware.module.noderank.NodeRankModuleBootstrapper | |
#optional number of top ranked nodes to remember, the default is 10 | |
com.graphaware.module.TR.maxTopRankNodes=50 | |
#optional daming factor, which is a number p such that a random node will be selected at any step of the algorithm | |
#with the probability 1-p (as opposed to following a random relationship). The default is 0.85 | |
com.graphaware.module.TR.dampingFactor=0.85 | |
#optional key of the property that gets written to the ranked nodes, default is "nodeRank" | |
com.graphaware.module.TR.propertyKey=topicRank | |
#optionally specify nodes to rank using an expression-based node inclusion policy, default is all business (i.e. non-framework-internal) nodes | |
com.graphaware.module.TR.node=hasLabel('Hashtag') | |
#optionally specify relationships to follow using an expression-based relationship inclusion policy, default is all business (i.e. non-framework-internal) relationships | |
com.graphaware.module.TR.relationship=isType('MENTIONED_IN') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//add some metadata | |
//country info | |
load csv with headers from | |
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1390098748" as csv | |
create (c:Country {code: csv.Country, name: csv.FullCountry, cq: toint(csv.CQ), rank: toint(csv.Rank), prevrank: toint(csv.Prev)}); | |
//team info | |
load csv with headers from | |
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1244447866" as csv | |
merge (tc:TeamClass {name: csv.Class}) | |
with csv, tc | |
match (c:Country {code: csv.Country}) | |
merge (tc)<-[:IN_CLASS]-(t:Team {code: trim(csv.Code), name: trim(csv.Name), cq: toint(csv.CQ), rank: toint(csv.Rank), prevrank: toint(csv.Prev)})-[:FROM_COUNTRY]->(c); | |
//twitter handle info | |
using periodic commit 500 | |
load csv with headers from | |
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=0" as csv | |
match (c:Country {code: trim(csv.Country)}) | |
merge (h:Handle {name: trim(csv.Handle), realname: trim(csv.Name)})-[:FROM_COUNTRY]->(c); | |
//rider info | |
load csv with headers from | |
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1885142986" as csv | |
match (h:Handle {realname: trim(csv.Name)}), (t:Team {code: trim(csv.Team)}) | |
set h.Age=toint(csv.Age) | |
set h.CQ=toint(csv.CQ) | |
set h.UCIcode=csv.UCIcode | |
set h.rank=toint(csv.Rank) | |
set h.prevrank=toint(csv.Prev) | |
create (h)-[:RIDES_FOR_TEAM]->(t); | |
//add the index on Handle | |
create index on :Handle(name); | |
create index on :Hashtag(name); | |
create index on :Tweet(text); | |
create index on :Handle(nodeRank); | |
create constraint on (h:Handle) assert h.twitterId is unique; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//get the handles from the csv file | |
//this should not do anything - as the handles have already been loaded above | |
using periodic commit 500 | |
load csv with headers from "file:<yourpath>/20150401.csv" as csv | |
with csv | |
where csv.Username<>[] | |
merge (h:Handle {name: '@'+lower(csv.Username)}); | |
//connect the tweets to the handles | |
using periodic commit 500 | |
load csv with headers from "file:<your path>/20150401.csv" as csv | |
with csv | |
where csv.Username<>[] | |
merge (h:Handle {name: '@'+lower(csv.Username)}) | |
merge (t:Tweet {text: lower(csv.Tweet), id: toint(csv.TweetID), time: csv.TweetTime, isretweet: toint(csv.IsReTweet), favorite: toint(csv.Favorite), retweet: toint(csv.ReTweet), url: csv.`Twitter URL`})<-[:TWEETS]-(h); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//extract handles from tweet text and connect tweets to handles | |
match (t:Tweet) | |
WITH t,split(t.text," ") as words | |
UNWIND words as handles | |
with t,handles | |
where left(handles,1)="@" | |
with t, handles | |
merge (h:Handle {name: lower(handles)}) | |
merge (h)-[:MENTIONED_IN]->(t); | |
//extract hashtags from tweet text and connect tweets to hashtags | |
match (t:Tweet) | |
WITH t,split(t.text," ") as words | |
UNWIND words as hashtags | |
with t,hashtags | |
where left(hashtags,1)="#" | |
with t, hashtags | |
merge (h:Hashtag {name: upper(hashtags)}) | |
merge (h)-[:MENTIONED_IN]->(t); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import sys | |
import os | |
import tweepy | |
import csv | |
import json | |
import calendar | |
from collections import deque | |
from util import Users | |
from py2neo import Graph | |
from dateutil import parser | |
def seed(api, username): | |
if os.path.exists("data/users.csv"): | |
print "Twitter graph has already been seeded. Delete 'data/users.csv' if you want to seed it again" | |
sys.exit(1) | |
USERS_TO_PROCESS = 50 | |
users_to_process = deque() | |
users_processed = set([username]) | |
for tweet in tweepy.Cursor(api.user_timeline, id=username).items(50): | |
for user in tweet.entities["user_mentions"]: | |
if not len(users_to_process) > USERS_TO_PROCESS: | |
users_to_process.append(user["screen_name"]) | |
else: | |
break | |
users_processed = set([username]) | |
while True: | |
if len(users_processed) >= USERS_TO_PROCESS: | |
break | |
else: | |
if len(users_to_process) > 0: | |
next_user = users_to_process.popleft() | |
print next_user | |
if not next_user in users_processed: | |
users_processed.add(next_user) | |
for tweet in tweepy.Cursor(api.user_timeline, id=next_user).items(10): | |
for user_mentioned in tweet.entities["user_mentions"]: | |
if not len(users_processed) > 50: | |
users_to_process.append(user_mentioned["screen_name"]) | |
else: | |
break | |
else: | |
break | |
with open("data/users.csv", "w") as usersfile: | |
writer = csv.writer(usersfile, delimiter=",") | |
for user in users_processed: | |
writer.writerow([user, "PROCESSED", ""]) | |
def read_user(username): | |
print username | |
profile_file_path = "data/profiles/{0}.json".format(username) | |
if os.path.exists(profile_file_path): | |
with open(profile_file_path, "r") as file: | |
profile = json.loads(file.read()) | |
print profile["name"] | |
print profile["description"] | |
print "Friends: {0}".format(len(profile["friends"])) | |
print "Followers: {0}".format(len(profile["followers"])) | |
file_path = "data/tweets/{0}.json".format(username) | |
if not os.path.exists(file_path): | |
tweets = [] | |
else: | |
with open(file_path, "r") as file: | |
tweets = json.loads(file.read()) | |
print "# of tweets: {0}".format(len(tweets)) | |
if len(tweets) > 0: | |
print "latest tweets:" | |
for tweet in tweets: | |
print tweet["id"], tweet["text"] | |
def download_all_user_tweets(api, users): | |
unprocessed_users = [user[0] for user in users.all().iteritems()] | |
for user in unprocessed_users: | |
download_user_tweets(api, users, user) | |
def download_new_user_tweets(api, users): | |
unprocessed_users = [user[0] for user in users.all().iteritems() if not user[1]["lastTweetRetrieved"]] | |
for user in unprocessed_users: | |
download_user_tweets(api, users, user) | |
def download_all_user_profiles(api, users): | |
unprocessed_users = [user[0] for user in users.all().iteritems() | |
if not os.path.exists("data/profiles/{0}.json".format(user[0]))] | |
for user in unprocessed_users: | |
download_profile(api, user) | |
def download_all_user_friends(api, users): | |
unprocessed_users = [user[0] for user in users.all().iteritems() | |
if not os.path.exists("data/friends/{0}.json".format(user[0]))] | |
for user in unprocessed_users: | |
download_friends(api, user) | |
def download_user_tweets(api, users, username): | |
print username | |
value = users.find(username) | |
file_path = "data/tweets/{0}.json".format(username) | |
if os.path.exists(file_path): | |
with open(file_path, "r") as file: | |
tweets = json.loads(file.read()) | |
else: | |
tweets = [] | |
first_tweet_done = False | |
since_id = value["lastTweetRetrieved"] | |
for tweet in tweepy.Cursor(api.user_timeline, id=username, since_id = since_id).items(50): | |
if not first_tweet_done: | |
value["lastTweetRetrieved"] = tweet.id | |
first_tweet_done = True | |
tweets.append(tweet._json) | |
users.save(username, value) | |
with open("data/tweets/{0}.json".format(username), "w") as file: | |
file.write(json.dumps(tweets)) | |
def download_profile(api, username): | |
print username | |
profile = api.get_user(username)._json | |
followers = list(tweepy.Cursor(api.followers_ids, username).items()) | |
friends = list(tweepy.Cursor(api.friends_ids, username).items()) | |
profile["followers"] = followers | |
profile["friends"] = friends | |
with open("data/profiles/{0}.json".format(username), "w") as file: | |
file.write(json.dumps(profile)) | |
def download_friends(api, username): | |
print username | |
profile = api.get_user(username)._json | |
friends = list(tweepy.Cursor(api.friends_ids, username).items()) | |
profile["friends"] = friends | |
with open("data/friends/{0}.json".format(username), "w") as file: | |
file.write(json.dumps(profile)) | |
def import_profiles_into_neo4j(): | |
graph = Graph() | |
tx = graph.cypher.begin() | |
files = [file for file in os.listdir("data/profiles") if file.endswith("json")] | |
for file in files: | |
with open("data/profiles/{0}".format(file), "r") as file: | |
profile = json.loads(file.read()) | |
print profile["screen_name"] | |
params = { | |
"twitterId" : profile["id"], | |
"screenName": profile["screen_name"], | |
"name": profile["name"], | |
"description": profile["description"], | |
"followers" : profile["followers"], | |
"friends" : profile["friends"] | |
} | |
statement = """ | |
MERGE (p:Person {twitterId: {twitterId}}) | |
REMOVE p:Shadow | |
SET p.screenName = {screenName}, | |
p.description = {description}, | |
p.name = {name} | |
WITH p | |
FOREACH(followerId IN {followers} | | |
MERGE (follower:Person {twitterId: followerId}) | |
ON CREATE SET follower:Shadow | |
MERGE (follower)-[:FOLLOWS]->(p) | |
) | |
FOREACH(friendId IN {friends} | | |
MERGE (friend:Person {twitterId: friendId}) | |
ON CREATE SET friend:Shadow | |
MERGE (p)-[:FOLLOWS]->(friend) | |
) | |
""" | |
tx.append(statement, params) | |
tx.process() | |
tx.commit() | |
def import_friends_into_neo4j(): | |
graph = Graph() | |
files = [file for file in os.listdir("data/friends") if file.endswith("json")] | |
for file in files: | |
tx = graph.cypher.begin() | |
with open("data/friends/{0}".format(file), "r") as file: | |
profile = json.loads(file.read()) | |
print profile["screen_name"] | |
params = { | |
"twitterId" : profile["id"], | |
"screenName": profile["screen_name"], | |
"friends" : profile["friends"] | |
} | |
statement = """ | |
MATCH (p:Handle {name: '@'+lower({screenName})}) | |
SET p.twitterId = {twitterId} | |
WITH p | |
WHERE p is not null | |
UNWIND {friends} as friendId | |
MATCH (friend:Handle {twitterId: friendId}) | |
MERGE (p)-[:FOLLOWS]->(friend) | |
""" | |
tx.append(statement, params) | |
tx.process() | |
tx.commit() | |
def import_tweets_into_neo4j(): | |
graph = Graph() | |
tx = graph.cypher.begin() | |
count = 0 | |
files = [file for file in os.listdir("data/tweets") if file.endswith("json")] | |
for file in files: | |
with open("data/tweets/{0}".format(file), "r") as file: | |
tweets = json.loads(file.read()) | |
for tweet in tweets: | |
created_at = calendar.timegm(parser.parse(tweet["created_at"]).timetuple()) | |
params = { | |
"tweetId": tweet["id"], | |
"createdAt": created_at, | |
"text": tweet["text"], | |
"userId": tweet["user"]["id"], | |
"inReplyToTweetId": tweet["in_reply_to_status_id"], | |
"userMentions": [user for user in tweet["entities"]["user_mentions"]], | |
"urls": [url for url in tweet["entities"]["urls"]] | |
} | |
statement = """ | |
MERGE (tweet:Tweet {id: {tweetId}}) | |
SET tweet.text = {text}, tweet.timestamp = {createdAt} | |
REMOVE tweet:Shadow | |
WITH tweet | |
MATCH (person:Person {twitterId: {userId}}) | |
MERGE (person)-[:TWEETED]->(tweet) | |
WITH tweet | |
FOREACH(user in {userMentions} | | |
MERGE (mentionedUser:Person {twitterId: user.id}) | |
SET mentionedUser.screenName = user.screen_name | |
MERGE (tweet)-[:MENTIONED_USER]->(mentionedUser) | |
) | |
FOREACH(url in {urls} | | |
MERGE (u:URL {value: url.expanded_url}) | |
MERGE (tweet)-[:MENTIONED_URL]->(u) | |
) | |
FOREACH(ignoreMe in CASE WHEN NOT {inReplyToTweetId} is null THEN [1] ELSE [] END | | |
MERGE (inReplyToTweet:Tweet {id: {inReplyToTweetId}}) | |
ON CREATE SET inReplyToTweet:Shadow | |
MERGE (tweet)-[:IN_REPLY_TO_TWEET]->(inReplyToTweet) | |
) | |
""" | |
tx.append(statement, params) | |
tx.process() | |
tx.commit() | |
def add_new_users(users, count): | |
graph = Graph() | |
params = {"limit": count} | |
results = graph.cypher.execute(""" | |
match (p:Shadow:Person)<-[:MENTIONED_USER]-(user) | |
RETURN p.screenName AS user, COUNT(*) AS times | |
ORDER BY times DESC | |
LIMIT {limit} | |
""", params) | |
print results | |
for row in results: | |
users.add(row["user"]) | |
def main(argv=None): | |
parser = argparse.ArgumentParser(description='Query the Twitter API') | |
# specific user | |
parser.add_argument('--seed') | |
parser.add_argument('--download-tweets') | |
parser.add_argument('--download-profile') | |
parser.add_argument('--read-user') | |
parser.add_argument('--add-new-users', type=int) | |
# all users | |
parser.add_argument('--download-all-user-tweets', action='store_true') | |
parser.add_argument('--download-new-user-tweets', action='store_true') | |
parser.add_argument('--download-all-user-profiles', action='store_true') | |
parser.add_argument('--download-all-user-friends', action='store_true') | |
# twitter auth | |
parser.add_argument('--check-auth', action='store_true') | |
# import | |
parser.add_argument('--import-profiles-into-neo4j', action='store_true') | |
parser.add_argument('--import-friends-into-neo4j', action='store_true') | |
parser.add_argument('--import-tweets-into-neo4j', action='store_true') | |
if argv is None: | |
argv = sys.argv | |
args = parser.parse_args() | |
if args.read_user: | |
read_user(args.read_user) | |
return | |
# Options that require keys go below here | |
consumer_key = os.environ.get('CONSUMER_KEY') | |
consumer_secret = os.environ.get('CONSUMER_SECRET') | |
access_token = os.environ.get('ACCESS_TOKEN') | |
access_token_secret = os.environ.get('ACCESS_TOKEN_SECRET') | |
if any([key is None for key in [consumer_key, consumer_secret, access_token, access_token_secret]]): | |
print "One of your twitter keys isn't set - don't forget to 'source credentials.local'" | |
sys.exit(1) | |
if args.check_auth: | |
print "consumer_key: {0}".format(consumer_key) | |
print "consumer_secret: {0}".format(consumer_secret) | |
print "access_token: {0}".format(access_token) | |
print "access_token_secret: {0}".format(access_token_secret) | |
try: | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_token, access_token_secret) | |
api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True) | |
api.verify_credentials() | |
print "Auth all working - we're good to go!" | |
except tweepy.TweepError as e: | |
print "Auth problem - " + str(e) | |
return | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_token, access_token_secret) | |
api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True) | |
api.verify_credentials() | |
if args.seed: | |
seed(api, args.seed) | |
return | |
if args.download_tweets: | |
users = Users() | |
download_user_tweets(api, users, args.download_tweets) | |
return | |
if args.download_all_user_tweets: | |
users = Users() | |
download_all_user_tweets(api, users) | |
return | |
if args.download_new_user_tweets: | |
users = Users() | |
download_new_user_tweets(api, users) | |
return | |
if args.download_profile: | |
users = Users() | |
download_profile(api, args.download_profile) | |
return | |
if args.download_all_user_profiles: | |
users = Users() | |
download_all_user_profiles(api, users) | |
return | |
if args.download_all_user_friends: | |
users = Users() | |
download_all_user_friends(api, users) | |
return | |
if args.add_new_users: | |
users = Users() | |
add_new_users(users, args.add_new_users) | |
return | |
if args.import_profiles_into_neo4j: | |
import_profiles_into_neo4j() | |
return | |
if args.import_friends_into_neo4j: | |
import_friends_into_neo4j() | |
return | |
if args.import_tweets_into_neo4j: | |
import_tweets_into_neo4j() | |
return | |
if __name__ == "__main__": | |
sys.exit(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//degree of handles | |
match (h:Handle)-[:TWEETS]->(t:Tweet) | |
return h.name, h.realname, count(t) | |
order by count(t) DESC | |
limit 10 | |
//degree of hashtags | |
match (h:Hashtag)-[:MENTIONED_IN]->(t:Tweet) | |
return h.name, count(t) | |
order by count(t) DESC | |
//most mentioned handles or hashtags | |
match (h)-[:MENTIONED_IN]->(t:Tweet) | |
return h.name, labels(h), count(t) | |
order by count(t) DESC | |
limit 10 | |
//querying the NodeRank | |
match (h:Handle) | |
where h.nodeRank is not null | |
return h.name, h.realname, h.nodeRank | |
order by h.nodeRank DESC | |
limit 10 | |
//what is connected to the top NodeRanked handles | |
match (h:Handle) | |
where h.nodeRank is not null | |
with h | |
order by h.nodeRank DESC | |
limit 1 | |
match (h)-[r*..2]-() | |
return h,r | |
limit 50 | |
//what is connected to the top NodeRanked handles at depth 1 | |
match (h:Handle) | |
where h.nodeRank is not null | |
with h | |
order by h.nodeRank DESC | |
limit 1 | |
match (h)--(connected) | |
return labels(connected), count(connected) | |
limit 25 | |
//what is connected to the top NodeRanked handles at depth 3 | |
match (h:Handle) | |
where h.nodeRank is not null | |
with h | |
order by h.nodeRank DESC | |
limit 1 | |
match (h)-[*..3]-(connected) | |
return labels(connected), count(connected) | |
order by count(connected) DESC | |
//betweenness centrality for the top ranked nodes - query using UNWIND | |
//first we create the subgraph that we want to analyse | |
match (h:Handle) | |
where h.nodeRank is not null | |
with h | |
order by h.nodeRank DESC | |
limit 50 | |
//we store all the nodes of the subgraph in a collection, and pass it to the next query | |
WITH COLLECT(h) AS handles | |
//then we unwind this collection TWICE so that we get a product of rows (2500 in total) | |
UNWIND handles as source | |
UNWIND handles as target | |
//and then finally we calculate the betweenness on these rows | |
MATCH p=allShortestPaths((source)-[:TWEETS|MENTIONED_IN*]-(target)) | |
WHERE id(source) < id(target) and length(p) > 1 | |
UNWIND nodes(p)[1..-1] as n | |
WITH n.realname as Name, count(*) as betweenness | |
WHERE Name is not null | |
RETURN Name, betweenness | |
ORDER BY betweenness DESC; | |
//querying the TopicRank | |
match (h:Hashtag) | |
where h.topicRank is not null | |
return h.name, h.topicRank | |
order by h.topicRank DESC | |
limit 50 | |
//to top TopicRanked Hashtag | |
match (h:Hashtag) | |
where h.topicRank is not null | |
with h | |
order by h.topicRank DESC | |
limit 1 | |
match (h)-[r*..2]-() | |
return h,r | |
limit 50 | |
//the link between Boonen and Kristoff | |
match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}), | |
p = allshortestpaths ((h2)-[*]-(h1)) | |
return p | |
//the link between Boonen and Kristoff | |
match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}), | |
p = allshortestpaths ((h2)-[r*]-(h1)) | |
unwind r as Rels | |
with p, Rels | |
where type(Rels)<>"Follows" | |
return p | |
//the link between Boonen and Kristoff and their teams | |
match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}), | |
p = allshortestpaths ((h2)-[*]-(h1)) | |
with nodes(p) as Nodes | |
unwind Nodes as Node | |
match (Node)--(t:Team) | |
return Node, t |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment