CyclingTweets
[ | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Tweet Id using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Tweet Id", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Username using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Username", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Tweet time using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Tweet time", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Tweet time using expression value.toDate()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Tweet time", | |
"expression": "value.toDate()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Is ReTweet using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Is ReTweet", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Favorite using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Favorite", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column ReTweet using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "ReTweet", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Is ReTweet using expression value.toNumber()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Is ReTweet", | |
"expression": "value.toNumber()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Favorite using expression value.toNumber()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Favorite", | |
"expression": "value.toNumber()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column ReTweet using expression value.toNumber()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "ReTweet", | |
"expression": "value.toNumber()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Twitter URL using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Twitter URL", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Tweet using expression grel:substring(value,1,length(value)-1)", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Tweet", | |
"expression": "grel:substring(value,1,length(value)-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Tweet Id to TweetID", | |
"oldColumnName": "Tweet Id", | |
"newColumnName": "TweetID" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Tweet time to TweetTime", | |
"oldColumnName": "Tweet time", | |
"newColumnName": "TweetTime" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Is ReTweet to IsRetweet", | |
"oldColumnName": "Is ReTweet", | |
"newColumnName": "IsRetweet" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Twitter URL to TwitterURL", | |
"oldColumnName": "Twitter URL", | |
"newColumnName": "TwitterURL" | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Tweet using expression grel:replaceChars(value,\",\",\" ,\")", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Tweet", | |
"expression": "grel:replaceChars(value,\",\",\" ,\")", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Tweet using expression grel:replacechars(value,\" \",\" \")", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Tweet", | |
"expression": "grel:replacechars(value,\" \",\" \")", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
} | |
] |
//Add this to the <your neo4j directory>/conf/neo4j.properties after adding | |
//graphaware-noderank-2.2.1.30.2.jar and | |
//graphaware-server-enterprise-all-2.2.1.30.jar | |
//to <your neo4j directory>/plugins directory | |
com.graphaware.runtime.enabled=true | |
#NR becomes the module ID: | |
com.graphaware.module.NR.1=com.graphaware.module.noderank.NodeRankModuleBootstrapper | |
#optional number of top ranked nodes to remember, the default is 10 | |
com.graphaware.module.NR.maxTopRankNodes=50 | |
#optional daming factor, which is a number p such that a random node will be selected at any step of the algorithm | |
#with the probability 1-p (as opposed to following a random relationship). The default is 0.85 | |
com.graphaware.module.NR.dampingFactor=0.85 | |
#optional key of the property that gets written to the ranked nodes, default is "nodeRank" | |
com.graphaware.module.NR.propertyKey=nodeRank | |
#optionally specify nodes to rank using an expression-based node inclusion policy, default is all business (i.e. non-framework-internal) nodes | |
com.graphaware.module.NR.node=hasLabel('Handle') | |
#optionally specify relationships to follow using an expression-based relationship inclusion policy, default is all business (i.e. non-framework-internal) relationships | |
com.graphaware.module.NR.relationship=isType('FOLLOWS') | |
#NR becomes the module ID: | |
com.graphaware.module.TR.2=com.graphaware.module.noderank.NodeRankModuleBootstrapper | |
#optional number of top ranked nodes to remember, the default is 10 | |
com.graphaware.module.TR.maxTopRankNodes=50 | |
#optional daming factor, which is a number p such that a random node will be selected at any step of the algorithm | |
#with the probability 1-p (as opposed to following a random relationship). The default is 0.85 | |
com.graphaware.module.TR.dampingFactor=0.85 | |
#optional key of the property that gets written to the ranked nodes, default is "nodeRank" | |
com.graphaware.module.TR.propertyKey=topicRank | |
#optionally specify nodes to rank using an expression-based node inclusion policy, default is all business (i.e. non-framework-internal) nodes | |
com.graphaware.module.TR.node=hasLabel('Hashtag') | |
#optionally specify relationships to follow using an expression-based relationship inclusion policy, default is all business (i.e. non-framework-internal) relationships | |
com.graphaware.module.TR.relationship=isType('MENTIONED_IN') |
//add some metadata | |
//country info | |
load csv with headers from | |
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1390098748" as csv | |
create (c:Country {code: csv.Country, name: csv.FullCountry, cq: toint(csv.CQ), rank: toint(csv.Rank), prevrank: toint(csv.Prev)}); | |
//team info | |
load csv with headers from | |
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1244447866" as csv | |
merge (tc:TeamClass {name: csv.Class}) | |
with csv, tc | |
match (c:Country {code: csv.Country}) | |
merge (tc)<-[:IN_CLASS]-(t:Team {code: trim(csv.Code), name: trim(csv.Name), cq: toint(csv.CQ), rank: toint(csv.Rank), prevrank: toint(csv.Prev)})-[:FROM_COUNTRY]->(c); | |
//twitter handle info | |
using periodic commit 500 | |
load csv with headers from | |
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=0" as csv | |
match (c:Country {code: trim(csv.Country)}) | |
merge (h:Handle {name: trim(csv.Handle), realname: trim(csv.Name)})-[:FROM_COUNTRY]->(c); | |
//rider info | |
load csv with headers from | |
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1885142986" as csv | |
match (h:Handle {realname: trim(csv.Name)}), (t:Team {code: trim(csv.Team)}) | |
set h.Age=toint(csv.Age) | |
set h.CQ=toint(csv.CQ) | |
set h.UCIcode=csv.UCIcode | |
set h.rank=toint(csv.Rank) | |
set h.prevrank=toint(csv.Prev) | |
create (h)-[:RIDES_FOR_TEAM]->(t); | |
//add the index on Handle | |
create index on :Handle(name); | |
create index on :Hashtag(name); | |
create index on :Tweet(text); | |
create index on :Handle(nodeRank); | |
create constraint on (h:Handle) assert h.twitterId is unique; |
//get the handles from the csv file | |
//this should not do anything - as the handles have already been loaded above | |
using periodic commit 500 | |
load csv with headers from "file:<yourpath>/20150401.csv" as csv | |
with csv | |
where csv.Username<>[] | |
merge (h:Handle {name: '@'+lower(csv.Username)}); | |
//connect the tweets to the handles | |
using periodic commit 500 | |
load csv with headers from "file:<your path>/20150401.csv" as csv | |
with csv | |
where csv.Username<>[] | |
merge (h:Handle {name: '@'+lower(csv.Username)}) | |
merge (t:Tweet {text: lower(csv.Tweet), id: toint(csv.TweetID), time: csv.TweetTime, isretweet: toint(csv.IsReTweet), favorite: toint(csv.Favorite), retweet: toint(csv.ReTweet), url: csv.`Twitter URL`})<-[:TWEETS]-(h); |
//extract handles from tweet text and connect tweets to handles | |
match (t:Tweet) | |
WITH t,split(t.text," ") as words | |
UNWIND words as handles | |
with t,handles | |
where left(handles,1)="@" | |
with t, handles | |
merge (h:Handle {name: lower(handles)}) | |
merge (h)-[:MENTIONED_IN]->(t); | |
//extract hashtags from tweet text and connect tweets to hashtags | |
match (t:Tweet) | |
WITH t,split(t.text," ") as words | |
UNWIND words as hashtags | |
with t,hashtags | |
where left(hashtags,1)="#" | |
with t, hashtags | |
merge (h:Hashtag {name: upper(hashtags)}) | |
merge (h)-[:MENTIONED_IN]->(t); |
import argparse | |
import sys | |
import os | |
import tweepy | |
import csv | |
import json | |
import calendar | |
from collections import deque | |
from util import Users | |
from py2neo import Graph | |
from dateutil import parser | |
def seed(api, username): | |
if os.path.exists("data/users.csv"): | |
print "Twitter graph has already been seeded. Delete 'data/users.csv' if you want to seed it again" | |
sys.exit(1) | |
USERS_TO_PROCESS = 50 | |
users_to_process = deque() | |
users_processed = set([username]) | |
for tweet in tweepy.Cursor(api.user_timeline, id=username).items(50): | |
for user in tweet.entities["user_mentions"]: | |
if not len(users_to_process) > USERS_TO_PROCESS: | |
users_to_process.append(user["screen_name"]) | |
else: | |
break | |
users_processed = set([username]) | |
while True: | |
if len(users_processed) >= USERS_TO_PROCESS: | |
break | |
else: | |
if len(users_to_process) > 0: | |
next_user = users_to_process.popleft() | |
print next_user | |
if not next_user in users_processed: | |
users_processed.add(next_user) | |
for tweet in tweepy.Cursor(api.user_timeline, id=next_user).items(10): | |
for user_mentioned in tweet.entities["user_mentions"]: | |
if not len(users_processed) > 50: | |
users_to_process.append(user_mentioned["screen_name"]) | |
else: | |
break | |
else: | |
break | |
with open("data/users.csv", "w") as usersfile: | |
writer = csv.writer(usersfile, delimiter=",") | |
for user in users_processed: | |
writer.writerow([user, "PROCESSED", ""]) | |
def read_user(username): | |
print username | |
profile_file_path = "data/profiles/{0}.json".format(username) | |
if os.path.exists(profile_file_path): | |
with open(profile_file_path, "r") as file: | |
profile = json.loads(file.read()) | |
print profile["name"] | |
print profile["description"] | |
print "Friends: {0}".format(len(profile["friends"])) | |
print "Followers: {0}".format(len(profile["followers"])) | |
file_path = "data/tweets/{0}.json".format(username) | |
if not os.path.exists(file_path): | |
tweets = [] | |
else: | |
with open(file_path, "r") as file: | |
tweets = json.loads(file.read()) | |
print "# of tweets: {0}".format(len(tweets)) | |
if len(tweets) > 0: | |
print "latest tweets:" | |
for tweet in tweets: | |
print tweet["id"], tweet["text"] | |
def download_all_user_tweets(api, users): | |
unprocessed_users = [user[0] for user in users.all().iteritems()] | |
for user in unprocessed_users: | |
download_user_tweets(api, users, user) | |
def download_new_user_tweets(api, users): | |
unprocessed_users = [user[0] for user in users.all().iteritems() if not user[1]["lastTweetRetrieved"]] | |
for user in unprocessed_users: | |
download_user_tweets(api, users, user) | |
def download_all_user_profiles(api, users): | |
unprocessed_users = [user[0] for user in users.all().iteritems() | |
if not os.path.exists("data/profiles/{0}.json".format(user[0]))] | |
for user in unprocessed_users: | |
download_profile(api, user) | |
def download_all_user_friends(api, users): | |
unprocessed_users = [user[0] for user in users.all().iteritems() | |
if not os.path.exists("data/friends/{0}.json".format(user[0]))] | |
for user in unprocessed_users: | |
download_friends(api, user) | |
def download_user_tweets(api, users, username): | |
print username | |
value = users.find(username) | |
file_path = "data/tweets/{0}.json".format(username) | |
if os.path.exists(file_path): | |
with open(file_path, "r") as file: | |
tweets = json.loads(file.read()) | |
else: | |
tweets = [] | |
first_tweet_done = False | |
since_id = value["lastTweetRetrieved"] | |
for tweet in tweepy.Cursor(api.user_timeline, id=username, since_id = since_id).items(50): | |
if not first_tweet_done: | |
value["lastTweetRetrieved"] = tweet.id | |
first_tweet_done = True | |
tweets.append(tweet._json) | |
users.save(username, value) | |
with open("data/tweets/{0}.json".format(username), "w") as file: | |
file.write(json.dumps(tweets)) | |
def download_profile(api, username): | |
print username | |
profile = api.get_user(username)._json | |
followers = list(tweepy.Cursor(api.followers_ids, username).items()) | |
friends = list(tweepy.Cursor(api.friends_ids, username).items()) | |
profile["followers"] = followers | |
profile["friends"] = friends | |
with open("data/profiles/{0}.json".format(username), "w") as file: | |
file.write(json.dumps(profile)) | |
def download_friends(api, username): | |
print username | |
profile = api.get_user(username)._json | |
friends = list(tweepy.Cursor(api.friends_ids, username).items()) | |
profile["friends"] = friends | |
with open("data/friends/{0}.json".format(username), "w") as file: | |
file.write(json.dumps(profile)) | |
def import_profiles_into_neo4j(): | |
graph = Graph() | |
tx = graph.cypher.begin() | |
files = [file for file in os.listdir("data/profiles") if file.endswith("json")] | |
for file in files: | |
with open("data/profiles/{0}".format(file), "r") as file: | |
profile = json.loads(file.read()) | |
print profile["screen_name"] | |
params = { | |
"twitterId" : profile["id"], | |
"screenName": profile["screen_name"], | |
"name": profile["name"], | |
"description": profile["description"], | |
"followers" : profile["followers"], | |
"friends" : profile["friends"] | |
} | |
statement = """ | |
MERGE (p:Person {twitterId: {twitterId}}) | |
REMOVE p:Shadow | |
SET p.screenName = {screenName}, | |
p.description = {description}, | |
p.name = {name} | |
WITH p | |
FOREACH(followerId IN {followers} | | |
MERGE (follower:Person {twitterId: followerId}) | |
ON CREATE SET follower:Shadow | |
MERGE (follower)-[:FOLLOWS]->(p) | |
) | |
FOREACH(friendId IN {friends} | | |
MERGE (friend:Person {twitterId: friendId}) | |
ON CREATE SET friend:Shadow | |
MERGE (p)-[:FOLLOWS]->(friend) | |
) | |
""" | |
tx.append(statement, params) | |
tx.process() | |
tx.commit() | |
def import_friends_into_neo4j(): | |
graph = Graph() | |
files = [file for file in os.listdir("data/friends") if file.endswith("json")] | |
for file in files: | |
tx = graph.cypher.begin() | |
with open("data/friends/{0}".format(file), "r") as file: | |
profile = json.loads(file.read()) | |
print profile["screen_name"] | |
params = { | |
"twitterId" : profile["id"], | |
"screenName": profile["screen_name"], | |
"friends" : profile["friends"] | |
} | |
statement = """ | |
MATCH (p:Handle {name: '@'+lower({screenName})}) | |
SET p.twitterId = {twitterId} | |
WITH p | |
WHERE p is not null | |
UNWIND {friends} as friendId | |
MATCH (friend:Handle {twitterId: friendId}) | |
MERGE (p)-[:FOLLOWS]->(friend) | |
""" | |
tx.append(statement, params) | |
tx.process() | |
tx.commit() | |
def import_tweets_into_neo4j(): | |
graph = Graph() | |
tx = graph.cypher.begin() | |
count = 0 | |
files = [file for file in os.listdir("data/tweets") if file.endswith("json")] | |
for file in files: | |
with open("data/tweets/{0}".format(file), "r") as file: | |
tweets = json.loads(file.read()) | |
for tweet in tweets: | |
created_at = calendar.timegm(parser.parse(tweet["created_at"]).timetuple()) | |
params = { | |
"tweetId": tweet["id"], | |
"createdAt": created_at, | |
"text": tweet["text"], | |
"userId": tweet["user"]["id"], | |
"inReplyToTweetId": tweet["in_reply_to_status_id"], | |
"userMentions": [user for user in tweet["entities"]["user_mentions"]], | |
"urls": [url for url in tweet["entities"]["urls"]] | |
} | |
statement = """ | |
MERGE (tweet:Tweet {id: {tweetId}}) | |
SET tweet.text = {text}, tweet.timestamp = {createdAt} | |
REMOVE tweet:Shadow | |
WITH tweet | |
MATCH (person:Person {twitterId: {userId}}) | |
MERGE (person)-[:TWEETED]->(tweet) | |
WITH tweet | |
FOREACH(user in {userMentions} | | |
MERGE (mentionedUser:Person {twitterId: user.id}) | |
SET mentionedUser.screenName = user.screen_name | |
MERGE (tweet)-[:MENTIONED_USER]->(mentionedUser) | |
) | |
FOREACH(url in {urls} | | |
MERGE (u:URL {value: url.expanded_url}) | |
MERGE (tweet)-[:MENTIONED_URL]->(u) | |
) | |
FOREACH(ignoreMe in CASE WHEN NOT {inReplyToTweetId} is null THEN [1] ELSE [] END | | |
MERGE (inReplyToTweet:Tweet {id: {inReplyToTweetId}}) | |
ON CREATE SET inReplyToTweet:Shadow | |
MERGE (tweet)-[:IN_REPLY_TO_TWEET]->(inReplyToTweet) | |
) | |
""" | |
tx.append(statement, params) | |
tx.process() | |
tx.commit() | |
def add_new_users(users, count): | |
graph = Graph() | |
params = {"limit": count} | |
results = graph.cypher.execute(""" | |
match (p:Shadow:Person)<-[:MENTIONED_USER]-(user) | |
RETURN p.screenName AS user, COUNT(*) AS times | |
ORDER BY times DESC | |
LIMIT {limit} | |
""", params) | |
print results | |
for row in results: | |
users.add(row["user"]) | |
def main(argv=None): | |
parser = argparse.ArgumentParser(description='Query the Twitter API') | |
# specific user | |
parser.add_argument('--seed') | |
parser.add_argument('--download-tweets') | |
parser.add_argument('--download-profile') | |
parser.add_argument('--read-user') | |
parser.add_argument('--add-new-users', type=int) | |
# all users | |
parser.add_argument('--download-all-user-tweets', action='store_true') | |
parser.add_argument('--download-new-user-tweets', action='store_true') | |
parser.add_argument('--download-all-user-profiles', action='store_true') | |
parser.add_argument('--download-all-user-friends', action='store_true') | |
# twitter auth | |
parser.add_argument('--check-auth', action='store_true') | |
# import | |
parser.add_argument('--import-profiles-into-neo4j', action='store_true') | |
parser.add_argument('--import-friends-into-neo4j', action='store_true') | |
parser.add_argument('--import-tweets-into-neo4j', action='store_true') | |
if argv is None: | |
argv = sys.argv | |
args = parser.parse_args() | |
if args.read_user: | |
read_user(args.read_user) | |
return | |
# Options that require keys go below here | |
consumer_key = os.environ.get('CONSUMER_KEY') | |
consumer_secret = os.environ.get('CONSUMER_SECRET') | |
access_token = os.environ.get('ACCESS_TOKEN') | |
access_token_secret = os.environ.get('ACCESS_TOKEN_SECRET') | |
if any([key is None for key in [consumer_key, consumer_secret, access_token, access_token_secret]]): | |
print "One of your twitter keys isn't set - don't forget to 'source credentials.local'" | |
sys.exit(1) | |
if args.check_auth: | |
print "consumer_key: {0}".format(consumer_key) | |
print "consumer_secret: {0}".format(consumer_secret) | |
print "access_token: {0}".format(access_token) | |
print "access_token_secret: {0}".format(access_token_secret) | |
try: | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_token, access_token_secret) | |
api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True) | |
api.verify_credentials() | |
print "Auth all working - we're good to go!" | |
except tweepy.TweepError as e: | |
print "Auth problem - " + str(e) | |
return | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_token, access_token_secret) | |
api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True) | |
api.verify_credentials() | |
if args.seed: | |
seed(api, args.seed) | |
return | |
if args.download_tweets: | |
users = Users() | |
download_user_tweets(api, users, args.download_tweets) | |
return | |
if args.download_all_user_tweets: | |
users = Users() | |
download_all_user_tweets(api, users) | |
return | |
if args.download_new_user_tweets: | |
users = Users() | |
download_new_user_tweets(api, users) | |
return | |
if args.download_profile: | |
users = Users() | |
download_profile(api, args.download_profile) | |
return | |
if args.download_all_user_profiles: | |
users = Users() | |
download_all_user_profiles(api, users) | |
return | |
if args.download_all_user_friends: | |
users = Users() | |
download_all_user_friends(api, users) | |
return | |
if args.add_new_users: | |
users = Users() | |
add_new_users(users, args.add_new_users) | |
return | |
if args.import_profiles_into_neo4j: | |
import_profiles_into_neo4j() | |
return | |
if args.import_friends_into_neo4j: | |
import_friends_into_neo4j() | |
return | |
if args.import_tweets_into_neo4j: | |
import_tweets_into_neo4j() | |
return | |
if __name__ == "__main__": | |
sys.exit(main()) |
//degree of handles | |
match (h:Handle)-[:TWEETS]->(t:Tweet) | |
return h.name, h.realname, count(t) | |
order by count(t) DESC | |
limit 10 | |
//degree of hashtags | |
match (h:Hashtag)-[:MENTIONED_IN]->(t:Tweet) | |
return h.name, count(t) | |
order by count(t) DESC | |
//most mentioned handles or hashtags | |
match (h)-[:MENTIONED_IN]->(t:Tweet) | |
return h.name, labels(h), count(t) | |
order by count(t) DESC | |
limit 10 | |
//querying the NodeRank | |
match (h:Handle) | |
where h.nodeRank is not null | |
return h.name, h.realname, h.nodeRank | |
order by h.nodeRank DESC | |
limit 10 | |
//what is connected to the top NodeRanked handles | |
match (h:Handle) | |
where h.nodeRank is not null | |
with h | |
order by h.nodeRank DESC | |
limit 1 | |
match (h)-[r*..2]-() | |
return h,r | |
limit 50 | |
//what is connected to the top NodeRanked handles at depth 1 | |
match (h:Handle) | |
where h.nodeRank is not null | |
with h | |
order by h.nodeRank DESC | |
limit 1 | |
match (h)--(connected) | |
return labels(connected), count(connected) | |
limit 25 | |
//what is connected to the top NodeRanked handles at depth 3 | |
match (h:Handle) | |
where h.nodeRank is not null | |
with h | |
order by h.nodeRank DESC | |
limit 1 | |
match (h)-[*..3]-(connected) | |
return labels(connected), count(connected) | |
order by count(connected) DESC | |
//betweenness centrality for the top ranked nodes - query using UNWIND | |
//first we create the subgraph that we want to analyse | |
match (h:Handle) | |
where h.nodeRank is not null | |
with h | |
order by h.nodeRank DESC | |
limit 50 | |
//we store all the nodes of the subgraph in a collection, and pass it to the next query | |
WITH COLLECT(h) AS handles | |
//then we unwind this collection TWICE so that we get a product of rows (2500 in total) | |
UNWIND handles as source | |
UNWIND handles as target | |
//and then finally we calculate the betweenness on these rows | |
MATCH p=allShortestPaths((source)-[:TWEETS|MENTIONED_IN*]-(target)) | |
WHERE id(source) < id(target) and length(p) > 1 | |
UNWIND nodes(p)[1..-1] as n | |
WITH n.realname as Name, count(*) as betweenness | |
WHERE Name is not null | |
RETURN Name, betweenness | |
ORDER BY betweenness DESC; | |
//querying the TopicRank | |
match (h:Hashtag) | |
where h.topicRank is not null | |
return h.name, h.topicRank | |
order by h.topicRank DESC | |
limit 50 | |
//to top TopicRanked Hashtag | |
match (h:Hashtag) | |
where h.topicRank is not null | |
with h | |
order by h.topicRank DESC | |
limit 1 | |
match (h)-[r*..2]-() | |
return h,r | |
limit 50 | |
//the link between Boonen and Kristoff | |
match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}), | |
p = allshortestpaths ((h2)-[*]-(h1)) | |
return p | |
//the link between Boonen and Kristoff | |
match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}), | |
p = allshortestpaths ((h2)-[r*]-(h1)) | |
unwind r as Rels | |
with p, Rels | |
where type(Rels)<>"Follows" | |
return p | |
//the link between Boonen and Kristoff and their teams | |
match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}), | |
p = allshortestpaths ((h2)-[*]-(h1)) | |
with nodes(p) as Nodes | |
unwind Nodes as Node | |
match (Node)--(t:Team) | |
return Node, t |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment