Skip to content

Instantly share code, notes, and snippets.

@rvanbruggen
Last active August 29, 2015 14:20
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rvanbruggen/1b7c85e02af67a2ce436 to your computer and use it in GitHub Desktop.
Save rvanbruggen/1b7c85e02af67a2ce436 to your computer and use it in GitHub Desktop.
CyclingTweets
[
{
"op": "core/text-transform",
"description": "Text transform on cells in column Tweet Id using expression grel:substring(value,1,length(value)-1)",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Tweet Id",
"expression": "grel:substring(value,1,length(value)-1)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Username using expression grel:substring(value,1,length(value)-1)",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Username",
"expression": "grel:substring(value,1,length(value)-1)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Tweet time using expression grel:substring(value,1,length(value)-1)",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Tweet time",
"expression": "grel:substring(value,1,length(value)-1)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Tweet time using expression value.toDate()",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Tweet time",
"expression": "value.toDate()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Is ReTweet using expression grel:substring(value,1,length(value)-1)",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Is ReTweet",
"expression": "grel:substring(value,1,length(value)-1)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Favorite using expression grel:substring(value,1,length(value)-1)",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Favorite",
"expression": "grel:substring(value,1,length(value)-1)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column ReTweet using expression grel:substring(value,1,length(value)-1)",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "ReTweet",
"expression": "grel:substring(value,1,length(value)-1)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Is ReTweet using expression value.toNumber()",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Is ReTweet",
"expression": "value.toNumber()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Favorite using expression value.toNumber()",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Favorite",
"expression": "value.toNumber()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column ReTweet using expression value.toNumber()",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "ReTweet",
"expression": "value.toNumber()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Twitter URL using expression grel:substring(value,1,length(value)-1)",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Twitter URL",
"expression": "grel:substring(value,1,length(value)-1)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Tweet using expression grel:substring(value,1,length(value)-1)",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Tweet",
"expression": "grel:substring(value,1,length(value)-1)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-rename",
"description": "Rename column Tweet Id to TweetID",
"oldColumnName": "Tweet Id",
"newColumnName": "TweetID"
},
{
"op": "core/column-rename",
"description": "Rename column Tweet time to TweetTime",
"oldColumnName": "Tweet time",
"newColumnName": "TweetTime"
},
{
"op": "core/column-rename",
"description": "Rename column Is ReTweet to IsRetweet",
"oldColumnName": "Is ReTweet",
"newColumnName": "IsRetweet"
},
{
"op": "core/column-rename",
"description": "Rename column Twitter URL to TwitterURL",
"oldColumnName": "Twitter URL",
"newColumnName": "TwitterURL"
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Tweet using expression grel:replaceChars(value,\",\",\" ,\")",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Tweet",
"expression": "grel:replaceChars(value,\",\",\" ,\")",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Tweet using expression grel:replacechars(value,\" \",\" \")",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Tweet",
"expression": "grel:replacechars(value,\" \",\" \")",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]
//Add this to the <your neo4j directory>/conf/neo4j.properties after adding
//graphaware-noderank-2.2.1.30.2.jar and
//graphaware-server-enterprise-all-2.2.1.30.jar
//to <your neo4j directory>/plugins directory
com.graphaware.runtime.enabled=true
#NR becomes the module ID:
com.graphaware.module.NR.1=com.graphaware.module.noderank.NodeRankModuleBootstrapper
#optional number of top ranked nodes to remember, the default is 10
com.graphaware.module.NR.maxTopRankNodes=50
#optional daming factor, which is a number p such that a random node will be selected at any step of the algorithm
#with the probability 1-p (as opposed to following a random relationship). The default is 0.85
com.graphaware.module.NR.dampingFactor=0.85
#optional key of the property that gets written to the ranked nodes, default is "nodeRank"
com.graphaware.module.NR.propertyKey=nodeRank
#optionally specify nodes to rank using an expression-based node inclusion policy, default is all business (i.e. non-framework-internal) nodes
com.graphaware.module.NR.node=hasLabel('Handle')
#optionally specify relationships to follow using an expression-based relationship inclusion policy, default is all business (i.e. non-framework-internal) relationships
com.graphaware.module.NR.relationship=isType('FOLLOWS')
#NR becomes the module ID:
com.graphaware.module.TR.2=com.graphaware.module.noderank.NodeRankModuleBootstrapper
#optional number of top ranked nodes to remember, the default is 10
com.graphaware.module.TR.maxTopRankNodes=50
#optional daming factor, which is a number p such that a random node will be selected at any step of the algorithm
#with the probability 1-p (as opposed to following a random relationship). The default is 0.85
com.graphaware.module.TR.dampingFactor=0.85
#optional key of the property that gets written to the ranked nodes, default is "nodeRank"
com.graphaware.module.TR.propertyKey=topicRank
#optionally specify nodes to rank using an expression-based node inclusion policy, default is all business (i.e. non-framework-internal) nodes
com.graphaware.module.TR.node=hasLabel('Hashtag')
#optionally specify relationships to follow using an expression-based relationship inclusion policy, default is all business (i.e. non-framework-internal) relationships
com.graphaware.module.TR.relationship=isType('MENTIONED_IN')
//add some metadata
//country info
load csv with headers from
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1390098748" as csv
create (c:Country {code: csv.Country, name: csv.FullCountry, cq: toint(csv.CQ), rank: toint(csv.Rank), prevrank: toint(csv.Prev)});
//team info
load csv with headers from
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1244447866" as csv
merge (tc:TeamClass {name: csv.Class})
with csv, tc
match (c:Country {code: csv.Country})
merge (tc)<-[:IN_CLASS]-(t:Team {code: trim(csv.Code), name: trim(csv.Name), cq: toint(csv.CQ), rank: toint(csv.Rank), prevrank: toint(csv.Prev)})-[:FROM_COUNTRY]->(c);
//twitter handle info
using periodic commit 500
load csv with headers from
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=0" as csv
match (c:Country {code: trim(csv.Country)})
merge (h:Handle {name: trim(csv.Handle), realname: trim(csv.Name)})-[:FROM_COUNTRY]->(c);
//rider info
load csv with headers from
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1885142986" as csv
match (h:Handle {realname: trim(csv.Name)}), (t:Team {code: trim(csv.Team)})
set h.Age=toint(csv.Age)
set h.CQ=toint(csv.CQ)
set h.UCIcode=csv.UCIcode
set h.rank=toint(csv.Rank)
set h.prevrank=toint(csv.Prev)
create (h)-[:RIDES_FOR_TEAM]->(t);
//add the index on Handle
create index on :Handle(name);
create index on :Hashtag(name);
create index on :Tweet(text);
create index on :Handle(nodeRank);
create constraint on (h:Handle) assert h.twitterId is unique;
//get the handles from the csv file
//this should not do anything - as the handles have already been loaded above
using periodic commit 500
load csv with headers from "file:<yourpath>/20150401.csv" as csv
with csv
where csv.Username<>[]
merge (h:Handle {name: '@'+lower(csv.Username)});
//connect the tweets to the handles
using periodic commit 500
load csv with headers from "file:<your path>/20150401.csv" as csv
with csv
where csv.Username<>[]
merge (h:Handle {name: '@'+lower(csv.Username)})
merge (t:Tweet {text: lower(csv.Tweet), id: toint(csv.TweetID), time: csv.TweetTime, isretweet: toint(csv.IsReTweet), favorite: toint(csv.Favorite), retweet: toint(csv.ReTweet), url: csv.`Twitter URL`})<-[:TWEETS]-(h);
//extract handles from tweet text and connect tweets to handles
match (t:Tweet)
WITH t,split(t.text," ") as words
UNWIND words as handles
with t,handles
where left(handles,1)="@"
with t, handles
merge (h:Handle {name: lower(handles)})
merge (h)-[:MENTIONED_IN]->(t);
//extract hashtags from tweet text and connect tweets to hashtags
match (t:Tweet)
WITH t,split(t.text," ") as words
UNWIND words as hashtags
with t,hashtags
where left(hashtags,1)="#"
with t, hashtags
merge (h:Hashtag {name: upper(hashtags)})
merge (h)-[:MENTIONED_IN]->(t);
import argparse
import sys
import os
import tweepy
import csv
import json
import calendar
from collections import deque
from util import Users
from py2neo import Graph
from dateutil import parser
def seed(api, username):
if os.path.exists("data/users.csv"):
print "Twitter graph has already been seeded. Delete 'data/users.csv' if you want to seed it again"
sys.exit(1)
USERS_TO_PROCESS = 50
users_to_process = deque()
users_processed = set([username])
for tweet in tweepy.Cursor(api.user_timeline, id=username).items(50):
for user in tweet.entities["user_mentions"]:
if not len(users_to_process) > USERS_TO_PROCESS:
users_to_process.append(user["screen_name"])
else:
break
users_processed = set([username])
while True:
if len(users_processed) >= USERS_TO_PROCESS:
break
else:
if len(users_to_process) > 0:
next_user = users_to_process.popleft()
print next_user
if not next_user in users_processed:
users_processed.add(next_user)
for tweet in tweepy.Cursor(api.user_timeline, id=next_user).items(10):
for user_mentioned in tweet.entities["user_mentions"]:
if not len(users_processed) > 50:
users_to_process.append(user_mentioned["screen_name"])
else:
break
else:
break
with open("data/users.csv", "w") as usersfile:
writer = csv.writer(usersfile, delimiter=",")
for user in users_processed:
writer.writerow([user, "PROCESSED", ""])
def read_user(username):
print username
profile_file_path = "data/profiles/{0}.json".format(username)
if os.path.exists(profile_file_path):
with open(profile_file_path, "r") as file:
profile = json.loads(file.read())
print profile["name"]
print profile["description"]
print "Friends: {0}".format(len(profile["friends"]))
print "Followers: {0}".format(len(profile["followers"]))
file_path = "data/tweets/{0}.json".format(username)
if not os.path.exists(file_path):
tweets = []
else:
with open(file_path, "r") as file:
tweets = json.loads(file.read())
print "# of tweets: {0}".format(len(tweets))
if len(tweets) > 0:
print "latest tweets:"
for tweet in tweets:
print tweet["id"], tweet["text"]
def download_all_user_tweets(api, users):
unprocessed_users = [user[0] for user in users.all().iteritems()]
for user in unprocessed_users:
download_user_tweets(api, users, user)
def download_new_user_tweets(api, users):
unprocessed_users = [user[0] for user in users.all().iteritems() if not user[1]["lastTweetRetrieved"]]
for user in unprocessed_users:
download_user_tweets(api, users, user)
def download_all_user_profiles(api, users):
unprocessed_users = [user[0] for user in users.all().iteritems()
if not os.path.exists("data/profiles/{0}.json".format(user[0]))]
for user in unprocessed_users:
download_profile(api, user)
def download_all_user_friends(api, users):
unprocessed_users = [user[0] for user in users.all().iteritems()
if not os.path.exists("data/friends/{0}.json".format(user[0]))]
for user in unprocessed_users:
download_friends(api, user)
def download_user_tweets(api, users, username):
print username
value = users.find(username)
file_path = "data/tweets/{0}.json".format(username)
if os.path.exists(file_path):
with open(file_path, "r") as file:
tweets = json.loads(file.read())
else:
tweets = []
first_tweet_done = False
since_id = value["lastTweetRetrieved"]
for tweet in tweepy.Cursor(api.user_timeline, id=username, since_id = since_id).items(50):
if not first_tweet_done:
value["lastTweetRetrieved"] = tweet.id
first_tweet_done = True
tweets.append(tweet._json)
users.save(username, value)
with open("data/tweets/{0}.json".format(username), "w") as file:
file.write(json.dumps(tweets))
def download_profile(api, username):
print username
profile = api.get_user(username)._json
followers = list(tweepy.Cursor(api.followers_ids, username).items())
friends = list(tweepy.Cursor(api.friends_ids, username).items())
profile["followers"] = followers
profile["friends"] = friends
with open("data/profiles/{0}.json".format(username), "w") as file:
file.write(json.dumps(profile))
def download_friends(api, username):
print username
profile = api.get_user(username)._json
friends = list(tweepy.Cursor(api.friends_ids, username).items())
profile["friends"] = friends
with open("data/friends/{0}.json".format(username), "w") as file:
file.write(json.dumps(profile))
def import_profiles_into_neo4j():
graph = Graph()
tx = graph.cypher.begin()
files = [file for file in os.listdir("data/profiles") if file.endswith("json")]
for file in files:
with open("data/profiles/{0}".format(file), "r") as file:
profile = json.loads(file.read())
print profile["screen_name"]
params = {
"twitterId" : profile["id"],
"screenName": profile["screen_name"],
"name": profile["name"],
"description": profile["description"],
"followers" : profile["followers"],
"friends" : profile["friends"]
}
statement = """
MERGE (p:Person {twitterId: {twitterId}})
REMOVE p:Shadow
SET p.screenName = {screenName},
p.description = {description},
p.name = {name}
WITH p
FOREACH(followerId IN {followers} |
MERGE (follower:Person {twitterId: followerId})
ON CREATE SET follower:Shadow
MERGE (follower)-[:FOLLOWS]->(p)
)
FOREACH(friendId IN {friends} |
MERGE (friend:Person {twitterId: friendId})
ON CREATE SET friend:Shadow
MERGE (p)-[:FOLLOWS]->(friend)
)
"""
tx.append(statement, params)
tx.process()
tx.commit()
def import_friends_into_neo4j():
graph = Graph()
files = [file for file in os.listdir("data/friends") if file.endswith("json")]
for file in files:
tx = graph.cypher.begin()
with open("data/friends/{0}".format(file), "r") as file:
profile = json.loads(file.read())
print profile["screen_name"]
params = {
"twitterId" : profile["id"],
"screenName": profile["screen_name"],
"friends" : profile["friends"]
}
statement = """
MATCH (p:Handle {name: '@'+lower({screenName})})
SET p.twitterId = {twitterId}
WITH p
WHERE p is not null
UNWIND {friends} as friendId
MATCH (friend:Handle {twitterId: friendId})
MERGE (p)-[:FOLLOWS]->(friend)
"""
tx.append(statement, params)
tx.process()
tx.commit()
def import_tweets_into_neo4j():
graph = Graph()
tx = graph.cypher.begin()
count = 0
files = [file for file in os.listdir("data/tweets") if file.endswith("json")]
for file in files:
with open("data/tweets/{0}".format(file), "r") as file:
tweets = json.loads(file.read())
for tweet in tweets:
created_at = calendar.timegm(parser.parse(tweet["created_at"]).timetuple())
params = {
"tweetId": tweet["id"],
"createdAt": created_at,
"text": tweet["text"],
"userId": tweet["user"]["id"],
"inReplyToTweetId": tweet["in_reply_to_status_id"],
"userMentions": [user for user in tweet["entities"]["user_mentions"]],
"urls": [url for url in tweet["entities"]["urls"]]
}
statement = """
MERGE (tweet:Tweet {id: {tweetId}})
SET tweet.text = {text}, tweet.timestamp = {createdAt}
REMOVE tweet:Shadow
WITH tweet
MATCH (person:Person {twitterId: {userId}})
MERGE (person)-[:TWEETED]->(tweet)
WITH tweet
FOREACH(user in {userMentions} |
MERGE (mentionedUser:Person {twitterId: user.id})
SET mentionedUser.screenName = user.screen_name
MERGE (tweet)-[:MENTIONED_USER]->(mentionedUser)
)
FOREACH(url in {urls} |
MERGE (u:URL {value: url.expanded_url})
MERGE (tweet)-[:MENTIONED_URL]->(u)
)
FOREACH(ignoreMe in CASE WHEN NOT {inReplyToTweetId} is null THEN [1] ELSE [] END |
MERGE (inReplyToTweet:Tweet {id: {inReplyToTweetId}})
ON CREATE SET inReplyToTweet:Shadow
MERGE (tweet)-[:IN_REPLY_TO_TWEET]->(inReplyToTweet)
)
"""
tx.append(statement, params)
tx.process()
tx.commit()
def add_new_users(users, count):
graph = Graph()
params = {"limit": count}
results = graph.cypher.execute("""
match (p:Shadow:Person)<-[:MENTIONED_USER]-(user)
RETURN p.screenName AS user, COUNT(*) AS times
ORDER BY times DESC
LIMIT {limit}
""", params)
print results
for row in results:
users.add(row["user"])
def main(argv=None):
parser = argparse.ArgumentParser(description='Query the Twitter API')
# specific user
parser.add_argument('--seed')
parser.add_argument('--download-tweets')
parser.add_argument('--download-profile')
parser.add_argument('--read-user')
parser.add_argument('--add-new-users', type=int)
# all users
parser.add_argument('--download-all-user-tweets', action='store_true')
parser.add_argument('--download-new-user-tweets', action='store_true')
parser.add_argument('--download-all-user-profiles', action='store_true')
parser.add_argument('--download-all-user-friends', action='store_true')
# twitter auth
parser.add_argument('--check-auth', action='store_true')
# import
parser.add_argument('--import-profiles-into-neo4j', action='store_true')
parser.add_argument('--import-friends-into-neo4j', action='store_true')
parser.add_argument('--import-tweets-into-neo4j', action='store_true')
if argv is None:
argv = sys.argv
args = parser.parse_args()
if args.read_user:
read_user(args.read_user)
return
# Options that require keys go below here
consumer_key = os.environ.get('CONSUMER_KEY')
consumer_secret = os.environ.get('CONSUMER_SECRET')
access_token = os.environ.get('ACCESS_TOKEN')
access_token_secret = os.environ.get('ACCESS_TOKEN_SECRET')
if any([key is None for key in [consumer_key, consumer_secret, access_token, access_token_secret]]):
print "One of your twitter keys isn't set - don't forget to 'source credentials.local'"
sys.exit(1)
if args.check_auth:
print "consumer_key: {0}".format(consumer_key)
print "consumer_secret: {0}".format(consumer_secret)
print "access_token: {0}".format(access_token)
print "access_token_secret: {0}".format(access_token_secret)
try:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True)
api.verify_credentials()
print "Auth all working - we're good to go!"
except tweepy.TweepError as e:
print "Auth problem - " + str(e)
return
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True)
api.verify_credentials()
if args.seed:
seed(api, args.seed)
return
if args.download_tweets:
users = Users()
download_user_tweets(api, users, args.download_tweets)
return
if args.download_all_user_tweets:
users = Users()
download_all_user_tweets(api, users)
return
if args.download_new_user_tweets:
users = Users()
download_new_user_tweets(api, users)
return
if args.download_profile:
users = Users()
download_profile(api, args.download_profile)
return
if args.download_all_user_profiles:
users = Users()
download_all_user_profiles(api, users)
return
if args.download_all_user_friends:
users = Users()
download_all_user_friends(api, users)
return
if args.add_new_users:
users = Users()
add_new_users(users, args.add_new_users)
return
if args.import_profiles_into_neo4j:
import_profiles_into_neo4j()
return
if args.import_friends_into_neo4j:
import_friends_into_neo4j()
return
if args.import_tweets_into_neo4j:
import_tweets_into_neo4j()
return
if __name__ == "__main__":
sys.exit(main())
//degree of handles
match (h:Handle)-[:TWEETS]->(t:Tweet)
return h.name, h.realname, count(t)
order by count(t) DESC
limit 10
//degree of hashtags
match (h:Hashtag)-[:MENTIONED_IN]->(t:Tweet)
return h.name, count(t)
order by count(t) DESC
//most mentioned handles or hashtags
match (h)-[:MENTIONED_IN]->(t:Tweet)
return h.name, labels(h), count(t)
order by count(t) DESC
limit 10
//querying the NodeRank
match (h:Handle)
where h.nodeRank is not null
return h.name, h.realname, h.nodeRank
order by h.nodeRank DESC
limit 10
//what is connected to the top NodeRanked handles
match (h:Handle)
where h.nodeRank is not null
with h
order by h.nodeRank DESC
limit 1
match (h)-[r*..2]-()
return h,r
limit 50
//what is connected to the top NodeRanked handles at depth 1
match (h:Handle)
where h.nodeRank is not null
with h
order by h.nodeRank DESC
limit 1
match (h)--(connected)
return labels(connected), count(connected)
limit 25
//what is connected to the top NodeRanked handles at depth 3
match (h:Handle)
where h.nodeRank is not null
with h
order by h.nodeRank DESC
limit 1
match (h)-[*..3]-(connected)
return labels(connected), count(connected)
order by count(connected) DESC
//betweenness centrality for the top ranked nodes - query using UNWIND
//first we create the subgraph that we want to analyse
match (h:Handle)
where h.nodeRank is not null
with h
order by h.nodeRank DESC
limit 50
//we store all the nodes of the subgraph in a collection, and pass it to the next query
WITH COLLECT(h) AS handles
//then we unwind this collection TWICE so that we get a product of rows (2500 in total)
UNWIND handles as source
UNWIND handles as target
//and then finally we calculate the betweenness on these rows
MATCH p=allShortestPaths((source)-[:TWEETS|MENTIONED_IN*]-(target))
WHERE id(source) < id(target) and length(p) > 1
UNWIND nodes(p)[1..-1] as n
WITH n.realname as Name, count(*) as betweenness
WHERE Name is not null
RETURN Name, betweenness
ORDER BY betweenness DESC;
//querying the TopicRank
match (h:Hashtag)
where h.topicRank is not null
return h.name, h.topicRank
order by h.topicRank DESC
limit 50
//to top TopicRanked Hashtag
match (h:Hashtag)
where h.topicRank is not null
with h
order by h.topicRank DESC
limit 1
match (h)-[r*..2]-()
return h,r
limit 50
//the link between Boonen and Kristoff
match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}),
p = allshortestpaths ((h2)-[*]-(h1))
return p
//the link between Boonen and Kristoff
match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}),
p = allshortestpaths ((h2)-[r*]-(h1))
unwind r as Rels
with p, Rels
where type(Rels)<>"Follows"
return p
//the link between Boonen and Kristoff and their teams
match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}),
p = allshortestpaths ((h2)-[*]-(h1))
with nodes(p) as Nodes
unwind Nodes as Node
match (Node)--(t:Team)
return Node, t
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment