rvanbruggen/cyclingtweets google refine.json

## cyclingtweets google refine.json
[
  {
    "op": "core/text-transform",
    "description": "Text transform on cells in column Tweet Id using expression grel:substring(value,1,length(value)-1)",
    "engineConfig": {
      "facets": [],
      "mode": "row-based"
    },
    "columnName": "Tweet Id",
    "expression": "grel:substring(value,1,length(value)-1)",
    "onError": "keep-original",
    "repeat": false,
    "repeatCount": 10
  },
  {
    "op": "core/text-transform",
    "description": "Text transform on cells in column Username using expression grel:substring(value,1,length(value)-1)",
    "engineConfig": {
      "facets": [],
      "mode": "row-based"
    },
    "columnName": "Username",
    "expression": "grel:substring(value,1,length(value)-1)",
    "onError": "keep-original",
    "repeat": false,
    "repeatCount": 10
  },
  {
    "op": "core/text-transform",
    "description": "Text transform on cells in column Tweet time using expression grel:substring(value,1,length(value)-1)",
    "engineConfig": {
      "facets": [],
      "mode": "row-based"
    },
    "columnName": "Tweet time",
    "expression": "grel:substring(value,1,length(value)-1)",
    "onError": "keep-original",
    "repeat": false,
    "repeatCount": 10
  },
  {
    "op": "core/text-transform",
    "description": "Text transform on cells in column Tweet time using expression value.toDate()",
    "engineConfig": {
      "facets": [],
      "mode": "row-based"
    },
    "columnName": "Tweet time",
    "expression": "value.toDate()",
    "onError": "keep-original",
    "repeat": false,
    "repeatCount": 10
  },
  {
    "op": "core/text-transform",
    "description": "Text transform on cells in column Is ReTweet using expression grel:substring(value,1,length(value)-1)",
    "engineConfig": {
      "facets": [],
      "mode": "row-based"
    },
    "columnName": "Is ReTweet",
    "expression": "grel:substring(value,1,length(value)-1)",
    "onError": "keep-original",
    "repeat": false,
    "repeatCount": 10
  },
  {
    "op": "core/text-transform",
    "description": "Text transform on cells in column Favorite using expression grel:substring(value,1,length(value)-1)",
    "engineConfig": {
      "facets": [],
      "mode": "row-based"
    },
    "columnName": "Favorite",
    "expression": "grel:substring(value,1,length(value)-1)",
    "onError": "keep-original",
    "repeat": false,
    "repeatCount": 10
  },
  {
    "op": "core/text-transform",
    "description": "Text transform on cells in column ReTweet using expression grel:substring(value,1,length(value)-1)",
    "engineConfig": {
      "facets": [],
      "mode": "row-based"
    },
    "columnName": "ReTweet",
    "expression": "grel:substring(value,1,length(value)-1)",
    "onError": "keep-original",
    "repeat": false,
    "repeatCount": 10
  },
  {
    "op": "core/text-transform",
    "description": "Text transform on cells in column Is ReTweet using expression value.toNumber()",
    "engineConfig": {
      "facets": [],
      "mode": "row-based"
    },
    "columnName": "Is ReTweet",
    "expression": "value.toNumber()",
    "onError": "keep-original",
    "repeat": false,
    "repeatCount": 10
  },
  {
    "op": "core/text-transform",
    "description": "Text transform on cells in column Favorite using expression value.toNumber()",
    "engineConfig": {
      "facets": [],
      "mode": "row-based"
    },
    "columnName": "Favorite",
    "expression": "value.toNumber()",
    "onError": "keep-original",
    "repeat": false,
    "repeatCount": 10
  },
  {
    "op": "core/text-transform",
    "description": "Text transform on cells in column ReTweet using expression value.toNumber()",
    "engineConfig": {
      "facets": [],
      "mode": "row-based"
    },
    "columnName": "ReTweet",
    "expression": "value.toNumber()",
    "onError": "keep-original",
    "repeat": false,
    "repeatCount": 10
  },
  {
    "op": "core/text-transform",
    "description": "Text transform on cells in column Twitter URL using expression grel:substring(value,1,length(value)-1)",
    "engineConfig": {
      "facets": [],
      "mode": "row-based"
    },
    "columnName": "Twitter URL",
    "expression": "grel:substring(value,1,length(value)-1)",
    "onError": "keep-original",
    "repeat": false,
    "repeatCount": 10
  },
  {
    "op": "core/text-transform",
    "description": "Text transform on cells in column Tweet using expression grel:substring(value,1,length(value)-1)",
    "engineConfig": {
      "facets": [],
      "mode": "row-based"
    },
    "columnName": "Tweet",
    "expression": "grel:substring(value,1,length(value)-1)",
    "onError": "keep-original",
    "repeat": false,
    "repeatCount": 10
  },
  {
    "op": "core/column-rename",
    "description": "Rename column Tweet Id to TweetID",
    "oldColumnName": "Tweet Id",
    "newColumnName": "TweetID"
  },
  {
    "op": "core/column-rename",
    "description": "Rename column Tweet time to TweetTime",
    "oldColumnName": "Tweet time",
    "newColumnName": "TweetTime"
  },
  {
    "op": "core/column-rename",
    "description": "Rename column Is ReTweet to IsRetweet",
    "oldColumnName": "Is ReTweet",
    "newColumnName": "IsRetweet"
  },
  {
    "op": "core/column-rename",
    "description": "Rename column Twitter URL to TwitterURL",
    "oldColumnName": "Twitter URL",
    "newColumnName": "TwitterURL"
  },
  {
    "op": "core/text-transform",
    "description": "Text transform on cells in column Tweet using expression grel:replaceChars(value,\",\",\" ,\")",
    "engineConfig": {
      "facets": [],
      "mode": "row-based"
    },
    "columnName": "Tweet",
    "expression": "grel:replaceChars(value,\",\",\" ,\")",
    "onError": "keep-original",
    "repeat": false,
    "repeatCount": 10
  },
  {
    "op": "core/text-transform",
    "description": "Text transform on cells in column Tweet using expression grel:replacechars(value,\"  \",\" \")",
    "engineConfig": {
      "facets": [],
      "mode": "row-based"
    },
    "columnName": "Tweet",
    "expression": "grel:replacechars(value,\"  \",\" \")",
    "onError": "keep-original",
    "repeat": false,
    "repeatCount": 10
  }
]

## cyclingtweets graphaware configuration.txt
//Add this to the <your neo4j directory>/conf/neo4j.properties after adding
//graphaware-noderank-2.2.1.30.2.jar and
//graphaware-server-enterprise-all-2.2.1.30.jar
//to <your neo4j directory>/plugins directory

com.graphaware.runtime.enabled=true

#NR becomes the module ID:
com.graphaware.module.NR.1=com.graphaware.module.noderank.NodeRankModuleBootstrapper

#optional number of top ranked nodes to remember, the default is 10
com.graphaware.module.NR.maxTopRankNodes=50

#optional daming factor, which is a number p such that a random node will be selected at any step of the algorithm
#with the probability 1-p (as opposed to following a random relationship). The default is 0.85
com.graphaware.module.NR.dampingFactor=0.85

#optional key of the property that gets written to the ranked nodes, default is "nodeRank"
com.graphaware.module.NR.propertyKey=nodeRank

#optionally specify nodes to rank using an expression-based node inclusion policy, default is all business (i.e. non-framework-internal) nodes
com.graphaware.module.NR.node=hasLabel('Handle')

#optionally specify relationships to follow using an expression-based relationship inclusion policy, default is all business (i.e. non-framework-internal) relationships
com.graphaware.module.NR.relationship=isType('FOLLOWS')

#NR becomes the module ID:
com.graphaware.module.TR.2=com.graphaware.module.noderank.NodeRankModuleBootstrapper

#optional number of top ranked nodes to remember, the default is 10
com.graphaware.module.TR.maxTopRankNodes=50

#optional daming factor, which is a number p such that a random node will be selected at any step of the algorithm
#with the probability 1-p (as opposed to following a random relationship). The default is 0.85
com.graphaware.module.TR.dampingFactor=0.85

#optional key of the property that gets written to the ranked nodes, default is "nodeRank"
com.graphaware.module.TR.propertyKey=topicRank

#optionally specify nodes to rank using an expression-based node inclusion policy, default is all business (i.e. non-framework-internal) nodes
com.graphaware.module.TR.node=hasLabel('Hashtag')

#optionally specify relationships to follow using an expression-based relationship inclusion policy, default is all business (i.e. non-framework-internal) relationships
com.graphaware.module.TR.relationship=isType('MENTIONED_IN')

## cyclingtweets loading metadata.cql
//add some metadata
//country info
load csv with headers from
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1390098748" as csv
create (c:Country {code: csv.Country, name: csv.FullCountry, cq: toint(csv.CQ), rank: toint(csv.Rank), prevrank: toint(csv.Prev)});

//team info
load csv with headers from
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1244447866" as csv
merge (tc:TeamClass {name: csv.Class})
with csv, tc
match (c:Country {code: csv.Country})
merge (tc)<-[:IN_CLASS]-(t:Team {code: trim(csv.Code), name: trim(csv.Name), cq: toint(csv.CQ), rank: toint(csv.Rank), prevrank: toint(csv.Prev)})-[:FROM_COUNTRY]->(c);

//twitter handle info
using periodic commit 500
load csv with headers from
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=0" as csv
match (c:Country {code: trim(csv.Country)})
merge (h:Handle {name: trim(csv.Handle), realname: trim(csv.Name)})-[:FROM_COUNTRY]->(c);

//rider info
load csv with headers from
"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1885142986" as csv
match (h:Handle {realname: trim(csv.Name)}), (t:Team {code: trim(csv.Team)})
set h.Age=toint(csv.Age)
set h.CQ=toint(csv.CQ)
set h.UCIcode=csv.UCIcode
set h.rank=toint(csv.Rank)
set h.prevrank=toint(csv.Prev)
create (h)-[:RIDES_FOR_TEAM]->(t);

//add the index on Handle
create index on :Handle(name);
create index on :Hashtag(name);
create index on :Tweet(text);
create index on :Handle(nodeRank);
create constraint on (h:Handle) assert h.twitterId is unique;

## cyclingtweets loading tweets.cql
//get the handles from the csv file
//this should not do anything - as the handles have already been loaded above
using periodic commit 500
load csv with headers from "file:<yourpath>/20150401.csv" as csv
with csv
where csv.Username<>[]
merge (h:Handle {name: '@'+lower(csv.Username)});


//connect the tweets to the handles
using periodic commit 500
load csv with headers from "file:<your path>/20150401.csv" as csv
with csv
where csv.Username<>[]
merge (h:Handle {name: '@'+lower(csv.Username)})
merge (t:Tweet {text: lower(csv.Tweet), id: toint(csv.TweetID), time: csv.TweetTime, isretweet: toint(csv.IsReTweet), favorite: toint(csv.Favorite), retweet: toint(csv.ReTweet), url: csv.`Twitter URL`})<-[:TWEETS]-(h);

## cyclingtweets processing.cql
//extract handles from tweet text and connect tweets to handles
match (t:Tweet)
WITH t,split(t.text," ") as words
UNWIND words as handles
with t,handles
where left(handles,1)="@"
with t, handles
merge (h:Handle {name: lower(handles)})
merge (h)-[:MENTIONED_IN]->(t);

//extract hashtags from tweet text and connect tweets to hashtags
match (t:Tweet)
WITH t,split(t.text," ") as words
UNWIND words as hashtags
with t,hashtags
where left(hashtags,1)="#"
with t, hashtags
merge (h:Hashtag {name: upper(hashtags)})
merge (h)-[:MENTIONED_IN]->(t);

## cyclingtweets upload friends.py
import argparse
import sys
import os
import tweepy
import csv
import json
import calendar

from collections import  deque
from util import Users
from py2neo import Graph
from dateutil import parser

def seed(api, username):
    if os.path.exists("data/users.csv"):
        print "Twitter graph has already been seeded. Delete 'data/users.csv' if you want to seed it again"
        sys.exit(1)

    USERS_TO_PROCESS = 50
    users_to_process = deque()
    users_processed = set([username])

    for tweet in tweepy.Cursor(api.user_timeline, id=username).items(50):
        for user in tweet.entities["user_mentions"]:
            if not len(users_to_process) > USERS_TO_PROCESS:
                users_to_process.append(user["screen_name"])
            else:
                break
    users_processed = set([username])
    while True:
        if len(users_processed) >= USERS_TO_PROCESS:
            break
        else:
            if len(users_to_process) > 0:
                next_user = users_to_process.popleft()
                print next_user
                if not next_user in users_processed:
                    users_processed.add(next_user)
                    for tweet in tweepy.Cursor(api.user_timeline, id=next_user).items(10):
                        for user_mentioned in tweet.entities["user_mentions"]:
                            if not len(users_processed) > 50:
                                users_to_process.append(user_mentioned["screen_name"])
                            else:
                                break
            else:
                break
    with open("data/users.csv", "w") as usersfile:
        writer = csv.writer(usersfile, delimiter=",")
        for user in users_processed:
            writer.writerow([user, "PROCESSED", ""])

def read_user(username):
    print username
    profile_file_path = "data/profiles/{0}.json".format(username)
    if os.path.exists(profile_file_path):
        with open(profile_file_path, "r") as file:
            profile = json.loads(file.read())
            print profile["name"]
            print profile["description"]
            print "Friends: {0}".format(len(profile["friends"]))
            print "Followers: {0}".format(len(profile["followers"]))

    file_path = "data/tweets/{0}.json".format(username)

    if not os.path.exists(file_path):
        tweets = []
    else:
        with open(file_path, "r") as file:
            tweets = json.loads(file.read())

    print "# of tweets: {0}".format(len(tweets))
    if len(tweets) > 0:
        print "latest tweets:"
        for tweet in tweets:
            print tweet["id"], tweet["text"]

def download_all_user_tweets(api, users):
    unprocessed_users =  [user[0] for user in users.all().iteritems()]
    for user in unprocessed_users:
        download_user_tweets(api, users, user)

def download_new_user_tweets(api, users):
    unprocessed_users =  [user[0] for user in users.all().iteritems() if not user[1]["lastTweetRetrieved"]]
    for user in unprocessed_users:
        download_user_tweets(api, users, user)

def download_all_user_profiles(api, users):
    unprocessed_users =  [user[0] for user in users.all().iteritems()
                          if not os.path.exists("data/profiles/{0}.json".format(user[0]))]

    for user in unprocessed_users:
        download_profile(api, user)

def download_all_user_friends(api, users):
    unprocessed_users =  [user[0] for user in users.all().iteritems()
                          if not os.path.exists("data/friends/{0}.json".format(user[0]))]

    for user in unprocessed_users:
        download_friends(api, user)

def download_user_tweets(api, users, username):
    print username
    value = users.find(username)

    file_path = "data/tweets/{0}.json".format(username)
    if os.path.exists(file_path):
        with open(file_path, "r") as file:
            tweets =  json.loads(file.read())
    else:
        tweets = []

    first_tweet_done = False
    since_id = value["lastTweetRetrieved"]
    for tweet in tweepy.Cursor(api.user_timeline, id=username, since_id = since_id).items(50):
        if not first_tweet_done:
            value["lastTweetRetrieved"] = tweet.id
            first_tweet_done = True
        tweets.append(tweet._json)

    users.save(username, value)

    with open("data/tweets/{0}.json".format(username), "w") as file:
        file.write(json.dumps(tweets))

def download_profile(api, username):
    print username

    profile = api.get_user(username)._json
    followers = list(tweepy.Cursor(api.followers_ids, username).items())
    friends = list(tweepy.Cursor(api.friends_ids, username).items())

    profile["followers"] =  followers
    profile["friends"] =  friends

    with open("data/profiles/{0}.json".format(username), "w") as file:
        file.write(json.dumps(profile))

def download_friends(api, username):
    print username
    profile = api.get_user(username)._json
    friends = list(tweepy.Cursor(api.friends_ids, username).items())

    profile["friends"] =  friends

    with open("data/friends/{0}.json".format(username), "w") as file:
        file.write(json.dumps(profile))

def import_profiles_into_neo4j():
    graph = Graph()

    tx = graph.cypher.begin()
    files = [file for file in os.listdir("data/profiles") if file.endswith("json")]
    for file in files:
        with open("data/profiles/{0}".format(file), "r") as file:
            profile = json.loads(file.read())
            print profile["screen_name"]

            params = {
                "twitterId" : profile["id"],
                "screenName": profile["screen_name"],
                "name": profile["name"],
                "description": profile["description"],
                "followers" : profile["followers"],
                "friends" : profile["friends"]
            }
            statement = """
                        MERGE (p:Person {twitterId: {twitterId}})
                        REMOVE p:Shadow
                        SET p.screenName = {screenName},
                            p.description = {description},
                            p.name = {name}
                        WITH p

                        FOREACH(followerId IN {followers} |
                          MERGE (follower:Person {twitterId: followerId})
                          ON CREATE SET follower:Shadow
                          MERGE (follower)-[:FOLLOWS]->(p)
                        )

                        FOREACH(friendId IN {friends} |
                          MERGE (friend:Person {twitterId: friendId})
                          ON CREATE SET friend:Shadow
                          MERGE (p)-[:FOLLOWS]->(friend)
                        )
                        """
            tx.append(statement, params)

            tx.process()
    tx.commit()

def import_friends_into_neo4j():
    graph = Graph()

    files = [file for file in os.listdir("data/friends") if file.endswith("json")]
    for file in files:
        tx = graph.cypher.begin()

        with open("data/friends/{0}".format(file), "r") as file:
            profile = json.loads(file.read())
            print profile["screen_name"]

            params = {
                "twitterId" : profile["id"],
                "screenName": profile["screen_name"],
                "friends" : profile["friends"]
            }
            statement = """
                        MATCH (p:Handle {name: '@'+lower({screenName})})
                            SET p.twitterId = {twitterId}
                        WITH p
                        WHERE p is not null
                        UNWIND {friends} as friendId
                        MATCH (friend:Handle {twitterId: friendId})
                        MERGE (p)-[:FOLLOWS]->(friend)
                        """
            tx.append(statement, params)
            tx.process()
        tx.commit()

def import_tweets_into_neo4j():
    graph = Graph()

    tx = graph.cypher.begin()
    count = 0

    files = [file for file in os.listdir("data/tweets") if file.endswith("json")]
    for file in files:
        with open("data/tweets/{0}".format(file), "r") as file:
            tweets = json.loads(file.read())

            for tweet in tweets:
                created_at = calendar.timegm(parser.parse(tweet["created_at"]).timetuple())

                params = {
                    "tweetId": tweet["id"],
                    "createdAt": created_at,
                    "text": tweet["text"],
                    "userId": tweet["user"]["id"],
                    "inReplyToTweetId": tweet["in_reply_to_status_id"],
                    "userMentions": [user for user in tweet["entities"]["user_mentions"]],
                    "urls": [url for url in tweet["entities"]["urls"]]
                }

                statement = """
                            MERGE (tweet:Tweet {id: {tweetId}})
                            SET tweet.text = {text}, tweet.timestamp = {createdAt}
                            REMOVE tweet:Shadow
                            WITH tweet
                            MATCH (person:Person {twitterId: {userId}})
                            MERGE (person)-[:TWEETED]->(tweet)
                            WITH tweet

                            FOREACH(user in {userMentions} |
                                MERGE (mentionedUser:Person {twitterId: user.id})
                                SET mentionedUser.screenName = user.screen_name
                                MERGE (tweet)-[:MENTIONED_USER]->(mentionedUser)
                            )

                            FOREACH(url in {urls} |
                                MERGE (u:URL {value: url.expanded_url})
                                MERGE (tweet)-[:MENTIONED_URL]->(u)
                            )

                            FOREACH(ignoreMe in CASE WHEN NOT {inReplyToTweetId} is null THEN [1] ELSE [] END |
                                MERGE (inReplyToTweet:Tweet {id: {inReplyToTweetId}})
                                ON CREATE SET inReplyToTweet:Shadow
                                MERGE (tweet)-[:IN_REPLY_TO_TWEET]->(inReplyToTweet)
                            )
                            """
                tx.append(statement, params)
                tx.process()
    tx.commit()

def add_new_users(users, count):
    graph = Graph()
    params = {"limit": count}
    results = graph.cypher.execute("""
                                  match (p:Shadow:Person)<-[:MENTIONED_USER]-(user)
                                  RETURN p.screenName AS user, COUNT(*) AS times
                                  ORDER BY times DESC
                                  LIMIT {limit}
                                  """, params)
    print results
    for row in results:
        users.add(row["user"])

def main(argv=None):
    parser = argparse.ArgumentParser(description='Query the Twitter API')

    # specific user
    parser.add_argument('--seed')
    parser.add_argument('--download-tweets')
    parser.add_argument('--download-profile')
    parser.add_argument('--read-user')

    parser.add_argument('--add-new-users', type=int)

    # all users
    parser.add_argument('--download-all-user-tweets', action='store_true')
    parser.add_argument('--download-new-user-tweets', action='store_true')
    parser.add_argument('--download-all-user-profiles', action='store_true')
    parser.add_argument('--download-all-user-friends', action='store_true')

    # twitter auth
    parser.add_argument('--check-auth', action='store_true')

    # import
    parser.add_argument('--import-profiles-into-neo4j', action='store_true')
    parser.add_argument('--import-friends-into-neo4j', action='store_true')
    parser.add_argument('--import-tweets-into-neo4j', action='store_true')

    if argv is None:
        argv = sys.argv

    args = parser.parse_args()

    if args.read_user:
        read_user(args.read_user)
        return

    # Options that require keys go below here
    consumer_key =  os.environ.get('CONSUMER_KEY')
    consumer_secret =  os.environ.get('CONSUMER_SECRET')
    access_token =  os.environ.get('ACCESS_TOKEN')
    access_token_secret =  os.environ.get('ACCESS_TOKEN_SECRET')

    if any([key is None for key in [consumer_key, consumer_secret, access_token, access_token_secret]]):
        print "One of your twitter keys isn't set - don't forget to 'source credentials.local'"
        sys.exit(1)

    if args.check_auth:
        print "consumer_key: {0}".format(consumer_key)
        print "consumer_secret: {0}".format(consumer_secret)
        print "access_token: {0}".format(access_token)
        print "access_token_secret: {0}".format(access_token_secret)

        try:
            auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
            auth.set_access_token(access_token, access_token_secret)
            api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True)
            api.verify_credentials()
            print "Auth all working - we're good to go!"
        except tweepy.TweepError as e:
            print "Auth problem - " + str(e)

        return

    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True)
    api.verify_credentials()

    if args.seed:
        seed(api, args.seed)
        return

    if args.download_tweets:
        users = Users()
        download_user_tweets(api, users,  args.download_tweets)
        return

    if args.download_all_user_tweets:
        users = Users()
        download_all_user_tweets(api, users)
        return

    if args.download_new_user_tweets:
        users = Users()
        download_new_user_tweets(api, users)
        return

    if args.download_profile:
        users = Users()
        download_profile(api, args.download_profile)
        return

    if args.download_all_user_profiles:
        users = Users()
        download_all_user_profiles(api, users)
        return

    if args.download_all_user_friends:
        users = Users()
        download_all_user_friends(api, users)
        return

    if args.add_new_users:
        users = Users()
        add_new_users(users, args.add_new_users)
        return

    if args.import_profiles_into_neo4j:
        import_profiles_into_neo4j()
        return

    if args.import_friends_into_neo4j:
        import_friends_into_neo4j()
        return

    if args.import_tweets_into_neo4j:
        import_tweets_into_neo4j()
        return

if __name__ == "__main__":
    sys.exit(main())

## querying cyclingtweets.cql
//degree of handles
match (h:Handle)-[:TWEETS]->(t:Tweet)
return h.name, h.realname, count(t)
order by count(t) DESC
limit 10

//degree of hashtags
match (h:Hashtag)-[:MENTIONED_IN]->(t:Tweet)
return h.name, count(t)
order by count(t) DESC

//most mentioned handles or hashtags
match (h)-[:MENTIONED_IN]->(t:Tweet)
return h.name, labels(h), count(t)
order by count(t) DESC
limit 10

//querying the NodeRank
match (h:Handle)
where h.nodeRank is not null
return h.name, h.realname, h.nodeRank
order by h.nodeRank DESC
limit 10

//what is connected to the top NodeRanked handles
match (h:Handle)
where h.nodeRank is not null
with h
order by h.nodeRank DESC
limit 1
match (h)-[r*..2]-()
return h,r
limit 50

//what is connected to the top NodeRanked handles at depth 1
match (h:Handle)
where h.nodeRank is not null
with h
order by h.nodeRank DESC
limit 1
match (h)--(connected)
return labels(connected), count(connected)
limit 25

//what is connected to the top NodeRanked handles at depth 3
match (h:Handle)
where h.nodeRank is not null
with h
order by h.nodeRank DESC
limit 1
match (h)-[*..3]-(connected)
return labels(connected), count(connected)
order by count(connected) DESC


//betweenness centrality for the top ranked nodes - query using UNWIND
//first we create the subgraph that we want to analyse
match (h:Handle)
where h.nodeRank is not null
with h
order by h.nodeRank DESC
limit 50
//we store all the nodes of the subgraph in a collection, and pass it to the next query
WITH COLLECT(h) AS handles
//then we unwind this collection TWICE so that we get a product of rows (2500 in total)
UNWIND handles as source
UNWIND handles as target
//and then finally we calculate the betweenness on these rows
MATCH p=allShortestPaths((source)-[:TWEETS|MENTIONED_IN*]-(target))
WHERE id(source) < id(target) and length(p) > 1
UNWIND nodes(p)[1..-1] as n
WITH n.realname as Name, count(*) as betweenness
WHERE Name is not null
RETURN Name, betweenness
ORDER BY betweenness DESC;


//querying the TopicRank
match (h:Hashtag)
where h.topicRank is not null
return h.name, h.topicRank
order by h.topicRank DESC
limit 50

//to top TopicRanked Hashtag
match (h:Hashtag)
where h.topicRank is not null
with h
order by h.topicRank DESC
limit 1
match (h)-[r*..2]-()
return h,r
limit 50

//the link between Boonen and Kristoff
match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}),
p = allshortestpaths ((h2)-[*]-(h1))
return p

//the link between Boonen and Kristoff
match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}),
p = allshortestpaths ((h2)-[r*]-(h1))
unwind r as Rels
with p, Rels
where type(Rels)<>"Follows"
return p

//the link between Boonen and Kristoff and their teams
match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}),
p = allshortestpaths ((h2)-[*]-(h1))
with nodes(p) as Nodes
unwind Nodes as Node
match (Node)--(t:Team)
return Node, t
	[
	{
	"op": "core/text-transform",
	"description": "Text transform on cells in column Tweet Id using expression grel:substring(value,1,length(value)-1)",
	"engineConfig": {
	"facets": [],
	"mode": "row-based"
	},
	"columnName": "Tweet Id",
	"expression": "grel:substring(value,1,length(value)-1)",
	"onError": "keep-original",
	"repeat": false,
	"repeatCount": 10
	},
	{
	"op": "core/text-transform",
	"description": "Text transform on cells in column Username using expression grel:substring(value,1,length(value)-1)",
	"engineConfig": {
	"facets": [],
	"mode": "row-based"
	},
	"columnName": "Username",
	"expression": "grel:substring(value,1,length(value)-1)",
	"onError": "keep-original",
	"repeat": false,
	"repeatCount": 10
	},
	{
	"op": "core/text-transform",
	"description": "Text transform on cells in column Tweet time using expression grel:substring(value,1,length(value)-1)",
	"engineConfig": {
	"facets": [],
	"mode": "row-based"
	},
	"columnName": "Tweet time",
	"expression": "grel:substring(value,1,length(value)-1)",
	"onError": "keep-original",
	"repeat": false,
	"repeatCount": 10
	},
	{
	"op": "core/text-transform",
	"description": "Text transform on cells in column Tweet time using expression value.toDate()",
	"engineConfig": {
	"facets": [],
	"mode": "row-based"
	},
	"columnName": "Tweet time",
	"expression": "value.toDate()",
	"onError": "keep-original",
	"repeat": false,
	"repeatCount": 10
	},
	{
	"op": "core/text-transform",
	"description": "Text transform on cells in column Is ReTweet using expression grel:substring(value,1,length(value)-1)",
	"engineConfig": {
	"facets": [],
	"mode": "row-based"
	},
	"columnName": "Is ReTweet",
	"expression": "grel:substring(value,1,length(value)-1)",
	"onError": "keep-original",
	"repeat": false,
	"repeatCount": 10
	},
	{
	"op": "core/text-transform",
	"description": "Text transform on cells in column Favorite using expression grel:substring(value,1,length(value)-1)",
	"engineConfig": {
	"facets": [],
	"mode": "row-based"
	},
	"columnName": "Favorite",
	"expression": "grel:substring(value,1,length(value)-1)",
	"onError": "keep-original",
	"repeat": false,
	"repeatCount": 10
	},
	{
	"op": "core/text-transform",
	"description": "Text transform on cells in column ReTweet using expression grel:substring(value,1,length(value)-1)",
	"engineConfig": {
	"facets": [],
	"mode": "row-based"
	},
	"columnName": "ReTweet",
	"expression": "grel:substring(value,1,length(value)-1)",
	"onError": "keep-original",
	"repeat": false,
	"repeatCount": 10
	},
	{
	"op": "core/text-transform",
	"description": "Text transform on cells in column Is ReTweet using expression value.toNumber()",
	"engineConfig": {
	"facets": [],
	"mode": "row-based"
	},
	"columnName": "Is ReTweet",
	"expression": "value.toNumber()",
	"onError": "keep-original",
	"repeat": false,
	"repeatCount": 10
	},
	{
	"op": "core/text-transform",
	"description": "Text transform on cells in column Favorite using expression value.toNumber()",
	"engineConfig": {
	"facets": [],
	"mode": "row-based"
	},
	"columnName": "Favorite",
	"expression": "value.toNumber()",
	"onError": "keep-original",
	"repeat": false,
	"repeatCount": 10
	},
	{
	"op": "core/text-transform",
	"description": "Text transform on cells in column ReTweet using expression value.toNumber()",
	"engineConfig": {
	"facets": [],
	"mode": "row-based"
	},
	"columnName": "ReTweet",
	"expression": "value.toNumber()",
	"onError": "keep-original",
	"repeat": false,
	"repeatCount": 10
	},
	{
	"op": "core/text-transform",
	"description": "Text transform on cells in column Twitter URL using expression grel:substring(value,1,length(value)-1)",
	"engineConfig": {
	"facets": [],
	"mode": "row-based"
	},
	"columnName": "Twitter URL",
	"expression": "grel:substring(value,1,length(value)-1)",
	"onError": "keep-original",
	"repeat": false,
	"repeatCount": 10
	},
	{
	"op": "core/text-transform",
	"description": "Text transform on cells in column Tweet using expression grel:substring(value,1,length(value)-1)",
	"engineConfig": {
	"facets": [],
	"mode": "row-based"
	},
	"columnName": "Tweet",
	"expression": "grel:substring(value,1,length(value)-1)",
	"onError": "keep-original",
	"repeat": false,
	"repeatCount": 10
	},
	{
	"op": "core/column-rename",
	"description": "Rename column Tweet Id to TweetID",
	"oldColumnName": "Tweet Id",
	"newColumnName": "TweetID"
	},
	{
	"op": "core/column-rename",
	"description": "Rename column Tweet time to TweetTime",
	"oldColumnName": "Tweet time",
	"newColumnName": "TweetTime"
	},
	{
	"op": "core/column-rename",
	"description": "Rename column Is ReTweet to IsRetweet",
	"oldColumnName": "Is ReTweet",
	"newColumnName": "IsRetweet"
	},
	{
	"op": "core/column-rename",
	"description": "Rename column Twitter URL to TwitterURL",
	"oldColumnName": "Twitter URL",
	"newColumnName": "TwitterURL"
	},
	{
	"op": "core/text-transform",
	"description": "Text transform on cells in column Tweet using expression grel:replaceChars(value,\",\",\" ,\")",
	"engineConfig": {
	"facets": [],
	"mode": "row-based"
	},
	"columnName": "Tweet",
	"expression": "grel:replaceChars(value,\",\",\" ,\")",
	"onError": "keep-original",
	"repeat": false,
	"repeatCount": 10
	},
	{
	"op": "core/text-transform",
	"description": "Text transform on cells in column Tweet using expression grel:replacechars(value,\" \",\" \")",
	"engineConfig": {
	"facets": [],
	"mode": "row-based"
	},
	"columnName": "Tweet",
	"expression": "grel:replacechars(value,\" \",\" \")",
	"onError": "keep-original",
	"repeat": false,
	"repeatCount": 10
	}
	]
	//Add this to the <your neo4j directory>/conf/neo4j.properties after adding
	//graphaware-noderank-2.2.1.30.2.jar and
	//graphaware-server-enterprise-all-2.2.1.30.jar
	//to <your neo4j directory>/plugins directory

	com.graphaware.runtime.enabled=true

	#NR becomes the module ID:
	com.graphaware.module.NR.1=com.graphaware.module.noderank.NodeRankModuleBootstrapper

	#optional number of top ranked nodes to remember, the default is 10
	com.graphaware.module.NR.maxTopRankNodes=50

	#optional daming factor, which is a number p such that a random node will be selected at any step of the algorithm
	#with the probability 1-p (as opposed to following a random relationship). The default is 0.85
	com.graphaware.module.NR.dampingFactor=0.85

	#optional key of the property that gets written to the ranked nodes, default is "nodeRank"
	com.graphaware.module.NR.propertyKey=nodeRank

	#optionally specify nodes to rank using an expression-based node inclusion policy, default is all business (i.e. non-framework-internal) nodes
	com.graphaware.module.NR.node=hasLabel('Handle')

	#optionally specify relationships to follow using an expression-based relationship inclusion policy, default is all business (i.e. non-framework-internal) relationships
	com.graphaware.module.NR.relationship=isType('FOLLOWS')

	#NR becomes the module ID:
	com.graphaware.module.TR.2=com.graphaware.module.noderank.NodeRankModuleBootstrapper

	#optional number of top ranked nodes to remember, the default is 10
	com.graphaware.module.TR.maxTopRankNodes=50

	#optional daming factor, which is a number p such that a random node will be selected at any step of the algorithm
	#with the probability 1-p (as opposed to following a random relationship). The default is 0.85
	com.graphaware.module.TR.dampingFactor=0.85

	#optional key of the property that gets written to the ranked nodes, default is "nodeRank"
	com.graphaware.module.TR.propertyKey=topicRank

	#optionally specify nodes to rank using an expression-based node inclusion policy, default is all business (i.e. non-framework-internal) nodes
	com.graphaware.module.TR.node=hasLabel('Hashtag')

	#optionally specify relationships to follow using an expression-based relationship inclusion policy, default is all business (i.e. non-framework-internal) relationships
	com.graphaware.module.TR.relationship=isType('MENTIONED_IN')
	//add some metadata
	//country info
	load csv with headers from
	"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1390098748" as csv
	create (c:Country {code: csv.Country, name: csv.FullCountry, cq: toint(csv.CQ), rank: toint(csv.Rank), prevrank: toint(csv.Prev)});

	//team info
	load csv with headers from
	"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1244447866" as csv
	merge (tc:TeamClass {name: csv.Class})
	with csv, tc
	match (c:Country {code: csv.Country})
	merge (tc)<-[:IN_CLASS]-(t:Team {code: trim(csv.Code), name: trim(csv.Name), cq: toint(csv.CQ), rank: toint(csv.Rank), prevrank: toint(csv.Prev)})-[:FROM_COUNTRY]->(c);

	//twitter handle info
	using periodic commit 500
	load csv with headers from
	"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=0" as csv
	match (c:Country {code: trim(csv.Country)})
	merge (h:Handle {name: trim(csv.Handle), realname: trim(csv.Name)})-[:FROM_COUNTRY]->(c);

	//rider info
	load csv with headers from
	"https://docs.google.com/a/neotechnology.com/spreadsheets/d/1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0/export?format=csv&id=1lLD2I_czto1iA1OjCMAZZxnYLAVsngBgjT5c0xuvpJ0&gid=1885142986" as csv
	match (h:Handle {realname: trim(csv.Name)}), (t:Team {code: trim(csv.Team)})
	set h.Age=toint(csv.Age)
	set h.CQ=toint(csv.CQ)
	set h.UCIcode=csv.UCIcode
	set h.rank=toint(csv.Rank)
	set h.prevrank=toint(csv.Prev)
	create (h)-[:RIDES_FOR_TEAM]->(t);

	//add the index on Handle
	create index on :Handle(name);
	create index on :Hashtag(name);
	create index on :Tweet(text);
	create index on :Handle(nodeRank);
	create constraint on (h:Handle) assert h.twitterId is unique;
	//get the handles from the csv file
	//this should not do anything - as the handles have already been loaded above
	using periodic commit 500
	load csv with headers from "file:<yourpath>/20150401.csv" as csv
	with csv
	where csv.Username<>[]
	merge (h:Handle {name: '@'+lower(csv.Username)});


	//connect the tweets to the handles
	using periodic commit 500
	load csv with headers from "file:<your path>/20150401.csv" as csv
	with csv
	where csv.Username<>[]
	merge (h:Handle {name: '@'+lower(csv.Username)})
	merge (t:Tweet {text: lower(csv.Tweet), id: toint(csv.TweetID), time: csv.TweetTime, isretweet: toint(csv.IsReTweet), favorite: toint(csv.Favorite), retweet: toint(csv.ReTweet), url: csv.`Twitter URL`})<-[:TWEETS]-(h);
	//extract handles from tweet text and connect tweets to handles
	match (t:Tweet)
	WITH t,split(t.text," ") as words
	UNWIND words as handles
	with t,handles
	where left(handles,1)="@"
	with t, handles
	merge (h:Handle {name: lower(handles)})
	merge (h)-[:MENTIONED_IN]->(t);

	//extract hashtags from tweet text and connect tweets to hashtags
	match (t:Tweet)
	WITH t,split(t.text," ") as words
	UNWIND words as hashtags
	with t,hashtags
	where left(hashtags,1)="#"
	with t, hashtags
	merge (h:Hashtag {name: upper(hashtags)})
	merge (h)-[:MENTIONED_IN]->(t);
	import argparse
	import sys
	import os
	import tweepy
	import csv
	import json
	import calendar

	from collections import deque
	from util import Users
	from py2neo import Graph
	from dateutil import parser

	def seed(api, username):
	if os.path.exists("data/users.csv"):
	print "Twitter graph has already been seeded. Delete 'data/users.csv' if you want to seed it again"
	sys.exit(1)

	USERS_TO_PROCESS = 50
	users_to_process = deque()
	users_processed = set([username])

	for tweet in tweepy.Cursor(api.user_timeline, id=username).items(50):
	for user in tweet.entities["user_mentions"]:
	if not len(users_to_process) > USERS_TO_PROCESS:
	users_to_process.append(user["screen_name"])
	else:
	break
	users_processed = set([username])
	while True:
	if len(users_processed) >= USERS_TO_PROCESS:
	break
	else:
	if len(users_to_process) > 0:
	next_user = users_to_process.popleft()
	print next_user
	if not next_user in users_processed:
	users_processed.add(next_user)
	for tweet in tweepy.Cursor(api.user_timeline, id=next_user).items(10):
	for user_mentioned in tweet.entities["user_mentions"]:
	if not len(users_processed) > 50:
	users_to_process.append(user_mentioned["screen_name"])
	else:
	break
	else:
	break
	with open("data/users.csv", "w") as usersfile:
	writer = csv.writer(usersfile, delimiter=",")
	for user in users_processed:
	writer.writerow([user, "PROCESSED", ""])

	def read_user(username):
	print username
	profile_file_path = "data/profiles/{0}.json".format(username)
	if os.path.exists(profile_file_path):
	with open(profile_file_path, "r") as file:
	profile = json.loads(file.read())
	print profile["name"]
	print profile["description"]
	print "Friends: {0}".format(len(profile["friends"]))
	print "Followers: {0}".format(len(profile["followers"]))

	file_path = "data/tweets/{0}.json".format(username)

	if not os.path.exists(file_path):
	tweets = []
	else:
	with open(file_path, "r") as file:
	tweets = json.loads(file.read())

	print "# of tweets: {0}".format(len(tweets))
	if len(tweets) > 0:
	print "latest tweets:"
	for tweet in tweets:
	print tweet["id"], tweet["text"]

	def download_all_user_tweets(api, users):
	unprocessed_users = [user[0] for user in users.all().iteritems()]
	for user in unprocessed_users:
	download_user_tweets(api, users, user)

	def download_new_user_tweets(api, users):
	unprocessed_users = [user[0] for user in users.all().iteritems() if not user[1]["lastTweetRetrieved"]]
	for user in unprocessed_users:
	download_user_tweets(api, users, user)

	def download_all_user_profiles(api, users):
	unprocessed_users = [user[0] for user in users.all().iteritems()
	if not os.path.exists("data/profiles/{0}.json".format(user[0]))]

	for user in unprocessed_users:
	download_profile(api, user)

	def download_all_user_friends(api, users):
	unprocessed_users = [user[0] for user in users.all().iteritems()
	if not os.path.exists("data/friends/{0}.json".format(user[0]))]

	for user in unprocessed_users:
	download_friends(api, user)

	def download_user_tweets(api, users, username):
	print username
	value = users.find(username)

	file_path = "data/tweets/{0}.json".format(username)
	if os.path.exists(file_path):
	with open(file_path, "r") as file:
	tweets = json.loads(file.read())
	else:
	tweets = []

	first_tweet_done = False
	since_id = value["lastTweetRetrieved"]
	for tweet in tweepy.Cursor(api.user_timeline, id=username, since_id = since_id).items(50):
	if not first_tweet_done:
	value["lastTweetRetrieved"] = tweet.id
	first_tweet_done = True
	tweets.append(tweet._json)

	users.save(username, value)

	with open("data/tweets/{0}.json".format(username), "w") as file:
	file.write(json.dumps(tweets))

	def download_profile(api, username):
	print username

	profile = api.get_user(username)._json
	followers = list(tweepy.Cursor(api.followers_ids, username).items())
	friends = list(tweepy.Cursor(api.friends_ids, username).items())

	profile["followers"] = followers
	profile["friends"] = friends

	with open("data/profiles/{0}.json".format(username), "w") as file:
	file.write(json.dumps(profile))

	def download_friends(api, username):
	print username
	profile = api.get_user(username)._json
	friends = list(tweepy.Cursor(api.friends_ids, username).items())

	profile["friends"] = friends

	with open("data/friends/{0}.json".format(username), "w") as file:
	file.write(json.dumps(profile))

	def import_profiles_into_neo4j():
	graph = Graph()

	tx = graph.cypher.begin()
	files = [file for file in os.listdir("data/profiles") if file.endswith("json")]
	for file in files:
	with open("data/profiles/{0}".format(file), "r") as file:
	profile = json.loads(file.read())
	print profile["screen_name"]

	params = {
	"twitterId" : profile["id"],
	"screenName": profile["screen_name"],
	"name": profile["name"],
	"description": profile["description"],
	"followers" : profile["followers"],
	"friends" : profile["friends"]
	}
	statement = """
	MERGE (p:Person {twitterId: {twitterId}})
	REMOVE p:Shadow
	SET p.screenName = {screenName},
	p.description = {description},
	p.name = {name}
	WITH p

	FOREACH(followerId IN {followers} \|
	MERGE (follower:Person {twitterId: followerId})
	ON CREATE SET follower:Shadow
	MERGE (follower)-[:FOLLOWS]->(p)
	)

	FOREACH(friendId IN {friends} \|
	MERGE (friend:Person {twitterId: friendId})
	ON CREATE SET friend:Shadow
	MERGE (p)-[:FOLLOWS]->(friend)
	)
	"""
	tx.append(statement, params)

	tx.process()
	tx.commit()

	def import_friends_into_neo4j():
	graph = Graph()

	files = [file for file in os.listdir("data/friends") if file.endswith("json")]
	for file in files:
	tx = graph.cypher.begin()

	with open("data/friends/{0}".format(file), "r") as file:
	profile = json.loads(file.read())
	print profile["screen_name"]

	params = {
	"twitterId" : profile["id"],
	"screenName": profile["screen_name"],
	"friends" : profile["friends"]
	}
	statement = """
	MATCH (p:Handle {name: '@'+lower({screenName})})
	SET p.twitterId = {twitterId}
	WITH p
	WHERE p is not null
	UNWIND {friends} as friendId
	MATCH (friend:Handle {twitterId: friendId})
	MERGE (p)-[:FOLLOWS]->(friend)
	"""
	tx.append(statement, params)
	tx.process()
	tx.commit()

	def import_tweets_into_neo4j():
	graph = Graph()

	tx = graph.cypher.begin()
	count = 0

	files = [file for file in os.listdir("data/tweets") if file.endswith("json")]
	for file in files:
	with open("data/tweets/{0}".format(file), "r") as file:
	tweets = json.loads(file.read())

	for tweet in tweets:
	created_at = calendar.timegm(parser.parse(tweet["created_at"]).timetuple())

	params = {
	"tweetId": tweet["id"],
	"createdAt": created_at,
	"text": tweet["text"],
	"userId": tweet["user"]["id"],
	"inReplyToTweetId": tweet["in_reply_to_status_id"],
	"userMentions": [user for user in tweet["entities"]["user_mentions"]],
	"urls": [url for url in tweet["entities"]["urls"]]
	}

	statement = """
	MERGE (tweet:Tweet {id: {tweetId}})
	SET tweet.text = {text}, tweet.timestamp = {createdAt}
	REMOVE tweet:Shadow
	WITH tweet
	MATCH (person:Person {twitterId: {userId}})
	MERGE (person)-[:TWEETED]->(tweet)
	WITH tweet

	FOREACH(user in {userMentions} \|
	MERGE (mentionedUser:Person {twitterId: user.id})
	SET mentionedUser.screenName = user.screen_name
	MERGE (tweet)-[:MENTIONED_USER]->(mentionedUser)
	)

	FOREACH(url in {urls} \|
	MERGE (u:URL {value: url.expanded_url})
	MERGE (tweet)-[:MENTIONED_URL]->(u)
	)

	FOREACH(ignoreMe in CASE WHEN NOT {inReplyToTweetId} is null THEN [1] ELSE [] END \|
	MERGE (inReplyToTweet:Tweet {id: {inReplyToTweetId}})
	ON CREATE SET inReplyToTweet:Shadow
	MERGE (tweet)-[:IN_REPLY_TO_TWEET]->(inReplyToTweet)
	)
	"""
	tx.append(statement, params)
	tx.process()
	tx.commit()

	def add_new_users(users, count):
	graph = Graph()
	params = {"limit": count}
	results = graph.cypher.execute("""
	match (p:Shadow:Person)<-[:MENTIONED_USER]-(user)
	RETURN p.screenName AS user, COUNT(*) AS times
	ORDER BY times DESC
	LIMIT {limit}
	""", params)
	print results
	for row in results:
	users.add(row["user"])

	def main(argv=None):
	parser = argparse.ArgumentParser(description='Query the Twitter API')

	# specific user
	parser.add_argument('--seed')
	parser.add_argument('--download-tweets')
	parser.add_argument('--download-profile')
	parser.add_argument('--read-user')

	parser.add_argument('--add-new-users', type=int)

	# all users
	parser.add_argument('--download-all-user-tweets', action='store_true')
	parser.add_argument('--download-new-user-tweets', action='store_true')
	parser.add_argument('--download-all-user-profiles', action='store_true')
	parser.add_argument('--download-all-user-friends', action='store_true')

	# twitter auth
	parser.add_argument('--check-auth', action='store_true')

	# import
	parser.add_argument('--import-profiles-into-neo4j', action='store_true')
	parser.add_argument('--import-friends-into-neo4j', action='store_true')
	parser.add_argument('--import-tweets-into-neo4j', action='store_true')

	if argv is None:
	argv = sys.argv

	args = parser.parse_args()

	if args.read_user:
	read_user(args.read_user)
	return

	# Options that require keys go below here
	consumer_key = os.environ.get('CONSUMER_KEY')
	consumer_secret = os.environ.get('CONSUMER_SECRET')
	access_token = os.environ.get('ACCESS_TOKEN')
	access_token_secret = os.environ.get('ACCESS_TOKEN_SECRET')

	if any([key is None for key in [consumer_key, consumer_secret, access_token, access_token_secret]]):
	print "One of your twitter keys isn't set - don't forget to 'source credentials.local'"
	sys.exit(1)

	if args.check_auth:
	print "consumer_key: {0}".format(consumer_key)
	print "consumer_secret: {0}".format(consumer_secret)
	print "access_token: {0}".format(access_token)
	print "access_token_secret: {0}".format(access_token_secret)

	try:
	auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_token, access_token_secret)
	api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True)
	api.verify_credentials()
	print "Auth all working - we're good to go!"
	except tweepy.TweepError as e:
	print "Auth problem - " + str(e)

	return

	auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_token, access_token_secret)
	api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True)
	api.verify_credentials()

	if args.seed:
	seed(api, args.seed)
	return

	if args.download_tweets:
	users = Users()
	download_user_tweets(api, users, args.download_tweets)
	return

	if args.download_all_user_tweets:
	users = Users()
	download_all_user_tweets(api, users)
	return

	if args.download_new_user_tweets:
	users = Users()
	download_new_user_tweets(api, users)
	return

	if args.download_profile:
	users = Users()
	download_profile(api, args.download_profile)
	return

	if args.download_all_user_profiles:
	users = Users()
	download_all_user_profiles(api, users)
	return

	if args.download_all_user_friends:
	users = Users()
	download_all_user_friends(api, users)
	return

	if args.add_new_users:
	users = Users()
	add_new_users(users, args.add_new_users)
	return

	if args.import_profiles_into_neo4j:
	import_profiles_into_neo4j()
	return

	if args.import_friends_into_neo4j:
	import_friends_into_neo4j()
	return

	if args.import_tweets_into_neo4j:
	import_tweets_into_neo4j()
	return

	if __name__ == "__main__":
	sys.exit(main())
	//degree of handles
	match (h:Handle)-[:TWEETS]->(t:Tweet)
	return h.name, h.realname, count(t)
	order by count(t) DESC
	limit 10

	//degree of hashtags
	match (h:Hashtag)-[:MENTIONED_IN]->(t:Tweet)
	return h.name, count(t)
	order by count(t) DESC

	//most mentioned handles or hashtags
	match (h)-[:MENTIONED_IN]->(t:Tweet)
	return h.name, labels(h), count(t)
	order by count(t) DESC
	limit 10

	//querying the NodeRank
	match (h:Handle)
	where h.nodeRank is not null
	return h.name, h.realname, h.nodeRank
	order by h.nodeRank DESC
	limit 10

	//what is connected to the top NodeRanked handles
	match (h:Handle)
	where h.nodeRank is not null
	with h
	order by h.nodeRank DESC
	limit 1
	match (h)-[r*..2]-()
	return h,r
	limit 50

	//what is connected to the top NodeRanked handles at depth 1
	match (h:Handle)
	where h.nodeRank is not null
	with h
	order by h.nodeRank DESC
	limit 1
	match (h)--(connected)
	return labels(connected), count(connected)
	limit 25

	//what is connected to the top NodeRanked handles at depth 3
	match (h:Handle)
	where h.nodeRank is not null
	with h
	order by h.nodeRank DESC
	limit 1
	match (h)-[*..3]-(connected)
	return labels(connected), count(connected)
	order by count(connected) DESC


	//betweenness centrality for the top ranked nodes - query using UNWIND
	//first we create the subgraph that we want to analyse
	match (h:Handle)
	where h.nodeRank is not null
	with h
	order by h.nodeRank DESC
	limit 50
	//we store all the nodes of the subgraph in a collection, and pass it to the next query
	WITH COLLECT(h) AS handles
	//then we unwind this collection TWICE so that we get a product of rows (2500 in total)
	UNWIND handles as source
	UNWIND handles as target
	//and then finally we calculate the betweenness on these rows
	MATCH p=allShortestPaths((source)-[:TWEETS\|MENTIONED_IN*]-(target))
	WHERE id(source) < id(target) and length(p) > 1
	UNWIND nodes(p)[1..-1] as n
	WITH n.realname as Name, count(*) as betweenness
	WHERE Name is not null
	RETURN Name, betweenness
	ORDER BY betweenness DESC;


	//querying the TopicRank
	match (h:Hashtag)
	where h.topicRank is not null
	return h.name, h.topicRank
	order by h.topicRank DESC
	limit 50

	//to top TopicRanked Hashtag
	match (h:Hashtag)
	where h.topicRank is not null
	with h
	order by h.topicRank DESC
	limit 1
	match (h)-[r*..2]-()
	return h,r
	limit 50

	//the link between Boonen and Kristoff
	match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}),
	p = allshortestpaths ((h2)-[*]-(h1))
	return p

	//the link between Boonen and Kristoff
	match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}),
	p = allshortestpaths ((h2)-[r*]-(h1))
	unwind r as Rels
	with p, Rels
	where type(Rels)<>"Follows"
	return p

	//the link between Boonen and Kristoff and their teams
	match (h1:Handle {name:"@kristoff87"}), (h2:Handle {realname:"BOONEN Tom"}),
	p = allshortestpaths ((h2)-[*]-(h1))
	with nodes(p) as Nodes
	unwind Nodes as Node
	match (Node)--(t:Team)
	return Node, t