Skip to content

Instantly share code, notes, and snippets.

@nicolewhite
Last active August 29, 2015 14:04
Show Gist options
  • Save nicolewhite/fb41e76844ce10183849 to your computer and use it in GitHub Desktop.
Save nicolewhite/fb41e76844ce10183849 to your computer and use it in GitHub Desktop.
import requests
import os
import time
from py2neo import neo4j
# Connect to graph and add constraints.
url = os.environ['NEO4J_URL']
# url = "http://localhost:7474/db/data/"
graph = neo4j.GraphDatabaseService(url)
# Add uniqueness constraints.
neo4j.CypherQuery(graph, "CREATE CONSTRAINT ON (t:Tweet) ASSERT t.id IS UNIQUE;").run()
neo4j.CypherQuery(graph, "CREATE CONSTRAINT ON (u:User) ASSERT u.screen_name IS UNIQUE;").run()
neo4j.CypherQuery(graph, "CREATE CONSTRAINT ON (h:Hashtag) ASSERT h.name IS UNIQUE;").run()
neo4j.CypherQuery(graph, "CREATE CONSTRAINT ON (l:Link) ASSERT l.url IS UNIQUE;").run()
neo4j.CypherQuery(graph, "CREATE CONSTRAINT ON (s:Source) ASSERT s.name IS UNIQUE;").run()
# Get Twitter bearer to pass to header.
TWITTER_BEARER = os.environ["TWITTER_BEARER"]
# URL parameters.
q = "oscon OR neo4j"
count = 100
result_type = "recent"
lang = "en"
since_id = -1
while True:
try:
# Build URL.
url = "https://api.twitter.com/1.1/search/tweets.json?q=%s&count=%s&result_type=%s&lang=%s&since_id=%s" % (q, count, result_type, lang, since_id)
# Send GET request.
r = requests.get(url, headers = {"accept":"application/json","Authorization":"Bearer " + TWITTER_BEARER})
# Keep status objects.
tweets = r.json()["statuses"]
if tweets:
plural = "s." if len(tweets) > 1 else "."
print("Found " + str(len(tweets)) + " tweet" + plural)
else:
print("No tweets found.\n")
time.sleep(65)
continue
# Update since_id so we do not capture tweets that were captured in the last API call.
since_id = tweets[0].get('id')
# Pass dict to Cypher and build query.
query = """
UNWIND {tweets} AS t
WITH t
ORDER BY t.id
WITH t,
t.entities AS e,
t.user AS u,
t.retweeted_status AS retweet
MERGE (tweet:Tweet {id:t.id})
SET tweet.text = t.text,
tweet.created_at = t.created_at,
tweet.favorites = t.favorite_count
MERGE (user:User {screen_name:u.screen_name})
SET user.name = u.name,
user.location = u.location,
user.followers = u.followers_count,
user.following = u.friends_count,
user.statuses = u.statusus_count,
user.profile_image_url = u.profile_image_url
MERGE (user)-[:POSTS]->(tweet)
MERGE (source:Source {name:t.source})
MERGE (tweet)-[:USING]->(source)
FOREACH (h IN e.hashtags |
MERGE (tag:Hashtag {name:LOWER(h.text)})
MERGE (tag)-[:TAGS]->(tweet)
)
FOREACH (u IN e.urls |
MERGE (url:Link {url:u.expanded_url})
MERGE (tweet)-[:CONTAINS]->(url)
)
FOREACH (m IN e.user_mentions |
MERGE (mentioned:User {screen_name:m.screen_name})
ON CREATE SET mentioned.name = m.name
MERGE (tweet)-[:MENTIONS]->(mentioned)
)
FOREACH (r IN [r IN [t.in_reply_to_status_id] WHERE r IS NOT NULL] |
MERGE (reply_tweet:Tweet {id:r})
MERGE (tweet)-[:REPLY_TO]->(reply_tweet)
)
FOREACH (retweet_id IN [x IN [retweet.id] WHERE x IS NOT NULL] |
MERGE (retweet_tweet:Tweet {id:retweet_id})
MERGE (tweet)-[:RETWEETS]->(retweet_tweet)
)
"""
# Send Cypher query to db.
neo4j.CypherQuery(graph, query).run(tweets=tweets)
print("Tweets added to graph!\n")
time.sleep(65)
except Exception as e:
print(e)
time.sleep(65)
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment