hillary_prismatic
# Licensed to the Apache Software Foundation (ASF) under one | |
# or more contributor license agreements. See the NOTICE file | |
# distributed with this work for additional information | |
# regarding copyright ownership. The ASF licenses this file | |
# to you under the Apache License, Version 2.0 (the | |
# "License"); you may not use this file except in compliance | |
# with the License. You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, | |
# software distributed under the License is distributed on an | |
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
# KIND, either express or implied. See the License for the | |
# specific language governing permissions and limitations | |
# under the License. | |
# | |
import requests | |
import time | |
import json | |
from py2neo import Graph | |
from py2neo import neo4j | |
from py2neo.packages.httpstream import http | |
from py2neo.packages.httpstream import SocketError | |
def RateLimited(maxPerSecond): | |
minInterval = 1.0 / float(maxPerSecond) | |
def decorate(func): | |
lastTimeCalled = [0.0] | |
def rateLimitedFunction(*args,**kargs): | |
elapsed = time.clock() - lastTimeCalled[0] | |
leftToWait = minInterval - elapsed | |
if leftToWait>0: | |
time.sleep(leftToWait) | |
ret = func(*args,**kargs) | |
lastTimeCalled[0] = time.clock() | |
return ret | |
return rateLimitedFunction | |
return decorate | |
@RateLimited(1) | |
def get_topics(title, body): | |
payload = { 'title': title, | |
'body': body, | |
'api-token': "YOUR API TOKEN HERE"} | |
r = requests.post("http://interest-graph.getprismatic.com/text/topic", data=payload) | |
return r | |
def get_graph(): | |
# Connect to graph | |
hostname = '54.209.65.47' | |
port = 7474 | |
password = 'hillary' | |
neo4j.authenticate('%s:%s' % (hostname, port), 'neo4j', password) | |
graph = neo4j.Graph("http://%s:%s/db/data/" % (hostname, port)) | |
return graph | |
def add_topics(topics_json, email_id): | |
graph = get_graph() | |
query = """ | |
UNWIND {topics} AS t | |
WITH t, | |
t.score AS s, | |
t.topic AS n | |
MERGE (e:Email {id: {email_id} }) | |
MERGE (top:Topic {name: n}) | |
MERGE (e)-[:REFERENCES {score: s}]->(top)""" | |
res = graph.cypher.execute(query, topics=topics_json, email_id=email_id) | |
def process_text(): | |
response_array = [] | |
try: | |
graph = get_graph() | |
cyph = 'MATCH (u:Email) WHERE NOT EXISTS((u)-[:REFERENCES]->()) RETURN u.subject AS subject, u.text AS text, u.id AS id' | |
res = graph.cypher.execute(cyph) | |
for record in res: | |
response_dict = { 'id': record.id } | |
topics_res = get_topics(record.subject, record.text) | |
topics_json = topics_res.json() | |
if 'topics' in topics_json: | |
response_dict['topics'] = topics_json['topics'] | |
add_topics(topics_json=topics_json['topics'], email_id=record.id) | |
print response_dict | |
print "\n" | |
response_array.append( response_dict ) | |
except SocketError: | |
raise Exception("SocketError trying to connect") | |
process_text() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment