Skip to content

Instantly share code, notes, and snippets.

@federicoemartinez
Created January 31, 2015 14:23
Show Gist options
  • Save federicoemartinez/5aafba331a319f821d94 to your computer and use it in GitHub Desktop.
Save federicoemartinez/5aafba331a319f821d94 to your computer and use it in GitHub Desktop.
import csv
import uuid
# Cosas para neo4j
from py2neo import Graph, Node, Relationship
import json
import requests
# Cosas para arango
from arango import Arango
class DataLoader(object):
def load_data(self, airport_file, airline_file, route_file, graph_loader):
airlines = csv.reader(airline_file, delimiter=",")
for airline_id, name, alias, iata, icao, callsign, country, active in airlines:
graph_loader.create_airline(
airline_id, name, alias, iata, icao, callsign, country, active)
airports = csv.reader(airport_file, delimiter=",")
for airport_id, name, city, country, iata, icao, lat, long, altitude, timezone, dst, tz in airports:
graph_loader.create_airport(airport_id, name, city, country, iata, icao, float(
lat), float(long), altitude, timezone, dst, tz)
routes = csv.reader(route_file, delimiter=",")
for airline, airline_id, source_airport, source_airport_id, destination_airport, destination_airport_id, codeshare, stops, equipement in routes:
if source_airport_id == "\\N" or airline_id == "\\N" or destination_airport_id == "\\N":
continue
graph_loader.create_route(
airline_id, source_airport_id, destination_airport_id, codeshare, stops, equipement)
class Neo4JGraphLoader(object):
def __init__(self, url="http://localhost:7474/db/data/"):
self._url = url
self._graph = Graph(self._url)
self._headers = {'content-type': 'application/json'}
url = self._url + "index/node/"
# Parametros necesarios para crear un indice geometrico
payload = {
"name": "geom",
"config": {
"provider": "spatial",
"geometry_type": "point",
"lat": "lat",
"lon": "long"
}
}
r = requests.post(url, data=json.dumps(payload), headers=self._headers)
self._airports = {}
self._airlines = {}
def create_airport(self, airport_id, name, city, country, iata, icao, lat, long, altitude, timezone, dst, tz):
n = Node("Airport", airport_id=airport_id, name=name, city=city, country=country, iata=iata,
icao=icao, lat=lat, long=long, altitude=altitude, timezone=timezone, dst=dst, tz=tz)
n, = self._graph.create(n)
self._airports[airport_id] = n
# Lo agregamos al indice
url = self._url + "index/node/geom"
payload = {'value': 'dummy', 'key': 'dummy', 'uri': str(n.uri)}
r = requests.post(url, data=json.dumps(payload), headers=self._headers)
# Al layer
url = self._url + "ext/SpatialPlugin/graphdb/addNodeToLayer"
payload = {'layer': 'geom', 'node': str(n.uri)}
r = requests.post(url, data=json.dumps(payload), headers=self._headers)
return n
def create_airline(self, airline_id, name, alias, iata, icao, callsign, country, active):
n = Node("Airline", airline_id=airline_id, name=name, alias=alias,
iata=iata, icao=icao, callsign=callsign, country=country, active=active)
n, = self._graph.create(n)
self._airlines[airline_id] = n
return n
def create_route(self, airline_id, source_airport_id, destination_airport_id, codeshare, stops, equipement):
n = Node("Route", route_id=str(uuid.uuid4()),
codeshare=codeshare, stops=stops, equipement=equipement)
r1 = Relationship(
self._airports[source_airport_id], "source_airport", n)
self._graph.create(r1)
r1 = Relationship(
self._airports[destination_airport_id], "destination_airport", n)
self._graph.create(r1)
r1 = Relationship(self._airlines[airline_id], "airline_responsible", n)
self._graph.create(r1)
class ArangoGraphLoader(object):
def __init__(self, host="localhost", port=8529):
self._arango = Arango(host=host, port=port)
if "openflights" not in self._arango.databases["user"]:
db = self._arango.add_database("openflights")
db.add_collection("airports")
db.collection("airports").add_geo_index(fields=["long", "lat"])
db.add_collection("airlines")
db.add_collection("routes")
graph = db.add_graph("openflights_graph")
graph.add_vertex_collection("airports")
graph.add_vertex_collection("airlines")
graph.add_vertex_collection("routes")
db.add_collection("source_airport", is_edge=True)
db.add_collection("destination_airport", is_edge=True)
db.add_collection("airline_responsible", is_edge=True)
graph.add_edge_definition(
edge_collection="source_airport",
from_vertex_collections=["routes"],
to_vertex_collections=["airports"])
graph.add_edge_definition(
edge_collection="destination_airport",
from_vertex_collections=["routes"],
to_vertex_collections=["airports"])
graph.add_edge_definition(
edge_collection="airline_responsible",
from_vertex_collections=["routes"],
to_vertex_collections=["airlines"])
else:
db = self._arango.db("openflights")
graph = db.graph("openflights_graph")
self._db = db
self._graph = graph
def create_airport(self, airport_id, name, city, country, iata, icao, lat, long, altitude, timezone, dst, tz):
return self._graph.add_vertex("airports", {"_key": airport_id,
"lat": lat,
"long": long,
"name": name,
"city": city,
"country": country,
"iata": iata,
"icao": icao,
"altitude": altitude,
"timezone": timezone,
"dst": dst,
"tz": tz})
def create_airline(self, airline_id, name, alias, iata, icao, callsign, country, active):
return self._graph.add_vertex("airlines", {"_key": airline_id,
"name": name,
"alias": alias,
"iata": iata,
"icao": icao,
"callsign": callsign,
"country": country,
"active": active})
def create_route(self, airline_id, source_airport_id, destination_airport_id, codeshare, stops, equipement):
ruuid = str(uuid.uuid4())
r = self._graph.add_vertex("routes", {
"_key": ruuid, "codeshare": codeshare, "stops": stops, "equipement": equipement})
self._graph.add_edge(
"source_airport",
{
"_from": "routes/" + ruuid,
"_to": "airports/" + source_airport_id,
})
self._graph.add_edge(
"destination_airport",
{
"_from": "routes/" + ruuid,
"_to": "airports/" + destination_airport_id,
})
self._graph.add_edge(
"airline_responsible",
{
"_from": "routes/" + ruuid,
"_to": "airlines/" + airline_id,
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment