Skip to content

Instantly share code, notes, and snippets.

@bosborne
Last active January 28, 2022 14:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bosborne/7fbbe8d6668b9c10f9e87fc1df3f3aca to your computer and use it in GitHub Desktop.
Save bosborne/7fbbe8d6668b9c10f9e87fc1df3f3aca to your computer and use it in GitHub Desktop.
Make a Neo4J database for a collection of wines using py2neo
#!/usr/bin/env python3
'''
Use py2neo to create a Neo4J Wine database.
Data is stored in YAML files like this:
region: Cotes du Rhone
country: France
comment: Mostly Grenache. Thin, sour, unpleasant.
winemaker: Joseph Sabon
year: 2006
pk: 331
score: 1
price: 15.00
grapes: Cinsault,Mourvedre,Syrah,Grenache
creation_date: 2008-12-31
name: Montueil La Levade
winetype: red
'''
import argparse
import glob
import re
import os
import yaml
from py2neo import Graph, Node, Relationship, NodeMatcher, RelationshipMatcher
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-p", "--port", default='7687', help="Neo4J port")
parser.add_argument("-s", "--server", default='localhost', help="Neo4J host")
parser.add_argument("-d", "--directory", required=True, help="Directory with YAML files")
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose mode")
args = parser.parse_args()
builder = NeoLoader(args.port, args.server, args.directory, args.verbose)
builder.setup()
builder.readYaml()
builder.load()
class NeoLoader:
def __init__(self, port, server, dir, verbose) -> None:
self.port = port
self.server = server
self.dir = dir
self.verbose = verbose
'''
Connecting to Neo4J in EC2:
- Add Custom TCP Security Group rules that open 7474, 7473, and 7687 to 0.0.0.0
- Configure in neo4j.conf: dbms.connectors.default_listen_address:0.0.0.0
By default Neo4J is only listening to localhost.
'''
def setup(self):
url = "bolt://" + self.server + ':' + self.port
self.graph = Graph(url, auth=(
os.environ["NEO4J_USERNAME"], os.environ["NEO4J_PASSWORD"]))
if self.verbose:
print("Connected: {0}".format(url))
# Delete all nodes and edges, make query objects
self.graph.run('MATCH (n) DETACH DELETE n')
self.nodeMatcher = NodeMatcher(self.graph)
self.relationshipMatcher = RelationshipMatcher(self.graph)
def load(self):
winenum = 0
for wine in self.wines:
if self.verbose:
print("Wine: {0}".format(wine['pk']))
# Make Country node if it doesn't exist
c = self.nodeMatcher.match("Country").where(name=wine['country']).first()
if c == None:
c = self.makeNode("Country", wine['country'])
# Make Region node if it doesn't exist
r = self.nodeMatcher.match("Region").where(name=wine['region']).first()
if r == None:
r = self.makeNode("Region", wine['region'])
# Connect Country and Region with edge "IsIn" if they're not connected
rc = self.relationshipMatcher.match((r, c), r_type='IsIn').first()
if rc == None:
self.graph.create(Relationship(r, "IsIn", c))
# Make Wine node
w = Node("Wine", pk=int(wine['pk']),
year=int(wine['year']),
score=int(wine['score']),
winetype=wine['winetype'],
winemaker=wine['winemaker'],
creation_date=wine['creation_date'])
# Wine may not have a name
if wine.get('name'):
w['name'] = wine['name']
set(w.labels)
self.graph.create(w)
if self.verbose:
print("Made Wine, pk: {0}".format(wine))
# Connect Wine and Region with edge "IsFrom"
self.graph.create(Relationship(w, "IsFrom", r))
for grapename in wine['grapes']:
# Make Grape node if it doesn't exist
g = self.nodeMatcher.match("Grape").where(name=grapename).first()
if g == None:
g = self.makeNode("Grape", grapename)
# Connect Wine and Grape with edge "MadeFrom"
self.graph.create(Relationship(w, "MadeFrom", g))
winenum += 1
if self.verbose:
print("Wine number {}".format(winenum))
def makeNode(self, label, name):
n = Node(label, name=name)
set(n.labels)
self.graph.create(n)
if self.verbose:
print("Created {0}: {1}".format(label, name))
return n
'''
Read YAML files with wine details.
'''
def readYaml(self):
self.wines = list()
for card in glob.glob(self.dir + '/*.yaml'):
with open(card) as f:
try:
y = yaml.safe_load(f)
if self.verbose:
print("Wine YAML is '{}'".format(y))
except yaml.YAMLError as err:
print("Could not read card '{0}': {1}".format(card, err))
y['grapes'] = [g.strip() for g in y['grapes'].split(',')]
# Add creation_date if it's missing
if y.get('creation_date') is None:
y['creation_date'] = str(int(y['year']) + 4) + '-12-22'
self.wines.append(y)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment