Skip to content

Instantly share code, notes, and snippets.

@coolgarifTech
Last active December 17, 2015 09:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save coolgarifTech/5591442 to your computer and use it in GitHub Desktop.
Save coolgarifTech/5591442 to your computer and use it in GitHub Desktop.
Simple script for creating the hydraGraph DB for Coolgarif Tech using Python Embedded bindings
# Script written by Richie Barter of Coolgarif Tech
# Date: April 2013
from neo4j import GraphDatabase
db = GraphDatabase('/home/userNAME/neo4j/hydraGraph/data/graph.db')
with db.transaction:
# Create an index for nodes & relations
COMPANY_NAME_INDEX = "COMPANY_NAME_INDEX"
PRIMARY_CONCEPT_INDEX = "PRIMARY_CONCEPT_INDEX"
EXPANSION_CONCEPT_INDEX = "EXPANSION_CONCEPT_INDEX"
# Had to wrap the index creation in some pre-processing to check that the index doesn't already exist
if db.node.indexes.exists(COMPANY_NAME_INDEX) == 0:
companyNameIndex = db.node.indexes.create(COMPANY_NAME_INDEX)
else:
companyNameIndex = db.node.indexes.get(COMPANY_NAME_INDEX)
if db.node.indexes.exists(PRIMARY_CONCEPT_INDEX) == 0:
primaryConceptIndex = db.node.indexes.create(PRIMARY_CONCEPT_INDEX)
else:
primaryConceptIndex = db.node.indexes.get(PRIMARY_CONCEPT_INDEX)
if db.node.indexes.exists(EXPANSION_CONCEPT_INDEX) == 0:
expansionConceptIndex = db.node.indexes.create(EXPANSION_CONCEPT_INDEX)
else:
expansionConceptIndex = db.node.indexes.create(EXPANSION_CONCEPT_INDEX)
# Creating all the nodes
# Add each one to a node index
# Company Node!
coolgarifTech = db.node(name = 'CoolgarifTech', description = 'Digital Agency focused on Data', established="Jan 2013", founders="Richie Barter & James Billot")
companyNameIndex['name']['CoolgarifTech'] = coolgarifTech
companyNameIndex['description']['Digital Agency focused on Data'] = coolgarifTech
# Primary Concepts
dataStorage = db.node(name = 'Data Storage', description = 'Focusing on how your organisation captures data')
primaryConceptIndex['name']['Data Storage'] = dataStorage
dataProcessing = db.node(name = 'Data Processing', description = 'Developing tools and workflows for efficiently processing data')
primaryConceptIndex['name']['Data Processing'] = dataProcessing
dataVisualisation = db.node(name = 'Data Visualisation', description = 'Telling stories with data')
primaryConceptIndex['name']['Data Visualisation'] = dataVisualisation
dataPublishing = db.node(name = 'Data Publication', description = 'Exposing data to internal and external audiences')
primaryConceptIndex['name']['Data Publication'] = dataPublishing
# Sub Nodes for each of the primary Concepts
dataMining = db.node(name = 'Data Mining', description="Statistical analysis of data sets within an organisation")
expansionConceptIndex['name']['Data Mining'] = dataMining
dataAnalysis = db.node(name = 'Data Analysis', description ="Delivering actionable insights from data")
expansionConceptIndex['name']['Data Analysis'] = dataAnalysis
graphDB = db.node(name = 'Graph Databases', description="Using network and graph theory to expose greater detail within data")
expansionConceptIndex['name']['Graph Databases'] = graphDB
dataStore = db.node(name = 'Data Store', description="Aggregrating data from multiple sources into a single relational schema")
expansionConceptIndex['name']['Data Store'] = dataStore
nlp = db.node(name = 'Natural Language Processing', description="Analysing and categorising unstructured text using natural language processing techniques")
expansionConceptIndex['name']['Natural Language Processing'] = nlp
linkedData = db.node(name = 'Linked Data', description="Bringing siloed data from different systems and processes together in a meaningful way")
expansionConceptIndex['name']['Linked Data'] = linkedData
productLaunch = db.node(name = 'Product Launch', description="Using Data Visualisation to explain more about a product and its story")
expansionConceptIndex['name']['Product Lauch'] = productLaunch
researchVisualisation = db.node(name = 'Research Visualisation', description="Enhance internal or external research with bespoke data visualisation")
expansionConceptIndex['name']['Research Visualisation'] = researchVisualisation
rdf = db.node(name = 'Semantic Data Markup', description="Linking data to formal ontologies to enable publication via API or integration with external data sets")
expansionConceptIndex['name']['Semantic Data Markup'] = rdf
api = db.node(name = 'Public API Creation', description="Creating a public API to publish data externally in multiple formats")
expansionConceptIndex['name']['Public API Creation'] = api
# Create all the relationships
# Add each one to a relationship index
#CoolgarifTech.relationships.create('subreference', reference) ### Commenting this out for the moment - index should provide alternative entry points into the graph ####
#Primary Relationships
coolgarifTech.relationships.create('implements_solutions_in', dataStorage)
coolgarifTech.relationships.create('implements_solutions_in', dataProcessing)
coolgarifTech.relationships.create('implements_solutions_in', dataVisualisation)
coolgarifTech.relationships.create('implements_solutions_in', dataPublishing)
# Secondary Relationships between primary concepts & sub nodes
# dataStorage
dataStorage.relationships.create('includes', graphDB)
dataStorage.relationships.create('includes', dataStore)
# dataProcessing
dataProcessing.relationships.create('includes', graphDB)
dataProcessing.relationships.create('includes', dataMining)
dataProcessing.relationships.create('includes', dataAnalysis)
dataProcessing.relationships.create('includes', nlp)
dataProcessing.relationships.create('includes', rdf)
# dataVisualisation
dataVisualisation.relationships.create('includes', productLaunch)
dataVisualisation.relationships.create('includes', researchVisualisation)
dataVisualisation.relationships.create('includes', graphDB, comment="Our Visualisation stack can be built upon graph databases (like this visualisation, for example :-)")
dataVisualisation.relationships.create('includes', dataStore, comment="Our visualisation stack can sit on any relational data store")
dataVisualisation.relationships.create('includes', dataAnalysis, comment="visualisations usually require analysis of internal data")
dataVisualisation.relationships.create('includes', linkedData)
# dataPublishing
dataPublishing.relationships.create('includes', rdf)
dataPublishing.relationships.create('includes', api)
dataPublishing.relationships.create('includes', linkedData)
# End of Script housekeeping
# Print the number of nodes to the cmd line
# Might add some logic to count / print no of nodes/rels added :-)
number_of_nodes = len(db.nodes)
number_of_relationships = len(db.relationships)
print "Number of nodes created"
print number_of_nodes
print "Number of relationships created"
print number_of_relationships
# Make sure to include or you will block the thread!
db.shutdown()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment