Last active
December 17, 2015 09:59
-
-
Save coolgarifTech/5591442 to your computer and use it in GitHub Desktop.
Simple script for creating the hydraGraph DB for Coolgarif Tech using Python Embedded bindings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script written by Richie Barter of Coolgarif Tech | |
# Date: April 2013 | |
from neo4j import GraphDatabase | |
db = GraphDatabase('/home/userNAME/neo4j/hydraGraph/data/graph.db') | |
with db.transaction: | |
# Create an index for nodes & relations | |
COMPANY_NAME_INDEX = "COMPANY_NAME_INDEX" | |
PRIMARY_CONCEPT_INDEX = "PRIMARY_CONCEPT_INDEX" | |
EXPANSION_CONCEPT_INDEX = "EXPANSION_CONCEPT_INDEX" | |
# Had to wrap the index creation in some pre-processing to check that the index doesn't already exist | |
if db.node.indexes.exists(COMPANY_NAME_INDEX) == 0: | |
companyNameIndex = db.node.indexes.create(COMPANY_NAME_INDEX) | |
else: | |
companyNameIndex = db.node.indexes.get(COMPANY_NAME_INDEX) | |
if db.node.indexes.exists(PRIMARY_CONCEPT_INDEX) == 0: | |
primaryConceptIndex = db.node.indexes.create(PRIMARY_CONCEPT_INDEX) | |
else: | |
primaryConceptIndex = db.node.indexes.get(PRIMARY_CONCEPT_INDEX) | |
if db.node.indexes.exists(EXPANSION_CONCEPT_INDEX) == 0: | |
expansionConceptIndex = db.node.indexes.create(EXPANSION_CONCEPT_INDEX) | |
else: | |
expansionConceptIndex = db.node.indexes.create(EXPANSION_CONCEPT_INDEX) | |
# Creating all the nodes | |
# Add each one to a node index | |
# Company Node! | |
coolgarifTech = db.node(name = 'CoolgarifTech', description = 'Digital Agency focused on Data', established="Jan 2013", founders="Richie Barter & James Billot") | |
companyNameIndex['name']['CoolgarifTech'] = coolgarifTech | |
companyNameIndex['description']['Digital Agency focused on Data'] = coolgarifTech | |
# Primary Concepts | |
dataStorage = db.node(name = 'Data Storage', description = 'Focusing on how your organisation captures data') | |
primaryConceptIndex['name']['Data Storage'] = dataStorage | |
dataProcessing = db.node(name = 'Data Processing', description = 'Developing tools and workflows for efficiently processing data') | |
primaryConceptIndex['name']['Data Processing'] = dataProcessing | |
dataVisualisation = db.node(name = 'Data Visualisation', description = 'Telling stories with data') | |
primaryConceptIndex['name']['Data Visualisation'] = dataVisualisation | |
dataPublishing = db.node(name = 'Data Publication', description = 'Exposing data to internal and external audiences') | |
primaryConceptIndex['name']['Data Publication'] = dataPublishing | |
# Sub Nodes for each of the primary Concepts | |
dataMining = db.node(name = 'Data Mining', description="Statistical analysis of data sets within an organisation") | |
expansionConceptIndex['name']['Data Mining'] = dataMining | |
dataAnalysis = db.node(name = 'Data Analysis', description ="Delivering actionable insights from data") | |
expansionConceptIndex['name']['Data Analysis'] = dataAnalysis | |
graphDB = db.node(name = 'Graph Databases', description="Using network and graph theory to expose greater detail within data") | |
expansionConceptIndex['name']['Graph Databases'] = graphDB | |
dataStore = db.node(name = 'Data Store', description="Aggregrating data from multiple sources into a single relational schema") | |
expansionConceptIndex['name']['Data Store'] = dataStore | |
nlp = db.node(name = 'Natural Language Processing', description="Analysing and categorising unstructured text using natural language processing techniques") | |
expansionConceptIndex['name']['Natural Language Processing'] = nlp | |
linkedData = db.node(name = 'Linked Data', description="Bringing siloed data from different systems and processes together in a meaningful way") | |
expansionConceptIndex['name']['Linked Data'] = linkedData | |
productLaunch = db.node(name = 'Product Launch', description="Using Data Visualisation to explain more about a product and its story") | |
expansionConceptIndex['name']['Product Lauch'] = productLaunch | |
researchVisualisation = db.node(name = 'Research Visualisation', description="Enhance internal or external research with bespoke data visualisation") | |
expansionConceptIndex['name']['Research Visualisation'] = researchVisualisation | |
rdf = db.node(name = 'Semantic Data Markup', description="Linking data to formal ontologies to enable publication via API or integration with external data sets") | |
expansionConceptIndex['name']['Semantic Data Markup'] = rdf | |
api = db.node(name = 'Public API Creation', description="Creating a public API to publish data externally in multiple formats") | |
expansionConceptIndex['name']['Public API Creation'] = api | |
# Create all the relationships | |
# Add each one to a relationship index | |
#CoolgarifTech.relationships.create('subreference', reference) ### Commenting this out for the moment - index should provide alternative entry points into the graph #### | |
#Primary Relationships | |
coolgarifTech.relationships.create('implements_solutions_in', dataStorage) | |
coolgarifTech.relationships.create('implements_solutions_in', dataProcessing) | |
coolgarifTech.relationships.create('implements_solutions_in', dataVisualisation) | |
coolgarifTech.relationships.create('implements_solutions_in', dataPublishing) | |
# Secondary Relationships between primary concepts & sub nodes | |
# dataStorage | |
dataStorage.relationships.create('includes', graphDB) | |
dataStorage.relationships.create('includes', dataStore) | |
# dataProcessing | |
dataProcessing.relationships.create('includes', graphDB) | |
dataProcessing.relationships.create('includes', dataMining) | |
dataProcessing.relationships.create('includes', dataAnalysis) | |
dataProcessing.relationships.create('includes', nlp) | |
dataProcessing.relationships.create('includes', rdf) | |
# dataVisualisation | |
dataVisualisation.relationships.create('includes', productLaunch) | |
dataVisualisation.relationships.create('includes', researchVisualisation) | |
dataVisualisation.relationships.create('includes', graphDB, comment="Our Visualisation stack can be built upon graph databases (like this visualisation, for example :-)") | |
dataVisualisation.relationships.create('includes', dataStore, comment="Our visualisation stack can sit on any relational data store") | |
dataVisualisation.relationships.create('includes', dataAnalysis, comment="visualisations usually require analysis of internal data") | |
dataVisualisation.relationships.create('includes', linkedData) | |
# dataPublishing | |
dataPublishing.relationships.create('includes', rdf) | |
dataPublishing.relationships.create('includes', api) | |
dataPublishing.relationships.create('includes', linkedData) | |
# End of Script housekeeping | |
# Print the number of nodes to the cmd line | |
# Might add some logic to count / print no of nodes/rels added :-) | |
number_of_nodes = len(db.nodes) | |
number_of_relationships = len(db.relationships) | |
print "Number of nodes created" | |
print number_of_nodes | |
print "Number of relationships created" | |
print number_of_relationships | |
# Make sure to include or you will block the thread! | |
db.shutdown() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment