coolgarifTech/hydraGraph_build.py

## hydraGraph_build.py
# Script written by Richie Barter of Coolgarif Tech
# Date: April 2013

from neo4j import GraphDatabase

db = GraphDatabase('/home/userNAME/neo4j/hydraGraph/data/graph.db')

with db.transaction:

	# Create an index for nodes & relations
	COMPANY_NAME_INDEX = "COMPANY_NAME_INDEX"
	PRIMARY_CONCEPT_INDEX = "PRIMARY_CONCEPT_INDEX"
	EXPANSION_CONCEPT_INDEX = "EXPANSION_CONCEPT_INDEX"

	# Had to wrap the index creation in some pre-processing to check that the index doesn't already exist
	if db.node.indexes.exists(COMPANY_NAME_INDEX) == 0:
		companyNameIndex = db.node.indexes.create(COMPANY_NAME_INDEX)
	else:
		companyNameIndex = db.node.indexes.get(COMPANY_NAME_INDEX)

	if db.node.indexes.exists(PRIMARY_CONCEPT_INDEX) == 0:
		primaryConceptIndex = db.node.indexes.create(PRIMARY_CONCEPT_INDEX)
	else:
		primaryConceptIndex = db.node.indexes.get(PRIMARY_CONCEPT_INDEX)

	if db.node.indexes.exists(EXPANSION_CONCEPT_INDEX) == 0:
		expansionConceptIndex = db.node.indexes.create(EXPANSION_CONCEPT_INDEX)
	else:
		expansionConceptIndex = db.node.indexes.create(EXPANSION_CONCEPT_INDEX)

	# Creating all the nodes
	# Add each one to a node index
	# Company Node!
	coolgarifTech = db.node(name = 'CoolgarifTech', description = 'Digital Agency focused on Data', established="Jan 2013", founders="Richie Barter & James Billot")
	companyNameIndex['name']['CoolgarifTech'] = coolgarifTech
	companyNameIndex['description']['Digital Agency focused on Data'] = coolgarifTech

	# Primary Concepts
	dataStorage = db.node(name = 'Data Storage', description = 'Focusing on how your organisation captures data')
	primaryConceptIndex['name']['Data Storage'] = dataStorage

	dataProcessing = db.node(name = 'Data Processing', description = 'Developing tools and workflows for efficiently processing data')
	primaryConceptIndex['name']['Data Processing'] = dataProcessing

	dataVisualisation = db.node(name = 'Data Visualisation', description = 'Telling stories with data')
	primaryConceptIndex['name']['Data Visualisation'] = dataVisualisation

	dataPublishing = db.node(name = 'Data Publication', description = 'Exposing data to internal and external audiences')
	primaryConceptIndex['name']['Data Publication'] = dataPublishing

	# Sub Nodes for each of the primary Concepts
	dataMining = db.node(name = 'Data Mining', description="Statistical analysis of data sets within an organisation")
	expansionConceptIndex['name']['Data Mining'] = dataMining

	dataAnalysis = db.node(name = 'Data Analysis', description ="Delivering actionable insights from data")
	expansionConceptIndex['name']['Data Analysis'] = dataAnalysis

	graphDB = db.node(name = 'Graph Databases', description="Using network and graph theory to expose greater detail within data")
	expansionConceptIndex['name']['Graph Databases'] = graphDB

	dataStore = db.node(name = 'Data Store', description="Aggregrating data from multiple sources into a single relational schema")
	expansionConceptIndex['name']['Data Store'] = dataStore

	nlp = db.node(name = 'Natural Language Processing', description="Analysing and categorising unstructured text using natural language processing techniques")
	expansionConceptIndex['name']['Natural Language Processing'] = nlp

	linkedData = db.node(name = 'Linked Data', description="Bringing siloed data from different systems and processes together in a meaningful way")
	expansionConceptIndex['name']['Linked Data'] = linkedData

	productLaunch = db.node(name = 'Product Launch', description="Using Data Visualisation to explain more about a product and its story")
	expansionConceptIndex['name']['Product Lauch'] = productLaunch

	researchVisualisation = db.node(name = 'Research Visualisation', description="Enhance internal or external research with bespoke data visualisation")
	expansionConceptIndex['name']['Research Visualisation'] = researchVisualisation

	rdf = db.node(name = 'Semantic Data Markup', description="Linking data to formal ontologies to enable publication via API or integration with external data sets")
	expansionConceptIndex['name']['Semantic Data Markup'] = rdf

	api = db.node(name = 'Public API Creation', description="Creating a public API to publish data externally in multiple formats")
	expansionConceptIndex['name']['Public API Creation'] = api

	# Create all the relationships
	# Add each one to a relationship index
	#CoolgarifTech.relationships.create('subreference', reference) ### Commenting this out for the moment - index should provide alternative entry points into the graph ####
	#Primary Relationships
	coolgarifTech.relationships.create('implements_solutions_in', dataStorage)
	coolgarifTech.relationships.create('implements_solutions_in', dataProcessing)
	coolgarifTech.relationships.create('implements_solutions_in', dataVisualisation)
	coolgarifTech.relationships.create('implements_solutions_in', dataPublishing)

	# Secondary Relationships between primary concepts & sub nodes
	# dataStorage
	dataStorage.relationships.create('includes', graphDB)
	dataStorage.relationships.create('includes', dataStore)

	# dataProcessing
	dataProcessing.relationships.create('includes', graphDB)
	dataProcessing.relationships.create('includes', dataMining)
	dataProcessing.relationships.create('includes', dataAnalysis)
	dataProcessing.relationships.create('includes', nlp)
	dataProcessing.relationships.create('includes', rdf)

	# dataVisualisation
	dataVisualisation.relationships.create('includes', productLaunch)
	dataVisualisation.relationships.create('includes', researchVisualisation)
	dataVisualisation.relationships.create('includes', graphDB, comment="Our Visualisation stack can be built upon graph databases (like this visualisation, for example :-)")
	dataVisualisation.relationships.create('includes', dataStore, comment="Our visualisation stack can sit on any relational data store")
	dataVisualisation.relationships.create('includes', dataAnalysis, comment="visualisations usually require analysis of internal data")
	dataVisualisation.relationships.create('includes', linkedData)

	# dataPublishing
	dataPublishing.relationships.create('includes', rdf)
	dataPublishing.relationships.create('includes', api)
	dataPublishing.relationships.create('includes', linkedData)

	# End of Script housekeeping
	# Print the number of nodes to the cmd line
	# Might add some logic to count / print no of nodes/rels added :-)
	number_of_nodes = len(db.nodes)
	number_of_relationships = len(db.relationships)
	print "Number of nodes created"
	print number_of_nodes
	print "Number of relationships created"
	print number_of_relationships

# Make sure to include or you will block the thread!
db.shutdown()
	# Script written by Richie Barter of Coolgarif Tech
	# Date: April 2013

	from neo4j import GraphDatabase

	db = GraphDatabase('/home/userNAME/neo4j/hydraGraph/data/graph.db')

	with db.transaction:

	# Create an index for nodes & relations
	COMPANY_NAME_INDEX = "COMPANY_NAME_INDEX"
	PRIMARY_CONCEPT_INDEX = "PRIMARY_CONCEPT_INDEX"
	EXPANSION_CONCEPT_INDEX = "EXPANSION_CONCEPT_INDEX"

	# Had to wrap the index creation in some pre-processing to check that the index doesn't already exist
	if db.node.indexes.exists(COMPANY_NAME_INDEX) == 0:
	companyNameIndex = db.node.indexes.create(COMPANY_NAME_INDEX)
	else:
	companyNameIndex = db.node.indexes.get(COMPANY_NAME_INDEX)

	if db.node.indexes.exists(PRIMARY_CONCEPT_INDEX) == 0:
	primaryConceptIndex = db.node.indexes.create(PRIMARY_CONCEPT_INDEX)
	else:
	primaryConceptIndex = db.node.indexes.get(PRIMARY_CONCEPT_INDEX)

	if db.node.indexes.exists(EXPANSION_CONCEPT_INDEX) == 0:
	expansionConceptIndex = db.node.indexes.create(EXPANSION_CONCEPT_INDEX)
	else:
	expansionConceptIndex = db.node.indexes.create(EXPANSION_CONCEPT_INDEX)

	# Creating all the nodes
	# Add each one to a node index
	# Company Node!
	coolgarifTech = db.node(name = 'CoolgarifTech', description = 'Digital Agency focused on Data', established="Jan 2013", founders="Richie Barter & James Billot")
	companyNameIndex['name']['CoolgarifTech'] = coolgarifTech
	companyNameIndex['description']['Digital Agency focused on Data'] = coolgarifTech

	# Primary Concepts
	dataStorage = db.node(name = 'Data Storage', description = 'Focusing on how your organisation captures data')
	primaryConceptIndex['name']['Data Storage'] = dataStorage

	dataProcessing = db.node(name = 'Data Processing', description = 'Developing tools and workflows for efficiently processing data')
	primaryConceptIndex['name']['Data Processing'] = dataProcessing

	dataVisualisation = db.node(name = 'Data Visualisation', description = 'Telling stories with data')
	primaryConceptIndex['name']['Data Visualisation'] = dataVisualisation

	dataPublishing = db.node(name = 'Data Publication', description = 'Exposing data to internal and external audiences')
	primaryConceptIndex['name']['Data Publication'] = dataPublishing

	# Sub Nodes for each of the primary Concepts
	dataMining = db.node(name = 'Data Mining', description="Statistical analysis of data sets within an organisation")
	expansionConceptIndex['name']['Data Mining'] = dataMining

	dataAnalysis = db.node(name = 'Data Analysis', description ="Delivering actionable insights from data")
	expansionConceptIndex['name']['Data Analysis'] = dataAnalysis

	graphDB = db.node(name = 'Graph Databases', description="Using network and graph theory to expose greater detail within data")
	expansionConceptIndex['name']['Graph Databases'] = graphDB

	dataStore = db.node(name = 'Data Store', description="Aggregrating data from multiple sources into a single relational schema")
	expansionConceptIndex['name']['Data Store'] = dataStore

	nlp = db.node(name = 'Natural Language Processing', description="Analysing and categorising unstructured text using natural language processing techniques")
	expansionConceptIndex['name']['Natural Language Processing'] = nlp

	linkedData = db.node(name = 'Linked Data', description="Bringing siloed data from different systems and processes together in a meaningful way")
	expansionConceptIndex['name']['Linked Data'] = linkedData

	productLaunch = db.node(name = 'Product Launch', description="Using Data Visualisation to explain more about a product and its story")
	expansionConceptIndex['name']['Product Lauch'] = productLaunch

	researchVisualisation = db.node(name = 'Research Visualisation', description="Enhance internal or external research with bespoke data visualisation")
	expansionConceptIndex['name']['Research Visualisation'] = researchVisualisation

	rdf = db.node(name = 'Semantic Data Markup', description="Linking data to formal ontologies to enable publication via API or integration with external data sets")
	expansionConceptIndex['name']['Semantic Data Markup'] = rdf

	api = db.node(name = 'Public API Creation', description="Creating a public API to publish data externally in multiple formats")
	expansionConceptIndex['name']['Public API Creation'] = api

	# Create all the relationships
	# Add each one to a relationship index
	#CoolgarifTech.relationships.create('subreference', reference) ### Commenting this out for the moment - index should provide alternative entry points into the graph ####
	#Primary Relationships
	coolgarifTech.relationships.create('implements_solutions_in', dataStorage)
	coolgarifTech.relationships.create('implements_solutions_in', dataProcessing)
	coolgarifTech.relationships.create('implements_solutions_in', dataVisualisation)
	coolgarifTech.relationships.create('implements_solutions_in', dataPublishing)

	# Secondary Relationships between primary concepts & sub nodes
	# dataStorage
	dataStorage.relationships.create('includes', graphDB)
	dataStorage.relationships.create('includes', dataStore)

	# dataProcessing
	dataProcessing.relationships.create('includes', graphDB)
	dataProcessing.relationships.create('includes', dataMining)
	dataProcessing.relationships.create('includes', dataAnalysis)
	dataProcessing.relationships.create('includes', nlp)
	dataProcessing.relationships.create('includes', rdf)

	# dataVisualisation
	dataVisualisation.relationships.create('includes', productLaunch)
	dataVisualisation.relationships.create('includes', researchVisualisation)
	dataVisualisation.relationships.create('includes', graphDB, comment="Our Visualisation stack can be built upon graph databases (like this visualisation, for example :-)")
	dataVisualisation.relationships.create('includes', dataStore, comment="Our visualisation stack can sit on any relational data store")
	dataVisualisation.relationships.create('includes', dataAnalysis, comment="visualisations usually require analysis of internal data")
	dataVisualisation.relationships.create('includes', linkedData)

	# dataPublishing
	dataPublishing.relationships.create('includes', rdf)
	dataPublishing.relationships.create('includes', api)
	dataPublishing.relationships.create('includes', linkedData)

	# End of Script housekeeping
	# Print the number of nodes to the cmd line
	# Might add some logic to count / print no of nodes/rels added :-)
	number_of_nodes = len(db.nodes)
	number_of_relationships = len(db.relationships)
	print "Number of nodes created"
	print number_of_nodes
	print "Number of relationships created"
	print number_of_relationships

	# Make sure to include or you will block the thread!
	db.shutdown()