Skip to content

Instantly share code, notes, and snippets.

@ryanpstauffer
Last active April 2, 2019 19:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryanpstauffer/b5a667fe05011e1a2c437429efa5a0fc to your computer and use it in GitHub Desktop.
Save ryanpstauffer/b5a667fe05011e1a2c437429efa5a0fc to your computer and use it in GitHub Desktop.
Evolving Data Models with JanusGraph
// SymphonyGraph Initial Schema
mgmt = graph.openManagement()
// Define Vertex labels
Orchestra = mgmt.makeVertexLabel('Orchestra').make()
Artist = mgmt.makeVertexLabel('Artist').make()
Work = mgmt.makeVertexLabel('Work').make()
Concert = mgmt.makeVertexLabel('Concert').make()
// Define Edge labels - the relationships between Vertices
COMPOSER = mgmt.makeEdgeLabel('COMPOSER').multiplicity(MANY2ONE).make()
SOLOIST = mgmt.makeEdgeLabel('SOLOIST').multiplicity(SIMPLE).make()
CONDUCTOR = mgmt.makeEdgeLabel('CONDUCTOR').multiplicity(SIMPLE).make()
ORCHESTRA = mgmt.makeEdgeLabel('ORCHESTRA').multiplicity(SIMPLE).make()
INCLUDES = mgmt.makeEdgeLabel('INCLUDES').multiplicity(SIMPLE).make()
// Define Vertex Property Keys
// Orchestra
name = mgmt.makePropertyKey('name').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
mgmt.addProperties(Orchestra, name)
// Artist
lastName = mgmt.makePropertyKey('lastName').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
firstName = mgmt.makePropertyKey('firstName').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
gender = mgmt.makePropertyKey('gender').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
nationality = mgmt.makePropertyKey('nationality').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
deceased = mgmt.makePropertyKey('deceased').
dataType(Boolean.class).cardinality(Cardinality.SINGLE).make()
mgmt.addProperties(Artist, lastName, firstName, gender, nationality, deceased)
// Work
title = mgmt.makePropertyKey('title').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
compositionDate = mgmt.makePropertyKey('compositionYear').
dataType(Integer.class).cardinality(Cardinality.SINGLE).make()
soloInstrument = mgmt.makePropertyKey('soloInstrument').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
mgmt.addProperties(Work, title, compositionDate, soloInstrument)
// Concert
firstDate = mgmt.makePropertyKey('firstDate').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
numShows = mgmt.makePropertyKey('numShows').
dataType(Integer.class).cardinality(Cardinality.SINGLE).make()
mgmt.addProperties(Concert, name, firstDate, numShows)
// Define connections as (EdgeLabel, VertexLabel out, VertexLabel in)
mgmt.addConnection(COMPOSER, Work, Artist)
mgmt.addConnection(SOLOIST, Work, Artist)
mgmt.addConnection(CONDUCTOR, Work, Artist)
mgmt.addConnection(ORCHESTRA, Concert, Orchestra)
mgmt.addConnection(INCLUDES, Concert, Work)
mgmt.commit()
// We aren't building indices on our Graph properties,
// since our sample dataset is so small,
// and indices are outside of the scope of this discussion
// Step 1 - Initial Schema Example - Setup
//
// Run with a Gremlin Console from the command line:
// $ bin/gremlin -i InitialSetup.groovy
//
// We use an in-memory graph for all testing
// This supports our data modelling graph operations and
// simplifies our setup without requiring an external storage backend.
graph = JanusGraphFactory.build().
set('storage.backend', 'inmemory').open()
//-----------------------
// Load the Initial Schema
//-----------------------
mgmt = graph.openManagement()
// Define Vertex labels
Orchestra = mgmt.makeVertexLabel('Orchestra').make()
Artist = mgmt.makeVertexLabel('Artist').make()
Work = mgmt.makeVertexLabel('Work').make()
Concert = mgmt.makeVertexLabel('Concert').make()
// Define Edge labels - the relationships between Vertices
COMPOSER = mgmt.makeEdgeLabel('COMPOSER').multiplicity(MANY2ONE).make()
SOLOIST = mgmt.makeEdgeLabel('SOLOIST').multiplicity(SIMPLE).make()
CONDUCTOR = mgmt.makeEdgeLabel('CONDUCTOR').multiplicity(SIMPLE).make()
ORCHESTRA = mgmt.makeEdgeLabel('ORCHESTRA').multiplicity(SIMPLE).make()
INCLUDES = mgmt.makeEdgeLabel('INCLUDES').multiplicity(SIMPLE).make()
// Define Vertex Property Keys
// Orchestra
name = mgmt.makePropertyKey('name').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
mgmt.addProperties(Orchestra, name)
// Artist
lastName = mgmt.makePropertyKey('lastName').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
firstName = mgmt.makePropertyKey('firstName').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
gender = mgmt.makePropertyKey('gender').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
nationality = mgmt.makePropertyKey('nationality').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
deceased = mgmt.makePropertyKey('deceased').
dataType(Boolean.class).cardinality(Cardinality.SINGLE).make()
mgmt.addProperties(Artist, lastName, firstName, gender, nationality, deceased)
// Work
title = mgmt.makePropertyKey('title').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
compositionDate = mgmt.makePropertyKey('compositionYear').
dataType(Integer.class).cardinality(Cardinality.SINGLE).make()
soloInstrument = mgmt.makePropertyKey('soloInstrument').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
mgmt.addProperties(Work, title, compositionDate, soloInstrument)
// Concert
firstDate = mgmt.makePropertyKey('firstDate').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
numShows = mgmt.makePropertyKey('numShows').
dataType(Integer.class).cardinality(Cardinality.SINGLE).make()
mgmt.addProperties(Concert, name, firstDate, numShows)
// Define connections as (EdgeLabel, VertexLabel out, VertexLabel in)
mgmt.addConnection(COMPOSER, Work, Artist)
mgmt.addConnection(SOLOIST, Work, Artist)
mgmt.addConnection(CONDUCTOR, Work, Artist)
mgmt.addConnection(ORCHESTRA, Concert, Orchestra)
mgmt.addConnection(INCLUDES, Concert, Work)
mgmt.commit()
//-----------------------
// Add Sample Data
//-----------------------
g = graph.traversal()
// Make an Orchestra
nyPhil = g.addV('Orchestra').property('name', 'New York Philharmonic').next()
cso = g.addV('Orchestra').property('name', 'Chicago Symphony Orchestra').next()
// Make 4 Artists
salonen = g.addV('Artist').
property('lastName', 'Salonen').
property('firstName', 'Esa-Pekka').
property('gender', 'Male').
property('nationality', 'Finnish').next()
strauss = g.addV('Artist').
property('lastName', 'Strauss').
property('firstName', 'Richard').
property('gender', 'Male').
property('nationality', 'German').
property('deceased', true).next()
gilbert = g.addV('Artist').
property('lastName', 'Gilbert').
property('firstName', 'Alan').next()
ma = g.addV('Artist').
property('lastName', 'Ma').
property('firstName', 'Yo-Yo').next()
// Make 3 Works
alsoSprach = g.addV('Work').
property('title',"Also sprach Zarathustra").
property('compositionYear', 1896).next()
wing = g.addV('Work').
property('title',"Wing on Wing").
property('compositionYear', 2004).next()
celloConcerto = g.addV('Work').
property('title', 'Cello Concerto').
property('compositionYear', 2017).
property('soloInstrument', 'cello').next()
// Make 3 concerts
concert1 = g.addV('Concert').
property('name', 'Esa-Pekka Salonen Conducts US Premiere by Tansy Davies').
property('firstDate', '4/27/2017').
property('numShows', 3).next()
concert2 = g.addV('Concert').
property('name', 'Premieres by Esa-Pekka Salonen and Anna Thorvaldsdottir').
property('firstDate', '5/19/2017').
property('numShows', 3).next()
concert3 = g.addV('Concert').
property('name', 'Salonen & Yo-Yo Ma').
property('firstDate', '3/9/2017').
property('numShows', 3).next()
// Add relationships
// INCLUDES
g.addE('INCLUDES').from(concert1).to(alsoSprach).iterate()
g.addE('INCLUDES').from(concert2).to(wing).iterate()
g.addE('INCLUDES').from(concert3).to(celloConcerto).iterate()
// ORCHESTRA
g.addE('ORCHESTRA').from(concert1).to(nyPhil).next()
g.addE('ORCHESTRA').from(concert2).to(nyPhil).next()
g.addE('ORCHESTRA').from(concert3).to(cso).next()
// COMPOSER
g.addE('COMPOSER').from(celloConcerto).to(salonen).next()
g.addE('COMPOSER').from(alsoSprach).to(strauss).next()
g.addE('COMPOSER').from(wing).to(salonen).next()
// CONDUCTOR
g.addE('CONDUCTOR').from(celloConcerto).to(salonen).next()
g.addE('CONDUCTOR').from(alsoSprach).to(salonen).next()
g.addE('CONDUCTOR').from(wing).to(gilbert).next()
// SOLOIST
g.addE('SOLOIST').from(celloConcerto).to(ma).next()
g.tx().commit()
// We can use some simple asserts to check our data model
assert 4 == g.V().hasLabel('Artist').count().next()
assert 3 == g.V().hasLabel('Concert').count().next()
assert 'Ma' == g.V().has('Orchestra', 'name', 'Chicago Symphony Orchestra').
in('ORCHESTRA').has('Concert', 'name', 'Salonen & Yo-Yo Ma').
out('INCLUDES').has('Work', 'title', 'Cello Concerto').
out('SOLOIST').hasLabel('Artist').values('lastName').next()
// Step 3 - Modify the data to fit the new schema
//
// Demonstrates ability to modify data in the graph
// rather than dropping and starting from scratch
// Start by creating a single Performance for each Concert.
g.V().hasLabel('Work').as('w').in('INCLUDES').
hasLabel('Concert').as('c').
map(addV('Performance').as('p').
property('performanceDate', values('firstDate')).
addE('PERFORMED').from('w').
select('p').addE('INCLUDES').from('c')).iterate()
// Connect the conductor and soloist Artists to each Performance
// and remove their connections from each Work.
g.V().hasLabel('Performance').as('p').in('PERFORMED').
outE('CONDUCTOR').as('OLD').inV().as('cond').
addE('CONDUCTOR').from('p').
select('OLD').drop().iterate()
g.V().hasLabel('Performance').as('p').in('PERFORMED').
outE('SOLOIST').as('OLD').inV().as('soloist').
addE('SOLOIST').from('p').
select('OLD').drop().iterate()
// Finally, connect the Orchestra to each individual Performance.
// For this model, we’re also keeping the existing connection
// between the Orchestra and the Concert.
g.V().hasLabel('Performance').as('p').in('PERFORMED').
in('INCLUDES').out('ORCHESTRA').
addE('ORCHESTRA').from('p').iterate()
//-----------------------
// Check the Data & Model
//-----------------------
// We can also make a few confirmations with some simple assert statements.
// 3 Performances were created
// Each has connections to Conductor, Soloist, and Orchestra
assert 3 == g.V().hasLabel('Performance').count().next()
assert 3 == g.V().hasLabel('Performance').
out('CONDUCTOR').hasLabel('Artist').count().next()
assert 1 == g.V().hasLabel('Performance').
out('SOLOIST').hasLabel('Artist').count().next()
assert 3 == g.V().hasLabel('Performance').
out('ORCHESTRA').hasLabel('Orchestra').count().next()
// Conductor, Soloist, Orchestra are NOT directly connected to Works
assert 0 == g.V().hasLabel('Work').outE('CONDUCTOR').count().next()
assert 0 == g.V().hasLabel('Work').outE('SOLOIST').count().next()
assert 0 == g.V().hasLabel('Work').outE('ORCHESTRA').count().next()
//-----------------------
// Sample Queries
//-----------------------
// Our Performances should now have Conductor, Orchestra and Soloist
// vertices attached by their respective labels:
g.V().hasLabel('Performance').outE().inV().path().by(label)
// ==>[Performance,CONDUCTOR,Artist]
// ==>[Performance,ORCHESTRA,Orchestra]
// ==>[Performance,SOLOIST,Artist]
// ==>[Performance,CONDUCTOR,Artist]
// ==>[Performance,ORCHESTRA,Orchestra]
// ==>[Performance,CONDUCTOR,Artist]
// ==>[Performance,ORCHESTRA,Orchestra]
// Our Works, on the other hand,
// should only be linked to a composing Artist
// and specific Performances of the Work.
g.V().hasLabel('Work').outE().inV().path().by(label)
// ==>[Work,COMPOSER,Artist]
// ==>[Work,PERFORMED,Performance]
// ==>[Work,COMPOSER,Artist]
// ==>[Work,PERFORMED,Performance]
// ==>[Work,COMPOSER,Artist]
// ==>[Work,PERFORMED,Performance]
// We can easily find composers who have conducted their own works,
// as well as retrieve the details of the performance.
g.V().hasLabel('Artist').as('a').
in('COMPOSER').out('PERFORMED').out('CONDUCTOR').
where(eq('a')).values('lastName')
// ==>Salonen
// Or more verbosely to view the path
g.V().hasLabel('Artist').as('a').
inE('COMPOSER').outV().outE('PERFORMED').inV().
outE('CONDUCTOR').inV().where(eq('a')).
path().by('lastName').by(label).by('title').
by(label).by('performanceDate').by(label).by('lastName')
// ==>[Salonen,COMPOSER,Cello Concerto,
// PERFORMED,3/9/2017,CONDUCTOR,Salonen]
// We can also hone in even more closely on what Esa-Pekka Salonen has been doing
// — for example, what orchestras has he conducted?
g.V().has('Artist', 'lastName', 'Salonen').
in('CONDUCTOR').out('ORCHESTRA').values('name')
// ==>New York Philharmonic
// ==>Chicago Symphony Orchestra
// Step 2 - Modify the schema
//
// Modify the existing schema to add the Performance vertex
// and associated edges & connections
// We'll need a new Management API transaction
// We assume we are working with our existing graph instance.
mgmt = graph.openManagement()
// Vertex Label
Performance = mgmt.makeVertexLabel('Performance').make()
// Properties
performanceDate = mgmt.makePropertyKey('performanceDate').
dataType(String.class).cardinality(Cardinality.SINGLE).make()
mgmt.addProperties(Performance, performanceDate)
// Edge & connections
PERFORMED = mgmt.makeEdgeLabel('PERFORMED').multiplicity(SIMPLE).make()
Orchestra = mgmt.getVertexLabel('Orchestra')
Artist = mgmt.getVertexLabel('Artist')
Work = mgmt.getVertexLabel('Work')
Concert = mgmt.getVertexLabel('Concert')
SOLOIST = mgmt.getEdgeLabel('SOLOIST')
CONDUCTOR = mgmt.getEdgeLabel('CONDUCTOR')
ORCHESTRA = mgmt.getEdgeLabel('ORCHESTRA')
INCLUDES = mgmt.getEdgeLabel('INCLUDES')
mgmt.addConnection(SOLOIST, Performance, Artist)
mgmt.addConnection(CONDUCTOR, Performance, Artist)
mgmt.addConnection(ORCHESTRA, Performance, Orchestra)
mgmt.addConnection(ORCHESTRA, Concert, Orchestra)
mgmt.addConnection(INCLUDES, Concert, Performance)
mgmt.addConnection(PERFORMED, Work, Performance)
mgmt.commit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment