Last active
April 2, 2019 19:06
-
-
Save ryanpstauffer/b5a667fe05011e1a2c437429efa5a0fc to your computer and use it in GitHub Desktop.
Evolving Data Models with JanusGraph
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// SymphonyGraph Initial Schema | |
mgmt = graph.openManagement() | |
// Define Vertex labels | |
Orchestra = mgmt.makeVertexLabel('Orchestra').make() | |
Artist = mgmt.makeVertexLabel('Artist').make() | |
Work = mgmt.makeVertexLabel('Work').make() | |
Concert = mgmt.makeVertexLabel('Concert').make() | |
// Define Edge labels - the relationships between Vertices | |
COMPOSER = mgmt.makeEdgeLabel('COMPOSER').multiplicity(MANY2ONE).make() | |
SOLOIST = mgmt.makeEdgeLabel('SOLOIST').multiplicity(SIMPLE).make() | |
CONDUCTOR = mgmt.makeEdgeLabel('CONDUCTOR').multiplicity(SIMPLE).make() | |
ORCHESTRA = mgmt.makeEdgeLabel('ORCHESTRA').multiplicity(SIMPLE).make() | |
INCLUDES = mgmt.makeEdgeLabel('INCLUDES').multiplicity(SIMPLE).make() | |
// Define Vertex Property Keys | |
// Orchestra | |
name = mgmt.makePropertyKey('name'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
mgmt.addProperties(Orchestra, name) | |
// Artist | |
lastName = mgmt.makePropertyKey('lastName'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
firstName = mgmt.makePropertyKey('firstName'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
gender = mgmt.makePropertyKey('gender'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
nationality = mgmt.makePropertyKey('nationality'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
deceased = mgmt.makePropertyKey('deceased'). | |
dataType(Boolean.class).cardinality(Cardinality.SINGLE).make() | |
mgmt.addProperties(Artist, lastName, firstName, gender, nationality, deceased) | |
// Work | |
title = mgmt.makePropertyKey('title'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
compositionDate = mgmt.makePropertyKey('compositionYear'). | |
dataType(Integer.class).cardinality(Cardinality.SINGLE).make() | |
soloInstrument = mgmt.makePropertyKey('soloInstrument'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
mgmt.addProperties(Work, title, compositionDate, soloInstrument) | |
// Concert | |
firstDate = mgmt.makePropertyKey('firstDate'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
numShows = mgmt.makePropertyKey('numShows'). | |
dataType(Integer.class).cardinality(Cardinality.SINGLE).make() | |
mgmt.addProperties(Concert, name, firstDate, numShows) | |
// Define connections as (EdgeLabel, VertexLabel out, VertexLabel in) | |
mgmt.addConnection(COMPOSER, Work, Artist) | |
mgmt.addConnection(SOLOIST, Work, Artist) | |
mgmt.addConnection(CONDUCTOR, Work, Artist) | |
mgmt.addConnection(ORCHESTRA, Concert, Orchestra) | |
mgmt.addConnection(INCLUDES, Concert, Work) | |
mgmt.commit() | |
// We aren't building indices on our Graph properties, | |
// since our sample dataset is so small, | |
// and indices are outside of the scope of this discussion |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Step 1 - Initial Schema Example - Setup | |
// | |
// Run with a Gremlin Console from the command line: | |
// $ bin/gremlin -i InitialSetup.groovy | |
// | |
// We use an in-memory graph for all testing | |
// This supports our data modelling graph operations and | |
// simplifies our setup without requiring an external storage backend. | |
graph = JanusGraphFactory.build(). | |
set('storage.backend', 'inmemory').open() | |
//----------------------- | |
// Load the Initial Schema | |
//----------------------- | |
mgmt = graph.openManagement() | |
// Define Vertex labels | |
Orchestra = mgmt.makeVertexLabel('Orchestra').make() | |
Artist = mgmt.makeVertexLabel('Artist').make() | |
Work = mgmt.makeVertexLabel('Work').make() | |
Concert = mgmt.makeVertexLabel('Concert').make() | |
// Define Edge labels - the relationships between Vertices | |
COMPOSER = mgmt.makeEdgeLabel('COMPOSER').multiplicity(MANY2ONE).make() | |
SOLOIST = mgmt.makeEdgeLabel('SOLOIST').multiplicity(SIMPLE).make() | |
CONDUCTOR = mgmt.makeEdgeLabel('CONDUCTOR').multiplicity(SIMPLE).make() | |
ORCHESTRA = mgmt.makeEdgeLabel('ORCHESTRA').multiplicity(SIMPLE).make() | |
INCLUDES = mgmt.makeEdgeLabel('INCLUDES').multiplicity(SIMPLE).make() | |
// Define Vertex Property Keys | |
// Orchestra | |
name = mgmt.makePropertyKey('name'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
mgmt.addProperties(Orchestra, name) | |
// Artist | |
lastName = mgmt.makePropertyKey('lastName'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
firstName = mgmt.makePropertyKey('firstName'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
gender = mgmt.makePropertyKey('gender'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
nationality = mgmt.makePropertyKey('nationality'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
deceased = mgmt.makePropertyKey('deceased'). | |
dataType(Boolean.class).cardinality(Cardinality.SINGLE).make() | |
mgmt.addProperties(Artist, lastName, firstName, gender, nationality, deceased) | |
// Work | |
title = mgmt.makePropertyKey('title'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
compositionDate = mgmt.makePropertyKey('compositionYear'). | |
dataType(Integer.class).cardinality(Cardinality.SINGLE).make() | |
soloInstrument = mgmt.makePropertyKey('soloInstrument'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
mgmt.addProperties(Work, title, compositionDate, soloInstrument) | |
// Concert | |
firstDate = mgmt.makePropertyKey('firstDate'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
numShows = mgmt.makePropertyKey('numShows'). | |
dataType(Integer.class).cardinality(Cardinality.SINGLE).make() | |
mgmt.addProperties(Concert, name, firstDate, numShows) | |
// Define connections as (EdgeLabel, VertexLabel out, VertexLabel in) | |
mgmt.addConnection(COMPOSER, Work, Artist) | |
mgmt.addConnection(SOLOIST, Work, Artist) | |
mgmt.addConnection(CONDUCTOR, Work, Artist) | |
mgmt.addConnection(ORCHESTRA, Concert, Orchestra) | |
mgmt.addConnection(INCLUDES, Concert, Work) | |
mgmt.commit() | |
//----------------------- | |
// Add Sample Data | |
//----------------------- | |
g = graph.traversal() | |
// Make an Orchestra | |
nyPhil = g.addV('Orchestra').property('name', 'New York Philharmonic').next() | |
cso = g.addV('Orchestra').property('name', 'Chicago Symphony Orchestra').next() | |
// Make 4 Artists | |
salonen = g.addV('Artist'). | |
property('lastName', 'Salonen'). | |
property('firstName', 'Esa-Pekka'). | |
property('gender', 'Male'). | |
property('nationality', 'Finnish').next() | |
strauss = g.addV('Artist'). | |
property('lastName', 'Strauss'). | |
property('firstName', 'Richard'). | |
property('gender', 'Male'). | |
property('nationality', 'German'). | |
property('deceased', true).next() | |
gilbert = g.addV('Artist'). | |
property('lastName', 'Gilbert'). | |
property('firstName', 'Alan').next() | |
ma = g.addV('Artist'). | |
property('lastName', 'Ma'). | |
property('firstName', 'Yo-Yo').next() | |
// Make 3 Works | |
alsoSprach = g.addV('Work'). | |
property('title',"Also sprach Zarathustra"). | |
property('compositionYear', 1896).next() | |
wing = g.addV('Work'). | |
property('title',"Wing on Wing"). | |
property('compositionYear', 2004).next() | |
celloConcerto = g.addV('Work'). | |
property('title', 'Cello Concerto'). | |
property('compositionYear', 2017). | |
property('soloInstrument', 'cello').next() | |
// Make 3 concerts | |
concert1 = g.addV('Concert'). | |
property('name', 'Esa-Pekka Salonen Conducts US Premiere by Tansy Davies'). | |
property('firstDate', '4/27/2017'). | |
property('numShows', 3).next() | |
concert2 = g.addV('Concert'). | |
property('name', 'Premieres by Esa-Pekka Salonen and Anna Thorvaldsdottir'). | |
property('firstDate', '5/19/2017'). | |
property('numShows', 3).next() | |
concert3 = g.addV('Concert'). | |
property('name', 'Salonen & Yo-Yo Ma'). | |
property('firstDate', '3/9/2017'). | |
property('numShows', 3).next() | |
// Add relationships | |
// INCLUDES | |
g.addE('INCLUDES').from(concert1).to(alsoSprach).iterate() | |
g.addE('INCLUDES').from(concert2).to(wing).iterate() | |
g.addE('INCLUDES').from(concert3).to(celloConcerto).iterate() | |
// ORCHESTRA | |
g.addE('ORCHESTRA').from(concert1).to(nyPhil).next() | |
g.addE('ORCHESTRA').from(concert2).to(nyPhil).next() | |
g.addE('ORCHESTRA').from(concert3).to(cso).next() | |
// COMPOSER | |
g.addE('COMPOSER').from(celloConcerto).to(salonen).next() | |
g.addE('COMPOSER').from(alsoSprach).to(strauss).next() | |
g.addE('COMPOSER').from(wing).to(salonen).next() | |
// CONDUCTOR | |
g.addE('CONDUCTOR').from(celloConcerto).to(salonen).next() | |
g.addE('CONDUCTOR').from(alsoSprach).to(salonen).next() | |
g.addE('CONDUCTOR').from(wing).to(gilbert).next() | |
// SOLOIST | |
g.addE('SOLOIST').from(celloConcerto).to(ma).next() | |
g.tx().commit() | |
// We can use some simple asserts to check our data model | |
assert 4 == g.V().hasLabel('Artist').count().next() | |
assert 3 == g.V().hasLabel('Concert').count().next() | |
assert 'Ma' == g.V().has('Orchestra', 'name', 'Chicago Symphony Orchestra'). | |
in('ORCHESTRA').has('Concert', 'name', 'Salonen & Yo-Yo Ma'). | |
out('INCLUDES').has('Work', 'title', 'Cello Concerto'). | |
out('SOLOIST').hasLabel('Artist').values('lastName').next() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Step 3 - Modify the data to fit the new schema | |
// | |
// Demonstrates ability to modify data in the graph | |
// rather than dropping and starting from scratch | |
// Start by creating a single Performance for each Concert. | |
g.V().hasLabel('Work').as('w').in('INCLUDES'). | |
hasLabel('Concert').as('c'). | |
map(addV('Performance').as('p'). | |
property('performanceDate', values('firstDate')). | |
addE('PERFORMED').from('w'). | |
select('p').addE('INCLUDES').from('c')).iterate() | |
// Connect the conductor and soloist Artists to each Performance | |
// and remove their connections from each Work. | |
g.V().hasLabel('Performance').as('p').in('PERFORMED'). | |
outE('CONDUCTOR').as('OLD').inV().as('cond'). | |
addE('CONDUCTOR').from('p'). | |
select('OLD').drop().iterate() | |
g.V().hasLabel('Performance').as('p').in('PERFORMED'). | |
outE('SOLOIST').as('OLD').inV().as('soloist'). | |
addE('SOLOIST').from('p'). | |
select('OLD').drop().iterate() | |
// Finally, connect the Orchestra to each individual Performance. | |
// For this model, we’re also keeping the existing connection | |
// between the Orchestra and the Concert. | |
g.V().hasLabel('Performance').as('p').in('PERFORMED'). | |
in('INCLUDES').out('ORCHESTRA'). | |
addE('ORCHESTRA').from('p').iterate() | |
//----------------------- | |
// Check the Data & Model | |
//----------------------- | |
// We can also make a few confirmations with some simple assert statements. | |
// 3 Performances were created | |
// Each has connections to Conductor, Soloist, and Orchestra | |
assert 3 == g.V().hasLabel('Performance').count().next() | |
assert 3 == g.V().hasLabel('Performance'). | |
out('CONDUCTOR').hasLabel('Artist').count().next() | |
assert 1 == g.V().hasLabel('Performance'). | |
out('SOLOIST').hasLabel('Artist').count().next() | |
assert 3 == g.V().hasLabel('Performance'). | |
out('ORCHESTRA').hasLabel('Orchestra').count().next() | |
// Conductor, Soloist, Orchestra are NOT directly connected to Works | |
assert 0 == g.V().hasLabel('Work').outE('CONDUCTOR').count().next() | |
assert 0 == g.V().hasLabel('Work').outE('SOLOIST').count().next() | |
assert 0 == g.V().hasLabel('Work').outE('ORCHESTRA').count().next() | |
//----------------------- | |
// Sample Queries | |
//----------------------- | |
// Our Performances should now have Conductor, Orchestra and Soloist | |
// vertices attached by their respective labels: | |
g.V().hasLabel('Performance').outE().inV().path().by(label) | |
// ==>[Performance,CONDUCTOR,Artist] | |
// ==>[Performance,ORCHESTRA,Orchestra] | |
// ==>[Performance,SOLOIST,Artist] | |
// ==>[Performance,CONDUCTOR,Artist] | |
// ==>[Performance,ORCHESTRA,Orchestra] | |
// ==>[Performance,CONDUCTOR,Artist] | |
// ==>[Performance,ORCHESTRA,Orchestra] | |
// Our Works, on the other hand, | |
// should only be linked to a composing Artist | |
// and specific Performances of the Work. | |
g.V().hasLabel('Work').outE().inV().path().by(label) | |
// ==>[Work,COMPOSER,Artist] | |
// ==>[Work,PERFORMED,Performance] | |
// ==>[Work,COMPOSER,Artist] | |
// ==>[Work,PERFORMED,Performance] | |
// ==>[Work,COMPOSER,Artist] | |
// ==>[Work,PERFORMED,Performance] | |
// We can easily find composers who have conducted their own works, | |
// as well as retrieve the details of the performance. | |
g.V().hasLabel('Artist').as('a'). | |
in('COMPOSER').out('PERFORMED').out('CONDUCTOR'). | |
where(eq('a')).values('lastName') | |
// ==>Salonen | |
// Or more verbosely to view the path | |
g.V().hasLabel('Artist').as('a'). | |
inE('COMPOSER').outV().outE('PERFORMED').inV(). | |
outE('CONDUCTOR').inV().where(eq('a')). | |
path().by('lastName').by(label).by('title'). | |
by(label).by('performanceDate').by(label).by('lastName') | |
// ==>[Salonen,COMPOSER,Cello Concerto, | |
// PERFORMED,3/9/2017,CONDUCTOR,Salonen] | |
// We can also hone in even more closely on what Esa-Pekka Salonen has been doing | |
// — for example, what orchestras has he conducted? | |
g.V().has('Artist', 'lastName', 'Salonen'). | |
in('CONDUCTOR').out('ORCHESTRA').values('name') | |
// ==>New York Philharmonic | |
// ==>Chicago Symphony Orchestra |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Step 2 - Modify the schema | |
// | |
// Modify the existing schema to add the Performance vertex | |
// and associated edges & connections | |
// We'll need a new Management API transaction | |
// We assume we are working with our existing graph instance. | |
mgmt = graph.openManagement() | |
// Vertex Label | |
Performance = mgmt.makeVertexLabel('Performance').make() | |
// Properties | |
performanceDate = mgmt.makePropertyKey('performanceDate'). | |
dataType(String.class).cardinality(Cardinality.SINGLE).make() | |
mgmt.addProperties(Performance, performanceDate) | |
// Edge & connections | |
PERFORMED = mgmt.makeEdgeLabel('PERFORMED').multiplicity(SIMPLE).make() | |
Orchestra = mgmt.getVertexLabel('Orchestra') | |
Artist = mgmt.getVertexLabel('Artist') | |
Work = mgmt.getVertexLabel('Work') | |
Concert = mgmt.getVertexLabel('Concert') | |
SOLOIST = mgmt.getEdgeLabel('SOLOIST') | |
CONDUCTOR = mgmt.getEdgeLabel('CONDUCTOR') | |
ORCHESTRA = mgmt.getEdgeLabel('ORCHESTRA') | |
INCLUDES = mgmt.getEdgeLabel('INCLUDES') | |
mgmt.addConnection(SOLOIST, Performance, Artist) | |
mgmt.addConnection(CONDUCTOR, Performance, Artist) | |
mgmt.addConnection(ORCHESTRA, Performance, Orchestra) | |
mgmt.addConnection(ORCHESTRA, Concert, Orchestra) | |
mgmt.addConnection(INCLUDES, Concert, Performance) | |
mgmt.addConnection(PERFORMED, Work, Performance) | |
mgmt.commit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment