Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save NicMcPhee/9acc7e841130be22c6cc2254d8bf964e to your computer and use it in GitHub Desktop.
Save NicMcPhee/9acc7e841130be22c6cc2254d8bf964e to your computer and use it in GitHub Desktop.
// Clear the DB for a clean start
MATCH (n) DETACH DELETE n;
CREATE CONSTRAINT ON (i:Individual) ASSERT i.uuid IS UNIQUE;
CREATE CONSTRAINT ON (e:Errors) ASSERT e.Errors_vector IS UNIQUE;
CREATE INDEX ON :Individual(generation);
CREATE INDEX ON :Errors(total_error);
USING PERIODIC COMMIT
LOAD CSV WITH HEADERS FROM
'http://facultypages.morris.umn.edu/~mcphee/Research/GECCO2016_tutorial/push_regression_run_2.csv' AS line
WITH line,
[TOINT(line.TC0), TOINT(line.TC1), TOINT(line.TC2), TOINT(line.TC3), TOINT(line.TC4), TOINT(line.TC5), TOINT(line.TC6), TOINT(line.TC7), TOINT(line.TC8), TOINT(line.TC9)]
AS errors_vector
CREATE (individual:Individual {uuid: line.uuid})
SET individual.generation = TOINT(line.generation),
individual.location = TOINT(line.location),
individual.plush_genome_size = TOINT(line.`plush-genome-size`),
individual.push_program_size = TOINT(line.`push-program-size`),
individual.plush_genome = line.`plush-genome`
MERGE (errors:Errors {errors_vector: errors_vector, total_error: TOINT(line.`total-error`)})
CREATE (individual)-[r:HAS]->(errors)
;
USING PERIODIC COMMIT
LOAD CSV WITH HEADERS FROM
'http://facultypages.morris.umn.edu/~mcphee/Research/GECCO2016_tutorial/push_regression_run_2.csv' AS line
WITH line, SPLIT(line.`parent-uuids`, ' ') AS parent_uuids
MATCH (child:Individual {uuid: line.uuid})
UNWIND parent_uuids as parent_uuid
MATCH (parent:Individual {uuid: parent_uuid})
CREATE (parent)-[r:PARENT_OF]->(child)
SET r.genetic_operator = line.`genetic-operators`
;
// Set all the individuals to have 0 selections by default
MATCH (n:Individual)
SET n += {num_selections: 0}
;
// Update num_selections for individuals with more than zero selections
MATCH (parent:Individual)-[e:PARENT_OF]->(child:Individual)
WITH parent, count(e) as num_selections
SET parent.num_selections = num_selections
;
////////////////////////////
// Done setting up the DB //
////////////////////////////
// Now some queries! //
////////////////////////////
// Open with the five generation graph nicely laid out.
// Then find out how many nodes and edges there are.
// Then show the Schema diagram
// Were there winners?
MATCH (winner:Individual)-[:HAS]->(errors:Errors {total_error: 0})
RETURN DISTINCT winner
;
// Click open a few nodes to show how we can explore in the GUI
// What do the last two generations look like?
MATCH (winner:Individual)-[:HAS]->(:Errors {total_error: 0})
MATCH (parent:Individual)-[:PARENT_OF]->(winner)
RETURN DISTINCT winner, parent
;
// What do the errors look like in the last five generations?
MATCH (winner:Individual)-[:HAS]->(:Errors {total_error: 0})
MATCH (ancestor:Individual)-[:PARENT_OF*0..4]->(winner)
MATCH (ancestor:Individual)-[:HAS]->(errors:Errors)
RETURN DISTINCT ancestor, errors
;
// How many distinct ancestors were there in the first generation?
MATCH (winner:Individual)-[:HAS]->(:Errors {total_error: 0})
MATCH (winner)<-[:PARENT_OF*39]-(ancestor:Individual)
WITH DISTINCT ancestor
MATCH (ancestor)-[:HAS]->(errors:Errors)
RETURN DISTINCT ancestor.uuid, ancestor.num_selections, errors
ORDER BY ancestor.num_selections DESC
;
// How many selections were there in the first generation?
MATCH (n:Individual {generation: 0})
RETURN SUM(n.num_selections)
;
// What was the average number of selections?
// Ignore the last generation since no selections where made there
MATCH (n:Individual)
WHERE n.generation < 39
RETURN AVG(n.num_selections)
;
// How many 10% hyperselections were there?
// Use 14 as the cutoff
MATCH (n:Individual)
WHERE n.num_selections > 14
MATCH (n)-[:HAS]->(errors:Errors)
RETURN n.uuid, n.num_selections, n.generation, errors
ORDER BY n.num_selections DESC
;
// How often is there no change in errors from parent to child?
MATCH (parent:Individual)-[:PARENT_OF]->(child:Individual)
MATCH (parent)-[:HAS]->(e:Errors)
MATCH (child)-[:HAS]->(e)
RETURN COUNT(DISTINCT [parent.uuid, child.uuid])
;
///////////////////////////////////////////////////
// How many distinct errors were there?
MATCH (errors:Errors) return count(*)
;
// What errors had total error < 100?
MATCH (errors:Errors) WHERE errors.total_error < 100
RETURN errors
ORDER BY errors.total_error
;
// How many individual had those errors & when were they introduced?
MATCH (errors:Errors) WHERE errors.total_error < 100
MATCH (n:Individual)-[:HAS]->(errors:Errors)
RETURN errors, MIN(n.generation), count(n)
ORDER BY errors.total_error
;
// What's the maximum number of selections in each generation?
UNWIND RANGE(0, 38) AS gen
MATCH (n:Individual {generation: gen})
WITH gen, MAX(n.num_selections) AS max_selections
MATCH (most_selected:Individual {generation: gen, num_selections: max_selections})
MATCH (most_selected:Individual)-[:HAS]->(errors:Errors)
RETURN gen, max_selections, errors
ORDER BY gen;
// How often do things get worse before (immediately) getting better?
MATCH (grandparent:Individual)-[:PARENT_OF]->(parent:Individual)
MATCH (grandparent:Individual)-[:HAS]->(gpe:Errors)
MATCH (parent:Individual)-[:HAS]->(pe:Errors)
WHERE gpe.total_error < pe.total_error
MATCH (parent:Individual)-[:PARENT_OF]->(child:Individual)
MATCH (child:Individual)-[:HAS]->(ce:Errors)
WHERE gpe.total_error > ce.total_error
RETURN DISTINCT grandparent.generation, gpe.total_error, pe.total_error, ce.total_error
ORDER BY grandparent.generation;
// Find the errors that eventually led to a success
MATCH (w:Individual)-[:HAS]-(:Errors {total_error: 0})
MATCH (n:Individual)-[:PARENT_OF*0..40]->(w:Individual)
MATCH (e:Errors)<-[:HAS]-(n:Individual)
RETURN DISTINCT n.generation, e
ORDER BY n.generation;
// Count how many times each errors appeared in a winner ancestry
MATCH (w:Individual)-[:HAS]-(:Errors {total_error: 0})
MATCH (n:Individual)-[:PARENT_OF*0..40]->(w:Individual)
MATCH (e:Errors)<-[:HAS]-(n:Individual)
RETURN DISTINCT e, COUNT(DISTINCT n)
ORDER BY COUNT(DISTINCT n) DESC;
// How many distinct ancestors did the winner(s) have?
MATCH (w:Individual)-[:HAS]-(:Errors {total_error: 0})
MATCH (n:Individual)-[:PARENT_OF*0..40]->(w:Individual)
RETURN COUNT(DISTINCT n);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment