Skip to content

Instantly share code, notes, and snippets.

@andry1
Created June 26, 2015 17:12
Show Gist options
  • Save andry1/deda62de41f5dd6d1e66 to your computer and use it in GitHub Desktop.
Save andry1/deda62de41f5dd6d1e66 to your computer and use it in GitHub Desktop.
#!/usr/bin/env groovy
//@Grab('com.tinkerpop.blueprints:blueprints-core:2.6.0')
//import com.tinkerpop.blueprints.impls.tg.TinkerGraph
@Grab('com.thinkaurelius.titan:titan-all:0.5.4')
import com.thinkaurelius.titan.core.TitanFactory
import com.tinkerpop.blueprints.Vertex
@Grab('com.xlson.groovycsv:groovycsv:1.0')
import static com.xlson.groovycsv.CsvParser.parseCsv
import com.tinkerpop.blueprints.util.io.graphson.GraphSONWriter
import com.tinkerpop.blueprints.util.io.graphson.GraphSONMode
import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter
import groovy.json.JsonSlurper
def jsonSlurper = new JsonSlurper()
g = TitanFactory.open('./titan-cassandra-es.properties')
mgmt = g.getManagementSystem()
id = mgmt.makeVertexLabel('ID').make()
segment = mgmt.makeVertexLabel('Segment').make()
type = mgmt.makePropertyKey('idType').dataType(String.class).make()
identifier = mgmt.makePropertyKey('identifier').dataType(String.class).make()
name = mgmt.makePropertyKey('name').dataType(String.class).make()
confidence = mgmt.makePropertyKey('confidence').dataType(Double.class).make()
provider = mgmt.makePropertyKey('provider').dataType(String.class).make()
memberOf = mgmt.makeEdgeLabel('memberOf').make()
linkedBy = mgmt.makeEdgeLabel('linkedBy').signature(provider,confidence).make()
belongsTo = mgmt.makeEdgeLabel('belongsTo').make()
mgmt.buildIndex('segmentsByName', Vertex.class).addKey(name).indexOnly(segment).buildCompositeIndex()
mgmt.buildIndex('segmentsByIdentifier', Vertex.class).addKey(identifier).unique().indexOnly(segment).buildCompositeIndex()
mgmt.buildIndex('idsByTypeAndIdentifier', Vertex.class).addKey(type).addKey(identifier).unique().indexOnly(id).buildCompositeIndex()
//mgmt.buildEdgeIndex(linkedBy, 'linkedByConfidenceAndProvider', Direction.OUT, Order.DESC, confidence, provider)
mgmt.commit()
def segmentVertices = []
def segments = []
def rosetta_id = 1
new File("segments.csv").withReader { reader ->
def data = parseCsv(reader)
for(line in data) {
segments[line.id.toInteger()] = line.name
}
}
new File("query_result.csv").withReader { reader ->
def data = parseCsv(reader)
for(line in data) {
cookie_id = line.cookie_id
segment_ids = jsonSlurper.parseText(line.id)
assoc_ids = jsonSlurper.parseText(line.association_ids)
cookie_vertex = g.addVertexWithLabel('ID')
// cookie_vertex.setProperty('label','ID')
cookie_vertex.setProperty('idType','CollectiveCookie')
cookie_vertex.setProperty('identifier',cookie_id)
rosetta_vertex = g.addVertexWithLabel('ID')
// rosetta_vertex.setProperty('label','ID')
rosetta_vertex.setProperty('idType','Rosetta')
rosetta_vertex.setProperty('identifier',rosetta_id)
rosetta_id += 1
println("Linking $cookie_vertex -> $rosetta_vertex")
e = cookie_vertex.addEdge('belongsTo',rosetta_vertex)
assoc_ids.each { net,assoc_id ->
assoc_vertex = g.addVertexWithLabel('ID')
// assoc_vertex.setProperty('label','ID')
assoc_vertex.setProperty('idType',net)
assoc_vertex.setProperty('identifier',assoc_id)
e = cookie_vertex.addEdge('linkedBy', assoc_vertex)
e.setProperty('provider',net)
e.setProperty('confidence',Math.random())
}
for(segment in segment_ids) {
if(!segmentVertices[segment]) {
segmentVertices[segment] = g.addVertexWithLabel('Segment')
// segmentVertices[segment].setProperty('label','Segment')
segmentVertices[segment].setProperty('identifier',segment)
segmentVertices[segment].setProperty('name', segments[segment])
}
segment_vertex = segmentVertices[segment]
e = cookie_vertex.addEdge('memberOf',segment_vertex)
}
}
}
g.commit()
//GraphSONWriter.outputGraph(g,'idgraph.json', GraphSONMode.COMPACT)
//GraphMLWriter.outputGraph(g, 'idgraph.graphml',
// [idType: 'String', identifier: 'String', name: 'String'], [confidence: 'double', provider: 'string'])
g.shutdown()
m = [:]
bk_nc = g.query().has('name','bk.nc').has('label','Segment').vertices().get(0)
bk_nc_members = bk_nc.as('x').in('memberOf').has('idType','CollectiveCookie').as('c').out('linkedBy').has('idType','ddp_us').back('c')
bk_nc_members.query().out('memberOf').has('label','Segment').groupCount(m) { it.name }.cap
//m.sort{a,b -> b.value <=> a.value}[0..10]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment