Skip to content

Instantly share code, notes, and snippets.

@rjurney
Created September 9, 2023 01:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rjurney/8bdd16741d7ec5864eb09386c03ee0c3 to your computer and use it in GitHub Desktop.
Save rjurney/8bdd16741d7ec5864eb09386c03ee0c3 to your computer and use it in GitHub Desktop.
Feature engineering using JanusGraph for Relato's business graph - customer recommender system in 2014
// Use this to start up a session
conf = new BaseConfiguration()
conf.setProperty("storage.directory", "/Users/rjurney/Software/marketing/titan/data")
conf.setProperty("storage.backend", "berkeleyje")
graph = TitanFactory.open(conf)
// Get a graph traverser
g = graph.traversal()
// Various centralities to use as features - JSONize and save
partnershipCentrality = g.V().out('partnership').groupCount().by('domain').next()
partnerPrettyJson = new JsonBuilder(partnershipCentrality).toString()
new File("/Users/rjurney/Software/marketing/titan/data/partner_centrality.json").write(partnerPrettyJson)
doublePartnershipCentrality = g.V().out('partnership').out('partnership').groupCount().by('domain').next()
doublePartnerPrettyJson = new JsonBuilder(doublePartnershipCentrality).toString()
new File("/Users/rjurney/Software/marketing/titan/data/partner_partner_centrality.json").write(doublePartnerPrettyJson)
partnerCompetitorCentrality = g.V().out('partnership').out('competitor').groupCount().by('domain').next()
partnerCompetitorPrettyJson = new JsonBuilder(partnerCompetitorCentrality).toString()
new File("/Users/rjurney/Software/marketing/titan/data/partner_competitor_centrality.json").write(partnerCompetitorPrettyJson)
competitorCentrality = g.V().out('competitor').groupCount().by('domain').next()
competitorPrettyJson = new JsonBuilder(competitorCentrality).toString()
new File("/Users/rjurney/Software/marketing/titan/data/competitor_centrality.json").write(competitorPrettyJson)
competitorCustomerCentrality = g.V().out('competitor').out('customer').groupCount().by('domain').next()
competitorCustomerPrettyJson = new JsonBuilder(competitorCustomerCentrality).toString()
new File("/Users/rjurney/Software/marketing/titan/data/competitor_customer_centrality.json").write(competitorCustomerPrettyJson)
competitorPartnerCentrality = g.V().out('partnership').out('competitor').groupCount().by('domain').next()
competitorPartnerPrettyJson = new JsonBuilder(competitorPartnerCentrality).toString()
new File("/Users/rjurney/Software/marketing/titan/data/competitor_partner_centrality.json").write(competitorPartnerPrettyJson)
customerCompetitorCentrality = g.V().out('customer').out('competitor').groupCount().by('domain').next()
customerCompetitorPrettyJson = new JsonBuilder(customerCompetitorCentrality).toString()
new File("/Users/rjurney/Software/marketing/titan/data/customer_competitor_centrality.json").write(customerCompetitorPrettyJson)
// New simple degree stuff I realized I hadn't added sooner, although some may be equivalent
degree = g.V().group().by('domain').by(both().count()).next()
degreeJson = new JsonBuilder(degree).toString()
new File("/Users/rjurney/Software/marketing/titan/data/degree.json").write(degreeJson)
inDegree = g.V().group().by('domain').by(inE().count()).next()
inDegreeJson = new JsonBuilder(inDegree).toString()
new File("/Users/rjurney/Software/marketing/titan/data/in_degree.json").write(inDegreeJson)
outDegree = g.V().group().by('domain').by(out().count()).next()
outDegreeJson = new JsonBuilder(outDegree).toString()
new File("/Users/rjurney/Software/marketing/titan/data/out_degree.json").write(outDegreeJson)
outDegreePartnership = g.V().group().by('domain').by(out('partnership').count()).next()
outDegreePartnershipJson = new JsonBuilder(outDegreePartnership).toString()
new File("/Users/rjurney/Software/marketing/titan/data/out_degree_partnership.json").write(outDegreePartnershipJson)
inDegreePartnership = g.V().group().by('domain').by(inE('partnership').count()).next()
inDegreePartnershipJson = new JsonBuilder(inDegreePartnership).toString()
new File("/Users/rjurney/Software/marketing/titan/data/in_degree_partnership.json").write(inDegreePartnershipJson)
outDegreeCustomer = g.V().group().by('domain').by(out('customer').count()).next()
outDegreeCustomerJson = new JsonBuilder(outDegreeCustomer).toString()
new File("/Users/rjurney/Software/marketing/titan/data/out_degree_customer.json").write(outDegreeCustomerJson)
inDegreeCustomer = g.V().group().by('domain').by(inE('customer').count()).next()
inDegreeCustomerJson = new JsonBuilder(inDegreeCustomer).toString()
new File("/Users/rjurney/Software/marketing/titan/data/in_degree_customer.json").write(inDegreeCustomerJson)
inDegreeInvestment = g.V().group().by('domain').by(inE('investment').count()).next()
inDegreeInvestmentJson = new JsonBuilder(inDegreeInvestment).toString()
new File("/Users/rjurney/Software/marketing/titan/data/in_degree_investment.json").write(inDegreeInvestmentJson)
outDegreeInvestment = g.V().group().by('domain').by(out('investment').count()).next()
outDegreeInvestmentJson = new JsonBuilder(outDegreeInvestment).toString()
new File("/Users/rjurney/Software/marketing/titan/data/out_degree_investment.json").write(outDegreeInvestmentJson)
competitorDegree = g.V().group().by('domain').by(both('competitor').count()).next()
competitorDegreeJson = new JsonBuilder(competitorDegree).toString()
new File("/Users/rjurney/Software/marketing/titan/data/degree_competitor.json").write(competitorDegreeJson)
// end global centralities
// Particular to a customer
customerConnectedness = g.V().has('domain','oreilly.com').as('x').out('customer').in('partnership').where(neq('x')).groupCount().by('domain').next()
customerConnectednessJson = new JsonBuilder(customerConnectedness).toString()
new File("/Users/rjurney/Software/marketing/titan/data/oreilly_customer_connectedness.json").write(customerConnectednessJson)
customerConnectedness = g.V().has('domain','gainsight.com').as('x').out('customer').in('partnership').where(neq('x')).groupCount().by('domain').next()
customerConnectednessJson = new JsonBuilder(customerConnectedness).toString()
new File("/Users/rjurney/Software/marketing/titan/data/gainsight_customer_connectedness.json").write(customerConnectednessJson)
customerConnectedness = g.V().has('domain','relato.io').as('x').out('competitor').out('customer').both('partnership').where(neq('x')).groupCount().by('domain').next()
customerConnectednessJson = new JsonBuilder(customerConnectedness).toString()
new File("/Users/rjurney/Software/marketing/titan/data/relato_customer_connectedness.json").write(customerConnectednessJson)
customerConnectedness2ndBi = g.V().has('domain','oreilly.com').as('x').out('customer').in('partnership').both('partnership').where(neq('x')).groupCount().by('domain').next()
customerConnectedness2ndBiJson = new JsonBuilder(customerConnectedness2ndBi).toString()
new File("/Users/rjurney/Software/marketing/titan/data/oreilly_customer_connectedness_2nd_bi.json").write(customerConnectedness2ndBiJson)
customerConnectedness2ndBi = g.V().has('domain','gainsight.com').as('x').out('customer').in('partnership').both('partnership').where(neq('x')).groupCount().by('domain').next()
customerConnectedness2ndBiJson = new JsonBuilder(customerConnectedness2ndBi).toString()
new File("/Users/rjurney/Software/marketing/titan/data/gainsight_customer_connectedness_2nd_bi.json").write(customerConnectedness2ndBiJson)
customerConnectedness2ndBi = g.V().has('domain','relato.io').as('x').out('competitor').out('customer').both('partnership').both('partnership').where(neq('x')).groupCount().by('domain').next()
customerConnectedness2ndBiJson = new JsonBuilder(customerConnectedness2ndBi).toString()
new File("/Users/rjurney/Software/marketing/titan/data/relato_customer_connectedness_2nd_bi.json").write(customerConnectedness2ndBiJson)
customerConnectedness = g.V().has('domain','totango.com').as('x').out('customer').both('partnership').where(neq('x')).groupCount().by('domain').next()
customerConnectednessJson = new JsonBuilder(customerConnectedness).toString()
new File("/Users/rjurney/Software/marketing/titan/data/totango.com_customer_connectedness.json").write(customerConnectednessJson)
customerConnectedness2ndBi = g.V().has('domain','totango.com').as('x').out('customer').both('partnership').both('partnership').where(neq('x')).groupCount().by('domain').next()
customerConnectedness2ndBiJson = new JsonBuilder(customerConnectedness2ndBi).toString()
new File("/Users/rjurney/Software/marketing/titan/data/totango.com_customer_connectedness_2nd_bi.json").write(customerConnectedness2ndBiJson)
competitorCustomerConnectedness2ndBi = g.V().has('domain','totango.com').as('x').out('competitor').out('customer').both('partnership').both('partnership').where(neq('x')).groupCount().by('domain').next()
competitorCustomerConnectedness2ndBiJson = new JsonBuilder(competitorCustomerConnectedness2ndBi).toString()
new File("/Users/rjurney/Software/marketing/titan/data/totango.com_competitor_customer_connectedness_2nd_bi.json").write(competitorCustomerConnectedness2ndBiJson)
customerConnectedness = g.V().has('domain','welink.com').as('x').out('customer').both('partnership').where(neq('x')).groupCount().by('domain').next()
customerConnectednessJson = new JsonBuilder(customerConnectedness).toString()
new File("/Users/rjurney/Software/marketing/titan/data/welink.com_customer_connectedness.json").write(customerConnectednessJson)
customerConnectedness2ndBi = g.V().has('domain','welink.com').as('x').out('customer').both('partnership').both('partnership').where(neq('x')).groupCount().by('domain').next()
customerConnectedness2ndBiJson = new JsonBuilder(customerConnectedness2ndBi).toString()
new File("/Users/rjurney/Software/marketing/titan/data/welink.com_customer_connectedness_2nd_bi.json").write(customerConnectedness2ndBiJson)
competitorCustomerConnectedness2ndBi = g.V().has('domain','welink.com').as('x').out('competitor').out('customer').both('partnership').both('partnership').where(neq('x')).groupCount().by('domain').next()
competitorCustomerConnectedness2ndBiJson = new JsonBuilder(competitorCustomerConnectedness2ndBi).toString()
new File("/Users/rjurney/Software/marketing/titan/data/welink.com_competitor_customer_connectedness_2nd_bi.json").write(competitorCustomerConnectedness2ndBiJson)
// Skip me when redoing things until the end
partnershipLoopCentrality = g.V().repeat(out('partnership').groupCount('m').by('domain')).times(3).cap('m').next()
partnershipLoopCentralityJson = new JsonBuilder(partnershipLoopCentrality).toString()
new File("/Users/rjurney/Software/marketing/titan/data/partner_loop_centrality.json").write(partnershipLoopCentralityJson)
// Closeness Centrality
// closenessCentrality = g.V().as("a").repeat(both('partnership').simplePath()).emit().as("b").dedup().by(select("a","b")).path().group().by(limit(local, 1)).by(count(local).map {1/it.get()}.sum())
// subGraph = g.E().hasLabel('partnership').subgraph('subGraph').cap('subGraph').next()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment