-
-
Save amcp/15ad10ff1157ea7ab11f2232732af96f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
:load ml.groovy | |
MovieLensParser.load(graph, "ml-1m") | |
g = graph.traversal() | |
clockWithResult(1){ g.E().count().tryNext().get() } | |
clockWithResult(1){ g.V().count().tryNext().get() } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class MovieLensParser { | |
static Map occupations | |
static List genres | |
static { | |
occupations = [0: "other", 1: "academic/educator", 2: "artist", | |
3: "clerical/admin", 4: "college/grad student", 5: "customer service", | |
6: "doctor/health care", 7: "executive/managerial", 8: "farmer", | |
9: "homemaker", 10: "K-12 student", 11: "lawyer", 12: "programmer", | |
13: "retired", 14: "sales/marketing", 15: "scientist", 16: "self-employed", | |
17: "technician/engineer", 18: "tradesman/craftsman", 19: "unemployed", 20: "writer"] | |
//iconv -f ISO-8859-1 -t UTF-8 movies.dat | sed 's/.*://' | tr "|" "\n" | sort | uniq | sed -e 's/^\(.*\)/\"\1\"/' | tr "\n" "@" | sed 's/@/,\ /g' | |
genres = ["Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary", | |
"Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", | |
"War", "Western"] | |
} | |
public static void parse(final org.apache.tinkerpop.gremlin.structure.Graph graph, final String dataDirectory) { | |
def g = graph.traversal() | |
// MovieID::Title::Genres | |
def genremap = [:] | |
genres.each { | |
genremap.put(it, graph.addVertex(T.label, 'genre', 'uid', 'g' + it, 'name', it)) | |
} | |
//occupations | |
def occupationmap = [:] | |
(0..20).each { | |
occupationmap.put(it, graph.addVertex(T.label, 'occupation', 'uid', 'o' + it, 'jobId', it, 'name', occupations.get(it))) | |
} | |
def moviemap = [:] | |
new File(dataDirectory + '/movies.dat').eachLine { final String line -> | |
def components = line.split("::") | |
def movieTitleYear = components[1] =~ /(.*\b)\s*\((\d+)\)/ | |
if (!movieTitleYear.find()) return | |
def movieId = components[0].toInteger() | |
def movieTitle = movieTitleYear.group(1) | |
def movieYear = movieTitleYear.group(2).toInteger() | |
def genres = components[2] | |
def movieVertex = graph.addVertex(T.label, 'movie', 'uid', 'm' + movieId, 'movieId', movieId, 'name', movieTitle, 'year', movieYear) | |
moviemap.put(movieId, movieVertex) | |
genres.split('\\|').each { def genre -> | |
movieVertex.addEdge('hasGenre', genremap[genre]) | |
} | |
} | |
def usermap = [:] | |
// UserID::Gender::Age::Occupation::Zip-code | |
new File(dataDirectory + '/users.dat').eachLine { final String line -> | |
def components = line.split("::") | |
def userId = components[0].toInteger() | |
def userGender = components[1] | |
def userAge = components[2].toInteger() | |
def occupationId = components[3].toInteger() | |
def userZipcode = components[4] | |
def userVertex = graph.addVertex(T.label, 'person', 'uid', 'u' + userId, 'userId', userId, 'gender', userGender, 'age', userAge, 'zipcode', userZipcode) | |
usermap.put(userId, userVertex) | |
userVertex.addEdge('hasOccupation', occupationmap[occupationId]) | |
} | |
// UserID::MovieID::Rating::Timestamp | |
new File(dataDirectory + '/ratings.dat').readLines().parallelStream().forEach( { final String line -> | |
def components = line.split("::") | |
def movieId = components[1].toInteger() | |
if(!moviemap.containsKey(movieId)) { | |
return | |
} | |
def userId = components[0].toInteger() | |
def stars = components[2].toInteger() | |
def time = components[3].toLong() | |
usermap[userId].addEdge('rated', moviemap[movieId], 'stars', stars, 'time', time) | |
}) | |
} | |
public static void load(final org.apache.tinkerpop.gremlin.structure.Graph graph, final String dataDirectory) { | |
if(graph instanceof com.thinkaurelius.titan.graphdb.database.StandardTitanGraph) { | |
def mgmt = ((com.thinkaurelius.titan.graphdb.database.StandardTitanGraph) graph).openManagement() | |
["movieId", "year", "stars", "userId", "age", "jobId"].each { | |
mgmt.makePropertyKey(it).dataType(Integer.class).make() | |
} | |
["name", "gender", "zipcode"].each { | |
mgmt.makePropertyKey(it).dataType(String.class).make() | |
} | |
mgmt.makePropertyKey("time").dataType(Long.class).make() | |
def uidKey = mgmt.makePropertyKey('uid').dataType(String.class).make() | |
mgmt.buildIndex('byUid', org.apache.tinkerpop.gremlin.structure.Vertex.class).addKey(uidKey).unique().buildCompositeIndex() | |
["rated", "hasOccupation", "hasGenre"].each { | |
mgmt.makeEdgeLabel(it).make() | |
} | |
["person", "genre", "occupation", "movie"].each { | |
mgmt.makeVertexLabel(it).make() | |
} | |
mgmt.commit() | |
graph.tx().commit() | |
} else { | |
graph.createIndex('uid', org.apache.tinkerpop.gremlin.structure.Vertex.class) | |
} | |
def start = System.currentTimeMillis() | |
def actualGraph = graph instanceof com.thinkaurelius.titan.graphdb.database.StandardTitanGraph ? graph.tx().createThreadedTx() : graph | |
parse(actualGraph, dataDirectory) | |
def creating = System.currentTimeMillis() | |
println "Creating objects took (ms): " + (creating - start) | |
if(actualGraph instanceof com.thinkaurelius.titan.graphdb.transaction.StandardTitanTx) actualGraph.tx().commit() | |
println "Committing took (ms): " + (System.currentTimeMillis() - creating) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
graph = TinkerGraph.open() | |
# Followed common-usage.txt to get the following results: | |
Creating objects took (ms): 3233 | |
Committing took (ms): 8 | |
Time to traverse 969719 edges: 111 ms | |
Time to traverse 9625 vertices: 2 ms |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
graph = com.thinkaurelius.titan.core.TitanFactory.build().set("storage.backend", "berkeleyje"). | |
set("storage.directory", "bdb").set("storage.buffer-size", Integer.MAX_VALUE).open() | |
# Followed common-usage.txt to get the following results: | |
Creating objects took (ms): 14789 | |
Committing took (ms): 15790 | |
Time to traverse 969719 edges: 2742 ms | |
Time to traverse 9625 vertices: 1594 ms |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
graph = com.thinkaurelius.titan.core.TitanFactory.build().set("storage.backend", "inmemory").open() | |
# Followed common-usage.txt to get the following results: | |
Creating objects took (ms): 13136 | |
Committing took (ms): 6180 | |
Time to traverse 969719 edges: 1051 ms | |
Time to traverse 9625 vertices: 17 ms |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
graph = com.thinkaurelius.titan.core.TitanFactory.build(). | |
set("storage.backend", "jp.classmethod.titan.diskstorage.tupl.TuplStoreManager"). | |
set("storage.tupl.min-cache-size", "1000000000"). | |
set("storage.tupl.map-data-files", "true").set("storage.tupl.direct-page-access", "true"). | |
set("storage.buffer-size", Integer.MAX_VALUE).open() | |
# Followed common-usage.txt to get the following results: | |
Creating objects took (ms): 13614 | |
Committing took (ms): 8692 | |
Time to traverse 969719 edges: 1155 ms | |
Time to traverse 9625 vertices: 191 ms |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment