amcp/common-usage.txt

## common-usage.txt
:load ml.groovy
MovieLensParser.load(graph, "ml-1m")

g = graph.traversal()
clockWithResult(1){ g.E().count().tryNext().get() }
clockWithResult(1){ g.V().count().tryNext().get() }

## ml.groovy
class MovieLensParser {

    static Map occupations
    static List genres
    static {
        occupations = [0: "other", 1: "academic/educator", 2: "artist",
                3: "clerical/admin", 4: "college/grad student", 5: "customer service",
                6: "doctor/health care", 7: "executive/managerial", 8: "farmer",
                9: "homemaker", 10: "K-12 student", 11: "lawyer", 12: "programmer",
                13: "retired", 14: "sales/marketing", 15: "scientist", 16: "self-employed",
                17: "technician/engineer", 18: "tradesman/craftsman", 19: "unemployed", 20: "writer"]
        //iconv -f ISO-8859-1 -t UTF-8 movies.dat | sed 's/.*://' | tr "|" "\n" | sort | uniq | sed -e 's/^\(.*\)/\"\1\"/' | tr "\n" "@" | sed 's/@/,\ /g'
        genres = ["Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary",
                  "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller",
                  "War", "Western"]
    }

    public static void parse(final org.apache.tinkerpop.gremlin.structure.Graph graph, final String dataDirectory) {

        def g = graph.traversal()

        // MovieID::Title::Genres
        def genremap = [:]
        genres.each {
            genremap.put(it, graph.addVertex(T.label, 'genre', 'uid', 'g' + it, 'name', it))
        }

        //occupations
        def occupationmap = [:]
        (0..20).each {
            occupationmap.put(it, graph.addVertex(T.label, 'occupation', 'uid', 'o' + it, 'jobId', it, 'name', occupations.get(it)))
        }

        def moviemap = [:]
        new File(dataDirectory + '/movies.dat').eachLine { final String line ->

            def components = line.split("::")
            def movieTitleYear = components[1] =~ /(.*\b)\s*\((\d+)\)/
            if (!movieTitleYear.find()) return

            def movieId = components[0].toInteger()
            def movieTitle = movieTitleYear.group(1)
            def movieYear = movieTitleYear.group(2).toInteger()
            def genres = components[2]
            def movieVertex = graph.addVertex(T.label, 'movie', 'uid', 'm' + movieId, 'movieId', movieId, 'name', movieTitle, 'year', movieYear)
            moviemap.put(movieId, movieVertex)
            genres.split('\\|').each { def genre ->
                movieVertex.addEdge('hasGenre', genremap[genre])
            }
        }

        def usermap = [:]
        // UserID::Gender::Age::Occupation::Zip-code
        new File(dataDirectory + '/users.dat').eachLine { final String line ->

            def components = line.split("::")
            def userId = components[0].toInteger()
            def userGender = components[1]
            def userAge = components[2].toInteger()
            def occupationId = components[3].toInteger()
            def userZipcode = components[4]
            def userVertex = graph.addVertex(T.label, 'person', 'uid', 'u' + userId, 'userId', userId, 'gender', userGender, 'age', userAge, 'zipcode', userZipcode)
            usermap.put(userId, userVertex)
            userVertex.addEdge('hasOccupation', occupationmap[occupationId])
        }

        // UserID::MovieID::Rating::Timestamp
        new File(dataDirectory + '/ratings.dat').readLines().parallelStream().forEach( { final String line ->

            def components = line.split("::")
            def movieId = components[1].toInteger()
            if(!moviemap.containsKey(movieId)) {
                return
            }
            def userId = components[0].toInteger()
            def stars = components[2].toInteger()
            def time = components[3].toLong()
            usermap[userId].addEdge('rated', moviemap[movieId], 'stars', stars, 'time', time)
        })
    }

    public static void load(final org.apache.tinkerpop.gremlin.structure.Graph graph, final String dataDirectory) {
        if(graph instanceof com.thinkaurelius.titan.graphdb.database.StandardTitanGraph) {
            def mgmt = ((com.thinkaurelius.titan.graphdb.database.StandardTitanGraph) graph).openManagement()
            ["movieId", "year", "stars", "userId", "age", "jobId"].each {
                mgmt.makePropertyKey(it).dataType(Integer.class).make()
            }
            ["name", "gender", "zipcode"].each {
                mgmt.makePropertyKey(it).dataType(String.class).make()
            }
            mgmt.makePropertyKey("time").dataType(Long.class).make()
            def uidKey = mgmt.makePropertyKey('uid').dataType(String.class).make()
            mgmt.buildIndex('byUid', org.apache.tinkerpop.gremlin.structure.Vertex.class).addKey(uidKey).unique().buildCompositeIndex()
            ["rated", "hasOccupation", "hasGenre"].each {
                mgmt.makeEdgeLabel(it).make()
            }
            ["person", "genre", "occupation", "movie"].each {
                mgmt.makeVertexLabel(it).make()
            }
            mgmt.commit()
            graph.tx().commit()
        } else {
            graph.createIndex('uid', org.apache.tinkerpop.gremlin.structure.Vertex.class)
        }
        def start = System.currentTimeMillis()
        def actualGraph = graph instanceof com.thinkaurelius.titan.graphdb.database.StandardTitanGraph ? graph.tx().createThreadedTx() : graph
        parse(actualGraph, dataDirectory)
        def creating = System.currentTimeMillis()
        println "Creating objects took (ms): " + (creating - start)
        if(actualGraph instanceof com.thinkaurelius.titan.graphdb.transaction.StandardTitanTx) actualGraph.tx().commit()
        println "Committing took (ms): " + (System.currentTimeMillis() - creating)
    }
}

## usage-tinkergraph.txt
graph = TinkerGraph.open()

# Followed common-usage.txt to get the following results:
Creating objects took (ms): 3233
Committing took (ms): 8
Time to traverse 969719 edges: 111 ms
Time to traverse 9625 vertices:  2 ms

## usage-titan-berkeleyje.txt
graph = com.thinkaurelius.titan.core.TitanFactory.build().set("storage.backend", "berkeleyje").
set("storage.directory", "bdb").set("storage.buffer-size", Integer.MAX_VALUE).open()

# Followed common-usage.txt to get the following results:
Creating objects took (ms): 14789
Committing took (ms): 15790
Time to traverse 969719 edges:  2742 ms
Time to traverse 9625 vertices: 1594 ms

## usage-titan-inmemory.txt
graph = com.thinkaurelius.titan.core.TitanFactory.build().set("storage.backend", "inmemory").open()

# Followed common-usage.txt to get the following results:
Creating objects took (ms): 13136
Committing took (ms): 6180
Time to traverse 969719 edges: 1051 ms
Time to traverse 9625 vertices:  17 ms

## usage-titan-tupl.txt
graph = com.thinkaurelius.titan.core.TitanFactory.build().
set("storage.backend", "jp.classmethod.titan.diskstorage.tupl.TuplStoreManager").
set("storage.tupl.min-cache-size", "1000000000").
set("storage.tupl.map-data-files", "true").set("storage.tupl.direct-page-access", "true").
set("storage.buffer-size", Integer.MAX_VALUE).open()

# Followed common-usage.txt to get the following results:
Creating objects took (ms): 13614
Committing took (ms): 8692
Time to traverse 969719 edges: 1155 ms
Time to traverse 9625 vertices: 191 ms
	:load ml.groovy
	MovieLensParser.load(graph, "ml-1m")

	g = graph.traversal()
	clockWithResult(1){ g.E().count().tryNext().get() }
	clockWithResult(1){ g.V().count().tryNext().get() }
	class MovieLensParser {

	static Map occupations
	static List genres
	static {
	occupations = [0: "other", 1: "academic/educator", 2: "artist",
	3: "clerical/admin", 4: "college/grad student", 5: "customer service",
	6: "doctor/health care", 7: "executive/managerial", 8: "farmer",
	9: "homemaker", 10: "K-12 student", 11: "lawyer", 12: "programmer",
	13: "retired", 14: "sales/marketing", 15: "scientist", 16: "self-employed",
	17: "technician/engineer", 18: "tradesman/craftsman", 19: "unemployed", 20: "writer"]
	//iconv -f ISO-8859-1 -t UTF-8 movies.dat \| sed 's/.://' \| tr "\|" "\n" \| sort \| uniq \| sed -e 's/^\(.\)/\"\1\"/' \| tr "\n" "@" \| sed 's/@/,\ /g'
	genres = ["Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary",
	"Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller",
	"War", "Western"]
	}

	public static void parse(final org.apache.tinkerpop.gremlin.structure.Graph graph, final String dataDirectory) {

	def g = graph.traversal()

	// MovieID::Title::Genres
	def genremap = [:]
	genres.each {
	genremap.put(it, graph.addVertex(T.label, 'genre', 'uid', 'g' + it, 'name', it))
	}

	//occupations
	def occupationmap = [:]
	(0..20).each {
	occupationmap.put(it, graph.addVertex(T.label, 'occupation', 'uid', 'o' + it, 'jobId', it, 'name', occupations.get(it)))
	}

	def moviemap = [:]
	new File(dataDirectory + '/movies.dat').eachLine { final String line ->

	def components = line.split("::")
	def movieTitleYear = components[1] =~ /(.\b)\s\((\d+)\)/
	if (!movieTitleYear.find()) return

	def movieId = components[0].toInteger()
	def movieTitle = movieTitleYear.group(1)
	def movieYear = movieTitleYear.group(2).toInteger()
	def genres = components[2]
	def movieVertex = graph.addVertex(T.label, 'movie', 'uid', 'm' + movieId, 'movieId', movieId, 'name', movieTitle, 'year', movieYear)
	moviemap.put(movieId, movieVertex)
	genres.split('\\\|').each { def genre ->
	movieVertex.addEdge('hasGenre', genremap[genre])
	}
	}

	def usermap = [:]
	// UserID::Gender::Age::Occupation::Zip-code
	new File(dataDirectory + '/users.dat').eachLine { final String line ->

	def components = line.split("::")
	def userId = components[0].toInteger()
	def userGender = components[1]
	def userAge = components[2].toInteger()
	def occupationId = components[3].toInteger()
	def userZipcode = components[4]
	def userVertex = graph.addVertex(T.label, 'person', 'uid', 'u' + userId, 'userId', userId, 'gender', userGender, 'age', userAge, 'zipcode', userZipcode)
	usermap.put(userId, userVertex)
	userVertex.addEdge('hasOccupation', occupationmap[occupationId])
	}

	// UserID::MovieID::Rating::Timestamp
	new File(dataDirectory + '/ratings.dat').readLines().parallelStream().forEach( { final String line ->

	def components = line.split("::")
	def movieId = components[1].toInteger()
	if(!moviemap.containsKey(movieId)) {
	return
	}
	def userId = components[0].toInteger()
	def stars = components[2].toInteger()
	def time = components[3].toLong()
	usermap[userId].addEdge('rated', moviemap[movieId], 'stars', stars, 'time', time)
	})
	}

	public static void load(final org.apache.tinkerpop.gremlin.structure.Graph graph, final String dataDirectory) {
	if(graph instanceof com.thinkaurelius.titan.graphdb.database.StandardTitanGraph) {
	def mgmt = ((com.thinkaurelius.titan.graphdb.database.StandardTitanGraph) graph).openManagement()
	["movieId", "year", "stars", "userId", "age", "jobId"].each {
	mgmt.makePropertyKey(it).dataType(Integer.class).make()
	}
	["name", "gender", "zipcode"].each {
	mgmt.makePropertyKey(it).dataType(String.class).make()
	}
	mgmt.makePropertyKey("time").dataType(Long.class).make()
	def uidKey = mgmt.makePropertyKey('uid').dataType(String.class).make()
	mgmt.buildIndex('byUid', org.apache.tinkerpop.gremlin.structure.Vertex.class).addKey(uidKey).unique().buildCompositeIndex()
	["rated", "hasOccupation", "hasGenre"].each {
	mgmt.makeEdgeLabel(it).make()
	}
	["person", "genre", "occupation", "movie"].each {
	mgmt.makeVertexLabel(it).make()
	}
	mgmt.commit()
	graph.tx().commit()
	} else {
	graph.createIndex('uid', org.apache.tinkerpop.gremlin.structure.Vertex.class)
	}
	def start = System.currentTimeMillis()
	def actualGraph = graph instanceof com.thinkaurelius.titan.graphdb.database.StandardTitanGraph ? graph.tx().createThreadedTx() : graph
	parse(actualGraph, dataDirectory)
	def creating = System.currentTimeMillis()
	println "Creating objects took (ms): " + (creating - start)
	if(actualGraph instanceof com.thinkaurelius.titan.graphdb.transaction.StandardTitanTx) actualGraph.tx().commit()
	println "Committing took (ms): " + (System.currentTimeMillis() - creating)
	}
	}
	graph = TinkerGraph.open()

	# Followed common-usage.txt to get the following results:
	Creating objects took (ms): 3233
	Committing took (ms): 8
	Time to traverse 969719 edges: 111 ms
	Time to traverse 9625 vertices: 2 ms
	graph = com.thinkaurelius.titan.core.TitanFactory.build().set("storage.backend", "berkeleyje").
	set("storage.directory", "bdb").set("storage.buffer-size", Integer.MAX_VALUE).open()

	# Followed common-usage.txt to get the following results:
	Creating objects took (ms): 14789
	Committing took (ms): 15790
	Time to traverse 969719 edges: 2742 ms
	Time to traverse 9625 vertices: 1594 ms
	graph = com.thinkaurelius.titan.core.TitanFactory.build().set("storage.backend", "inmemory").open()

	# Followed common-usage.txt to get the following results:
	Creating objects took (ms): 13136
	Committing took (ms): 6180
	Time to traverse 969719 edges: 1051 ms
	Time to traverse 9625 vertices: 17 ms
	graph = com.thinkaurelius.titan.core.TitanFactory.build().
	set("storage.backend", "jp.classmethod.titan.diskstorage.tupl.TuplStoreManager").
	set("storage.tupl.min-cache-size", "1000000000").
	set("storage.tupl.map-data-files", "true").set("storage.tupl.direct-page-access", "true").
	set("storage.buffer-size", Integer.MAX_VALUE).open()

	# Followed common-usage.txt to get the following results:
	Creating objects took (ms): 13614
	Committing took (ms): 8692
	Time to traverse 969719 edges: 1155 ms
	Time to traverse 9625 vertices: 191 ms