Skip to content

Instantly share code, notes, and snippets.

@okram
Created November 18, 2015 19:02
Show Gist options
  • Save okram/d9f158dee789689759da to your computer and use it in GitHub Desktop.
Save okram/d9f158dee789689759da to your computer and use it in GitHub Desktop.
// http://grouplens.org/datasets/movielens/1m/
graph = TinkerGraph.open()
g = graph.traversal()
dir = '/Users/marko/Desktop/ml-1m/'
categories = [] as Set
new File(dir + 'movies.dat').eachLine { it.split("::")[2].split("\\|").each{categories.add(it)} }
categories.each{ g.addV(label,'category','name',it).next() }
new File(dir + 'movies.dat').eachLine { line ->
components = line.split("::")
movieId = new Integer(components[0])
movieTitleYear = components[1] =~ /(.*)\s*\((\d+)\)/
movieTitleYear.find()
movie = g.addV(id, 'm' + movieId, label, 'movie', 'name', movieTitleYear.group(1).trim(), 'year', movieTitleYear.group(2) as Integer).next()
components[2].split("\\|").each{
movie.addEdge('category',g.V().hasLabel('category').has('name',it).next())
}
}
occupations = [0:'other', 1:'academic/educator', 2:'artist',
3:'clerical/admin', 4:'college/grad student', 5:'customer service',
6:'doctor/health care', 7:'executive/managerial', 8:'farmer',
9:'homemaker', 10:'K-12 student', 11:'lawyer', 12:'programmer',
13:'retired', 14:'sales/marketing', 15:'scientist', 16:'self-employed',
17:'technician/engineer', 18:'tradesman/craftsman', 19:'unemployed', 20:'writer']
occupations.each{key,value ->
g.addV(id, 'o' + key, label, 'occupation', 'name', value).iterate()
}
new File(dir + 'users.dat').eachLine {
user = g.addV(id, 'u' + (it.split("::")[0] as Integer), label, 'user', 'gender', it.split("::")[1], 'age', (it.split("::")[2] as Integer)).next()
user.addEdge('occupation',g.V().hasLabel('occupation').has(id,'o' + it.split("::")[3]).next())
}
new File(dir + 'ratings.dat').eachLine { line ->
components = line.split("::");
user = g.V('u' + (components[0] as Integer)).next()
movie = g.V('m' + (components[1] as Integer)).next()
user.addEdge('rated',movie,'stars',components[2] as Integer)
}
graph.io(gryo()).writeGraph(dir + 'movie-lens.kryo')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment