Skip to content

Instantly share code, notes, and snippets.

@invkrh
Last active May 6, 2016 16:14
Show Gist options
  • Save invkrh/d4b0360060c4f0d84515c86b412d63a6 to your computer and use it in GitHub Desktop.
Save invkrh/d4b0360060c4f0d84515c86b412d63a6 to your computer and use it in GitHub Desktop.
import org.apache.spark._
import org.apache.spark.mllib.feature.{Word2Vec, Word2VecModel}
object Word2VecPerfTest extends App {
val start = System.currentTimeMillis()
val conf = new SparkConf().setMaster("local[*]").setAppName("word2vec")
val sc = new SparkContext(conf)
val input = sc.textFile("text8").map(line => line.split(" ").toSeq)
val word2vec = new Word2Vec().setNumPartitions(20)
val model = word2vec.fit(input)
val synonyms = model.findSynonyms("china", 40)
for((synonym, cosineSimilarity) <- synonyms) {
println(s"$synonym $cosineSimilarity")
}
// Save and load model
model.save(sc, "result")
val sameModel = Word2VecModel.load(sc, "result")
val end = System.currentTimeMillis()
val time = (end - start) / 1000
println(time + " seconds")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment