How to implement LDA in Spark and get the topic distributions of new documents
import org.apache.spark.rdd._
import org.apache.spark.mllib.clustering.{LDA, DistributedLDAModel, LocalLDAModel}
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import scala.collection.mutable
val stopWordsInput = sc.textFile("stopwords.csv")
val stopWords = stopWordsInput.collect()