View mongospark
def main(args: Array[String]): Unit = { | |
// Set up configurations | |
val sc = getSparkContext() | |
val sqlContext = SQLContext.getOrCreate(sc) | |
val readConfig = ReadConfig(Map("uri" -> "mongodb://127.0.0.1/movies.movie_ratings?readPreference=primaryPreferred")) | |
val writeConfig = WriteConfig(Map("uri" -> "mongodb://127.0.0.1/movies.user_recommendations")) | |
val userId = 0 |
View mongospark
/** | |
* Gets or creates the Spark Context | |
*/ | |
def getSparkContext(): SparkContext = { | |
val conf = new SparkConf() | |
.setMaster("local[*]") | |
.setAppName("MovieRatings") | |
val sc = SparkContext.getOrCreate(conf) | |
sc.setCheckpointDir("/tmp/checkpoint/") |
View mongospark
package example | |
import org.apache.log4j.{Level, Logger} | |
import org.apache.spark.ml.evaluation.RegressionEvaluator | |
import org.apache.spark.ml.recommendation.ALS | |
import org.apache.spark.ml.tuning.{ParamGridBuilder, TrainValidationSplit} | |
import org.apache.spark.sql.SQLContext | |
import org.apache.spark.{SparkConf, SparkContext} | |
import com.mongodb.spark.MongoSpark |
View randomdocs
number_of_documents = 100 | |
load_data(collection, number_of_documents) | |
winner = [ d for d in collection.aggregate([{'$sample': {'size': 1 }}])][0] | |
print "AND THE WINNER IS ..... " + winner['name'] |
View randomdocs
number_of_documents = 100 | |
load_data(collection, number_of_documents ) | |
query = {'i': random.randint(0, 10 ) } | |
docs = [x for x in collection.find(query)] | |
winner = random.sample(docs, 1)[0] |
View randomdocs
def load_data(collection, n=100): | |
#fixed number of marks | |
max_i = 10 | |
for j,d in load_data_file(n): | |
d['i'] = random.randint(0, max_i) | |
collection.insert( d ) |
View randomdocs
def load_data(collection, n=100): | |
#let's skip some elements | |
skiplist = [10, 12, 231 , 2 , 4] | |
for i,d in load_data_file(n): | |
d['i'] = i | |
if i in skiplist: | |
continue | |
collection.insert( d ) | |
load_data(collection, 100) |
View randomdocs
number_of_documents = collection.count() |
View randomdocs
mc = MongoClient() | |
db = mc.simplerandom | |
collection = db.names | |
number_of_documents = 100 | |
load_data(collection, number_of_documents ) | |
query = {'i': random.randint(0, number_of_documents ) } |
View randomdocs
def load_data(collection, n=100): | |
#for each element we will insert the `i` value | |
for i in xrange(n): | |
name = ''.join(random.sample( string.letters, 20)) | |
collection.insert( {'name': name, 'i': i}) |