Skip to content

Instantly share code, notes, and snippets.

Avatar

Dana Groce danared

  • MongoDB
View GitHub Profile
View mongospark
def main(args: Array[String]): Unit = {
// Set up configurations
val sc = getSparkContext()
val sqlContext = SQLContext.getOrCreate(sc)
val readConfig = ReadConfig(Map("uri" -> "mongodb://127.0.0.1/movies.movie_ratings?readPreference=primaryPreferred"))
val writeConfig = WriteConfig(Map("uri" -> "mongodb://127.0.0.1/movies.user_recommendations"))
val userId = 0
View mongospark
/**
* Gets or creates the Spark Context
*/
def getSparkContext(): SparkContext = {
val conf = new SparkConf()
.setMaster("local[*]")
.setAppName("MovieRatings")
val sc = SparkContext.getOrCreate(conf)
sc.setCheckpointDir("/tmp/checkpoint/")
View mongospark
package example
import org.apache.log4j.{Level, Logger}
import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.recommendation.ALS
import org.apache.spark.ml.tuning.{ParamGridBuilder, TrainValidationSplit}
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
import com.mongodb.spark.MongoSpark
View randomdocs
number_of_documents = 100
load_data(collection, number_of_documents)
winner = [ d for d in collection.aggregate([{'$sample': {'size': 1 }}])][0]
print "AND THE WINNER IS ..... " + winner['name']
View randomdocs
number_of_documents = 100
load_data(collection, number_of_documents )
query = {'i': random.randint(0, 10 ) }
docs = [x for x in collection.find(query)]
winner = random.sample(docs, 1)[0]
View randomdocs
def load_data(collection, n=100):
#fixed number of marks
max_i = 10
for j,d in load_data_file(n):
d['i'] = random.randint(0, max_i)
collection.insert( d )
View randomdocs
def load_data(collection, n=100):
#let's skip some elements
skiplist = [10, 12, 231 , 2 , 4]
for i,d in load_data_file(n):
d['i'] = i
if i in skiplist:
continue
collection.insert( d )
load_data(collection, 100)
View randomdocs
number_of_documents = collection.count()
View randomdocs
mc = MongoClient()
db = mc.simplerandom
collection = db.names
number_of_documents = 100
load_data(collection, number_of_documents )
query = {'i': random.randint(0, number_of_documents ) }
View randomdocs
def load_data(collection, n=100):
#for each element we will insert the `i` value
for i in xrange(n):
name = ''.join(random.sample( string.letters, 20))
collection.insert( {'name': name, 'i': i})
You can’t perform that action at this time.