Skip to content

Instantly share code, notes, and snippets.

Dana Groce danared

  • MongoDB
Block or report user

Report or block danared

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View mongospark
def main(args: Array[String]): Unit = {
// Set up configurations
val sc = getSparkContext()
val sqlContext = SQLContext.getOrCreate(sc)
val readConfig = ReadConfig(Map("uri" -> "mongodb://127.0.0.1/movies.movie_ratings?readPreference=primaryPreferred"))
val writeConfig = WriteConfig(Map("uri" -> "mongodb://127.0.0.1/movies.user_recommendations"))
val userId = 0
View mongospark
/**
* Gets or creates the Spark Context
*/
def getSparkContext(): SparkContext = {
val conf = new SparkConf()
.setMaster("local[*]")
.setAppName("MovieRatings")
val sc = SparkContext.getOrCreate(conf)
sc.setCheckpointDir("/tmp/checkpoint/")
View mongospark
package example
import org.apache.log4j.{Level, Logger}
import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.recommendation.ALS
import org.apache.spark.ml.tuning.{ParamGridBuilder, TrainValidationSplit}
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
import com.mongodb.spark.MongoSpark
View randomdocs
number_of_documents = 100
load_data(collection, number_of_documents)
winner = [ d for d in collection.aggregate([{'$sample': {'size': 1 }}])][0]
print "AND THE WINNER IS ..... " + winner['name']
View randomdocs
number_of_documents = 100
load_data(collection, number_of_documents )
query = {'i': random.randint(0, 10 ) }
docs = [x for x in collection.find(query)]
winner = random.sample(docs, 1)[0]
View randomdocs
def load_data(collection, n=100):
#fixed number of marks
max_i = 10
for j,d in load_data_file(n):
d['i'] = random.randint(0, max_i)
collection.insert( d )
View randomdocs
def load_data(collection, n=100):
#let's skip some elements
skiplist = [10, 12, 231 , 2 , 4]
for i,d in load_data_file(n):
d['i'] = i
if i in skiplist:
continue
collection.insert( d )
load_data(collection, 100)
View randomdocs
number_of_documents = collection.count()
View randomdocs
mc = MongoClient()
db = mc.simplerandom
collection = db.names
number_of_documents = 100
load_data(collection, number_of_documents )
query = {'i': random.randint(0, number_of_documents ) }
View randomdocs
def load_data(collection, n=100):
#for each element we will insert the `i` value
for i in xrange(n):
name = ''.join(random.sample( string.letters, 20))
collection.insert( {'name': name, 'i': i})
You can’t perform that action at this time.