Skip to content

Instantly share code, notes, and snippets.

@helxsz
Last active August 29, 2015 14:05
Show Gist options
  • Save helxsz/8a4882ed949e9a30c860 to your computer and use it in GitHub Desktop.
Save helxsz/8a4882ed949e9a30c860 to your computer and use it in GitHub Desktop.
{
"isActive": true,
"balance": "$2,504.16",
"age": 31,
"eyeColor": "green",
"name": "Mendez Gilmore",
"gender": "male",
"registered": "2014-07-20T04:47:48 -01:00",
"latitude": 79.452285,
"longitude": -163.151555
},
{
"isActive": true,
"balance": "$3,791.11",
"age": 20,
"eyeColor": "blue",
"name": "Lynn Santana",
"gender": "male",
"registered": "2014-06-22T23:00:44 -01:00",
"latitude": -3.60444,
"longitude": -147.956637
},
{
"isActive": true,
"balance": "$2,450.66",
"age": 31,
"eyeColor": "blue",
"name": "Kelley Shaffer",
"gender": "female",
"registered": "2014-05-17T08:36:55 -01:00",
"latitude": 29.1312,
"longitude": -36.645241
},
{
"isActive": false,
"balance": "$1,794.35",
"age": 23,
"eyeColor": "brown",
"name": "Gardner Lamb",
"gender": "male",
"registered": "2014-03-21T09:29:33 -00:00",
"latitude": 18.80767,
"longitude": 153.42069
},
{
"isActive": false,
"balance": "$1,913.56",
"age": 22,
"eyeColor": "green",
"name": "Lambert Daugherty",
"gender": "male",
"registered": "2014-06-12T12:22:10 -01:00",
"latitude": -48.750177,
"longitude": -172.932591
},
{
"isActive": true,
"balance": "$3,338.16",
"age": 32,
"eyeColor": "green",
"name": "Pickett Bradshaw",
"gender": "male",
"registered": "2014-04-14T09:02:57 -01:00",
"latitude": -19.16996,
"longitude": 121.900084
},
{
"isActive": false,
"balance": "$1,613.38",
"age": 24,
"eyeColor": "green",
"name": "Blair Mcconnell",
"gender": "male",
"registered": "2014-03-24T05:34:30 -00:00",
"latitude": -5.684265,
"longitude": -104.292739
}
1 1 1 1
2 2 2 2
3 3 3 3
4 4 4 4
5 5 5 5
6 6 6 6
7 7 7 7
8 8 8 8
9 9 9 9
10 10 10 10
11 11 11 11
12 12 12 12
{
"facebook": {
"application": "Coupons",
"author": {
"avatar": "https://graph.facebook.com/100004343800786/picture",
"id": "100004343800786",
"link": "http://www.facebook.com/profile.php?id=100004343800786",
"name": "Flint Beastwood",
"type": "user"
},
"caption": "woobox.com",
"created_at": "Fri, 01 Aug 2014 09:36:31 +0000",
"description": "I've got my FREE Steam key for GTR Evolution from Bundle Stars and PC Gamer. Go get yours at http://www.pcgamer.com!",
"id": "100004343800786_341086752712782",
"is_share": false,
"link": "http://woobox.com/328djz",
"name": "FREE GTR Evolution Steam key from Bundle Stars and PC Gamer",
"picture": "https://fbexternal-a.akamaihd.net/app_full_proxy.php?app=174961479209942&v=1&size=z&cksum=fb17e9efd3aa14b116341c354f32f4f6&src=http%3A%2F%2Fwoobox.com%2Foffers%2Fshareimage%2F328djz%3F53ce7b0149ef2-Week5-PCGamer-Campaign.jpg",
"source": "Coupons (174961479209942)",
"type": "link"
},
"interaction": {
"author": {
"avatar": "https://graph.facebook.com/100004343800786/picture",
"id": "100004343800786",
"link": "http://www.facebook.com/profile.php?id=100004343800786",
"name": "Flint Beastwood",
"type": "user"
},
"content": "I've got my FREE Steam key for GTR Evolution from Bundle Stars and PC Gamer. Go get yours at http://www.pcgamer.com!",
"created_at": "Fri, 01 Aug 2014 09:36:31 +0000",
"id": "1e4195f51a03a980e0666e90d199ebba",
"link": "http://www.facebook.com/100004343800786_341086752712782",
"received_at": 1406885879.6349001,
"schema": {
"version": 3
},
"source": "Coupons (174961479209942)",
"subtype": "link",
"title": "FREE GTR Evolution Steam key from Bundle Stars and PC Gamer",
"type": "facebook"
},
"salience": {
"content": {
"sentiment": 0
},
"title": {
"sentiment": 0
}
}
}
// http://www.json-generator.com
// lab 1
val rdd = sc.textFile("/root/project/exampleCsv.csv")
val lines = rdd.map(line => {val array = line.split(",");(array(0).toInt,array(1).toInt+array(3).toInt)})
lines.collect().foreach(print)
// lab 2
val lines1 = rdd.map(line => {val array = line.split(",");(array(0).toInt, ( array(1).toInt,array(3).toInt) )})
lines.mapValues(v => (v._1*2, v._2*3)).collect().foreach(print)
// lab 2.2
case class Record(v1:Double, v2:Double, v3:Double)
val data = sc.textFile("/root/data/kmeans_data.txt")
// 1
val person = data.map(line =>{val items = line.split(" ");Record(items(0).toDouble,items(1).toDouble,items(2).toDouble)}).collect().foreach(println)
// 2
val person = data.map(line =>{line.split(" ") match{ case Array(v1, v2, v3) => Record(v1.toDouble, v2.toDouble, v3.toDouble) }}).collect().foreach(println)
// 3
val person = data.map(_.split(" ") match{ case Array(v1, v2, v3) => Record(v1.toDouble, v2.toDouble, v3.toDouble) }).collect().foreach(println)
// 4
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.createSchemaRDD
val person = data.map(_.split(" ") match{ case Array(v1, v2, v3) => Record(v1.toDouble, v2.toDouble, v3.toDouble) })
person.registerAsTable("person")
val teens = sqlContext.sql("SELECT v1, v2, v3 from person where v2 > 1")
// 4.1
teens.collect().foreach(println)
// 4.2
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.mllib.regression.LabeledPoint
// error : value v1 is not a memeber of org.apache.spark.sql.Row
val training = teens.map(record => { val features = Vectors.dense(record.v1, record.v2, record.v3); val label = record.v1; LabeledPoint(label, features)} )
// error : type mismatch
val training = teens.map(row=> { val features = Vectors.dense( row(0), row(1), row(2)); val label = row(0); LabeledPoint(label, features)} )
// solution
val training = teens.map(row=> { val features = Vectors.dense( row.getDouble(0), row.getDouble(1), row.getDouble(2)); val label = row.getDouble(0); LabeledPoint(label, features)} )
training.collect().foreach(println)
// lab 3
val b= sc.parallelize( List(1,2,3,4,5,6,7,8,9), 3)
b.foreachPartition(iter => iter.foreach(value => println("value :"+value)))
// lab
val a = sc.parallelize(Seq((1,1),(1,1),(1,3),(3,5),(3,5),(3,7)),2)
a.distinct().mapValues(v=>v*2).collect().foreach(println)
// https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
import org.json4s.jackson.JsonMethods
import org.json4s.jackson.JsonMethods._
import org.json4s.JsonAST._
import org.json4s.DefaultFormats
// problem lab 4
val a = sc.textFile("/root/project/a.json")
a.map(line => parse(line)).map(json => (json \ "gender", (json \ "name",json \ "age")).collect().foreach(println)
// problem lab 5
val a= sc.parallelize("""{"name":"Yin","age":25,"gender":"male"},{"name":"Kin","age":23,"gender":"male"},{"name":"xin","age":29,"gender":"female"},""" :: Nil)
a.map(line => parse(line)).map(json => (json \ "gender", (json \ "name",json \ "age")).collect().foreach(println)
// problem lab 6
val lines = sc.textFile("/root/data/facebook.json")
lines.map(line => {parse(line)}).filter(json => { (json \ "facebook" \ "application").extract[String] == "Coupons" }).collect().foreach(println)
// https://gist.github.com/cotdp/fda64b4248e43a3c8f46
org.apache.spark.SparkException: Job aborted due to stage failure: Task not serializable: java.io.NotSerializableException: org.json4s.DefaultFormats$
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment