Skip to content

Instantly share code, notes, and snippets.

for i in sc.textFile("/public/randomtextwriter/part-m-00000"). \
flatMap(lambda rec: rec.split(" ")). \
map(lambda rec: (rec, 1)). \
reduceByKey(lambda total, value: total + value). \
take(100):
print(i)
val conf = new SparkConf().
setAppName("Word Count).
setMaster("local")
package wordcount
/**
* Created by itversity on 25/03/17.
* spark-submit
spark-submit \
--class WordCount \
/Users/itversity/IdeaProjects/sands/target/scala-2.10/sands_2.10-1.0.jar \
dev /Users/itversity/Research/data/wordcount.txt /Users/itversity/Research/data/wc
*/
name := "demo-spark-scala"
version := "1.0"
scalaVersion := "2.11.8"
libraryDependencies += "org.apache.spark" % "spark-core_2.11" % "1.6.2"
libraryDependencies += "com.typesafe" % "config" % "1.3.0"
package wordcount
import com.typesafe.config._
import org.apache.spark.SparkContext, org.apache.spark.SparkConf
import org.apache.hadoop.fs._
object WordCount {
def main(args: Array[String]) {
val appConf = ConfigFactory.load()
/**
* Created by itversity on 21/02/17.
*/
import java.sql.DriverManager
import com.typesafe.config._
case class EmployeesCommission(first_name: String,
last_name: String,
salary: Double,
name := "wlabs"
version := "1.0"
scalaVersion := "2.11.8"
libraryDependencies += "mysql" % "mysql-connector-java" % "5.1.36"
libraryDependencies += "com.typesafe" % "config" % "1.3.1"
dev.host = nn01.itversity.com
dev.port = 3306
dev.db = hr
dev.user = hr_ro
dev.pw = itversity
//Here is the exercise - http://discuss.itversity.com/t/exercise-09-scala-and-spark-political-analysis-for-the-state-of-up/2907
val fileContents = sc.
textFile("/Users/itversity/Research/data/elections/ls2014.tsv")
val data = fileContents.
mapPartitionsWithIndex((idx, iter) => if (idx == 0) iter.drop(1) else iter)
val upData = data.filter(_.split("\t")(0) == "Uttar Pradesh")
val upDataMap = upData.
map(rec =>
{
# base directory of retail_db and output path are passed as arguments
# spark-submit daily_revenue.py /Users/itversity/Research/data/retail_db /Users/itversity/Research/revenue_per_day --master local
from pyspark import SparkContext, SparkConf
import sys
conf = SparkConf().setAppName("Daily Revenue").setMaster("local")
sc = SparkContext(conf=conf)
orders = sc.textFile(sys.argv[1] + "/orders")