Skip to content

Instantly share code, notes, and snippets.

@aorjoa
Created May 16, 2017 14:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aorjoa/8dd4962b680e548b31b3b429458e8113 to your computer and use it in GitHub Desktop.
Save aorjoa/8dd4962b680e548b31b3b429458e8113 to your computer and use it in GitHub Desktop.
#!/bin/bash
echo "rdd-WordCount-sparktest-sha256-java-checkpoint-with-cache-in-one-test"
testname="rdd-WordCount-sparktest-sha256-java-checkpoint-with-cache-in-one-test"
echo -e '''
package th.ac.sut.aiyara.sparktest
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint}
import com.bloomberg.sparkflow
import org.scalatest.FunSuite
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc}
/**
* Created by Bhuridech Sudsee.
*/
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{
// //Checkpoint directory
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/")
test("Test spark MapReduce wordcount job#1") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#2") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#3") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#4") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#5") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#6") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#7") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#8") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#9") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#10") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
}
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala
for round in {1..5};do
time for testno in {1..1};do
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno
sbt testOnCluster -Dserializer=java -Dhash=sha256 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/.
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/.
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
rm -rf /mnt/ceph/sparktestCheckpoint/*
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done
echo "rdd-WordCount-sparktest-sha256-kryo-checkpoint-with-cache-in-one-test"
testname="rdd-WordCount-sparktest-sha256-kryo-checkpoint-with-cache-in-one-test"
echo -e '''
package th.ac.sut.aiyara.sparktest
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint}
import com.bloomberg.sparkflow
import org.scalatest.FunSuite
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc}
/**
* Created by Bhuridech Sudsee.
*/
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{
// //Checkpoint directory
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/")
test("Test spark MapReduce wordcount job#1") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#2") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#3") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#4") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#5") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#6") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#7") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#8") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#9") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
test("Test spark MapReduce wordcount job#10") {
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data")
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupBy(_.toLowerCase)
.map(w => (w._1, w._2.size)).cache()
counts.sparktestCheckpoint()
assert(counts.count() === 12670265)
}
}
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala
for round in {1..5};do
time for testno in {1..1};do
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno
sbt testOnCluster -Dserializer=kryo -Dhash=sha256 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/.
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/.
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
rm -rf /mnt/ceph/sparktestCheckpoint/*
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done
#!/bin/bash
echo "dataset-WordCount-original-checkpoint-with-cache-in-one-test"
testname="dataset-WordCount-original-checkpoint-with-cache-in-one-test"
echo -e '''
package th.ac.sut.aiyara.sparktest
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint}
import com.bloomberg.sparkflow
import org.scalatest.FunSuite
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc}
/**
* Created by Bhuridech Sudsee.
*/
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{
// //Checkpoint directory
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/")
test("Test spark MapReduce wordcount job#1") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.checkpoint()
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#2") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.checkpoint()
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#3") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.checkpoint()
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#4") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.checkpoint()
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#5") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.checkpoint()
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#6") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.checkpoint()
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#7") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.checkpoint()
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#8") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.checkpoint()
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#9") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.checkpoint()
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#10") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.checkpoint()
assert(ds.count() === 12670265)
}
}
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala
for round in {1..5};do
time for testno in {1..1};do
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno
sbt testOnCluster -Dserializer=parquet -Dhash=md5 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/.
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/.
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
rm -rf /mnt/ceph/sparktestCheckpoint/*
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done
echo "dataset-WordCount-sparktest-md5-parquet-checkpoint-with-cache-in-one-test"
testname="dataset-WordCount-sparktest-md5-parquet-checkpoint-with-cache-in-one-test"
echo -e '''
package th.ac.sut.aiyara.sparktest
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint}
import com.bloomberg.sparkflow
import org.scalatest.FunSuite
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc}
/**
* Created by Bhuridech Sudsee.
*/
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{
// //Checkpoint directory
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/")
test("Test spark MapReduce wordcount job#1") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#2") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#3") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#4") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#5") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#6") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#7") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#8") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#9") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#10") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
}
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala
for round in {1..5};do
time for testno in {1..1};do
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno
sbt testOnCluster -Dserializer=parquet -Dhash=md5 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/.
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/.
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
rm -rf /mnt/ceph/sparktestCheckpoint/*
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done
echo "dataset-WordCount-sparktest-sha1-parquet-checkpoint-with-cache-in-one-test"
testname="dataset-WordCount-sparktest-sha1-parquet-checkpoint-with-cache-in-one-test"
echo -e '''
package th.ac.sut.aiyara.sparktest
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint}
import com.bloomberg.sparkflow
import org.scalatest.FunSuite
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc}
/**
* Created by Bhuridech Sudsee.
*/
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{
// //Checkpoint directory
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/")
test("Test spark MapReduce wordcount job#1") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#2") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#3") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#4") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#5") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#6") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#7") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#8") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#9") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#10") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
}
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala
for round in {1..5};do
time for testno in {1..1};do
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno
sbt testOnCluster -Dserializer=parquet -Dhash=sha1 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/.
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/.
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
rm -rf /mnt/ceph/sparktestCheckpoint/*
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done
echo "dataset-WordCount-sparktest-sha256-parquet-checkpoint-with-cache-in-one-test"
testname="dataset-WordCount-sparktest-sha256-parquet-checkpoint-with-cache-in-one-test"
echo -e '''
package th.ac.sut.aiyara.sparktest
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint}
import com.bloomberg.sparkflow
import org.scalatest.FunSuite
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc}
/**
* Created by Bhuridech Sudsee.
*/
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{
// //Checkpoint directory
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/")
test("Test spark MapReduce wordcount job#1") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#2") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#3") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#4") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#5") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#6") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#7") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#8") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#9") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#10") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
}
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala
for round in {1..5};do
time for testno in {1..1};do
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno
sbt testOnCluster -Dserializer=parquet -Dhash=sha256 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/.
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/.
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
rm -rf /mnt/ceph/sparktestCheckpoint/*
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done
echo "dataset-WordCount-sparktest-md5-avro-checkpoint-with-cache-in-one-test"
testname="dataset-WordCount-sparktest-md5-avro-checkpoint-with-cache-in-one-test"
echo -e '''
package th.ac.sut.aiyara.sparktest
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint}
import com.bloomberg.sparkflow
import org.scalatest.FunSuite
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc}
/**
* Created by Bhuridech Sudsee.
*/
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{
// //Checkpoint directory
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/")
test("Test spark MapReduce wordcount job#1") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#2") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#3") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#4") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#5") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#6") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#7") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#8") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#9") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#10") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
}
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala
for round in {1..5};do
time for testno in {1..1};do
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno
sbt testOnCluster -Dserializer=avro -Dhash=md5 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/.
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/.
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
rm -rf /mnt/ceph/sparktestCheckpoint/*
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done
echo "dataset-WordCount-sparktest-sha1-avro-checkpoint-with-cache-in-one-test"
testname="dataset-WordCount-sparktest-sha1-avro-checkpoint-with-cache-in-one-test"
echo -e '''
package th.ac.sut.aiyara.sparktest
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint}
import com.bloomberg.sparkflow
import org.scalatest.FunSuite
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc}
/**
* Created by Bhuridech Sudsee.
*/
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{
// //Checkpoint directory
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/")
test("Test spark MapReduce wordcount job#1") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#2") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#3") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#4") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#5") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#6") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#7") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#8") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#9") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#10") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
}
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala
for round in {1..5};do
time for testno in {1..1};do
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno
sbt testOnCluster -Dserializer=avro -Dhash=sha1 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/.
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/.
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
rm -rf /mnt/ceph/sparktestCheckpoint/*
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done
echo "dataset-WordCount-sparktest-sha256-avro-checkpoint-with-cache-in-one-test"
#!/bin/bash
testname="dataset-WordCount-sparktest-sha256-avro-checkpoint-with-cache-in-one-test"
echo -e '''
package th.ac.sut.aiyara.sparktest
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint}
import com.bloomberg.sparkflow
import org.scalatest.FunSuite
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc}
/**
* Created by Bhuridech Sudsee.
*/
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{
// //Checkpoint directory
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/")
test("Test spark MapReduce wordcount job#1") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#2") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#3") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#4") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#5") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#6") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#7") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#8") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#9") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#10") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
}
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala
for round in {1..5};do
time for testno in {1..1};do
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno
sbt testOnCluster -Dserializer=avro -Dhash=sha256 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/.
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/.
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
rm -rf /mnt/ceph/sparktestCheckpoint/*
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done
echo "dataset-WordCount-sparkflow-checkpoint-with-cache-in-one-test"
testname="dataset-WordCount-sparkflow-checkpoint-with-cache-in-one-test"
echo -e '''
package th.ac.sut.aiyara.sparktest
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint}
import com.bloomberg.sparkflow
import org.scalatest.FunSuite
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc}
/**
* Created by Bhuridech Sudsee.
*/
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{
// //Checkpoint directory
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/")
test("Test spark MapReduce wordcount job#1") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#2") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#3") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#4") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#5") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#6") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#7") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#8") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#9") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
test("Test spark MapReduce wordcount job#10") {
import spark.implicits._
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String]
val counts = textFile.flatMap(_.split(" "))
.filter(_ != "")
.groupByKey(_.toLowerCase).keys.cache()
val ds = counts.sparktestCheckpoint(counts.getEncoder)
assert(ds.count() === 12670265)
}
}
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala
for round in {1..5};do
time for testno in {1..1};do
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno
sbt testOnCluster -Dserializer=avro -Dhash=sha256 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/.
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/.
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round
rm -rf /mnt/ceph/sparktestCheckpoint/*
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment