start new:
tmux
start new with session name:
tmux new -s myname
import javax.crypto.Cipher; | |
import javax.crypto.spec.IvParameterSpec; | |
import javax.crypto.spec.SecretKeySpec; | |
import java.security.SecureRandom; | |
import java.util.Base64; | |
public class AESUtil { | |
private static final String ALGORITHM = "AES"; | |
private static final String CIPHER_ALGORITHM = "AES/CFB/PKCS5Padding"; | |
private static final int KEY_SIZE = 128; |
//https://stackoverflow.com/questions/33882894/spark-sql-apply-aggregate-functions-to-a-list-of-columns | |
val Claim1 = StructType(Seq(StructField("pid", StringType, true),StructField("diag1", StringType, true),StructField("diag2", StringType, true), StructField("allowed", IntegerType, true), StructField("allowed1", IntegerType, true))) | |
val claimsData1 = Seq(("PID1", "diag1", "diag2", 100, 200), ("PID1", "diag2", "diag3", 300, 600), ("PID1", "diag1", "diag5", 340, 680), ("PID2", "diag3", "diag4", 245, 490), ("PID2", "diag2", "diag1", 124, 248)) | |
val claimRDD1 = sc.parallelize(claimsData1) | |
val claimRDDRow1 = claimRDD1.map(p => Row(p._1, p._2, p._3, p._4, p._5)) | |
val claimRDD2DF1 = sqlContext.createDataFrame(claimRDDRow1, Claim1) | |
val l = List("allowed", "allowed1") | |
val exprs = l.map((_ -> "sum")).toMap |
df.collect().foreach(row => row.toSeq.foreach(col => { | |
col match { | |
case n: Number => println(col) | |
case _ => None | |
} | |
})) |
df.collect().foreach(row => row.toSeq.foreach(col => { | |
col match { | |
case n: Number => println(col) | |
case _ => None | |
} | |
})) |
import org.apache.spark.ml._ | |
import org.apache.spark.ml.util.Identifiable | |
import org.apache.spark.sql.types._ | |
val df = Seq( | |
(0, "a"), (1, "b"), | |
(2, "c"), (3, "a"), | |
(4, "a"), (5, "c")) | |
.toDF("label", "category") |
def averageTime[R](block: => R, numIter: Int = 10): Unit = { | |
val t0 = System.nanoTime() | |
(1 to numIter).foreach( _ => block) | |
val t1 = System.nanoTime() | |
val averageTimeTaken = (t1 - t0) / numIter | |
val timeTakenMs = averageTimeTaken / 1000000 | |
println("Elapsed time: " + timeTakenMs + "ms") | |
} | |
val testDf = spark.range(10000000).toDF.cache |
// this flavour is pure magic... | |
def toDouble: (Any) => Double = { case i: Int => i case f: Float => f case d: Double => d } | |
// whilst this flavour is longer but you are in full control... | |
object any2Double extends Function[Any,Double] { | |
def apply(any: Any): Double = | |
any match { case i: Int => i case f: Float => f case d: Double => d } | |
} | |
// like when you can invoke any2Double from another similar conversion... |