Skip to content

Instantly share code, notes, and snippets.

View oluies's full-sized avatar

Örjan Angré (Lundberg) oluies

  • Sweden
  • 09:18 (UTC +02:00)
  • X @oluies
View GitHub Profile
echo "15;23;35#18;14;89" | tr '#' '\012' | awk -F';' '{for (i=1;i<=NF;i++){print NR, i, $(i)} }'
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import java.net.InetAddress
def IPv4ToLong(dottedIP: String): Long = {
val addrArray: Array[String] = dottedIP.split("\\.")
var num: Long = 0
var i: Int = 0
while (i < addrArray.length) {
val power: Int = 3 - i
num = num + ((addrArray(i).toInt % 256) * Math.pow(256, power)).toLong
i += 1
}
import org.apache.spark.sql.types.{DoubleType,LongType,ShortType, IntegerType, StructField,TimestampType, StructType,StringType,NumericType,BooleanType}
import org.apache.hadoop.fs.{FileSystem,Path}
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.implicits._
def getschemametod(): StructType = {
StructType(
Seq(
import org.apache.spark.sql.types.{DoubleType,LongType,ShortType, IntegerType, StructField,TimestampType, StructType,StringType,NumericType,BooleanType}
import org.apache.hadoop.fs.{FileSystem,Path}
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.implicits._
def csvToDF(file: Path, delimiter : String,charset: String = "UTF8", useHeader: Boolean = true, schema: Option[StructType] = None) = {
val df = schema match {
case Some(schema) => sqlContext.read
sqlloggik4_df = """
SELECT *
, CAST(id as BIGINT) *10000 + SUM(new_session)
OVER (PARTITION BY id ORDER BY starttid)
AS session_id
FROM(
SELECT *,
unix_timestamp(l.starttid) - LAG(unix_timestamp(l.starttid)) OVER (PARTITION BY l.id ORDER BY l.starttid) timesincelast,
CASE
WHEN unix_timestamp(l.starttid) - LAG(unix_timestamp(l.starttid)) OVER (PARTITION BY l.id ORDER BY l.starttid) >= 30 * 60
def checkSEPnr(pnr:String) = {
val chars = pnr.toList
val removeMinus = chars.view.filter(_ != '-')
val charToInt = removeMinus.view.map(_ - '0')
val lunsum: Int = charToInt.take(9).foldLeft( (0,2) ){
(r,c) =>
(r._1 + (c * r._2) / 10 + (c * r._2) % 10, if (r._2 == 2) 1 else 2)
}._1 % 10
@oluies
oluies / vectorsum.scala
Created September 12, 2016 13:04
summarize a vector in spark
import org.apache.spark.sql.Row
import breeze.linalg.DenseVector
import org.apache.spark.mllib.linalg.{Vector, Vectors}
val t_df = sqlContext.read.parquet("/user/s89718/Pivoted_cust_weekday_total_with_Clusters.parquet")
val tm_df = t_df.select("IP_ID","assembled")
val emptyVector = BDV(Array.fill(7)(0.0))
val zeVector = tm_df
.rdd
import org.apache.spark.sql.Row
import breeze.linalg.DenseVector
import org.apache.spark.mllib.linalg.{Vector, Vectors}
val t_df = sqlContext.read.parquet("/user/_/Pivoted_cust_weekday_total_with_Clusters.parquet")
val tm_df = t_df.select("IP_ID","assembled")
val emptyVector = DenseVector (Array.fill(7)(0.0))
val zeVector = tm_df
.rdd
@oluies
oluies / spark_unix_timestamp.scala
Created August 19, 2016 11:57
Spark: transform timestamp text to timestamp and extract some parts
val df =Seq((1L,"03JUN2015 19.28.00"),(2L,"#$@#@#")).toDF("id","dts")
import org.apache.spark.sql.functions.dayofmonth
import org.apache.spark.sql.functions.unix_timestamp
df.withColumn("ts", unix_timestamp($"dts","ddMMMyy HH.mm.ss").cast("timestamp"))
.withColumn("dom", dayofmonth($"ts"))
.withColumn("month", month($"ts"))
.withColumn("yesar", year($"ts"))
.show(2,false)