Skip to content

Instantly share code, notes, and snippets.

View ahoy-jon's full-sized avatar
🐋
Thinking!

Jonathan Winandy ahoy-jon

🐋
Thinking!
View GitHub Profile
package meta
import org.apache.spark.SparkConf
import org.apache.spark.sql.types.{ArrayType, StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession}
import org.sat4j.core.{Vec, VecInt}
import org.sat4j.maxsat.{MinCostDecorator, SolverFactory, WeightedMaxSatDecorator}
import org.sat4j.pb.IPBSolver
import utils.Gouache
@ahoy-jon
ahoy-jon / testJoins.scala
Created December 28, 2017 22:39
Spark Joins by forcing the partionner on materialized data
package stategolf
import java.util.UUID
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Dataset, Encoder, SaveMode, SparkSession}
import org.apache.spark.storage.StorageLevel
object TryOps {
import scala.util.{Failure, Try, Success}
import scala.annotation.tailrec
/*can be used as :
(Try {
HTTP.get(".....")
}).retryGet(3)
*/
#Query: select cntan, count(*) as nbrows from dw_ssa_audience.modeleh m, (select count(*) cntan from m.annotations) acn where no_event=20170612 group by cntan
cntan,nbvisiteurs
463,2
578,3
974,2
192,26
207,11
982,2
125,49
883,1
import org.apache.spark.rdd.{CoGroupedRDD, RDD}
import org.apache.spark.sql.SparkSession
import org.apache.spark.{HashPartitioner, Partitioner}
import shapeless._
import shapeless.ops.hlist.{ToTraversable, Tupler}
import shapeless.ops.traversable.FromTraversable
import scala.language.higherKinds
import scala.reflect.ClassTag
package io.univalence.sparktools.kpialgebra
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import shapeless.contrib.spire._
import spire.algebra._
import spire.implicits._
import scala.reflect.ClassTag
val result = stRdd.groupBy(_.id).map({
case (ig, vars) =>
val svars: Seq[SomeTable] = vars.toSeq.sortBy(_.updateAt).reverse
def ex(f: SomeTable => String): String = svars.map(f).find(_ != null).orNull
def ex2(f: SomeTable => String): (String, Timestamp) = {
svars.map(x => (f(x), x.updateAt)).find(_._1 != null).getOrElse((null, null))
}
@ahoy-jon
ahoy-jon / RDD.scala
Last active October 14, 2016 14:26
BlogBetterUADF
val stRdd: RDD[SomeTable] = ???
val result: RDD[SomeTable] = stRdd.groupBy(_.id).map({
case (ig, vars) =>
//Tri par date à l'intérieur du group
val svars: Seq[SomeTable] = vars.toSeq.sortBy(_.updateAt).reverse
//Définition de la fonction pour trouver le premier attribut non null de SomeTable
def ex(f: SomeTable => String): String = svars.map(f).find(_ != null).orNull
//Définition de max pour un attribut de SomeTable
val f1: DataFrame = parallelize.toDF()
f1.registerTempTable("st")
f1.show(false)
/*
+---+------+------+----------------------+------+------+
|id |value1|value2|updateAt |value3|value4|
+---+------+------+----------------------+------+------+
|id1|t1 |r1 |2016-10-14 05:23:50.51|10 |null |
|id1|t2 |null |2016-10-15 05:23:50.51|12 |null |
@ahoy-jon
ahoy-jon / bibliothèque 2014-09
Last active September 6, 2016 15:24
Stockage 2014-09 suite à un déménagement.
L’orient ancien et nous
Programming erlang second edition software for concurrent world
Programming Collective intelligence
Gödel escher bach
Carnet de croquis (l’éléphant nantes )
Ethics of big data
Real world haskell
Trois volumes «  the art pf computer programming
Getting started with storm
Les grands philosophes