Skip to content

Instantly share code, notes, and snippets.

View gist:2a6e3e22266ca51a6aef
import com.typesafe.config.ConfigFactory
import{JdbcReader, Reader}
import star.{Loader, StarConfig}
* Created by markmo on 12/03/2016.
class StarSpec extends UnitSpec {
val starConf = new StarConfig(ConfigFactory.load("star.conf"))
markmo / gist:7653ba15bd054887286e
Created Mar 12, 2016
Transformation DSL Example
View gist:7653ba15bd054887286e
"DSL notation" should "execute a pipeline" in {
import diamond.transformation._
// import helper functions such as fieldLocator
import RowTransformation._
// create a new context object that can pass state between transformations
val ctx = new TransformationContext
View gist:b057e2537b377e23005c
import java.util.Calendar
import{CSVSink, CSVSource}
import diamond.models.{AttributeType, Event, Feature}
import{FeatureStore, FeatureStoreRepository}
import diamond.transform.TransformationContext
import diamond.transform.row.{AppendColumnRowTransformation, RowTransformation}
import diamond.transform.sql.NamedSQLTransformation
import diamond.transform.table.RowTransformationPipeline
import diamond.utility.dateFunctions._
View gist:2c775674307bff284f2b
import java.util.Calendar
import diamond.models.Event
import diamond.transform.eventFunctions._
import diamond.utility.dateFunctions._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
import org.apache.spark.streaming.Duration
View gist:84c4fa00a7e03f673d62
* Created by markmo on 20/01/2016.
class DemoSpec extends UnitSpec {
"Data" should "be processed" in {
val demo ="$BASE_URI/$LAYER_RAW/Customer_Demographics.parquet")
val tx ="$BASE_URI/$LAYER_RAW/Customer_Transactions.parquet")
View gist:71fc668207b2f9cb2e4d
* Alternative implementation using a Custom Interpolator.
* @param sc StringContext
implicit class TemplateHelper(val sc: StringContext) extends AnyVal {
def t(args: Any*): String = {
val strings =
val expressions = args.iterator
View gist:87e2a11bce0fadf8e773
package diamond.transform
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType, TimestampType}
* Created by markmo on 12/12/2015.
object schemas {
val inputSchema = StructType(Seq(
markmo / gist:2f79e46397567bca7acf
Created Mar 12, 2016
Spark Transformation DSL
View gist:2f79e46397567bca7acf
package diamond.transform
import org.apache.spark.sql.{Row, DataFrame}
import org.apache.spark.sql.types.DataType
import scala.language.implicitConversions
* All this is WIP - NOT READY
View gist:c8ebb803b06b649ad48d
trait FeatureType[V] {
val name: String
trait FeatureX[V] extends FeatureType[V] {
val value: V
case class FeatureCollection(id: Int,
View gist:2493df23cc96aca8f851
package diamond.models
import java.util.Date
* @param entity String entity id (usually hashed)
* @param eventType String attribute name
* @param ts Date event timestamp
* @param namespace String logical grouping
* @param session Option[String] session id