Skip to content

Instantly share code, notes, and snippets.

View ldacosta's full-sized avatar

Luis Da Costa ldacosta

View GitHub Profile
from os import path
from Utils.programming.ut_find_folders import *
from ReinforcementLearning.NHL.playbyplay.playbyplay_data import *
# Pointers to the data
repoCode = '/Users/hectorb/Desktop/Younes/Code/NHL_stats_SL'
db_root = '/Users/hectorb/Desktop/Younes/Databases/Hockey'
repoPbP = path.join(db_root, 'PlayByPlay')
repoPSt = path.join(db_root, 'PlayerStats/player')
import itertools
from abc import abstractmethod
from typing import List, Set, Callable, Tuple, Dict
from enum import Enum, auto
class PlayerType(Enum):
DEFENSIVE = auto()
OFFENSIVE = auto()
NEUTRAL = auto()
def summarize2(dsSEMUnionClean: Dataset[SEMUnionClean]): Dataset[SEMUnionClean] = {
import dsSEMUnionClean.sparkSession.implicits._
import org.apache.spark.sql.expressions.scalalang.typed.{
count => typedCount,
sum => typedSum
}
val xxx =
dsSEMUnionClean.groupByKey(r => (r.account_descriptive_name, r.advertiser_id, r.advertiser_name, r.campaign_id, r.campaign_name, r.source))
.agg(
package com.mediative.mpn.brain.datascience.platform
package nationals.recommendations.strategy
import com.cra.figaro.library.atomic.continuous.{ AtomicNormal, Normal }
import com.holdenkarau.spark.testing.{ DatasetGenerator, DatasetSuiteBase }
import com.mediative.mpn.brain.datascience.platform.nationals.generators
import org.apache.spark.sql.Dataset
import org.scalacheck.Gen
import org.scalatest.FreeSpec
import org.scalatest.prop.GeneratorDrivenPropertyChecks
@ldacosta
ldacosta / GenericDataWriter.scala
Last active February 12, 2016 21:58
I want to write a data writer that is generic for the Warehouse API's data
/**
* I want to write a data writer that is generic for the Warehouse API's data
* however, this won't compile as d.toDF needs some implicits that can only be found if
* (1) T is a case class (that is why T <: Product) and
* (2) T is defiend in the proper place (see https://issues.scala-lang.org/browse/SI-6649 or http://stackoverflow.com/questions/33704831/value-todf-is-not-a-member-of-org-apache-spark-rdd-rdd)
*
* I think step (1) is OK, but the typetags (step (2)) are not working.
*/
case class GenericDataWriter[T <: Product](name: String, sqlC: SQLContext, stage: Stage, fmt: Format) extends Serializable with Logging with DataWriter[T] {
/////////////////////////////////
// Generic
trait Model[T1,T2] {
def train(t: List[(T1, T2)]): Unit // or Boolean?
def predict(t: T1): Set[(T2, Confidence)]
}
trait LinearModel[T1,T2] extends Model[T1,T2] {
private val m: MLLibLinearModel // -ish
}
@ldacosta
ldacosta / jodaDateTime.scala
Created January 7, 2016 21:29
Getting used to calculations with JODA DateTime
import com.github.nscala_time.time.Imports._
import org.joda.time.Days
val nowInToronto:DateTime = DateTime.now(DateTimeZone.forID("America/Toronto"))
val nowInTorontoInUTC: DateTime = new org.joda.time.DateTime( nowInToronto, DateTimeZone.UTC)
println(nowInToronto.toString)
println(nowInTorontoInUTC.toString)
assert(Days.daysBetween( nowInToronto, nowInTorontoInUTC ).getDays == 0)
private def templateForATest(dir: String, sql: SQLContext) = {
// whatever structure we want to test
val good = CleanData(reportDate = Timestamp.valueOf("2015-08-01 19:20:21"), // yyyy-[m]m-[d]d hh:mm:ss
date = Timestamp.valueOf("2015-08-01 19:20:21"),
impressions = 10,
clicks = 20,
totalConversions = 33,
impressionsConvergenceProbability = Some(0.99),
clicksConvergenceProbability = Some(0.99),
totalConversionsConvergenceProbability = Some(0.99))
@ldacosta
ldacosta / MixedinTrait.scala
Last active August 29, 2015 14:24
Want to mix ATrait below to get functionalities on MyClass. But there is AType defined in ATrait that I want to have access to - at declaration time. Is this possible?
// Stuff below doesn't compile
trait ATrait {
val something: Int
// <some stuff here>
case class AType[T](value: T)
}
case class MyClass(something: Int, f: AType => Int) extends ATrait // error: not found: type AType
// this version needs _something_ to be defined:
object Wrapper extends ATrait { // error: something undefined
//
trait Model[T] {
def name: String // TODO: not sure of its utility
def randomPick: Seq[Double]
}
trait ContinuousModel[T] extends Model[T] {
implicit val continuousOpt: Continuous[T]
}