Skip to content

Instantly share code, notes, and snippets.

View afsalthaj's full-sized avatar
💭
Every day is learning. Every day is the pursuit to perfectness.

Afsal Thaj afsalthaj

💭
Every day is learning. Every day is the pursuit to perfectness.
View GitHub Profile
import scalaz.State
import scalaz._, Scalaz._
import StateInfo._
// A nice functional way to produce a simple table from a tree (minimal code, in real life tree can be further nested and state becomes super important)
// Nowhere it can go wrong, it traverses back and forth the table keys and values and pads empty spaces to match the maximum size of keys and values that's encountered
// up until then.
sealed trait Tree
case class Leaf(key: String, value: String) extends Tree
case class Both(left: Tree, right: Tree) extends Tree
# If you come from bash you might have to change your $PATH.
# export PATH=$HOME/bin:/usr/local/bin:$PATH
# Path to your oh-my-zsh installation.
export ZSH="/Users/afsalthaj/.oh-my-zsh"
# Set name of the theme to load --- if set to "random", it will
# load a random theme each time oh-my-zsh is loaded, in which case,
# to know which specific one was loaded, run: echo $RANDOM_THEME
# See https://github.com/robbyrussell/oh-my-zsh/wiki/Themes
object RecursionSchemes extends App {
import PropertyTreeX._
sealed trait PropertyTreeX[+K, +V, +A]
object PropertyTreeX {
final case object EmptyX extends PropertyTreeX[Nothing, Nothing, Nothing]
trait =!=[C, D]
object NoFeatureS {
implicit def neq[E, F]: E =!= F = new =!=[E, F] {}
@annotation.implicitAmbiguous("Could not prove ${J} =!= ${J}")
implicit def neqAmbig1[G, H, J]: J =!= J = new =!=[J, J] {}
implicit def neqAmbig2[I]: I =!= I = new =!=[I, I] {}
}
class CustomStringIndexer(override val uid: String) extends Estimator[CustomStringIndexerModel]
with Params
with HasConstrainedHandleInvalid
with MaxLabelSize
with HasInputCols
with HasOutputCols
with ConsiderNullAsCategory
with DefaultParamsWritable {
/**
* Warning: The custom validator is predominantly focussing on performance improvements,
* and has taken a few significant changes with respect to original cross validator.
* This isn't thoroughly tested, although we made a successful with better performance.
*
* TODO: Not tested thoroughly. Use at your own risk:
*/
class BetterCrossValidator extends CrossValidator {
override def fit(dataset: Dataset[_]): CrossValidatorModel = {
```scala
scala> list.collect
res0: Array[org.apache.spark.sql.Row] = Array([1.0], [1.0], [1.0], [2.0], [2.0], [0.0])
scala> list
res1: org.apache.spark.sql.DataFrame = [a_indexed: double]
scala> val table1 =list
table1: org.apache.spark.sql.DataFrame = [a_indexed: double]

Spark with Bucketing

Execution plan of spark on bucketed data-sets, and verify if it is smart enough to avoid wide dependency.

PS: When trying things in spark-shell, make a note that, for small datasets, the join would be probably be broadcast exchange in physical execution plan by default. Example:

./spark-shell

import zio.{Task, ZIO }
trait ConfigModule {
def configService: ConfigService
}
trait ConfigService {
def config: Task[String]
}
import com.cronutils.model._
import com.cronutils.model.definition._
import java.time._
import com.cronutils.parser.CronParser
import com.cronutils.model.time.ExecutionTime
import CronOps._
import StepDirection._
import java.util.Optional
import scala.util.Try