Afsal Thaj afsalthaj

## tree_to_table.scala
import scalaz.State
import scalaz._, Scalaz._
import StateInfo._

// A nice functional way to produce a simple table from a tree (minimal code, in real life tree can be further nested and state becomes super important)
// Nowhere it can go wrong, it traverses back and forth the table keys and values and pads empty spaces to match the maximum size of keys and values that's encountered
// up until then.
sealed trait Tree
case class Leaf(key: String, value: String) extends Tree
case class Both(left: Tree, right: Tree) extends Tree

## .zshrc_file
# If you come from bash you might have to change your $PATH.
# export PATH=$HOME/bin:/usr/local/bin:$PATH

# Path to your oh-my-zsh installation.
export ZSH="/Users/afsalthaj/.oh-my-zsh"

# Set name of the theme to load --- if set to "random", it will
# load a random theme each time oh-my-zsh is loaded, in which case,
# to know which specific one was loaded, run: echo $RANDOM_THEME
# See https://github.com/robbyrussell/oh-my-zsh/wiki/Themes

## ok_recursion_schemes.scala

object RecursionSchemes extends App {
  import PropertyTreeX._

  sealed trait PropertyTreeX[+K, +V, +A]

  object PropertyTreeX {

    final case object EmptyX extends PropertyTreeX[Nothing, Nothing, Nothing]

## notAvalue.scala
trait =!=[C, D]

object NoFeatureS {
  implicit def neq[E, F]: E =!= F = new =!=[E, F] {}

  @annotation.implicitAmbiguous("Could not prove ${J} =!= ${J}")
  implicit def neqAmbig1[G, H, J]: J =!= J = new =!=[J, J] {}
  implicit def neqAmbig2[I]: I =!= I = new =!=[I, I] {}

}

## better_string_indexing.scala

class CustomStringIndexer(override val uid: String) extends Estimator[CustomStringIndexerModel]
  with Params
  with HasConstrainedHandleInvalid
  with MaxLabelSize
  with HasInputCols
  with HasOutputCols
  with ConsiderNullAsCategory
  with DefaultParamsWritable {

## better_crossvalidator.scala

/**
 * Warning: The custom validator is predominantly focussing on performance improvements,
 * and has taken a few significant changes with respect to original cross validator.
 * This isn't thoroughly tested, although we made a successful with better performance.
 *
 * TODO: Not tested thoroughly. Use at your own risk:
 */
class BetterCrossValidator extends CrossValidator {
  override def fit(dataset: Dataset[_]): CrossValidatorModel = {

## spark_optimiser_is_not_a_silver_bullet.scala
```scala

scala> list.collect
res0: Array[org.apache.spark.sql.Row] = Array([1.0], [1.0], [1.0], [2.0], [2.0], [0.0])

scala> list
res1: org.apache.spark.sql.DataFrame = [a_indexed: double]

scala> val table1 =list
table1: org.apache.spark.sql.DataFrame = [a_indexed: double]

## spark_with_bucketing.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                afsalthaj
                / spark_with_bucketing.md
            
            
              Last active
              August 10, 2019 04:50
            
          
    Spark with Bucketing

Execution plan of spark on bucketed data-sets, and verify if it is smart enough to avoid wide dependency.
PS: When trying things in spark-shell,
make a note that, for small datasets, the join would be probably be broadcast exchange in physical execution plan by default.
Example:
./spark-shell

  
## zio_pattern.scala
import zio.{Task, ZIO }

trait ConfigModule {
  def configService: ConfigService

}

trait ConfigService {
  def config: Task[String]
}

## cron_scala.scala
import com.cronutils.model._
import com.cronutils.model.definition._
import java.time._

import com.cronutils.parser.CronParser
import com.cronutils.model.time.ExecutionTime
import CronOps._
import StepDirection._
import java.util.Optional
import scala.util.Try
	import scalaz.State
	import scalaz._, Scalaz._
	import StateInfo._

	// A nice functional way to produce a simple table from a tree (minimal code, in real life tree can be further nested and state becomes super important)
	// Nowhere it can go wrong, it traverses back and forth the table keys and values and pads empty spaces to match the maximum size of keys and values that's encountered
	// up until then.
	sealed trait Tree
	case class Leaf(key: String, value: String) extends Tree
	case class Both(left: Tree, right: Tree) extends Tree
	# If you come from bash you might have to change your $PATH.
	# export PATH=$HOME/bin:/usr/local/bin:$PATH

	# Path to your oh-my-zsh installation.
	export ZSH="/Users/afsalthaj/.oh-my-zsh"

	# Set name of the theme to load --- if set to "random", it will
	# load a random theme each time oh-my-zsh is loaded, in which case,
	# to know which specific one was loaded, run: echo $RANDOM_THEME
	# See https://github.com/robbyrussell/oh-my-zsh/wiki/Themes

	object RecursionSchemes extends App {
	import PropertyTreeX._

	sealed trait PropertyTreeX[+K, +V, +A]

	object PropertyTreeX {

	final case object EmptyX extends PropertyTreeX[Nothing, Nothing, Nothing]
	trait =!=[C, D]

	object NoFeatureS {
	implicit def neq[E, F]: E =!= F = new =!=[E, F] {}

	@annotation.implicitAmbiguous("Could not prove ${J} =!= ${J}")
	implicit def neqAmbig1[G, H, J]: J =!= J = new =!=[J, J] {}
	implicit def neqAmbig2[I]: I =!= I = new =!=[I, I] {}

	}

	class CustomStringIndexer(override val uid: String) extends Estimator[CustomStringIndexerModel]
	with Params
	with HasConstrainedHandleInvalid
	with MaxLabelSize
	with HasInputCols
	with HasOutputCols
	with ConsiderNullAsCategory
	with DefaultParamsWritable {

	/**
	* Warning: The custom validator is predominantly focussing on performance improvements,
	* and has taken a few significant changes with respect to original cross validator.
	* This isn't thoroughly tested, although we made a successful with better performance.
	*
	* TODO: Not tested thoroughly. Use at your own risk:
	*/
	class BetterCrossValidator extends CrossValidator {
	override def fit(dataset: Dataset[_]): CrossValidatorModel = {
	```scala

	scala> list.collect
	res0: Array[org.apache.spark.sql.Row] = Array([1.0], [1.0], [1.0], [2.0], [2.0], [0.0])

	scala> list
	res1: org.apache.spark.sql.DataFrame = [a_indexed: double]

	scala> val table1 =list
	table1: org.apache.spark.sql.DataFrame = [a_indexed: double]
	import zio.{Task, ZIO }

	trait ConfigModule {
	def configService: ConfigService

	}

	trait ConfigService {
	def config: Task[String]
	}
	import com.cronutils.model._
	import com.cronutils.model.definition._
	import java.time._

	import com.cronutils.parser.CronParser
	import com.cronutils.model.time.ExecutionTime
	import CronOps._
	import StepDirection._
	import java.util.Optional
	import scala.util.Try