Skip to content

Instantly share code, notes, and snippets.

Pathikrit Bhowmick pathikrit

View GitHub Profile
@pathikrit
pathikrit / Morph.scala
Created Apr 29, 2020
Case class morpher in scala
View Morph.scala
import shapeless._, syntax.singleton._, record._, ops.hlist._
/**
* Given an instance A and it's generic representation AR and function f from AR => BR
* we can covert A to B if we also have the generic representation of BR as B
* We also handle misalignments using shapeless's align typeclass (https://stackoverflow.com/questions/29242873/shapeless-turn-a-case-class-into-another-with-fields-in-different-order)
*/
case class Morph[A, AR](a: A)(implicit reprA: LabelledGeneric.Aux[A, AR]) {
// Why this DSL you say? Hack to get around scalac idiocy: https://stackoverflow.com/a/46614684/471136
def to[B] = new {
@pathikrit
pathikrit / GzipSplitter.scala
Last active Dec 12, 2019
Split a file into multiple GZIP files
View GzipSplitter.scala
import java.io.InputStream
import better.files._
import squants.information._, InformationConversions._
object GzipSplitter {
/** Splits the $inputstream into approximately equal chunks of $splitSize gzip files under $outputDirectory */
def split(
inputStream : InputStream,
outputDirectory : File = File.newTemporaryDirectory(),
@pathikrit
pathikrit / SparkDataLoad.scala
Last active Jun 1, 2020
Spark utils to ship data
View SparkDataLoad.scala
import java.nio.charset.{ Charset, StandardCharsets }
import org.apache.spark.sql._
import org.apache.spark.sql.types._
object SparkDataLoad {
def fromCsv[A : Encoder](
path: Set[String],
encoding: Charset = StandardCharsets.UTF_8,
useHeader: Boolean = false,
@pathikrit
pathikrit / SphericalDistance.scala
Last active Sep 24, 2019
Distance calculator between 2 coordinates on a planet
View SphericalDistance.scala
/** Distance between 2 coordinates (in degrees) */
def dist(
p1: (Double, Double), // Coordinate 1 (in degrees)
p2: (Double, Double), // Coordinate 2 (in degrees)
manhattanDist: Boolean = false, // If true, calculate Manhattan distance on the sphere :)
diameter: Double = 7917.5 // Diameter of Earth in miles; set this to whatever planet/units you want
): Double = {
import Math._
def haversine(theta: Double) = (1 - cos(theta))/2
View BooleanMonitor.scala
import java.util.concurrent.TimeUnit
import scala.concurrent.duration.Duration
import com.google.common.util.concurrent.Monitor
class BooleanMonitor(monitor: Monitor = new Monitor())(check: => Boolean) {
private val guard = new Monitor.Guard(monitor) { override def isSatisfied = check }
def whenSatisfied[U](timeout: Duration = Duration.Inf)(f: => U): U = {
View SparkSchemaDsl.scala
import org.apache.spark.sql.types._
import org.apache.spark.sql._
object SchemaDsl {
case class ScalaToSparkType[ScalaType](sparkType: DataType, isNullable: Boolean = false) {
def toField(name: String) = StructField(name = name, dataType = sparkType, nullable = isNullable)
}
implicit val stringType: ScalaToSparkType[String] = ScalaToSparkType(StringType)
implicit val intType: ScalaToSparkType[Int] = ScalaToSparkType(IntegerType)
@pathikrit
pathikrit / script.scala
Last active Dec 12, 2018
Move duplicate files to a directory
View script.scala
import better.files._
def moveDupes(
dir: File,
logFile: File = (File.home / "dupes.txt"),
dupeFolder: File = (File.home / 'dupes).createDirectory()
) = {
for {
log <- logFile.printWriter()
(hash, toKeep :: toMove) <- dir.listRecursively.toSeq.groupBy(_.md5).mapValues(_.toList)
@pathikrit
pathikrit / GitPunchCard.scala
Last active Apr 15, 2018
Scala Script to print Git PunchCard
View GitPunchCard.scala
/**
* Quick and dirty Scala app to print git commit punch-card e.g.
*
* ┃08┃09┃10┃11┃12┃13┃14┃15┃16┃17┃18┃19┃20┃21┃22┃23┃00┃01┃02┃03┃04┃05┃06┃07┃
* Sun┃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
* Mon┃▁▁▁▄▄▄▅▅▅▅▅▅▄▄▄▆▆▆▇▇▇▇▇▇███▆▆▆▅▅▅▄▄▄▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
* Tue┃▁▁▁▃▃▃▆▆▆▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
* Wed┃▁▁▁▄▄▄▅▅▅▇▇▇▅▅▅▅▅▅███▇▇▇▅▅▅▆▆▆▇▇▇▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
* Thu┃▁▁▁▂▂▂▄▄▄▆▆▆▅▅▅▆▆▆▇▇▇▇▇▇▆▆▆▇▇▇▅▅▅▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
* Fri┃▁▁▁▂▂▂▄▄▄▅▅▅▅▅▅▄▄▄▄▄▄▅▅▅▅▅▅▃▃▃▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
View local.sbt
triggeredMessage := Watched.clearWhenTriggered
libraryDependencies += "com.lihaoyi" % "ammonite" % "latest.release" % "test" cross CrossVersion.full
initialCommands in (Test, console) := """ammonite.Main().run()"""
watchSources ++= (
(baseDirectory.value * "*.sbt").get
++ (baseDirectory.value / "project" * "*.scala").get
++ (baseDirectory.value / "project" * "*.sbt").get
@pathikrit
pathikrit / MajorityElement.scala
Last active Jun 12, 2017
Boyer–Moore majority vote algorithm
View MajorityElement.scala
import scala.collection.generic.Growable
/**
* Boyer–Moore majority vote algorithm (https://en.wikipedia.org/wiki/Boyer–Moore_majority_vote_algorithm)
* A Data structure that supports O(1) tracking of the majority element in streaming data
* (i.e. something that occurs strictly > 50% of the time)
*/
class MajorityElement[A] extends Growable[A] {
private[this] var majorityElement = Option.empty[A]
private[this] var count = 0
You can’t perform that action at this time.