Skip to content

Instantly share code, notes, and snippets.

View ryanmiville's full-sized avatar

Ryan Miville ryanmiville

View GitHub Profile
@ryanmiville
ryanmiville / pyspark_utils.py
Last active August 2, 2023 02:06
pyspark utilities
from pyspark.sql import SparkSession
def most_recent_path(path: str, spark: SparkSession) -> str:
"""Get the most recent path in a directory using the Hadoop file system.
In practice, this is used to get the most recent partition in a table.
"""
sc = spark.sparkContext
URI = sc._jvm.java.net.URI
@ryanmiville
ryanmiville / utils.scala
Last active August 1, 2023 22:25
Spark utility functions
//> using scala "2.12.17"
//> using dep "org.apache.spark::spark-sql:3.3.2"
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.apache.hadoop.fs._
import java.net._
object utils {
//> using scala "2.12"
//> using dep "com.chuusai::shapeless:2.3.3"
//> using dep "org.scalameta::munit:0.7.29"
/**
* Parser is a type class for parsing command line arguments in the format
* '--key value' into a case class. The arguments can be in any order, and
* extra fields that are not in your case class will be ignored. The field
* name in your case class must match the argument exactly (except for the
* leading --)
//> using scala "2.12.17"
//> using dep "org.apache.spark::spark-sql:3.3.2"
//> using dep "org.scalameta::munit:0.7.29"
import org.apache.spark.sql.{Dataset, SparkSession}
import org.apache.logging.log4j.LogManager
import org.apache.logging.log4j.core.LoggerContext
import org.apache.logging.log4j.Level
@ryanmiville
ryanmiville / Setup.scala
Created March 8, 2022 17:58
Child span behavior with single stream
//> using scala "2.13"
//> using lib "com.armanbilge::bayou:0.1-4fb42c8"
//> using lib "org.typelevel::cats-effect:3.3.7"
//> using lib "co.fs2::fs2-core:3.2.5"
//> using lib "org.typelevel::log4cats-core:2.2.0"
//> using lib "org.typelevel::log4cats-slf4j:2.2.0"
//> using lib "org.tpolecat::natchez-core:0.1.6"
//> using lib "org.tpolecat::natchez-log:0.1.6"
import cats.effect.{Trace => _, _}
@ryanmiville
ryanmiville / Merge.scala
Last active March 8, 2022 17:57
Strange child span behavior with merging streams
//> using scala "2.13"
//> using lib "com.armanbilge::bayou:0.1-4fb42c8"
//> using lib "org.typelevel::cats-effect:3.3.7"
//> using lib "co.fs2::fs2-core:3.2.5"
//> using lib "org.typelevel::log4cats-core:2.2.0"
//> using lib "org.typelevel::log4cats-slf4j:2.2.0"
//> using lib "org.tpolecat::natchez-core:0.1.6"
//> using lib "org.tpolecat::natchez-log:0.1.6"
import cats.effect.{Trace => _, _}
@ryanmiville
ryanmiville / official_race_class_list.md
Last active March 20, 2024 07:38
All races and classes from official D&D sources
@ryanmiville
ryanmiville / WsPingPong.scala
Last active March 1, 2018 00:15
A simple example of responding to a websocket message with backpressure using Akka
class WsPingPong extends Actor {
import WsPingPong._
private implicit val materializer = ActorMaterializer()
private val queue =
Source.queue[Message](Int.MaxValue, OverflowStrategy.backpressure)
.via(Http().webSocketClientFlow(WebSocketRequest("wss://pingpongsockets.org")))
.to(Sink.actorRefWithAck(self, Init, Ack, Complete))
.run()