Skip to content

Instantly share code, notes, and snippets.

View dyno's full-sized avatar
🏠
Working from home

Dyno Fu dyno

🏠
Working from home
View GitHub Profile
@dyno
dyno / line_sweep_x2.py
Last active October 7, 2023 16:56
line sweep in 2 dimension
#!/usr/bin/env python3
"""
Each point is represented by [x, y], x and y both are floating points.
unit square: with edges either horizontal or vertical, side length 1
Input: n points Output: max number of points can be covered by a unit square
Example:
input:[[0.1,0.2], [0.5, 0.6], [100, 200], [0.9,0.8]]
@dyno
dyno / sparkjson.sc
Created December 1, 2022 00:12
Spark Json
import org.apache.spark.sql.types.{IntegerType, MapType, StringType, StructField, StructType}
import org.apache.spark.sql.{Column, DataFrame, Encoders}
val DataSchema: StructType = StructType(
List(
StructField("A", StringType, nullable = true), // required
StructField("B", StringType, nullable = true), // required
StructField("C", StringType, nullable = true) // required
)
)
@dyno
dyno / SharedSparkContext.scala
Created March 10, 2022 22:20
Gradle Spark Unittest Setup
import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest.{BeforeAndAfterAll, Suite}
// https://www.slideshare.net/SparkSummit/spark-summit-eu-talk-by-ted-malaska
trait SharedSparkContext extends BeforeAndAfterAll { self: Suite =>
@transient private var _sc: SparkContext = _
def sc: SparkContext = _sc
def spark: SparkSession = SparkSession.builder().config(_sc.getConf).getOrCreate()
@dyno
dyno / GsonAdapter.scala
Created November 14, 2021 03:47
Gson Adapter for Scala
import com.google.gson._
import sun.reflect.generics.reflectiveObjects.ParameterizedTypeImpl
import java.lang.reflect.{ParameterizedType, Type}
import java.util.{ArrayList => JArrayList, LinkedHashMap => JListMap, List => JList}
import scala.collection.JavaConverters._
import scala.collection.immutable.ListMap
object GsonAdapter {
@dyno
dyno / README.md
Created November 4, 2021 18:03 — forked from davideicardi/README.md
Write and read Avro records from bytes array

Avro serialization

There are 4 possible serialization format when using avro:

@dyno
dyno / emr_support.sh
Created June 18, 2021 06:12
EMR support checklist
#!/usr/bin/env bash
set -x
ps auxwww --sort -%cpu | head -10
ps auxwww --sort -rss | head -10
systemctl --type=service
@dyno
dyno / aws_s3_touch.sh
Created May 27, 2021 17:47
aws s3 touch
#!/usr/bin/env bash
# https://stackoverflow.com/questions/13455168/is-there-a-way-to-touch-a-file-in-amazon-s3
aws_s3_touch() {
python3 -c 'from urllib.parse import urlparse; u=urlparse("'$1'"); print(f"{u.netloc} {u.path[1:]}")' | while read bucket key;
do
aws s3api put-object --bucket $bucket --key $key
done
}
@dyno
dyno / single_process_lock.py
Created May 16, 2021 06:23
make sure only one processing running on the system
import fcntl
import sys
from contextlib import contextmanager
from os.path import isfile
from absl import logging as log
@contextmanager
def single_process_lock(lockfile: str):
task rewriteMavenPom(type: Copy) {
from file("$buildDir/publications/maven/pom-default.xml")
into file("$buildDir/libs/")
rename("pom-default.xml", "${jar.archiveFileName.get()}.pom.xml")
filter { it.replaceAll('_%%', '_' + scalaSuffix).replaceAll('%scala-version%', scalaVersion) }
dependsOn(jar, generatePomFileForMavenPublication)
}
@dyno
dyno / pyconindia_dsl.py
Created October 20, 2020 23:26 — forked from siddhi/pyconindia_dsl.py
Domain Specific Languages in Python - Pycon India - 17 Sep 2011
from pyparsing import *
# By default, PyParsing treats \n as whitespace and ignores it
# In our grammer, \n is significant, so tell PyParsing not to ignore it
ParserElement.setDefaultWhitespaceChars(" \t")
def parse(input_string):
def convert_prop_to_dict(tokens):
"""Convert a list of field property tokens to a dict"""
prop_dict = {}