Skip to content

Instantly share code, notes, and snippets.

case class InputData(...)
val dataSchema =
ScalaReflection.schemaFor[InputData].dataType.asInstanceOf[StructType]
val dataset: Dataset[InputData] = spark
.read
.schema(dataSchema)
.parquet(incomingPath)
.as[InputData]
@visualskyrim
visualskyrim / build.sbt
Last active April 13, 2020 02:19
Finatra prject setup
ThisBuild / scalaVersion := "2.12.10"
ThisBuild / version := "0.1"
ThisBuild / organization := ""
ThisBuild / organizationName := ""
lazy val root = (project in file("."))
.enablePlugins(BuildInfoPlugin)
.settings(
name := "your-api",
fork := true,
@visualskyrim
visualskyrim / kill_airflow_schedulers.sh
Created March 9, 2020 04:54
A script to kill all local airflow schedulers
kill $(ps -ef | grep "airflow scheduler" | awk '{print $2}')
@visualskyrim
visualskyrim / run_spark_job.sh
Created March 7, 2020 03:06
A template to run spark job with possible options
#########################################################
# The purpose of this script is <----------->
#
# Arguments:
# VAR_1
# VAR_2
#########################################################
if [ $# != 2 ]
then
git log v1.0.0..v1.0.1 --oneline --graph --decorate
@visualskyrim
visualskyrim / build.sbt
Created April 23, 2018 07:55
scala style setting
lazy val compileScalastyle = taskKey[Unit]("compileScalastyle")
// scalastyle >= 0.9.0
compileScalastyle := scalastyle.in(Compile).toTask("").value
(compile in Compile) := ((compile in Compile) dependsOn compileScalastyle).value
assemblyMergeStrategy in assembly := {
case PathList("META-INF", xs @ _*) => MergeStrategy.discard
case x => MergeStrategy.first
@visualskyrim
visualskyrim / hdfs_job_target.py
Created December 28, 2017 06:09
A luigi task target deciding if a task is done or not by checking the _SUCCESS file in the given directory
class HdfsJobTarget(HdfsTarget):
def exists(self):
return self.fs.exists(os.path.join(self.path, '_SUCCESS'))
def copy(self, destination):
target_dir = os.path.dirname(destination)
if not self.fs.exists(target_dir):
self.fs.mkdir(target_dir)
self.fs.copy(self.path, target_dir)
@visualskyrim
visualskyrim / docker-compose.yml
Created December 27, 2017 14:16
Docker Compose File: influxdb + grafana
version: "1"
services:
influxdb:
image: influxdb
volumes:
- <local-mnt>:/var/lib/influxdb
ports:
- 8084:8083
- 8086:8086
@visualskyrim
visualskyrim / modify_ulimit.yml
Last active August 18, 2022 09:27
A ansible playbook to modify ulimit
- hosts: all
become: true
tasks:
- name: configure system settings, file descriptors and number of threads
pam_limits:
domain: <--your-username-->
limit_type: "{{item.limit_type}}"
limit_item: "{{item.limit_item}}"
value: "{{item.value}}"
with_items:
@visualskyrim
visualskyrim / change_ulimit.yml
Created November 27, 2017 02:34
Ansible: Change ulimit
- hosts: all
become: true
tasks:
- name: configure system settings, file descriptors and number of threads
pam_limits:
domain: ratuser
limit_type: "{{item.limit_type}}"
limit_item: "{{item.limit_item}}"
value: "{{item.value}}"
with_items: