Skip to content

Instantly share code, notes, and snippets.

View bernhardschaefer's full-sized avatar

Bernhard Schäfer bernhardschaefer

View GitHub Profile
@bernhardschaefer
bernhardschaefer / augmentations.py
Created October 19, 2022 11:55
Albumentations Augmenation Pipeline
from albumentations import *
transforms = [
OneOf([
LongestMaxSize(p=0.7, max_size=1333),
RandomResizedCrop(
p=0.3, height=1333, width=1333,
scale=(0.2, 1.0), ratio=(0.5, 2.0)
),
], p=1.0),
@bernhardschaefer
bernhardschaefer / spark-submit-streaming-yarn.sh
Last active March 21, 2022 05:04
spark-submit template for running Spark Streaming on YARN (referenced in https://www.inovex.de/blog/247-spark-streaming-on-yarn-in-production/)
#!/bin/bash
# Minimum TODOs on a per job basis:
# 1. define name, application jar path, main class, queue and log4j-yarn.properties path
# 2. remove properties not applicable to your Spark version (Spark 1.x vs. Spark 2.x)
# 3. tweak num_executors, executor_memory (+ overhead), and backpressure settings
# the two most important settings:
num_executors=6
executor_memory=3g
keys_to_remove = [
'module.roi_heads.box.predictor.cls_score.weight',
'module.roi_heads.box.predictor.cls_score.bias',
'module.roi_heads.box.predictor.bbox_pred.weight',
'module.roi_heads.box.predictor.bbox_pred.bias',
'module.roi_heads.mask.predictor.mask_fcn_logits.weight', # mask
'module.roi_heads.mask.predictor.mask_fcn_logits.bias' # mask
]
def trim_maskrcnn_benchmark_model(model_path: str, trimmed_model_path: str):
# Spark Streaming Logging Configuration
# See also: http://spark.apache.org/docs/2.0.2/running-on-yarn.html#debugging-your-application
log4j.rootLogger=INFO, stderr
# application namespace configuration
log4j.logger.de.inovex.mysparkapp=stderr, stdout
# Write all logs to standard Spark stderr file
log4j.appender.stderr=org.apache.log4j.RollingFileAppender
val checkpointDirectory = "hdfs:///path/to/checkpoint/dir"
def main(args: Array[String]) {
// Get StreamingContext from checkpoint data or create a new one
val context = StreamingContext.getOrCreate(checkpointDirectory, functionToCreateContext _)
// Start the context
context.start()
import org.apache.spark.groupon.metrics.UserMetricsSystem
import org.apache.spark.groupon.metrics.SparkTimer
import org.apache.spark.rdd.RDD
class SparkTimerExample[T] {
lazy val timer: SparkTimer = UserMetricsSystem.timer("BulkPutTimer")
def bulkPut(rdd: RDD[T]) = {
rdd.foreachPartition(partitionOfRecords => {
val timerCtx = timer.time()
-- TerminalVim.app
-- This creates a shim Application that will enable you to open files from the Finder in vim using iTerm
-- To use this script:
-- 1. Open Automator and create a new Application
-- 2. Add the "Run Applescript" action
-- 3. Paste this script into the Run Applescript section
-- 4. Save the application as TerminalVim.app in your Applications folder
-- 5. In the Finder, right click on a file and select "Open With". In that window you can set TerminalVim as a default
on run {input, parameters}