Bernhard Schäfer bernhardschaefer

## augmentations.py
from albumentations import *

transforms = [
    OneOf([
        LongestMaxSize(p=0.7, max_size=1333),
        RandomResizedCrop(
            p=0.3, height=1333, width=1333,
            scale=(0.2, 1.0), ratio=(0.5, 2.0)
        ),
    ], p=1.0),

## spark-submit-streaming-yarn.sh
#!/bin/bash

# Minimum TODOs on a per job basis:
# 1. define name, application jar path, main class, queue and log4j-yarn.properties path
# 2. remove properties not applicable to your Spark version (Spark 1.x vs. Spark 2.x)
# 3. tweak num_executors, executor_memory (+ overhead), and backpressure settings

# the two most important settings:
num_executors=6
executor_memory=3g

## trim_maskrcnn_benchmark_model.py
keys_to_remove = [
    'module.roi_heads.box.predictor.cls_score.weight',
    'module.roi_heads.box.predictor.cls_score.bias',
    'module.roi_heads.box.predictor.bbox_pred.weight',
    'module.roi_heads.box.predictor.bbox_pred.bias',
    'module.roi_heads.mask.predictor.mask_fcn_logits.weight', # mask
    'module.roi_heads.mask.predictor.mask_fcn_logits.bias'    # mask
]

def trim_maskrcnn_benchmark_model(model_path: str, trimmed_model_path: str):

## log4j-yarn.properties
# Spark Streaming Logging Configuration
# See also: http://spark.apache.org/docs/2.0.2/running-on-yarn.html#debugging-your-application

log4j.rootLogger=INFO, stderr

# application namespace configuration
log4j.logger.de.inovex.mysparkapp=stderr, stdout

# Write all logs to standard Spark stderr file
log4j.appender.stderr=org.apache.log4j.RollingFileAppender

## spark-streaming-hdfs-marker-file.scala
  val checkpointDirectory = "hdfs:///path/to/checkpoint/dir"

  def main(args: Array[String]) {

    // Get StreamingContext from checkpoint data or create a new one
    val context = StreamingContext.getOrCreate(checkpointDirectory, functionToCreateContext _)

    // Start the context
    context.start()

## SparkTimerExample.scala
import org.apache.spark.groupon.metrics.UserMetricsSystem
import org.apache.spark.groupon.metrics.SparkTimer
import org.apache.spark.rdd.RDD

class SparkTimerExample[T] {
  lazy val timer: SparkTimer = UserMetricsSystem.timer("BulkPutTimer")

  def bulkPut(rdd: RDD[T]) = {
    rdd.foreachPartition(partitionOfRecords => {
      val timerCtx = timer.time()

## TerminalVim.scpt
-- TerminalVim.app
-- This creates a shim Application that will enable you to open files from the Finder in vim using iTerm

-- To use this script:
-- 1. Open Automator and create a new Application
-- 2. Add the "Run Applescript" action
-- 3. Paste this script into the Run Applescript section
-- 4. Save the application as TerminalVim.app in your Applications folder
-- 5. In the Finder, right click on a file and select "Open With". In that window you can set TerminalVim as a default
on run {input, parameters}
	from albumentations import *

	transforms = [
	OneOf([
	LongestMaxSize(p=0.7, max_size=1333),
	RandomResizedCrop(
	p=0.3, height=1333, width=1333,
	scale=(0.2, 1.0), ratio=(0.5, 2.0)
	),
	], p=1.0),
	#!/bin/bash

	# Minimum TODOs on a per job basis:
	# 1. define name, application jar path, main class, queue and log4j-yarn.properties path
	# 2. remove properties not applicable to your Spark version (Spark 1.x vs. Spark 2.x)
	# 3. tweak num_executors, executor_memory (+ overhead), and backpressure settings

	# the two most important settings:
	num_executors=6
	executor_memory=3g
	keys_to_remove = [
	'module.roi_heads.box.predictor.cls_score.weight',
	'module.roi_heads.box.predictor.cls_score.bias',
	'module.roi_heads.box.predictor.bbox_pred.weight',
	'module.roi_heads.box.predictor.bbox_pred.bias',
	'module.roi_heads.mask.predictor.mask_fcn_logits.weight', # mask
	'module.roi_heads.mask.predictor.mask_fcn_logits.bias' # mask
	]

	def trim_maskrcnn_benchmark_model(model_path: str, trimmed_model_path: str):
	# Spark Streaming Logging Configuration
	# See also: http://spark.apache.org/docs/2.0.2/running-on-yarn.html#debugging-your-application

	log4j.rootLogger=INFO, stderr

	# application namespace configuration
	log4j.logger.de.inovex.mysparkapp=stderr, stdout

	# Write all logs to standard Spark stderr file
	log4j.appender.stderr=org.apache.log4j.RollingFileAppender
	val checkpointDirectory = "hdfs:///path/to/checkpoint/dir"

	def main(args: Array[String]) {

	// Get StreamingContext from checkpoint data or create a new one
	val context = StreamingContext.getOrCreate(checkpointDirectory, functionToCreateContext _)

	// Start the context
	context.start()
	import org.apache.spark.groupon.metrics.UserMetricsSystem
	import org.apache.spark.groupon.metrics.SparkTimer
	import org.apache.spark.rdd.RDD

	class SparkTimerExample[T] {
	lazy val timer: SparkTimer = UserMetricsSystem.timer("BulkPutTimer")

	def bulkPut(rdd: RDD[T]) = {
	rdd.foreachPartition(partitionOfRecords => {
	val timerCtx = timer.time()
	-- TerminalVim.app
	-- This creates a shim Application that will enable you to open files from the Finder in vim using iTerm

	-- To use this script:
	-- 1. Open Automator and create a new Application
	-- 2. Add the "Run Applescript" action
	-- 3. Paste this script into the Run Applescript section
	-- 4. Save the application as TerminalVim.app in your Applications folder
	-- 5. In the Finder, right click on a file and select "Open With". In that window you can set TerminalVim as a default
	on run {input, parameters}