Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

conda create -n py36-test python=3.6
source activate py36-test
python -m ipykernel install --name py36-test
source deactivate
@mleuthold
mleuthold / Alias for Jupyter Lab as Docker container
Last active September 7, 2020 15:13
Jupyter Lab as Docker container
alias jupyter-lab='docker run -it --rm -p 8888:8888 -v "$HOME":/home/jovyan/ --user $(id -u):$(id -g) --group-add users --group-add root -e JUPYTER_ENABLE_LAB=yes jupyter/all-spark-notebook'
@mleuthold
mleuthold / prepare-pypspark-env.sh
Last active October 4, 2023 19:42
Use PySpark 3.0.0 with pyenv
pyenv install 3.8.1
pyenv virtualenv 3.8.1 spark3
pyenv shell spark3
pip install pyspark
MY_PYSPARK_LOCATION=$(pip show pyspark | grep -e "Location" | cut -d ' ' -f 2)
# /home/marleu/.pyenv/versions/3.8.1/envs/spark3/lib/python3.8/site-packages
MY_PYSPARK_NAME=$(pip show pyspark | grep -e "Name" | cut -d ' ' -f 2)
# pyspark
#########################
# logging.conf
#########################
[loggers]
keys=root,sampleLogger
[handlers]
keys=consoleHandler
# ./conf/log4j.properties
[...]
# Set everything to be logged to the console
log4j.rootCategory=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{ISO8601} - %c{1} - %p - %m%n
[...]
@mleuthold
mleuthold / copy_kafka_messages_between_clusters.py
Created February 15, 2020 22:41
How to copy Kafka messages from one to another Kafka cluster
#!/usr/bin/env python3
import os
from kafka import KafkaConsumer
from kafka import KafkaProducer
# Access all environment variables
print('*----------------------------------*')
print(os.environ)
print('*----------------------------------*')
@mleuthold
mleuthold / shortcut for Parquet Tools as Docker container
Last active December 8, 2020 09:40
Make parquet-tools available
alias parquet-tools="docker run --rm --workdir /data -it -v $(pwd):/data nathanhowell/parquet-tools"
### OR
grep -qxF "### PARQUETTOOLS BEGIN
### PARQUETTOOLS END" ~/.zshrc || echo "\n### PARQUETTOOLS BEGIN\n### PARQUETTOOLS END" >> ~/.zshrc
command='''
parquet_tools(){
docker run --rm --workdir /data -it -v $(pwd):/data nathanhowell/parquet-tools "$@"
@mleuthold
mleuthold / Makefile
Created April 4, 2019 16:54
How to write Make targets for parallel execution with partial ordering
# run deploy phases in sequence
deploy:
$(MAKE) deploy.infrastructure
$(MAKE) deploy.kafka-topics
$(MAKE) deploy.pipeline
# deploy infrastructure in parallel
deploy.infrastructure: \
deploy.kafka-cluster \
deploy.kafka-client
@mleuthold
mleuthold / Makefile
Created April 4, 2019 16:40
Using Kubernetes context and Docker endpoint of Minikube in a Makefile
ifeq ($(ENV), local)
# if local, then user docker registry of minikube
export DOCKER_TLS_VERIFY := $(shell minikube docker-env | grep DOCKER_TLS_VERIFY | cut -d\= -f2 | tr -d \")
export DOCKER_HOST := $(shell minikube docker-env | grep DOCKER_HOST | cut -d\= -f2 | tr -d \")
export DOCKER_CERT_PATH := $(shell minikube docker-env | grep DOCKER_CERT_PATH | cut -d\= -f2 | tr -d \")
export DOCKER_API_VERSION := $(shell minikube docker-env | grep DOCKER_API_VERSION | cut -d\= -f2 | tr -d \")
# if local, then user kubernetes context of minikube
export MY_CONTEXT := $(shell kubectl config use-context minikube; echo "using minikube context")
else
# use kubernetes context specified in configuration file
@mleuthold
mleuthold / install-plugin-tillerless-for-helm.sh
Last active August 19, 2019 14:21
Install plugin Tillerless for Helm.
#!/usr/bin/env bash
set -xe
export HELM_TILLER_SILENT=true
export HELM_TILLER_HISTORY_MAX=5
NAMESPACE="${MY_KUBERNETES_NAMESPACE:-default}"
helm init --client-only