Skip to content

Instantly share code, notes, and snippets.

@easel
Last active January 18, 2019 18:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save easel/b7dc78afcaf58cb2e68f84454bc73271 to your computer and use it in GitHub Desktop.
Save easel/b7dc78afcaf58cb2e68f84454bc73271 to your computer and use it in GitHub Desktop.
Kubernetes Stuff
FROM jupyter/all-spark-notebook
ADD environment.yml .
RUN \
curl -L -o coursier https://git.io/coursier &&\
chmod +x coursier && \
export SCALA_VERSION=2.12.8 ALMOND_VERSION=0.2.1 && \
./coursier bootstrap -r jitpack -i user \
-I user:sh.almond:scala-kernel-api_$SCALA_VERSION:$ALMOND_VERSION \
sh.almond:scala-kernel_$SCALA_VERSION:$ALMOND_VERSION --sources \
--default=true -o almond && \
./almond --install --global && \
rm -f almoond &&\
conda env update -n base -f environment.yml &&\
jupyter serverextension enable --py jupyterlab_git --sys-prefix && \
jupyter labextension install @jupyterlab/git
USER root
RUN sed -i 's/\(UMASK.*\)022/\1002/g' /etc/login.defs
USER jovyan
name: almond-spark-notebook
channels:
- fastai
- pytorch
- conda-forge
- defaults
dependencies:
- absl-py
- alembic
- arrow-cpp
- asn1crypto
- astor
- async_generator
- attrs
- backcall
- blas
- bleach
- bokeh
- boost-cpp
- bottleneck
- bzip2
- c-ares
- ca-certificates
- cairo
- certifi
- cffi
- chardet
- cloudpickle
- configurable-http-proxy
- cryptography
- cryptography-vectors
- curl
- cycler
- cython
- cytoolz
- dask-core
- decorator
- dill
- entrypoints
- expat
- fastcache
- findspark
- fontconfig
- freetype
- gast
- gensim
- gettext
- glib
- gmpy2
- graphite2
- grpcio
- gsl
- h5py
- harfbuzz
- hdf5
- icu
- idna
- imageio
- implicit
- ipykernel
- ipython
- ipython_genutils
- ipywidgets
- jedi
- jinja2
- jpeg
- jsonschema
- jupyter_client
- jupyter_core
- jupyterlab-git
- kiwisolver
- libffi
- libiconv
- libpng
- libprotobuf
- libsodium
- libssh2
- libtiff
- libxcb
- libxml2
- llvmlite
- mako
- markdown
- markupsafe
- matplotlib
- matplotlib-base
- metakernel
- mistune
- mpc
- mpfr
- mpmath
- msgpack-numpy
- msgpack-python
- murmurhash
- nbconvert
- nbformat
- networkx
- ninja
- notebook
- numba
- numexpr
- numpy
- olefile
- openblas
- openssl
- packaging
- pamela
- pandas
- pandoc
- pandocfilters
- parquet-cpp
- parso
- patsy
- pcre
- pexpect
- pickleshare
- pillow
- pip
- pixman
- prometheus_client
- prompt_toolkit
- protobuf
- psutil
- pthread-stubs
- pyarrow
- pycosat
- pycparser
- pygments
- pyopenssl
- pyparsing
- pyqt
- pyrsistent
- pysocks
- pytest
- python=3.7
- python-dateutil
- python-editor
- python-oauth2
- pytz
- pywavelets
- pyyaml
- pyzmq
- qt
- regex
- requests
- ruamel_yaml
- scikit-image
- scikit-learn
- scipy
- seaborn
- send2trash
- setuptools
- simplejson
- sip
- six
- spylon
- spylon-kernel
- sqlalchemy
- sqlite
- statsmodels
- sympy
- tensorboard
- termcolor
- terminado
- thinc
- tk
- toolz
- tornado
- tqdm
- typing
- ujson
- urllib3
- vincent
- wcwidth
- webencodings
- werkzeug
- wheel
- widgetsnbextension
- wrapt
- xlrd
- xz
- yaml
- zeromq
- zlib
- cymem
- intel-openmp
- libcurl
- mkl
- preshed
- pycurl
- spacy
- dataclasses
- fastai
- fastprogress
- pytorch
- pytorch-cpu
- torchvision
- torchvision-cpu
- pip:
- black
- dask
- pysolr
- msgpack
- pyspark==2.4.0
- torch==1.0.0
- toree==0.3.0
// Use with jupyterlab helm charts from https://github.com/jupyterhub/zero-to-jupyterhub-k8s
// helm repo add jupyterhub https://jupyterhub.github.io/helm-chart/
// helm repo update
// helm upgrade --install jhub jupyterhub/jupyterhub --namespace jhub --version 0.7.0 --values jhub.yaml
proxy:
secretToken: "SECRET"
rbac:
enabled: true
singleuser:
serviceAccountName: spark
defaultUrl: "/lab"
memory:
guarantee: 4G
image:
name: jupyter/all-spark-notebook
tag: v8
storage:
extraVolumes:
- name: jhub-scratch
hostPath:
path: /local/scratch
- name: jhub-data
hostPath:
path: /data
extraVolumeMounts:
- name: jhub-scratch
mountPath: /local/scratch
- name: jhub-data
mountPath: /data
extraEnv:
NUM_CORES: 7
MKL_NUM_THREADS: 7
OMP_NUM_THREADS: 7
SPARK_OPTS: >-
--deploy-mode=client
--master k8s://https://kubernetes.default.svc
--driver-memory=4g
--executor-memory=31g
--executor-cores=7
--conf spark.executor.instances=1
--conf spark.driver.pod.name=${HOSTNAME}
--conf spark.driver.host=`hostname -i`
--conf spark.driver.port=19998
--conf spark.kubernetes.executor.request.cores=4
--conf spark.kubernetes.container.image=docker.io/7thsense/spark:spark24
--conf spark.kubernetes.namespace=`cat /var/run/secrets//kubernetes.io/serviceaccount/namespace`
--conf spark.kubernetes.authenticate.oauthTokenFile=/var/run/secrets/kubernetes.io/serviceaccount/token
--conf spark.kubernetes.authenticate.caCertFile=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
--conf spark.kubernetes.executor.volumes.hostPath.jhub-scratch.mount.path=/local/scratch
--conf spark.kubernetes.executor.volumes.hostPath.jhub-scratch.options.path=/local/scratch
--conf spark.kubernetes.executor.volumes.hostPath.jhub-data.mount.path=/data
--conf spark.kubernetes.executor.volumes.hostPath.jhub-data.options.path=/data
hub:
extraConfig: |
c.Spawner.cmd = ['jupyter-labhub']
c.Authenticator.admin_users = {'user'}
c.JupyterHub.admin_access = True
ingress:
enabled: true
hosts:
- jhub
ingress:
helm upgrade --install ingress stable/nginx-ingress --namespace kube-system --values nginx-ingress.yaml
jhub:
helm upgrade --install jhub jupyterhub/jupyterhub \
--namespace jhub \
--version 0.7.0 \
--values jhub.yaml
hostpath-provisioner:
helm repo add rimusz https://charts.rimusz.net
helm repo update
helm upgrade --install hostpath-provisioner --namespace kube-system rimusz/hostpath-provisioner
.PHONY: jhub ingress hostpath-provisioner
// deploy with kubectl apply -f solr-yaml
// You'll need an ingress controller with support for path routing and a default volume provider
apiVersion: apps/v1
kind: Deployment
metadata:
name: solr-cloud-embedded
namespace: solr
labels:
app: solr-cloud-embedded
spec:
replicas: 1
selector:
matchLabels:
app: solr-cloud-embedded
template:
metadata:
labels:
app: solr-cloud-embedded
spec:
volumes:
- name: solr-home
persistentVolumeClaim:
claimName: solr-home
containers:
- name: solr-cloud-embedded
image: solr:latest
ports:
- containerPort: 8983
command: ['solr-foreground', '-c']
env:
- name: INIT_SOLR_HOME
value: "yes"
- name: SOLR_HOME
value: "/solr-home/"
volumeMounts:
- name: solr-home
mountPath: "/solr-home"
---
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: solr-home
namespace: solr
spec:
accessModes:
- ReadWriteOnce
volumeMode: Filesystem
resources:
requests:
storage: 100Gi
---
kind: Service
apiVersion: v1
metadata:
name: solr-service
namespace: solr
spec:
selector:
app: solr-cloud-embedded
ports:
- protocol: TCP
port: 8983
targetPort: 8983
---
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
name: solr-ingress
namespace: solr
annotations:
nginx.ingress.kubernetes.io/proxy-body-size: 1024m
spec:
rules:
- host: "solr"
http:
paths:
- path: /solr
backend:
serviceName: solr-service
servicePort: 8983
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment