eval (minikube docker-env) # Use VM built-in docker daemon
# Creation of spark base
cd spark-2.2.0-k8s-0.5.0-bin-2.7.3
docker build . -f Dockerfile -t spark-history:v1 # Build docker image for spark base
# Creation of spark history
cd dockerfiles/history-service
docker build . -f Dockerfile -t spark-history:v1 # Build docker image spark history derived from base
kubectl run spark-history --image=spark-history:v1 --port=18080
kubectl expose deployment spark-history --type=LoadBalancer
minikube service spark-history
-
How to share read/write enabled storage between pods (they may run in different nodes)
- Storage as a service with multiple simulteanous write access (ok each spark-submit writes to a different file)
- Storage as a platform ceph ...
-
For minikube hostPath might be enough since anyway we have only one node but this needs to be covered by the k8s Spark implement (request for hostpah typed storage in the API call ?)
--conf spark.eventLog.enabled true \
--conf spark.eventLog.dir file:///tmp/spark-events/
#
# Creation of a spark server deployment for historization of Spark logs
#
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
name: spark-history
spark-version: 2.2.0
name: spark-history-deployment
spec:
selector:
matchLabels:
app: spark-history
spark-version: 2.2.0
template:
metadata:
labels:
app: spark-history
spark-version: 2.2.0
spec:
volumes:
- name: temp-volume
hostPath:
path: '/tmp/spark-events'
containers:
- name: spark-history
image: spark-history:v1
imagePullPolicy: IfNotPresent
ports:
- containerPort: 18080
volumeMounts:
- mountPath: '/tmp/spark-events'
name: temp-volume
resources:
requests:
cpu: "1"
limits:
cpu: "1"
kubectl create -f Downloads/spark-2.2.0-k8s-0.5.0-bin-2.7.3/conf/kubernetes-shuffle-service.yaml