Created
February 14, 2019 09:35
-
-
Save SunnyBingoMe/e42d448378841f67a17fb6b16ff1423e to your computer and use it in GitHub Desktop.
spark-on-k8s python pyspark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# I was having problems to create docker images for spark-on-k8s using docker-image-tool.sh in spark (it always gives spark-r). | |
# Thus here is the docker file to creat for running pyspark. | |
# The image is available on sunnybingome/spark8s:pyspark240py368 with spark 2.4.0 | |
# For clie-mode (interactive mode), please use pipenv to ensure py version. The image is in using py 3.6(.8). | |
# Sunny modified from spark-2.4.0-bin-hadoop2.7/kubernetes/dockerfiles/spark/Dockerfile | |
# | |
# Licensed to the Apache Software Foundation (ASF) under one or more | |
# contributor license agreements. See the NOTICE file distributed with | |
# this work for additional information regarding copyright ownership. | |
# The ASF licenses this file to You under the Apache License, Version 2.0 | |
# (the "License"); you may not use this file except in compliance with | |
# the License. You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
FROM openjdk:8-alpine | |
ARG spark_jars=jars | |
ARG img_path=kubernetes/dockerfiles | |
ARG k8s_tests=kubernetes/tests | |
## sunny: install python ## pyv='/opt/entrypoint.sh: line 75: python: command not found' | |
RUN apk update \ | |
&& apk upgrade \ | |
&& apk add --no-cache bash \ | |
&& apk add --no-cache --virtual=build-dependencies unzip \ | |
&& apk add --no-cache curl \ | |
&& apk add --no-cache openjdk7-jre | |
RUN apk add --no-cache python3 \ | |
&& python3 -m ensurepip \ | |
&& pip3 install --upgrade pip setuptools \ | |
&& rm -r /usr/lib/python*/ensurepip && \ | |
if [ ! -e /usr/bin/pip ]; then ln -s pip3 /usr/bin/pip ; fi && \ | |
if [[ ! -e /usr/bin/python ]]; then ln -sf /usr/bin/python3 /usr/bin/python; fi && \ | |
rm -r /root/.cache | |
RUN pip3 install pyspark==2.4.0 | |
# Before building the docker image, first build and make a Spark distribution following | |
# the instructions in http://spark.apache.org/docs/latest/building-spark.html. | |
# If this docker file is being used in the context of building your images from a Spark | |
# distribution, the docker build command should be invoked from the top level directory | |
# of the Spark distribution. E.g.: | |
# docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile . | |
RUN set -ex && \ | |
apk upgrade --no-cache && \ | |
apk add --no-cache bash tini libc6-compat linux-pam && \ | |
mkdir -p /opt/spark && \ | |
mkdir -p /opt/spark/work-dir && \ | |
touch /opt/spark/RELEASE && \ | |
rm /bin/sh && \ | |
ln -sv /bin/bash /bin/sh && \ | |
echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \ | |
chgrp root /etc/passwd && chmod ug+rw /etc/passwd | |
COPY ${spark_jars} /opt/spark/jars | |
COPY bin /opt/spark/bin | |
COPY sbin /opt/spark/sbin | |
COPY ${img_path}/spark/entrypoint.sh /opt/ | |
COPY examples /opt/spark/examples | |
COPY ${k8s_tests} /opt/spark/tests | |
COPY data /opt/spark/data | |
ENV SPARK_HOME /opt/spark | |
WORKDIR /opt/spark/work-dir | |
ENTRYPOINT [ "/opt/entrypoint.sh" ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment