Skip to content

Instantly share code, notes, and snippets.

@kk17
Created March 28, 2019 07:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kk17/37991242e3f86e48cf1882d9c5bfa638 to your computer and use it in GitHub Desktop.
Save kk17/37991242e3f86e48cf1882d9c5bfa638 to your computer and use it in GitHub Desktop.
Spark-SQL-Ranger
version: '3.1'
services:
mysql:
image: mysql:5.7
environment:
MYSQL_ROOT_PASSWORD: root
MYSQL_DATABASE: metastore
volumes:
- ./mysql-data:/var/lib/mysql
ports:
- "3306:3306"
thrift-server:
build:
context: .
args:
METASTORE_CONNECTION_URL: jdbc:mysql://mysql:3306/metastore?useSSL=false
METASTORE_CONNECTION_USER_NAME: root
METASTORE_CONNECTION_PASSWORD: root
AUDIT_DB_URL: jdbc:mysql://host.docker.internal:43006/ranger?useSSL=false
AUDIT_DB_USEr: rangerlogger
AUDIT_DB_PASSWORD: rangerlogger
RANGER_REST_URL: http://host.docker.internal:6080
depends_on:
- mysql
command: bash -c "/usr/lib/spark/sbin/start-thriftserver.sh && tail -fn100 /usr/lib/spark/logs/spark--org.apache.spark.sql.hive.thriftserver.HiveThriftServer2-1*.out"
ports:
- 10001:10001
volumes:
- ../data:/data
FROM centos:7
RUN yum -y update
RUN yum -y install wget gcc openldap-devel
RUN yum -y install https://centos7.iuscommunity.org/ius-release.rpm
RUN yum -y install -y python36u python36u-libs python36u-devel python36u-pip
# JAVA
# /dev/urandom is used as random source, which is prefectly safe
# according to http://www.2uo.de/myths-about-urandom/
RUN yum install -y \
java-1.8.0-openjdk-1.8.0.191.b12-1.el7_6 \
java-1.8.0-openjdk-devel-1.8.0.191.b12-1.el7_6 \
&& echo "securerandom.source=file:/dev/urandom" >> /usr/lib/jvm/java/jre/lib/security/java.security \
&& yum clean all
# SPARK
ARG SPARK_VERSION=2.3.2
ARG HADOOP_VERSION=2.7
ENV SPARK_HOME /usr/lib/spark
ARG SPARK_ARCHIVE=https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
RUN curl -s ${SPARK_ARCHIVE} | tar -xz -C /usr/lib/ && mv /usr/lib/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} ${SPARK_HOME}
ADD http://central.maven.org/maven2/mysql/mysql-connector-java/5.1.38/mysql-connector-java-5.1.38.jar ${SPARK_HOME}/jars/mysql-connector-java-5.1.38.jar
ENV PATH $PATH:${SPARK_HOME}/bin
# for overwriting metastore db connection configuration
ADD hive-site.xml.local.template /home/hadoop/hive-site.xml.template
ARG METASTORE_CONNECTION_URL
ARG METASTORE_CONNECTION_USER_NAME
ARG METASTORE_CONNECTION_PASSWORD
RUN export METASTORE_CONNECTION_URL=$METASTORE_CONNECTION_URL && \
export METASTORE_CONNECTION_USER_NAME=$METASTORE_CONNECTION_USER_NAME && \
export METASTORE_CONNECTION_PASSWORD=$METASTORE_CONNECTION_PASSWORD && \
eval "echo \"$(sed 's/"/\\"/g' /home/hadoop/hive-site.xml.template)\"" > /usr/lib/spark/conf/hive-site.xml
# Ranger hive plugin
ADD ranger-0.5.3-hive-plugin.tar.gz .
RUN cp -r ranger-0.5.3-hive-plugin/lib/* $SPARK_HOME/jars && rm -rf ranger-0.5.3-hive-plugin
ADD ranger-hive-security.xml.template /home/hadoop/ranger-hive-security.xml.template
ADD ranger-hive-audit.xml.template /home/hadoop/ranger-hive-audit.xml.template
ADD https://dl.bintray.com/spark-packages/maven/yaooqinn/spark-authorizer/2.1.1/spark-authorizer-2.1.1.jar ${SPARK_HOME}/jars/
ARG AUDIT_DB_URL
ARG AUDIT_DB_USEr
ARG AUDIT_DB_PASSWORD
ARG RANGER_REST_URL
RUN export AUDIT_DB_URL=$AUDIT_DB_URL && \
export AUDIT_DB_USEr=$AUDIT_DB_USEr && \
export AUDIT_DB_PASSWORD=$AUDIT_DB_PASSWORD && \
export RANGER_REST_URL=$RANGER_REST_URL && \
eval "echo \"$(sed 's/"/\\"/g' /home/hadoop/ranger-hive-security.xml.template)\"" > /usr/lib/spark/conf/ranger-hive-security.xml && \
eval "echo \"$(sed 's/"/\\"/g' /home/hadoop/ranger-hive-audit.xml.template)\"" > /usr/lib/spark/conf/ranger-hive-audit.xml && \
echo "spark.sql.extensions org.apache.ranger.authorization.spark.authorizer.RangerSparkSQLExtension" >> $SPARK_HOME/conf/spark-defaults.conf && \
echo "spark.sql.warehouse.dir /data/spark-warehouse" >> $SPARK_HOME/conf/spark-defaults.conf
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Licensed to the Apache Software Foundation (ASF) under one or more -->
<!-- contributor license agreements. See the NOTICE file distributed with -->
<!-- this work for additional information regarding copyright ownership. -->
<!-- The ASF licenses this file to You under the Apache License, Version 2.0 -->
<!-- (the "License"); you may not use this file except in compliance with -->
<!-- the License. You may obtain a copy of the License at -->
<!-- -->
<!-- http://www.apache.org/licenses/LICENSE-2.0 -->
<!-- -->
<!-- Unless required by applicable law or agreed to in writing, software -->
<!-- distributed under the License is distributed on an "AS IS" BASIS, -->
<!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -->
<!-- See the License for the specific language governing permissions and -->
<!-- limitations under the License. -->
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>${METASTORE_CONNECTION_URL}</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>${METASTORE_CONNECTION_USER_NAME}</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>${METASTORE_CONNECTION_PASSWORD}</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>${METASTORE_CONNECTION_DRIVER_NAME:-com.mysql.jdbc.Driver}</value>
</property>
<property>
<name>hive.support.concurrency</name>
<value>true</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>${THRIFT_PORT:-10001}</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
<description>
Enforce metastore schema version consistency.
True: Verify that version information stored in is compatible with one from Hive jars. Also disable automatic
schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures
proper metastore schema migration. (Default)
False: Warn if the version information stored in metastore doesn't match with one from in Hive jars.
</description>
</property>
<property>
<name>hive.metastore.schema.verification.record.version</name>
<value>false</value>
<description>
When true the current MS version is recorded in the VERSION table. If this is disabled and verification is
enabled the MS will be unusable.
</description>
</property>
<property>
<name>hive.security.authorization.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.security.authorization.manager</name>
<value>org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory</value>
</property>
<property>
<name>hive.security.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator</value>
</property>
<property>
<name>hive.conf.restricted.list</name>
<value>hive.security.authorization.enabled,hive.security.authorization.manager,hive.security.authenticator.manager</value>
</property>
<configuration>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/data/spark-warehouse</value>
<description>Hive Metastore location</description>
</property>
</configuration>
</configuration>
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>xasecure.audit.is.enabled</name>
<value>true</value>
</property>
<property>
<name>xasecure.audit.destination.db</name>
<value>false</value>
</property>
<property>
<name>xasecure.audit.destination.db.jdbc.driver</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>xasecure.audit.destination.db.jdbc.url</name>
<value>${AUDIT_DB_URL}</value>
</property>
<property>
<name>xasecure.audit.destination.db.password</name>
<value>${AUDIT_DB_PASSWORD}</value>
</property>
<property>
<name>xasecure.audit.destination.db.user</name>
<value>${AUDIT_DB_PASSWORD}</value>
</property>
</configuration>
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>ranger.plugin.hive.policy.rest.url</name>
<value>${RANGER_REST_URL}</value>
</property>
<property>
<name>ranger.plugin.hive.service.name</name>
<value>metastore</value>
</property>
<property>
<name>ranger.plugin.hive.policy.cache.dir</name>
<value>metastore/policycache</value>
</property>
<property>
<name>ranger.plugin.hive.policy.pollIntervalMs</name>
<value>5000</value>
</property>
<property>
<name>ranger.plugin.hive.policy.source.impl</name>
<value>org.apache.ranger.admin.client.RangerAdminRESTClient</value>
</property>
</configuration>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment