Last active
September 30, 2021 15:00
-
-
Save rluta/a228c26d04afea491950c976470eac79 to your computer and use it in GitHub Desktop.
EMR 6.3 Bootstrap script for Spark RAPIDS plugin
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -ex | |
bucket_path=$1 | |
export CUDF_VERSION=${CUDF_VERSION:-21.08.2-cuda11} | |
export RAPIDS_VERSION=${RAPIDS_VERSION:-2.12-21.08.0} | |
echo "Give YARN authorization to manage devices" | |
sudo chmod a+rwx -R /sys/fs/cgroup/cpu,cpuacct | |
sudo chmod a+rwx -R /sys/fs/cgroup/devices | |
echo "Install the cuda-compat-11-2" | |
sudo yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo | |
sudo yum clean all | |
sudo yum -y install cuda-toolkit-11-2 cuda-compat-11-2 openssl11 | |
echo "Clean-up default EMR jars" | |
sudo rm -f /usr/lib/spark/jars/rapids-*.jar | |
sudo rm -f /usr/share/aws/emr/spark-rapids/lib/rapids-*.jar | |
sudo rm -f /usr/lib/spark/jars/cudf-*.jar | |
sudo rm -f /usr/share/aws/emr/spark-rapids/lib/cudf-*.jar | |
sudo mkdir -p /usr/share/aws/emr/spark-rapids/lib/ | |
sudo mkdir -p /usr/lib/spark/jars/ | |
echo "Install cuDF and Spark RAPIDS" | |
sudo aws s3 cp "${bucket_path}/cudf-${CUDF_VERSION}.jar" "/usr/share/aws/emr/spark-rapids/lib/cudf-${CUDF_VERSION}.jar" | |
sudo ln -s "/usr/share/aws/emr/spark-rapids/lib/cudf-${CUDF_VERSION}.jar" "/usr/lib/spark/jars/cudf-${CUDF_VERSION}.jar" | |
sudo aws s3 cp "${bucket_path}/rapids-4-spark_${RAPIDS_VERSION}.jar" "/usr/share/aws/emr/spark-rapids/lib/rapids-4-spark_${RAPIDS_VERSION}.jar" | |
sudo ln -s "/usr/share/aws/emr/spark-rapids/lib/rapids-4-spark_${RAPIDS_VERSION}.jar" "/usr/lib/spark/jars/rapids-4-spark_${RAPIDS_VERSION}.jar" | |
echo "Install gpu discovery scripts" | |
sudo mkdir -p /usr/lib/spark/scripts/gpu/ | |
sudo aws s3 cp "${bucket_path}/getGpusResources.sh" /usr/lib/spark/scripts/gpu/getGpusResources.sh | |
sudo chmod +x /usr/lib/spark/scripts/gpu/getGpusResources.sh | |
echo "Done" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"Classification":"spark", | |
"Properties":{ | |
"enableSparkRapids":"false" | |
} | |
}, | |
{ | |
"Classification":"yarn-site", | |
"Properties":{ | |
"yarn.nodemanager.resource-plugins":"yarn.io/gpu", | |
"yarn.resource-types":"yarn.io/gpu", | |
"yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices":"auto", | |
"yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables":"/usr/bin", | |
"yarn.nodemanager.linux-container-executor.cgroups.mount":"true", | |
"yarn.nodemanager.linux-container-executor.cgroups.mount-path":"/sys/fs/cgroup", | |
"yarn.nodemanager.linux-container-executor.cgroups.hierarchy":"yarn", | |
"yarn.nodemanager.container-executor.class":"org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor" | |
} | |
}, | |
{ | |
"Classification":"container-executor", | |
"Properties":{ | |
}, | |
"Configurations":[ | |
{ | |
"Classification":"gpu", | |
"Properties":{ | |
"module.enabled":"true" | |
} | |
}, | |
{ | |
"Classification":"cgroups", | |
"Properties":{ | |
"root":"/sys/fs/cgroup", | |
"yarn-hierarchy":"yarn" | |
} | |
} | |
] | |
}, | |
{ | |
"Classification":"capacity-scheduler", | |
"Properties":{ | |
"yarn.scheduler.capacity.resource-calculator":"org.apache.hadoop.yarn.util.resource.DominantResourceCalculator" | |
} | |
}, | |
{ | |
"Classification":"spark-defaults", | |
"Properties":{ | |
"spark.plugins":"com.nvidia.spark.SQLPlugin", | |
"spark.rapids.sql.enabled":"true", | |
"spark.executor.resource.gpu.discoveryScript":"/usr/lib/spark/scripts/gpu/getGpusResources.sh", | |
"spark.executor.extraLibraryPath":"/usr/local/cuda-11.2/targets/x86_64-linux/lib:/usr/local/cuda-11.2/extras/CUPTI/lib64:/usr/local/cuda-11.2/compat/:/usr/local/cuda-11.2/lib:/usr/local/cuda-11.2/lib64:/usr/lib/hadoop/lib/native:/docker/usr/lib/hadoop/lib/native", | |
"spark.rapids.shims-provider-override": "com.nvidia.spark.rapids.shims.spark311.SparkShimServiceProvider", | |
"spark.kryo.registrator":"com.nvidia.spark.rapids.GpuKryoRegistrator", | |
"spark.rapids.sql.concurrentGpuTasks":"2", | |
"spark.executor.resource.gpu.amount":"1", | |
"spark.executor.cores":"4", | |
"spark.executor.memory":"9000M", | |
"spark.task.cpus ":"1", | |
"spark.task.resource.gpu.amount":"0.25", | |
"spark.rapids.memory.pinnedPool.size":"0", | |
"spark.executor.memoryOverhead":"2G", | |
"spark.sql.sources.useV1SourceList":"", | |
"spark.sql.sources.ignoreDataLocality.enabled":"true", | |
"spark.sql.files.maxPartitionBytes":"402653184", | |
"spark.rapids.sql.incomaptibleOps.enabled":"true", | |
"spark.rapids.sql.decimalType.enabled":"true" | |
} | |
} | |
] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# | |
# Licensed to the Apache Software Foundation (ASF) under one or more | |
# contributor license agreements. See the NOTICE file distributed with | |
# this work for additional information regarding copyright ownership. | |
# The ASF licenses this file to You under the Apache License, Version 2.0 | |
# (the "License"); you may not use this file except in compliance with | |
# the License. You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
# This script is a basic example script to get resource information about NVIDIA GPUs. | |
# It assumes the drivers are properly installed and the nvidia-smi command is available. | |
# It is not guaranteed to work on all setups so please test and customize as needed | |
# for your environment. It can be passed into SPARK via the config | |
# spark.{driver/executor}.resource.gpu.discoveryScript to allow the driver or executor to discover | |
# the GPUs it was allocated. It assumes you are running within an isolated container where the | |
# GPUs are allocated exclusively to that driver or executor. | |
# It outputs a JSON formatted string that is expected by the | |
# spark.{driver/executor}.resource.gpu.discoveryScript config. | |
# | |
# Example output: {"name": "gpu", "addresses":["0","1","2","3","4","5","6","7"]} | |
ADDRS=$(nvidia-smi --query-gpu=index --format=csv,noheader | sed -e ':a' -e 'N' -e'$!ba' -e 's/\n/","/g'`) | |
echo {\"name\": \"gpu\", \"addresses\":[\"$ADDRS\"]} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
id=(openssl rand --hex 8) | |
bucket_uri=s3://bootstrap-emr-$id | |
aws s3 mb ${bcket_uri} --region eu-west-1 | |
wget https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/21.08.0/rapids-4-spark_2.12-21.08.0.jar | |
wget https://repo1.maven.org/maven2/ai/rapids/cudf/21.08.2/cudf-21.08.2-cuda11.jar | |
aws s3 sync . ${bucket_uri}/ | |
aws emr create-cluster \ | |
--release-label emr-6.3.0 \ | |
--applications Name=Hadoop Name=Spark \ | |
--service-role EMR_DefaultRole \ | |
--ec2-attributes KeyName=my-key-pair,InstanceProfile=EMR_EC2_DefaultRole \ | |
--instance-groups InstanceGroupType=MASTER,InstanceCount=1,InstanceType=m5a.2xlarge \ | |
InstanceGroupType=CORE,InstanceCount=1,InstanceType=g4dn.2xlarge \ | |
InstanceGroupType=TASK,InstanceCount=1,InstanceType=g4dn.xlarge \ | |
--configurations file:///emr-rapids-configuration.json \ | |
--bootstrap-actions Name='Rapids 21.08.0 Bootstrap action',Path=${bucket_uri}/bootstrap-rapids.sh |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment