Created
July 29, 2018 23:16
-
-
Save wesm/04e9a1c11f446887ad40978ddd7f8383 to your computer and use it in GitHub Desktop.
Wes's Apache Arrow helper bash scripts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
export ARROW_CLANG_VERSION=6.0 | |
export ARROW_GCC=gcc | |
export ARROW_GXX=g++ | |
export ARROW_LLVM_VERSION=$ARROW_CLANG_VERSION | |
export PYARROW_WITH_ORC=1 | |
export PYARROW_WITH_PARQUET=1 | |
export PYARROW_WITH_PLASMA=1 | |
export PYARROW_BUNDLE_ARROW_CPP=0 | |
export PYARROW_BUNDLE_BOOST=0 | |
export PYARROW_PARALLEL=4 | |
export PYARROW_CMAKE_GENERATOR=Ninja | |
export XCODE_ROOT=/Applications/Xcode.app/Contents/Developer | |
export DEVELOPER_DIR=$XCODE_ROOT | |
# export USE_NINJA_BUILD= | |
export USE_NINJA_BUILD=-GNinja | |
function osx_toolchain { | |
export MACOSX_DEPLOYMENT_TARGET=10.9 | |
export CC=$XCODE_ROOT/usr/bin/gcc | |
export CXX=$XCODE_ROOT/usr/bin/g++ | |
export CONDA_ENV_PATH=/Users/wesm/anaconda/envs/arrow-test | |
} | |
function linux_toolchain { | |
export CC=clang-$ARROW_CLANG_VERSION | |
export CXX=clang++-$ARROW_CLANG_VERSION | |
export CPP_TOOLCHAIN=$HOME/cpp-toolchain | |
export CPP_RUNTIME_TOOLCHAIN=$HOME/cpp-runtime-toolchain | |
} | |
function xcode64 { | |
export XCODE_ROOT=/Applications/Xcode-6.app/Contents/Developer | |
export DEVELOPER_DIR=$XCODE_ROOT | |
osx_toolchain | |
} | |
function system_toolchain { | |
if [[ $OSTYPE == "darwin"* ]]; then | |
osx_toolchain | |
else | |
linux_toolchain | |
fi | |
} | |
system_toolchain | |
export ARROW_BUILD_GPU=ON | |
export ARROW_BUILD_TENSORFLOW=ON | |
export ASAN_SYMBOLIZER_PATH=$(type -p llvm-symbolizer) | |
export TOOLCHAIN_CUDA_VERSION=8.0 | |
DEBUG_TP_DIR=$HOME/local | |
RELEASE_TP_DIR=$HOME/local-release | |
export TP_DIR=$DEBUG_TP_DIR | |
export TOOLCHAIN_BUILD_TYPE=debug | |
export PARQUET_ROOT=$HOME/code/parquet-cpp | |
export PARQUET_TEST_DATA=$PARQUET_ROOT/data | |
export LD_LIBRARY_PATH_BAK=${LD_LIBRARY_PATH_BAK:=$LD_LIBRARY_PATH} | |
export PATH_BAK=$PATH | |
export ARROW_LIBHDFS3_DIR=$HOME/anaconda3/lib | |
export CUDA_HOME=/usr/local/cuda-${TOOLCHAIN_CUDA_VERSION} | |
export CUDA_TOOLKIT_ROOT_DIR=${CUDA_HOME} | |
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} | |
export PATH=${CUDA_HOME}/bin:${PATH} | |
# Use local ruby | |
export PATH=$HOME/ruby/bin:$PATH | |
function set_build_env() { | |
echo "Thirdparty dir: $TP_DIR" | |
export ARROW_BUILD_TOOLCHAIN=$CPP_TOOLCHAIN | |
export PARQUET_BUILD_TOOLCHAIN=$CPP_TOOLCHAIN | |
export BOOST_ROOT=$CPP_TOOLCHAIN | |
# libprotobuf used by Orc EP build | |
export PROTOBUF_HOME=$CPP_TOOLCHAIN | |
export ARROW_HOME=$TP_DIR | |
export PARQUET_HOME=$TP_DIR | |
export PATH=$CPP_TOOLCHAIN/bin:$PATH_BAK | |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH_BAK:$TP_DIR/lib | |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CPP_RUNTIME_TOOLCHAIN/lib | |
# export GTEST_HOME=$CPP_TOOLCHAIN | |
export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$TP_DIR/lib/pkgconfig | |
export PYARROW_BUILD_TYPE=$TOOLCHAIN_BUILD_TYPE | |
# export CXXFLAGS="-Werror -Wall -fno-omit-frame-pointer" | |
# -DARROW_ORC=on \ | |
export ARROW_GCC_OPTIONS="\ | |
$USE_NINJA_BUILD \ | |
-DCMAKE_INSTALL_PREFIX=$TP_DIR \ | |
-DCMAKE_BUILD_TYPE=$TOOLCHAIN_BUILD_TYPE \ | |
-DCMAKE_CXX_FLAGS='-D_GLIBCXX_USE_CXX11_ABI=0' \ | |
-DARROW_VERBOSE_THIRDPARTY_BUILD=off \ | |
-DARROW_NO_DEPRECATED_API=on \ | |
-DARROW_EXTRA_ERROR_CONTEXT=on \ | |
-DARROW_BOOST_USE_SHARED=on \ | |
-DARROW_BUILD_BENCHMARKS=on \ | |
-DARROW_BUILD_TESTS=on \ | |
-DARROW_HDFS=on \ | |
-DARROW_ORC=on \ | |
-DARROW_PYTHON=on \ | |
-DARROW_GPU=$ARROW_BUILD_GPU \ | |
$EXTRA_ARROW_FLAGS" | |
export PARQUET_CXXFLAGS="-DARROW_NO_DEPRECATED_API" | |
export PYARROW_CXXFLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" | |
export PARQUET_GCC_OPTIONS="\ | |
$USE_NINJA_BUILD \ | |
-DCMAKE_INSTALL_PREFIX=$TP_DIR \ | |
-DCMAKE_BUILD_TYPE=$TOOLCHAIN_BUILD_TYPE \ | |
-DCMAKE_CXX_FLAGS='-D_GLIBCXX_USE_CXX11_ABI=0' \ | |
-DPARQUET_BOOST_USE_SHARED=on \ | |
-DPARQUET_BUILD_BENCHMARKS=on \ | |
-DPARQUET_THRIFT_USE_BOOST=off \ | |
$EXTRA_PARQUET_FLAGS" | |
echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" | |
echo "CC: $CC" | |
echo "CXX: $CXX" | |
echo "ARROW_CXXFLAGS: $ARROW_CXXFLAGS" | |
echo "ARROW_OPTIONS: $ARROW_GCC_OPTIONS" | |
echo "PARQUET_OPTIONS: $PARQUET_GCC_OPTIONS" | |
echo "PARQUET_CXXFLAGS: $PARQUET_CXXFLAGS" | |
echo "PYARROW_CXXFLAGS: $ARROW_CXXFLAGS" | |
} | |
function debug() { | |
export TP_DIR=$DEBUG_TP_DIR | |
export TOOLCHAIN_BUILD_TYPE=debug | |
export EXTRA_ARROW_FLAGS="\ | |
$ARROW_TOOLCHAIN_FLAGS | |
-DBUILD_WARNING_LEVEL=CHECKIN" | |
export EXTRA_PARQUET_FLAGS="\ | |
-DPARQUET_BUILD_WARNING_LEVEL=CHECKIN -Werror" | |
export ASAN_IF_ENABLED=OFF | |
set_build_env | |
} | |
function release() { | |
export TP_DIR=$RELEASE_TP_DIR | |
export TOOLCHAIN_BUILD_TYPE=release | |
export ASAN_IF_ENABLED=OFF | |
export EXTRA_ARROW_FLAGS="" | |
export EXTRA_PARQUET_FLAGS="" | |
set_build_env | |
} | |
function set_build_type_flags() { | |
if [ $TOOLCHAIN_BUILD_TYPE = "release" ]; then | |
release | |
else | |
debug | |
fi | |
} | |
function toolchain_clang { | |
# export CC=$CLANG_TOOLS_PATH/clang | |
# export CXX=$CLANG_TOOLS_PATH/clang++ | |
export CC=clang-$ARROW_CLANG_VERSION | |
export CXX=clang++-$ARROW_CLANG_VERSION | |
export ARROW_TOOLCHAIN_FLAGS="\ | |
-DARROW_FUZZING=ON \ | |
-DARROW_TEST_MEMCHECK=off \ | |
-DARROW_USE_ASAN=$ASAN_IF_ENABLED" | |
set_build_type_flags | |
} | |
function toolchain_gcc { | |
export CC=$ARROW_GCC | |
export CXX=$ARROW_GXX | |
export ARROW_TOOLCHAIN_FLAGS="\ | |
-DARROW_FUZZING=OFF \ | |
-DARROW_TEST_MEMCHECK=off \ | |
-DARROW_USE_ASAN=OFF" | |
set_build_type_flags | |
} | |
function toolchain_gcc48 { | |
export CC=gcc-4.8 | |
export CXX=g++-4.8 | |
export ARROW_TOOLCHAIN_FLAGS="\ | |
-DARROW_FUZZING=OFF \ | |
-DARROW_TEST_MEMCHECK=off \ | |
-DARROW_USE_ASAN=OFF" | |
set_build_type_flags | |
} | |
toolchain_clang | |
export PATH=$CPP_TOOLCHAIN/bin:$PATH | |
export ARROW_USE_CCACHE=1 | |
# export TERM=xterm-color | |
# Using Impala's thirdparty bits. Looking at output of impala-config.sh | |
export JAVA_HOME=/usr/lib/jvm/java-8-oracle | |
export HADOOP_HOME=/home/wesm/code/cloudera/impala/thirdparty/hadoop-2.6.0-cdh5.7.0 | |
# export HADOOP_HOME=/home/wesm/code/cloudera/impala/thirdparty/hadoop-2.6.0-cdh5.7.0-SNAPSHOT | |
if [ ! -d "$HADOOP_HOME" ]; then | |
export HADOOP_HOME=/home/wesm/code/cloudera/impala/thirdparty/hadoop-2.6.0-cdh5.8.0-SNAPSHOT | |
fi | |
# This avoids native-hadoop loading error / warning =( =( | |
if [ -d "$HADOOP_HOME" ]; then | |
export CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath --glob` | |
export HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$HADOOP_HOME/lib/native" | |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native/ | |
fi | |
export ARROW_HDFS_TEST_HOST=localhost | |
export ARROW_HDFS_TEST_PORT=20500 | |
export ARROW_HDFS_TEST_USER=wesm | |
function parquet_cmake { | |
cmake $PARQUET_GCC_OPTIONS \ | |
-DPARQUET_CXXFLAGS="$PARQUET_CXXFLAGS" \ | |
.. | |
} | |
function parquet_cpp_update { | |
# rm -rf ~/code/parquet-cpp/library-build | |
mkdir -p ~/code/parquet-cpp/library-build | |
pushd ~/code/parquet-cpp/library-build | |
rm -rf * | |
cmake -GNinja \ | |
-DCMAKE_INSTALL_PREFIX=$TP_DIR \ | |
-DCMAKE_BUILD_TYPE=$TOOLCHAIN_BUILD_TYPE \ | |
-DCMAKE_CXX_FLAGS='-D_GLIBCXX_USE_CXX11_ABI=0' \ | |
-DPARQUET_BUILD_TESTS=OFF \ | |
-DPARQUET_BUILD_EXECUTABLES=OFF \ | |
-DPARQUET_BOOST_USE_SHARED=ON \ | |
-DPARQUET_CXXFLAGS="$PARQUET_CXXFLAGS" \ | |
.. | |
ninja clean | |
ninja | |
ninja install | |
popd | |
} | |
function arrow_cpp_update { | |
# rm -rf ~/code/arrow/cpp/library-build | |
mkdir -p ~/code/arrow/cpp/library-build | |
pushd ~/code/arrow/cpp/library-build | |
rm -rf * | |
cmake -GNinja \ | |
-DCMAKE_INSTALL_PREFIX=$TP_DIR \ | |
-DCMAKE_BUILD_TYPE=$TOOLCHAIN_BUILD_TYPE \ | |
-DCMAKE_CXX_FLAGS='-D_GLIBCXX_USE_CXX11_ABI=0' \ | |
-DARROW_EXTRA_ERROR_CONTEXT=ON \ | |
-DARROW_NO_DEPRECATED_API=OFF \ | |
-DARROW_BOOST_USE_SHARED=ON \ | |
-DARROW_BUILD_BENCHMARKS=off \ | |
-DARROW_GPU=$ARROW_BUILD_GPU \ | |
-DARROW_ORC=on \ | |
-DARROW_PLASMA=on \ | |
-DARROW_PYTHON=on \ | |
-DARROW_TENSORFLOW=$ARROW_BUILD_TENSORFLOW \ | |
-DARROW_BUILD_TESTS=off \ | |
-DCMAKE_BUILD_TYPE=$TOOLCHAIN_BUILD_TYPE .. | |
ninja clean | |
ninja | |
ninja install | |
popd | |
} | |
function arrow_cmake { | |
cmake $ARROW_GCC_OPTIONS \ | |
-DARROW_PLASMA=on \ | |
-DARROW_CXXFLAGS="-Werror $ARROW_CXXFLAGS" \ | |
-DCMAKE_BUILD_TYPE=$TOOLCHAIN_BUILD_TYPE .. | |
} | |
function arrow_gcc { | |
toolchain_gcc | |
arrow_cmake | |
} | |
function arrow_clang { | |
toolchain_clang | |
arrow_cmake | |
} | |
function build_pyarrow { | |
python setup.py build_ext --inplace --with-parquet --with-plasma | |
} | |
function arrow_glib_test { | |
arrow_cpp_update | |
pushd $HOME/code/arrow/c_glib | |
git clean -fdx . | |
export PKG_CONFIG_PATH=$TP_DIR/lib/pkgconfig:$PKG_CONFIG_PATH | |
GLIB_CXXFLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" | |
./autogen.sh | |
./configure CXXFLAGS=$GLIB_CXXFLAGS CFLAGS=$GLIB_CXXFLAGS \ | |
--prefix=$TP_DIR --enable-gtk-doc | |
CXXFLAGS=$GLIB_CXXFLAGS CFLAGS=$GLIB_CXXFLAGS make -j8 | |
make install | |
export GI_TYPELIB_PATH=$TP_DIR/lib/girepository-1.0 | |
NO_MAKE=yes test/run-test.sh | |
popd | |
} | |
function arrow_preflight { | |
ARROW_PREFLIGHT_DIR=$HOME/code/arrow/cpp/preflight | |
mkdir -p $ARROW_PREFLIGHT_DIR | |
pushd $ARROW_PREFLIGHT_DIR | |
arrow_cmake | |
ninja format | |
ninja lint | |
popd | |
pushd $HOME/code/arrow/python | |
flake8 --count pyarrow | |
flake8 --count --config=.flake8.cython pyarrow | |
popd | |
} | |
function pandas_gcc { | |
toolchain_gcc | |
cmake -DPANDAS_BUILD_CYTHON=off \ | |
.. | |
} | |
function pandas_clang { | |
toolchain_clang | |
cmake -DPANDAS_BUILD_CYTHON=off \ | |
.. | |
} | |
function parquet_clang_cmake { | |
toolchain_clang | |
parquet_cmake | |
} | |
function get_arrow_sha256 { | |
TMPNAME=`uuidgen`.tar.gz | |
wget https://github.com/apache/arrow/archive/$1.tar.gz -O $TMPNAME | |
echo `sha256sum $TMPNAME` | |
rm -rf $TMPNAME | |
} | |
function get_parquet_sha256 { | |
TMPNAME=`uuidgen`.tar.gz | |
wget https://github.com/apache/parquet-cpp/archive/$1.tar.gz -O $TMPNAME | |
echo `sha256sum $TMPNAME` | |
rm -rf $TMPNAME | |
} | |
function update_tp_toolchain { | |
ccache -C | |
arrow_cpp_update | |
parquet_cpp_update | |
} | |
function update_pyarrow { | |
update_tp_toolchain | |
pushd $HOME/code/arrow/python | |
rm -rf build/ | |
python setup.py build_ext --inplace | |
popd | |
} | |
#---------------------------------------------------------------------- | |
# Spark stuff | |
export PATH=$HOME/java/maven-3.3.9/bin:$PATH | |
export SPARK_HOME=$HOME/code/spark | |
export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m" | |
# ---------------------------------------------------------------------- | |
# Ocaml stuff | |
. /home/wesm/.opam/opam-init/init.sh > /dev/null 2> /dev/null || true |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment