Skip to content

Instantly share code, notes, and snippets.

@AlenkaF
Last active December 2, 2023 11:39
Show Gist options
  • Save AlenkaF/afe4ef98d51f107b3b3831ccdd1695c2 to your computer and use it in GitHub Desktop.
Save AlenkaF/afe4ef98d51f107b3b3831ccdd1695c2 to your computer and use it in GitHub Desktop.
Apache Arrow build commands
eval "$(/opt/homebrew/bin/brew shellenv)"
export ARROW_GITHUB_API_TOKEN=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
export APACHE_JIRA_TOKEN=MjAzNDQzMjU2MDgxOpX3cy3h9s3+TiP1oWpTuiyyp4y9
arrow () {
if [[ $1 = "submodule" ]]; then
git submodule update --init
elif [[ $1 = "env" ]]; then
cd /Users/alenkafrim/repos
# python3 -m venv pyarrow-dev
source ./pyarrow-dev/bin/activate
cd arrow
elif [[ $1 = "env-4" ]]; then
cd /Users/alenkafrim/repos
source ./pyarrow-triaging-4/bin/activate
elif [[ $1 = "env-9" ]]; then
cd /Users/alenkafrim/repos
source ./pyarrow-triaging-9/bin/activate
elif [[ $1 = "build-vars" ]]; then
# --- Set env vars ---
echo $export PYARROW_WITH_FLIGHT=1
export PYARROW_WITH_FLIGHT=1
echo $export PYARROW_WITH_GANDIVA=0
export PYARROW_WITH_GANDIVA=0
echo $export PYARROW_WITH_ORC=0
export PYARROW_WITH_ORC=0
echo $export PYARROW_WITH_PARQUET=1
export PYARROW_WITH_PARQUET=1
echo $export PYARROW_WITH_DATASET=1
export PYARROW_WITH_DATASET=1
echo $export PYARROW_WITH_S3=1
export PYARROW_WITH_S3=1
echo $export PYARROW_PARALLEL=4
export PYARROW_PARALLEL=4
echo $export ARROW_COMPUTE=1
export ARROW_COMPUTE=1
echo $export PYARROW_WITH_SUBSTRAIT=1
export PYARROW_WITH_SUBSTRAIT=1
echo $export ARROW_CSV=1
export ARROW_CSV=1
echo $export PYARROW_WITH_GCS=0
export PYARROW_WITH_GCS=0
echo $export PYARROW_WITH_PARQUET_ENCRYPTION=0
export PYARROW_WITH_PARQUET_ENCRYPTION=0
echo $export PYARROW_BUILD_TYPE="debug"
export PYARROW_BUILD_TYPE="debug"
# echo $export PYARROW_BUNDLE_ARROW_CPP=1
# export PYARROW_BUNDLE_ARROW_CPP=1
elif [[ $1 = "vars" ]]; then
export PARQUET_TEST_DATA="$(pwd)/cpp/submodules/parquet-testing/data"
export ARROW_TEST_DATA="$(pwd)/testing/data"
export ARROW_HOME=$(pwd)/dist
printenv ARROW_HOME
export CMAKE_PREFIX_PATH=$ARROW_HOME
export LD_LIBRARY_PATH=$(pwd)/dist/lib:$LD_LIBRARY_PATH
export R_LD_LIBRARY_PATH=$(pwd)/dist/lib:$LD_LIBRARY_PATH
# export DYLD_LIBRARY_PATH=$(pwd)/python/build/dist/lib
printenv LD_LIBRARY_PATH
printenv R_LD_LIBRARY_PATH
# printenv DYLD_LIBRARY_PATH
elif [[ $1 = "new" ]]; then
# This creates a new branch based on the remote upstream/master
STASH=$(git stash)
git fetch upstream
git checkout master
git reset --hard upstream/master
git checkout -b $2
if [[ "$STASH" != "No local changes to save" ]]; then
git stash pop
fi
elif [[ $1 = "rebase" ]]; then
# This rebases the current branch on the remote upstream/main
git fetch upstream
git rebase upstream/main
elif [[ $1 == "remote" ]] && [[ $2 == "add" ]]; then
# This adds someone else's fork as a remote so you can check out their branch
git remote add $3 git@github.com:$3/arrow.git
git fetch $3
elif [[ $1 == "cmake-release" ]]; then
# This assembles the cmake flags for PyArrow,
# Run this inside arrow folder, then
# arrow cpp
# to build
echo $export ARROW_BUILD_TYPE=release
export ARROW_BUILD_TYPE=release
pushd cpp/build
cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DCMAKE_PREFIX_PATH=$ARROW_HOME \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_BUILD_TYPE=release \
-DGTest_SOURCE=BUNDLED \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_BROTLI=ON \
-DARROW_COMPUTE=ON \
-DARROW_CSV=ON \
-DARROW_GCS=OFF \
-DARROW_PARQUET=ON \
-DPARQUET_REQUIRE_ENCRYPTION=OFF \
-DARROW_DATASET=ON \
-DARROW_FLIGHT=ON \
-DARROW_S3=ON \
-DARROW_JEMALLOC=OFF \
-DARROW_JSON=ON \
-DARROW_BUILD_TESTS=OFF \
-DARROW_DEPENDENCY_SOURCE=AUTO \
-DARROW_INSTALL_NAME_RPATH=OFF \
-DARROW_EXTRA_ERROR_CONTEXT=ON \
-GNinja \
..
popd
elif [[ $1 == "cmake-reldebinfo" ]]; then
# This assembles the cmake flags for PyArrow,
# Run this inside arrow folder, then
# arrow cpp
# to build
echo $export ARROW_BUILD_TYPE=RelWithDebInfo
export ARROW_BUILD_TYPE=RelWithDebInfo
pushd cpp/build
cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DCMAKE_PREFIX_PATH=$ARROW_HOME \
-DARROW_CXXFLAGS="-ggdb" \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DGTest_SOURCE=BUNDLED \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_BROTLI=ON \
-DARROW_PARQUET=ON \
-DPARQUET_REQUIRE_ENCRYPTION=OFF \
-DARROW_DATASET=ON \
-DARROW_FLIGHT=ON \
-DARROW_JEMALLOC=OFF \
-DARROW_S3=ON \
-DARROW_CSV=ON \
-DARROW_GCS=OFF \
-DARROW_BUILD_TESTS=ON \
-DARROW_DEPENDENCY_SOURCE=AUTO \
-DARROW_INSTALL_NAME_RPATH=OFF \
-DARROW_EXTRA_ERROR_CONTEXT=ON \
-GNinja \
..
popd
elif [[ $1 == "cmake-debug" ]]; then
# This assembles the cmake flags for PyArrow,
# Run this inside arrow folder, then
# arrow cpp
# to build
echo $export ARROW_BUILD_TYPE=debug
export ARROW_BUILD_TYPE=debug
pushd cpp/build
cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DCMAKE_PREFIX_PATH=$ARROW_HOME \
-DARROW_CXXFLAGS="-ggdb" \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_BUILD_TYPE=debug \
-DARROW_BUILD_STATIC=OFF \
-DGTest_SOURCE=BUNDLED \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_BROTLI=ON \
-DARROW_PARQUET=ON \
-DPARQUET_REQUIRE_ENCRYPTION=OFF \
-DARROW_DATASET=ON \
-DARROW_FLIGHT=ON \
-DARROW_JEMALLOC=ON \
-DARROW_JSON=ON \
-DARROW_S3=ON \
-DARROW_CSV=ON \
-DARROW_GCS=OFF \
-DARROW_SUBSTRAIT=ON \
-DARROW_BUILD_TESTS=ON \
-DARROW_DEPENDENCY_SOURCE=AUTO \
-DARROW_INSTALL_NAME_RPATH=OFF \
-DARROW_EXTRA_ERROR_CONTEXT=ON \
-GNinja \
..
popd
elif [[ $1 == "cpp" ]]; then
# Run this inside arrow folder
pushd cpp/build
ninja -j8
ninja install
popd
elif [[ $1 == "pyarrow" ]]; then
# Run this inside arrow folder
pushd python
python setup.py build_ext --inplace
popd
elif [[ $1 == "pyarrow-clean" ]]; then
# Run this inside arrow folder
pushd python
python setup.py clean
popd
elif [[ $1 == "docs" ]]; then
# Run this inside arrow folder
pushd cpp/apidoc
doxygen
popd
pushd docs
make html
popd
elif [[ $1 == "sphinx-docs" ]]; then
# Run this inside arrow folder
pushd docs
make html
popd
elif [[ $1 == "docs-clean" ]]; then
# Run this inside arrow folder
pushd docs
make clean
popd
elif [[ $1 == "merge-pr" ]]; then
./dev/merge_arrow_pr.sh
else
echo "***"
echo "Usage:"
echo "arrow submodule"
echo "arrow env"
echo "arrow env-4"
echo "arrow env-9"
echo "arrow vars"
echo "arrow build-vars"
echo "arrow new <branch-name>"
echo "arrow rebase"
echo "arrow remote add <github-user>"
echo "arrow merge-pr"
echo "***"
echo "Usage build:"
echo "arrow cmake-release"
echo "arrow cmake-debug"
echo "arrow cpp"
echo "arrow pyarrow-clean"
echo "arrow pyarrow"
echo "***"
echo "Usage documentation:"
echo "arrow docs-clean"
echo "arrow docs"
echo "arrow sphinx-docs"
fi
}
# Setting PATH for Python 3.11
# The original version is saved in .zprofile.pysave
# PATH="/Library/Frameworks/Python.framework/Versions/3.11/bin:${PATH}"
# export PATH
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment