Building arrow
, parquet-cpp
, and pyarrow
conda
Boost (>= 1.54)
A recent-ish C/C++ compiler (4.9?)
Create a Conda environment
This will create an environment with the latest version of Python
If you want a different version of Python, add python=X.Y
(e.g., python=3.5
) to the conda create
package list
conda create -n pyarrow -c conda-forge \
pandas \
pyarrow \
arrow-cpp \
cmake \
parquet-cpp \
cython \
brotli \
snappy \
zlib \
lz4 \
flatbuffers \
rapidjson \
jemalloc \
thrift-cpp \
pytest
conda remove -n pyarrow arrow-cpp parquet-cpp
git clone git://github.com/apache/parquet-cpp
git clone git://github.com/apache/arrow
Activate your new conda environment
Set environment variables so that arrow and parquet-cpp can find dependencies
export FLATBUFFERS_HOME=$CONDA_PREFIX
export RAPIDJSON_HOME=$CONDA_PREFIX
export JEMALLOC_HOME=$CONDA_PREFIX
export BROTLI_HOME=$CONDA_PREFIX
export ARROW_HOME=$CONDA_PREFIX
export THRIFT_HOME=$CONDA_PREFIX
export SNAPPY_HOME=$CONDA_PREFIX
export ZLIB_HOME=$CONDA_PREFIX
ARROW_BUILD_DIR=$PWD /arrow/cpp/cmake-debug-build
mkdir -p $ARROW_BUILD_DIR
pushd $ARROW_BUILD_DIR
cmake \
-DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \
-DCMAKE_INSTALL_LIBDIR=$CONDA_PREFIX /lib \
-DARROW_PYTHON=on \
-DPYTHON_EXECUTABLE=$CONDA_PREFIX /bin/python \
-DARROW_CXXFLAGS=" -Werror" \
-DARROW_TEST_MEMCHECK=on \
-DARROW_BUILD_TESTS=on \
-DARROW_BUILD_BENCHMARKS=on \
-DARROW_BOOST_USE_SHARED=on \
-DARROW_JEMALLOC=on \
..
make -j $( nproc)
make install
popd
PARQUET_BUILD_DIR=$PWD /parquet-cpp/cmake-debug-build
mkdir -p $PARQUET_BUILD_DIR
pushd $PARQUET_BUILD_DIR
cmake \
-DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \
-DCMAKE_INSTALL_LIBDIR=$CONDA_PREFIX /lib \
-DPARQUET_ARROW=on \
-DPARQUET_ZLIB_VENDORED=off \
-DPARQUET_BOOST_USE_SHARED=on \
-DPARQUET_BUILD_TESTS=on \
-DPARQUET_BUILD_EXECUTABLES=on \
..
make -j $( nproc)
make install
popd
pushd arrow/python
python setup.py build_ext --inplace --with-parquet --with-jemalloc
Make sure we can import pyarrow
and pyarrow.parquet
python -c ' import pyarrow'
python -c ' import pyarrow.parquet'