-
-
Save bskaggs/fc3c8d0d553be54e2645616236fdc8c6 to your computer and use it in GitHub Desktop.
FROM python:3.7-alpine3.8 | |
RUN apk add --no-cache \ | |
build-base \ | |
cmake \ | |
bash \ | |
jemalloc-dev \ | |
boost-dev \ | |
autoconf \ | |
zlib-dev \ | |
flex \ | |
bison | |
RUN pip install --no-cache-dir six pytest numpy cython | |
RUN pip install --no-cache-dir pandas | |
ARG ARROW_VERSION=0.12.0 | |
ARG ARROW_SHA1=2ede75769e12df972f0acdfddd53ab15d11e0ac2 | |
ARG ARROW_BUILD_TYPE=release | |
ENV ARROW_HOME=/usr/local \ | |
PARQUET_HOME=/usr/local | |
#Download and build apache-arrow | |
RUN mkdir /arrow \ | |
&& apk add --no-cache curl \ | |
&& curl -o /tmp/apache-arrow.tar.gz -SL https://github.com/apache/arrow/archive/apache-arrow-${ARROW_VERSION}.tar.gz \ | |
&& echo "$ARROW_SHA1 *apache-arrow.tar.gz" | sha1sum /tmp/apache-arrow.tar.gz \ | |
&& tar -xvf /tmp/apache-arrow.tar.gz -C /arrow --strip-components 1 \ | |
&& mkdir -p /arrow/cpp/build \ | |
&& cd /arrow/cpp/build \ | |
&& cmake -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \ | |
-DCMAKE_INSTALL_LIBDIR=lib \ | |
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ | |
-DARROW_PARQUET=on \ | |
-DARROW_PYTHON=on \ | |
-DARROW_PLASMA=on \ | |
-DARROW_BUILD_TESTS=OFF \ | |
.. \ | |
&& make -j$(nproc) \ | |
&& make install \ | |
&& cd /arrow/python \ | |
&& python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --with-parquet \ | |
&& python setup.py install \ | |
&& rm -rf /arrow /tmp/apache-arrow.tar.gz |
0.14.0 builds fine for me - however have hit the same issue trying to build 0.15.1.
@jensenity have you been able to get past this since reporting?
Haven't bottomed out the flatbuffers_ep issue with 0.15.1, but building PyArrow from master using the above approach worked fine.
Is there an instruction of compiling from scratch but aimed at using it from R? After compiling it, I can use it in R still.
FROM python:3.6-alpine
RUN apk update \
&& apk upgrade \
&& apk add --no-cache build-base \
cmake \
bash \
boost-dev \
autoconf \
zlib-dev \
libressl-dev \
flex \
bison \
&& pip install six pytest numpy cython pandas
ARG ARROW_BUILD_TYPE=release
ENV ARROW_HOME=/usr/local \
PARQUET_HOME=/usr/local
RUN mkdir -p /arrow \
&& apk add --no-cache curl \
&& curl -o /tmp/apache-arrow.zip -SL https://codeload.github.com/apache/arrow/zip/master \
&& unzip /tmp/apache-arrow.zip \
&& mv arrow-master/* /arrow/ \
&& mkdir -p /arrow/cpp/build \
&& cd /arrow/cpp/build \
&& cmake -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \
-DOPENSSL_ROOT_DIR=/usr/local/ssl \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_PARQUET=ON \
-DARROW_PYTHON=ON \
-DARROW_PLASMA=ON \
-DARROW_BUILD_TESTS=OFF \
.. \
&& make -j$(nproc) \
&& make install \
&& cd /arrow/python \
&& python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --with-parquet \
&& python setup.py install \
&& rm -rf /arrow /tmp/apache-arrow.tar.gz
This works to build arrow 0.15.1
For 3.0.0 with 3.8 python
FROM python:3.8-alpine
RUN apk update \
&& apk upgrade \
&& apk add --no-cache build-base \
autoconf \
bash \
bison \
boost-dev \
cmake \
flex \
libressl-dev \
zlib-dev
RUN pip install --no-cache-dir six pytest numpy cython
RUN pip install --no-cache-dir pandas
ARG ARROW_VERSION=3.0.0
ARG ARROW_SHA1=c1fed962cddfab1966a0e03461376ebb28cf17d3
ARG ARROW_BUILD_TYPE=release
ENV ARROW_HOME=/usr/local \
PARQUET_HOME=/usr/local
#Download and build apache-arrow
RUN mkdir /arrow \
&& wget -q https://github.com/apache/arrow/archive/apache-arrow-${ARROW_VERSION}.tar.gz -O /tmp/apache-arrow.tar.gz \
&& echo "${ARROW_SHA1} *apache-arrow.tar.gz" | sha1sum /tmp/apache-arrow.tar.gz \
&& tar -xvf /tmp/apache-arrow.tar.gz -C /arrow --strip-components 1 \
&& mkdir -p /arrow/cpp/build \
&& cd /arrow/cpp/build \
&& cmake -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \
-DOPENSSL_ROOT_DIR=/usr/local/ssl \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_PARQUET=ON \
-DARROW_PYTHON=ON \
-DARROW_PLASMA=ON \
-DARROW_BUILD_TESTS=OFF \
.. \
&& make -j$(nproc) \
&& make install \
&& cd /arrow/python \
&& python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --with-parquet \
&& python setup.py install \
&& rm -rf /arrow /tmp/apache-arrow.tar.gz
Can someone try this on python3.9-alpine?
Doesn't work for me.
I've tried a few different config:
alpine python 3.9.14
without pandas packages
but on ARROW_VERSION 9.0.0
-> failed
with pandas it was also failed, do You have a solution for the newer release of arrow?
#12 164.8 -- stderr output is:
#12 164.8 In file included from /arrow/cpp/build/thrift_ep-prefix/src/thrift_ep/lib/cpp/src/thrift/transport/TSocket.cpp:37:
#12 164.8 /usr/include/sys/poll.h:1:2: warning: #warning redirecting incorrect #include <sys/poll.h> to <poll.h> [-Wcpp]
#12 164.8 1 | #warning redirecting incorrect #include <sys/poll.h> to <poll.h>
#12 164.8 | ^~~~~~~
#12 164.8 In file included from /arrow/cpp/build/thrift_ep-prefix/src/thrift_ep/lib/cpp/src/thrift/transport/TServerSocket.cpp:33:
#12 164.8 /usr/include/sys/poll.h:1:2: warning: #warning redirecting incorrect #include <sys/poll.h> to <poll.h> [-Wcpp]
#12 164.8 1 | #warning redirecting incorrect #include <sys/poll.h> to <poll.h>
#12 164.8 | ^~~~~~~
#12 164.8 In file included from /arrow/cpp/build/thrift_ep-prefix/src/thrift_ep/lib/cpp/src/thrift/transport/TSSLSocket.cpp:34:
#12 164.8 /usr/include/sys/poll.h:1:2: warning: #warning redirecting incorrect #include <sys/poll.h> to <poll.h> [-Wcpp]
#12 164.8 1 | #warning redirecting incorrect #include <sys/poll.h> to <poll.h>
#12 164.8 | ^~~~~~~
#12 164.8 /arrow/cpp/build/thrift_ep-prefix/src/thrift_ep/lib/cpp/src/thrift/transport/TSSLSocket.cpp: In function 'void apache::thrift::transport::cleanupOpenSSL()':
#12 164.8 /arrow/cpp/build/thrift_ep-prefix/src/thrift_ep/lib/cpp/src/thrift/transport/TSSLSocket.cpp:157:3: error: 'OPENSSL_thread_stop' was not declared in this scope; did you mean 'OPENSSL_realloc'?
#12 164.8 157 | OPENSSL_thread_stop();
#12 164.8 | ^~~~~~~~~~~~~~~~~~~
#12 164.8 | OPENSSL_realloc
#12 164.8 /arrow/cpp/build/thrift_ep-prefix/src/thrift_ep/lib/cpp/src/thrift/transport/TSSLSocket.cpp: In member function 'virtual void apache::thrift::transport::TSSLSocket::close()':
#12 164.8 /arrow/cpp/build/thrift_ep-prefix/src/thrift_ep/lib/cpp/src/thrift/transport/TSSLSocket.cpp:395:5: error: 'OPENSSL_thread_stop' was not declared in this scope; did you mean 'OPENSSL_realloc'?
#12 164.8 395 | OPENSSL_thread_stop();
#12 164.8 | ^~~~~~~~~~~~~~~~~~~
#12 164.8 | OPENSSL_realloc
#12 164.8 make[5]: *** [lib/cpp/CMakeFiles/thrift.dir/build.make:566: lib/cpp/CMakeFiles/thrift.dir/src/thrift/transport/TSSLSocket.cpp.o] Error 1
#12 164.8 make[5]: *** Waiting for unfinished jobs....
#12 164.8 make[4]: *** [CMakeFiles/Makefile2:125: lib/cpp/CMakeFiles/thrift.dir/all] Error 2
#12 164.8 make[3]: *** [Makefile:156: all] Error 2
#12 164.8
#12 164.8 CMake Error at /arrow/cpp/build/thrift_ep-prefix/src/thrift_ep-stamp/thrift_ep-build-RELEASE.cmake:47 (message):
#12 164.8 Stopping after outputting logs.
#12 164.8
#12 164.8
#12 164.8 make[2]: *** [CMakeFiles/thrift_ep.dir/build.make:86: thrift_ep-prefix/src/thrift_ep-stamp/thrift_ep-build] Error 1
#12 164.8 make[1]: *** [CMakeFiles/Makefile2:940: CMakeFiles/thrift_ep.dir/all] Error 2
#12 164.8 make[1]: *** Waiting for unfinished jobs....
#12 187.8 -- re2_ep build command succeeded. See also /arrow/cpp/build/re2_ep-prefix/src/re2_ep-stamp/re2_ep-build-*.log
#12 187.8 [ 20%] Performing install step for 're2_ep'
#12 188.7 -- re2_ep install command succeeded. See also /arrow/cpp/build/re2_ep-prefix/src/re2_ep-stamp/re2_ep-install-*.log
#12 188.7 [ 20%] Completed 're2_ep'
#12 188.7 [ 20%] Built target re2_ep
#12 212.4 -- jemalloc_ep build command succeeded. See also /arrow/cpp/build/jemalloc_ep-prefix/src/jemalloc_ep-stamp/jemalloc_ep-build-*.log
#12 212.4 [ 20%] Performing install step for 'jemalloc_ep'
#12 212.5 -- jemalloc_ep install command succeeded. See also /arrow/cpp/build/jemalloc_ep-prefix/src/jemalloc_ep-stamp/jemalloc_ep-install-*.log
#12 212.5 [ 20%] Completed 'jemalloc_ep'
#12 212.5 [ 20%] Built target jemalloc_ep
#12 212.5 make: *** [Makefile:146: all] Error 2
------
executor failed running [/bin/sh -c mkdir /arrow && wget -q https://github.com/apache/arrow/archive/apache-arrow-${ARROW_VERSION}.tar.gz -O /tmp/apache-arrow.tar.gz && tar -xvf /tmp/apache-arrow.tar.gz -C /arrow --strip-components 1 && mkdir -p /arrow/cpp/build && cd /arrow/cpp/build && cmake -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE -DOPENSSL_ROOT_DIR=/usr/local/ssl -DCMAKE_INSTALL_LIBDIR=lib -DCMAKE_INSTALL_PREFIX=$ARROW_HOME -DARROW_WITH_BZ2=ON -DARROW_WITH_ZLIB=ON -DARROW_WITH_ZSTD=ON -DARROW_WITH_LZ4=ON -DARROW_WITH_SNAPPY=ON -DARROW_PARQUET=ON -DARROW_PYTHON=ON -DARROW_PLASMA=ON -DARROW_BUILD_TESTS=OFF .. && make -j$(nproc) && make install && cd /arrow/python && python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --with-parquet && python setup.py install && rm -rf /arrow /tmp/apache-arrow.tar.gz]: exit code: 2
No, I don't. I tried a few solutions that I found by Googling, but none of them work. I'm currently totally lost.
FROM python:3.7.15-alpine3.16
RUN apk add --no-cache bash \
postgresql-dev \
gettext \
gcc \
musl-dev \
make \
cmake \
g++ \
git \
boost-dev \
flex \
bison \
zlib-dev \
autoconf \
build-base
WORKDIR /code
COPY . /code
RUN pip install --no-cache --upgrade pip wheel
RUN pip install --no-cache -r requirements.txt
RUN apk del git make cmake g++
-- Could NOT find Arrow (missing: Arrow_DIR)
-- Checking for module 'arrow'
-- Package 'arrow', required by 'virtual:world', not found
CMake Error at /usr/share/cmake/Modules/FindPackageHandleStandardArgs.cmake:230 (message):
Could NOT find Arrow (missing: ARROW_INCLUDE_DIR ARROW_LIB_DIR
ARROW_FULL_SO_VERSION ARROW_SO_VERSION)
Call Stack (most recent call first):
/usr/share/cmake/Modules/FindPackageHandleStandardArgs.cmake:594 (_FPHSA_FAILURE_MESSAGE)
cmake_modules/FindArrow.cmake:419 (find_package_handle_standard_args)
cmake_modules/FindArrowPython.cmake:46 (find_package)
CMakeLists.txt:218 (find_package)
I can't make this work either. If someone has knowledge about what the underlying problem is I will gladly put in some time and effort and try to make this work. Unfortunately, my knowledge about this thus far is quite limited. I would really like to work with the Alpine base image as it is a safe and small starting point. I am using python:3.11-alpine
as a base.
And these are my arguments. Do I have to update the
ARROW_SHA1
?