Created
September 11, 2020 22:56
-
-
Save colinfang/02b45e6751264b044e02cb7edd209c09 to your computer and use it in GitHub Desktop.
openblas 2821
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM ubuntu:20.04 | |
RUN apt update | |
RUN apt install -qy openjdk-8-jdk | |
RUN apt install -qy wget | |
WORKDIR /opt | |
RUN wget -q https://downloads.apache.org/hadoop/common/hadoop-3.2.1/hadoop-3.2.1.tar.gz | |
RUN tar -xzf hadoop-3.2.1.tar.gz | |
ENV HADOOP_HOME /opt/hadoop-3.2.1 | |
# Use pip numpy works well | |
# RUN apt install -qy python3 python3-pip | |
# RUN pip3 install "numpy==1.18" "scipy==1.5" "pyarrow==1.0" | |
# RUN ldd /usr/local/lib/python3.8/dist-packages/numpy/core/_multiarray_umath.cpython-38-x86_64-linux-gnu.so | |
# Use conda numpy | |
RUN wget -q -O miniconda3.sh "https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" | |
ARG conda_path=/opt/miniconda3 | |
ARG conda_bin=${conda_path}/bin | |
ARG conda_exe=${conda_bin}/conda | |
RUN sh /opt/miniconda3.sh -b -p ${conda_path} | |
RUN ${conda_exe} init | |
RUN ${conda_exe} install -y "numpy=1.18" "scipy=1.5" "nomkl" "pyarrow=0.15" | |
# pip install works well | |
# RUN ${conda_bin}/pip install "numpy==1.18" "scipy==1.5" "pyarrow==1.0" | |
RUN ldd /opt/miniconda3/lib/python3.8/site-packages/numpy/core/_multiarray_umath.cpython-38-x86_64-linux-gnu.so | |
WORKDIR /src | |
ADD test.py test.py | |
# docker build -t debug . | |
# docker run --rm -it debug bash | |
# OPENBLAS_NUM_THREADS=2 python test.py | |
# OPENBLAS_NUM_THREADS=2 gdb python -ex 'r test.py' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyarrow | |
import numpy as np | |
import scipy | |
import scipy.optimize | |
from pyarrow.hdfs import HadoopFileSystem | |
try: | |
# Activate JNI load `$HADOOP_HOME/lib/native/libhdfs.so` in runtime | |
HadoopFileSystem('foo') | |
except: | |
pass | |
evs = np.array([ 0.01855396, 0.02888079, 0.01484719, 0.01187566, 0.01350127, | |
0.0152477 , 0.02978069, 0.01184938, 0.0152477 , 0.01967369, | |
0.02334463, -0.00964757, -0.0084154 , 0.0093229 , 0.00074653]) | |
A_eq = np.array([[-0.17128674, 0.17588126, -0.21854693, 0.35221215, 0.32877443, | |
0.35090059, -0.28819657, -0.17272982, 0.35090059, 0.32671732, | |
-0.13842946, 0.23981023, 0.1866889 , 0.15406733, 0.24219247], | |
[ 0.27321495, -0.28669058, 0.355471 , 0.24540659, 0.16261506, | |
0.24417405, -0.20448798, 0.27555701, 0.24417405, 0.16159759, | |
-0.19235484, -0.38261073, -0.30371767, -0.25482233, -0.16266994]]) | |
b_eq = [0,0] | |
scipy.optimize.linprog(-evs, A_eq=A_eq, b_eq=b_eq, bounds=[(0, 1)] * len(evs)) | |
# https://github.com/xianyi/OpenBLAS/issues/2821 |
(base) root@4d2ad75f092e:/src# OPENBLAS_NUM_THREADS=2 python test.py
2020-09-11 23:32:14,815 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
hdfsBuilderConnect(forceNewInstance=1, nn=foo, port=0, kerbTicketCachePath=(NULL), userName=(NULL)) error:
UnknownHostException: foojava.lang.IllegalArgumentException: java.net.UnknownHostException: foo
at org.apache.hadoop.security.SecurityUtil.buildTokenService(SecurityUtil.java:447)
at org.apache.hadoop.hdfs.NameNodeProxiesClient.createProxyWithClientProtocol(NameNodeProxiesClient.java:139)
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:356)
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:290)
at org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:171)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3303)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:124)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3352)
at org.apache.hadoop.fs.FileSystem$Cache.getUnique(FileSystem.java:3326)
at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:532)
at org.apache.hadoop.fs.FileSystem$2.run(FileSystem.java:502)
at org.apache.hadoop.fs.FileSystem$2.run(FileSystem.java:499)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:499)
Caused by: java.net.UnknownHostException: foo
... 16 more
Segmentation fault
The java error is expected (because I put a fake host foo
). It doesn't block the program.
We hit the openblas segfault.
Hmm. I cannot reproduce the segfault with your Dockerfile unfortunately - I do get the Hadoop exception and its backtrace down to the "16 more", but what looks like normal completion afterwards. Is there anything missing in the reproducer, or am I just (un)lucky ?
@martin-frbg I made a repo https://github.com/colinfang/openblas2821 so that I can use docker hub at https://hub.docker.com/repository/docker/colinfang/openblas2821
I can reproduce the error using the image built from docker hub. It segfaults everytime.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The code segfaults on
OPENBLAS_NUM_THREADS=2 python test.py
OpenMathLib/OpenBLAS#2821
traceback
In order to reproduce it, we need
$HADOOP_HOME/lib/native/libhdfs.so
in runtime (JNI)