Last active
August 1, 2018 06:41
-
-
Save ebernhardson/c1fbe7c7c2425afe5131d8d6130572f8 to your computer and use it in GitHub Desktop.
LightGBM + HDFS Demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Build lightgbm container with hdfs | |
host$ docker build -t lightgbm . | |
# Pull the cloudera image | |
host$ docker pull cloudera/quickstart | |
# Start up a network for them to talk | |
host$ docker network create lgbtest | |
# Start hadoop in one shell | |
host$ docker run -ti --network lgbtest --name=cloudera --hostname=quickstart.cloudera --privileged=true cloudera/quickstart /usr/bin/docker-quickstart | |
# Start lightgbm in another shell | |
host$ docker run -ti --network lgbtest --name=lightgbm --hostname=lightgbm lightgbm:latest | |
# Copy training files from lightgbm to cloudera | |
host$ docker cp lightgbm:/root/LightGBM/examples/regression . | |
host$ docker cp regression cloudera:/root | |
# Copy the files into hdfs from inside the cloudera container | |
cloudera$ hdfs dfs -copyFromLocal /root/regression /user/cloudera/ | |
# Start up an interactive python interpreter in the lightgbm container | |
lightgbm$ CLASSPATH=$(hadoop classpath --glob) /root/venv/bin/python | |
# Train something with hdfs datasets | |
>>> import lightgbm as lgb | |
>>> ds_train, ds_test = (lgb.Dataset('hdfs://quickstart.cloudera/user/cloudera/regression/regression.' + x) for x in ('train', 'test')) | |
>>> gbm = lgb.train({'objective': 'regression', 'verbose': 1}, ds_train, num_boost_round=2, valid_sets=ds_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-----BEGIN PGP PUBLIC KEY BLOCK----- | |
Version: GnuPG v1.4.9 (GNU/Linux) | |
mQGiBEnvgi0RBADLx1qQlXlrvHOo13dUvoWL97Ny/0s0S/GcMEgAqYvZzUPVcq8H | |
GUsOb4PLTfcL1H7Ptq9fqr02uIb5Bc/ltdwE9GFaT2nvdfBx9T8jr8LrW9JE2xJq | |
dCyFO5yP9YbZeFAxNO3yBxeP85lQ9CdWWLvyYdtQ+T84EYerqkcVbSvYRwCg6zyx | |
EE3jWYvyVv/3HTrVTYpgHgMD/2kMR1Z2vEYOSM7h4cnRnxiadhefqJ2WCm4L30Rx | |
/F9JBLAEuIuUndiOShoB043iDY+rrqCHqHQ/uI2D4piW9cDYMo7EJlsFtQ5g2SFg | |
PcS4+DLhU464dTQsTGAhvcv+F0VQV4iu1HdD2/kKJkCS/MZL4rr4emqsh6VIBDdG | |
ytPaA/9cyRJZe2BrBM2pECGncE5RUaM3g37Ka+VnmMVOXgZdzgCxwFZyVhyxzssD | |
kB4jcm75UEZx8BiaoPQDQEsBongdx5M4Vwv5XnvUq7sK7eZLmUzW9hmkPjgLea0/ | |
znchvPsLeTNqSfIcH14TbFt6B2y1G3Vbi5/6UiAaIqLrqjZlCrQXQ2xvdWRlcmEg | |
QXB0IFJlcG9zaXRvcnmIYAQTEQIAIAUCSe+CLQIbAwYLCQgHAwIEFQIIAwQWAgMB | |
Ah4BAheAAAoJEDJ1dO4CqBjdGQUAnitydC/NGEh0aZXDN1v22pWFpRzTAJ46N4gT | |
Zx25oWfyppX3R7fSH+7TPrkCDQRJ74ItEAgAq8s4iMsGhk9nnMF6wlarqHjws4Dw | |
NFZBzA1Ah8KnMtrdr8t99OfzY1b7PNzHXujcaTTqL6L881ezdsls9aHp2kr24Btr | |
8nqEZJHSjCnQscAGu+NrhoH2KvK+tMRCHGRcy5UNQbLTJi4Hf8Lo+zv0WUy9BCDu | |
7HoDlwrrh1Rw5oOwLFc2UXSTEB6BwYna0mZcNjVpfKNHa//wJcKR0AtsCwRT9znP | |
GS0Hpqi1l0/iU7sJhNWyyF427ANg+Jv2n4IP+dd734ZiFeJ9tWCtBjfc3MZJKETk | |
tiCtX7FVIIqBAmYLwPqcvZMGJMrNzLBtRuuiBv5bFcPpMEhoD40oQEG8uwADBQf/ | |
f3NpQbuAcZLMzbrHYu3FB/+4ETvDJXJIEUiQUdobWancSBUhuNPOqIgLzIWM1jRu | |
jWGIpkeP6iqNW9kDrq26CuMFP2CoVvnaMiLXUvyf62HWAiYXXlZle5O97bvhYMtM | |
Y4o5sMo2ktI9IcgYIFicFwcmuGyGL7nJ3Bo9FAUV2LvMe++O/f13jsPpygoTZgGT | |
6w0erglWgrgf5pXt8ajlI4TUrlMVg9Iy/tB9ZzVHnpk21o4vLHwZkgXe1WlK/Rze | |
ZCruXyXHaFyEJN2zlP2xNj2F2WisL+/HEnl/qzU4IpNI2LQV2aiY9Nt8MBXgSHAh | |
gWKWkjiB+tswgzuNsBOTM4hJBBgRAgAJBQJJ74ItAhsMAAoJEDJ1dO4CqBjd988A | |
oJ1WlEx2BcFA7W1RMyErejcvB6thAKCf3t0thSQvkoGi3AOJ4Haj/C3yUQ== | |
=H6IR | |
-----END PGP PUBLIC KEY BLOCK----- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
deb http://archive.cloudera.com/cdh5/debian/jessie/amd64/cdh jessie-cdh5 contrib |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Package: * | |
Pin: release o=Cloudera, l=Cloudera | |
Pin-Priority: 501 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM docker-registry.wikimedia.org/wikimedia-jessie | |
ENTRYPOINT ["/bin/bash"] | |
COPY cloudera.list /etc/apt/sources.list.d/cloudera.list | |
COPY cloudera.pref /etc/apt/preferences.d/cloudera.pref | |
COPY archive.key /root/archive.key | |
ENV HADDOP_CONF=/etc/hadoop/conf | |
RUN apt-key add /root/archive.key && \ | |
apt update && \ | |
apt install -y -t jessie-backports cmake cmake-data && \ | |
apt install -y --force-yes git-core libhdfs0 libhdfs0-dev build-essential openjdk-8-jdk-headless gcj-4.9-jdk python3 virtualenv && \ | |
ln -s /usr/lib/gcc/x86_64-linux-gnu/4.9/include/jawt.h /usr/lib/jvm/java-8-openjdk-amd64/include/ && \ | |
virtualenv --python python3 /root/venv && \ | |
/root/venv/bin/pip install --upgrade pip wheel && \ | |
/root/venv/bin/pip install numpy scipy scikit-learn && \ | |
git clone --recursive https://github.com/Microsoft/LightGBM /root/LightGBM && \ | |
mkdir /root/LightGBM/build && \ | |
cd /root/LightGBM/build && \ | |
JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 cmake -DUSE_HDFS=1 .. && \ | |
make -j 4 && \ | |
cd /root/LightGBM/python-package && \ | |
/root/venv/bin/python setup.py install --precompile && \ | |
echo 'export CLASSPATH=$(hadoop classpath --glob)' > /etc/profile.d/hadoop.sh |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment