Last active
May 6, 2019 17:29
-
-
Save jcrist/75e81f6792e610f81ffa86b4474a9b1f to your computer and use it in GitHub Desktop.
Hadoop Pseudodistributed Skein debugging
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Package: * | |
Pin: release o=Cloudera, l=Cloudera | |
Pin-Priority: 501 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM ubuntu:xenial | |
RUN apt-get update && \ | |
apt-get install -y -q curl bzip2 git && \ | |
rm -rf /var/lib/apt/lists/* | |
# Install CDH5 in a single node: Pseudo Distributed | |
# Docs: https://www.cloudera.com/documentation/enterprise/5-6-x/topics/cdh_qs_yarn_pseudo.html | |
ADD cloudera.pref /etc/apt/preferences.d/cloudera.pref | |
RUN curl -s https://archive.cloudera.com/cdh5/ubuntu/xenial/amd64/cdh/archive.key | apt-key add - && \ | |
echo 'deb [arch=amd64] http://archive.cloudera.com/cdh5/ubuntu/xenial/amd64/cdh xenial-cdh5 contrib' > /etc/apt/sources.list.d/cloudera.list && \ | |
echo 'deb-src http://archive.cloudera.com/cdh5/ubuntu/xenial/amd64/cdh xenial-cdh5 contrib' >> /etc/apt/sources.list.d/cloudera.list && \ | |
apt-get update && \ | |
apt-get install -y -q sudo openjdk-8-jre-headless hadoop-conf-pseudo && \ | |
sudo -u hdfs hdfs namenode -format -force && \ | |
for x in `cd /etc/init.d ; ls hadoop-hdfs-*` ; do sudo service $x start ; done && \ | |
bash /usr/lib/hadoop/libexec/init-hdfs.sh && \ | |
sudo -u hdfs hdfs dfs -mkdir /user/testuser && \ | |
sudo -u hdfs hdfs dfs -chown testuser /user/testuser && \ | |
rm -rf /var/lib/apt/lists/* | |
RUN useradd -m testuser | |
USER testuser | |
# Install conda & build conda environments: | |
RUN curl https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o /tmp/miniconda.sh \ | |
&& /bin/bash /tmp/miniconda.sh -b -p /home/testuser/miniconda \ | |
&& echo 'export PATH="/home/testuser/miniconda/bin:$PATH"' >> /home/testuser/.bashrc \ | |
&& rm /tmp/miniconda.sh | |
USER root | |
ADD start.sh /tmp/start.sh | |
CMD ["bash", "/tmp/start.sh", "-d"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Start HDFS | |
sudo service hadoop-hdfs-namenode start | |
sudo service hadoop-hdfs-datanode start | |
sudo service hadoop-yarn-resourcemanager start | |
sudo service hadoop-yarn-nodemanager start | |
echo "HDFS Started" | |
if [[ $1 == "-d" ]]; then | |
# Running as a daemon | |
sleep infinity | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import skein | |
# Simulate a dask cluster without dask dependency | |
spec = skein.ApplicationSpec.from_yaml(""" | |
name: test | |
services: | |
scheduler: | |
instances: 1 | |
resources: | |
vcores: 1 | |
memory: 256 | |
script: sleep infinity | |
worker: | |
instances: 0 | |
resources: | |
vcores: 1 | |
memory: 256 | |
depends: | |
- scheduler | |
script: sleep infinity | |
""") | |
client = skein.Client() | |
app = client.submit_and_connect(spec) | |
app.scale('worker', 2) | |
timeout = 60 | |
while timeout >= 0: | |
n = len(app.get_containers(services=["worker"], states=['RUNNING'])) | |
print("N workers: %d" % n) | |
if n == 2: | |
break | |
time.sleep(2) | |
timeout -= 2 | |
else: | |
print("FAILED TO GET WORKERS IN TIME") | |
app.shutdown() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To setup, copy everything into same directory. From directory:
On my machine the test outputs: