- Ubuntu 16.04 x64
- Cloudera 5.7.00
- docker
- docker image: cloudera/quickstart
install-docker-ubuntu-16-04.md
# fetch the docker image
docker pull cloudera/quickstart:latest
# run a container with the image
docker run --hostname=quickstart.cloudera \
--privileged=true \
-t -i \
-p 8888:8888 \
-p 80:80 \
-p 8088:8088 \
-p 8042:8042 \
-p 19888:19888 \
cloudera/quickstart \
/usr/bin/docker-quickstart
sudo su hdfs
hadoop fs -chown cloudera /user/cloudera && exit
sudo su cloudera
hadoop fs -mkdir /user/cloudera/wordcount /user/cloudera/wordcount/input && exit
# add some input data
echo "Hadoop is an elephant" > file0 && \
echo "Hadoop is as yellow as can be" > file1 && \
echo "Oh what a yellow fellow is Hadoop" > file2 && \
hadoop fs -put file* /user/cloudera/wordcount/input
hadoop fs -rm -r /user/cloudera/wordcount/output && \
hadoop jar wc-inmapper.jar wc.WordCount /user/cloudera/wordcount/input /user/cloudera/wordcount/output && \
hadoop fs -cat /user/cloudera/wordcount/output/*
hadoop fs -mkdir /user/cloudera/cb/ /user/cloudera/cb/input && exit
echo "B12 C31 D76 A12 B76 C31 D76 C31 A10 B12 D76" > file0 && \
echo "D76 D76 B12 A12 C31 D76 B12 A12 D76 A12 C31" > file1 && \
hadoop fs -put file* /user/cloudera/cb/input && \
hadoop fs -rm -r /user/cloudera/cb/output && \
hadoop jar crystal-ball-hadoop.jar /user/cloudera/cb/input /user/cloudera/cb/output && \
hadoop fs -cat /user/cloudera/cb/output/*
For more infomation please visit:
Example: WordCount Sourcecode
Cloudera - Hadoop Tutorial - Example: Running WordCount v1.0
YouTobe video: How to use official Cloudera quickstart Docker image
Error
Exception in thread "main" org.apache.hadoop.mapred.FileAlreadyExistsException: Output directory hdfs://quickstart.cloudera:8020/user/cloudera/ach/input already exists:
![screen shot 2018-04-16 at 5 13 18 pm](https://user-images.githubusercontent.com/5343215/38837919-8672e282-4199-11e8-9b55-7b3b2798c46a.png)
Solution
Run the
hadoop jar
command without the class name like this:com.AverageComputation