Instantly share code, notes, and snippets.

View hadoop-vm-hadoop-start.sh
$HADOOP_HOME/sbin/start-dfs.sh
$HADOOP_HOME/sbin/start-yarn.sh
View hadoop-vm-bashrc.sh
# set hadoop env vars
export HADOOP_HOME=/srv/hadoop
export HADOOP_JAR_LIB=$HADOOP_HOME/share/hadoop/tools/lib
export PATH=$PATH:$HADOOP_HOME/bin
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
View hadoop-vm-hadoop-setup.sh
ssh-keygen #just press enter when it asks you for a file path and password
cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
chmod 755 ~/hadoop_start.sh
hadoop namenode -format
~/hadoop_start.sh
View hadoop-vm-root-setup.sh
#install basic development tools
apt-get update
apt-get upgrade
apt-get install build-essential ssh lzop git rsync curl
apt-get install python-dev python-setuptools
apt-get install libcurl4-openssl-dev
easy_install pip
pip install virtualenv virtualenvwrapper python-dateutil
#create a hadoop user
View hadoop-vm-sysctl.conf
# disable ipv6
net.ipv6.conf.all.disable_ipv6 = 1
net.ipv6.conf.default.disable_ipv6 = 1
net.ipv6.conf.lo.disable_ipv6 = 1
View hadoop-vm-yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
View hadoop-vm-hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
View hadoop-vm-mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
View hadoop-vm-core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/var/app/hadoop/data</value>
</property>
</configuration>