Skip to content

Instantly share code, notes, and snippets.

@alvaromuir
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alvaromuir/5131956a2c34eca54dfc to your computer and use it in GitHub Desktop.
Save alvaromuir/5131956a2c34eca54dfc to your computer and use it in GitHub Desktop.
base data-science vagrant linux build
# base data-science vagrant linux build
# last update, 9.28.14
# @alvaromuir
## Base installs ##
sudo yum -y update
sudo yum -y upgrade
sudo yum clean all
## Adminstrative ##
sudo rpm -Uvh http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
sudo yum -y install openssh-clients openssh-server ntp bind-utils yum-utils htop nmap
## JAVA ##
sudo wget --no-cookies --no-check-certificate --header "Cookie: oraclelicense=accept-securebackup-cookie" "http://download.oracle.com/otn-pub/java/jdk/7u67-b01/jdk-7u67-linux-x64.rpm" -O /usr/local/src/jdk-7-linux-x64.rpm
sudo rpm -Uvh /usr/local/src/jdk-7-linux-x64.rpm
sudo alternatives --install /usr/bin/java java /usr/java/latest/bin/java 2
sudo bash -c "echo export JAVA_HOME=/usr/java/default > /etc/profile.d/java.sh"
sudo bash -c "echo export JAVACMD='\$JAVA_HOME/bin/java/bin/java' >> /etc/profile.d/java.sh"
sudo bash -c "echo export PATH='\$PATH:\$JAVA_HOME/bin' >> /etc/profile.d/java.sh"
source /etc/profile.d/java.sh
## Core development additions ##
sudo yum -y groupinstall "Development tools"
sudo yum -y install python-setuptools golang nodejs npm screen hg git
sudo easy_install pip
sudo pip install virtualenvwrapper
sudo bash -c "echo export PATH='\$PATH:`python -c "from distutils.sysconfig import get_python_lib; print get_python_lib()"`' > /etc/profile.d/python.sh"
source /etc/profile.d/python.sh
sudo bash -c "echo export GOPATH='\$HOME/go' > /etc/profile.d/go.sh"
sudo bash -c "echo export PATH='\$PATH:\$HOME/go/bin' >> /etc/profile.d/go.sh"
source /etc/profile.d/go.sh
yum -y install postgresql-jdbc*
## ANT ##
sudo wget http://apache.tradebit.com/pub//ant/binaries/apache-ant-1.9.4-bin.zip -O /usr/local/src/ant-1.9.4-bin.zip
sudo unzip /usr/local/src/ant-1.9.4-bin.zip -d /opt
sudo mv /opt/apache-ant-1.9.4 /opt/ant
sudo ln -s /opt/ant/bin/ant /usr/bin/ant
sudo bash -c "echo 'ANT_HOME=/opt/ant' > /etc/profile.d/ant.sh"
sudo bash -c "echo 'export CLASSPATH=.' >> /etc/profile.d/ant.sh"
source /etc/profile.d/ant.sh
## Maven ##
sudo wget http://www.motorlogy.com/apache/maven/maven-3/3.2.1/binaries/apache-maven-3.2.1-bin.zip -O /usr/local/src/maven-3.2.1.zip
sudo unzip /usr/local/src/maven-3.2.1.zip -d /opt
sudo mv /opt/apache-maven-3.2.1 /opt/maven
sudo ln -s /opt/maven/bin/mvn /usr/bin/mvn
sudo bash -c "echo 'MAVEN_HOME=/opt/maven' > /etc/profile.d/maven.sh"
sudo bash -c "echo 'MAVEN_OPTS=\"-Xmx2g -Xmx512m -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m\"' >> /etc/profile.d/maven.sh"
sudo bash -c "echo 'export CLASSPATH=.' >> /etc/profile.d/maven.sh"
source /etc/profile.d/maven.sh
# SCALA #
sudo yum -y localinstall http://downloads.typesafe.com/scala/2.11.2/scala-2.11.2.rpm
sudo bash -c "echo 'export SCALA_HOME=/usr/share/scala' > /etc/profile.d/scala.sh"
sudo bash -c "echo export PATH='\$PATH:\$SCALA_HOME/bin' >> /etc/profile.d/scala.sh"
source /etc/profile.d/scala.sh
## Data Science Stuff ##
sudo curl -o /usr/local/src/python_tools.zip http://www.datasciencetoolkit.org/python_tools.zip
sudo unzip /usr/local/src/python_tools.zip
sudo python/install
sudo rm -rf python __MACOSX
sudo pip install csvkit
sudo yum -y install numpy scipy python-matplotlib ipython python-pandas sympy python-nose R parallel octave
sudo curl -o /usr/local/bin/jq http://stedolan.github.io/jq/download/linux64/jq
sudo chmod +x /usr/local/bin/jq
sudo curl -o /etc/yum.repos.d/tange.repo http://download.opensuse.org/repositories/home:/tange/CentOS_CentOS-6/home:tange.repol
sudo npm -g install xml2json-command
sudo curl -o /usr/local/bin/Rio https://raw.githubusercontent.com/jeroenjanssens/data-science-at-the-command-line/master/tools/Rio
sudo chmod +x /usr/local/bin/Rio
sudo bash -c "echo alias rio=`which Rio` > /etc/profile.d/aliases.sh"
sudo bash -c "echo alias r=`which R` >> /etc/profile.d/aliases.sh"
## Julia ##
sudo yum -y install libXp openmotif openmotif22
sudo git clone git://github.com/JuliaLang/julia.git /usr/share/julia
cd !$
sudo bash -c "echo 'override USE_SYSTEM_BLAS = 1' >> Make.user"
sudo bash -c "echo 'OPENBLAS_DYNAMIC_ARCH=0' >> Make.user"
sudo make -C deps cleanall
sudo make -C deps clean-openblas
sudo make
cd ~
sudo bash -c "echo 'export JULIA_HOME=/usr/share/julia' > /etc/profile.d/julia.sh"
sudo ln -s $JULIA_HOME/usr/bin/julia /usr/bin/
source /etc/profile.d/julia.sh
## Hadoop ##
# NOTE: Hadoop will still have to be configured
# See /etc/hadoop/conf
sudo rm -rf /etc/yum.repos.d/hdp.repo
sudo rm -rf /etc/yum.repos.d/ambari.repo
sudo curl -o /etc/yum.repos.d/hdp.repo http://public-repo-1.hortonworks.com/HDP/centos6/2.x/updates/2.1.4.0/hdp.repo
sudo curl -o /etc/yum.repos.d/ambari.repo http://public-repo-1.hortonworks.com/ambari/centos6/1.x/updates/1.6.1/ambari.repo
sudo yum -y install hadoop hdfs hbase hcatalog hive hue mahout oozie pig storm squoop webhcat
sudo yum -y install ambari-server
sudo yum -y install
sudo bash -c "echo export HADOOP_CONF_DIR=/etc/hadoop/conf > /etc/profile.d/hadoop.sh"
sudo bash -c "echo export YARN_CONF_DIR='\$HADOOP_CONF_DIR' >> /etc/profile.d/hadoop.sh"
source /etc/profile.d/hadoop.sh
## Spark ##
curl http://d3kbcqa49mib13.cloudfront.net/spark-1.1.0.tgz
sudo tar -xf /usr/local/src/spark.tgz
cd spark-1.1.0
mvn clean
mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -DskipTests clean package
cd ~
sudo mv spark-1.1.0 /usr/share/spark
sudo bash -c "echo export PATH='\$PATH:/usr/share/spark/bin' >> /etc/profile.d/spark.sh"
source /etc/profile.d/spark.sh
sudo chkconfig ntpd on
sudo service ntpd start
chkconfig iptables off
chkconfig postgresql on
service postgresql start
## PACKAGE CLEANUP ##
sudo su -
# Zero free space to aid VM compression
dd if=/dev/zero of=/EMPTY bs=1M
rm -f /EMPTY
# Remove bash history
unset HISTFILE
rm -rf /root/.bash_history
rm -rf /home/vagrant/.bash_history
# Cleanup log files
find /var/log -type f | while read f; do echo -ne '' > $f; done;
# Whiteout root
count=`df --sync -kP / | tail -n1 | awk -F ' ' '{print $4}'`;
let count--
dd if=/dev/zero of=/tmp/whitespace bs=1024 count=$count;
rm -rf /tmp/whitespace;
# Whiteout /boot
count=`df --sync -kP /boot | tail -n1 | awk -F ' ' '{print $4}'`;
let count--
dd if=/dev/zero of=/boot/whitespace bs=1024 count=$count;
rm -rf /boot/whitespace;
swappart=$(cat /proc/swaps | grep -v Filename | tail -n1 | awk -F ' ' '{print $1}')
if [ "$swappart" != "" ]; then
swapoff $swappart;
dd if=/dev/zero of=$swappart;
mkswap $swappart;
swapon $swappart;
fi
history -cw && history -c && exit
unset HISTFILE
rm -rf .bash_history
history -cw && history -c && exit
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment