Skip to content

Instantly share code, notes, and snippets.

View obar1's full-sized avatar
🏠
Working from home

obar1 obar1

🏠
Working from home
View GitHub Profile
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
# installs to /opt/gradle
# existing versions are not overwritten/deleted
# seamless upgrades/downgrades
# $GRADLE_HOME points to latest *installed* (not released)
gradle_version=2.9
mkdir -p /opt/gradle
wget -N https://services.gradle.org/distributions/gradle-${gradle_version}-all.zip --no-check-certificate
sudo unzip -foq gradle-${gradle_version}-all.zip -d /opt/gradle
sudo ln -sfn gradle-${gradle_version} /opt/gradle/latest
sudo printf "export GRADLE_HOME=/opt/gradle/latest\nexport PATH=\$PATH:\$GRADLE_HOME/bin" > /etc/profile.d/gradle.sh
@obar1
obar1 / mapr_python
Last active December 25, 2015 08:25
export PV=3.4.3
su
yum groupinstall "Development tools"
yum install -y gcc make openssl-devel openssl
wget http://www.python.org/ftp/python/3.4.3/Python-3.4.3.tgz -O /var/tmp/Python-3.4.3.tgz
gunzip -c /var/tmp/Python-3.4.3.tgz | tar xvf -
./configure
export PS1="\[\033[36m\]\u\[\033[m\]@\[\033[32m\]\h:\[\033[33;1m\]\w\[\033[m\]\$ "
export CLICOLOR=1
export LSCOLORS=ExFxBxDxCxegedabagacad
alias ls='ls -GFh'
# setup
http://www.apache.org/dist/spark/spark-1.6.2/http://www.apache.org/dist/spark/spark-1.6.2/spark-1.6.2-bin-hadoop2.6.tgz
wget http://www.apache.org/dist/spark/spark-1.6.2/spark-1.6.2-bin-hadoop2.6.tgz
sudo mv spark-1.6.2-bin-hadoop2.6.tgz /opt
cd /opt
tar xvf spark-1.6.2-bin-hadoop2.6.tgz
cd
sudo ln -s /opt/spark-1.6.2-bin-hadoop2.6/ spark
@obar1
obar1 / vm_setup.md
Last active November 1, 2016 20:53

tools

yum groupinstall "Development Tools" yum install net-tools kernel-headers kernel-devel gcc yum update reboot

vmware tool

## get the extension
# https://addons.mozilla.org/en-US/firefox/addon/export-cookies/
# login
# https://www.safaribooksonline.com/accounts/login/?next=/s/
## export the cookies
cd ~
sudo yum remove java-1.*.0-openjdk-*
wget --no-cookies --no-check-certificate --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com%2F; oraclelicense=accept-securebackup-cookie" "http://download.oracle.com/otn-pub/java/jdk/8u40-b25/jdk-8u40-linux-x64.rpm"
# latest
wget --no-check-certificate --no-cookies --header "Cookie: oraclelicense=accept-securebackup-cookie" http://download.oracle.com/otn-pub/java/jdk/8u102-b14/jdk-8u102-linux-x64.rpm
### TODO to update
su
rpm -ivh jdk-8u40-linux-x64.rpm
@obar1
obar1 / clousterdock 16gb
Last active December 1, 2016 13:25
clousterdock 16gb using cloudera clusterdock
#on my centos 7
cd /tmp
wget https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
ls *.rpm
To install epel-release-7-5.noarch.rpm, type:
sudo yum install epel-release-7.noarch.rpm
`
# some tgz with random data
#!/bin/bash
for year in `seq 2013 2015`;
do
for month in `seq 1 12`;
do
mkdir $year-$month
echo $RANDOM > $year-$month/ratings.tsv