Skip to content

Instantly share code, notes, and snippets.

View tomz's full-sized avatar

Tom Zeng tomz

View GitHub Profile
@tomz
tomz / install-apache-zeppelin-on-amazon-emr.sh
Last active August 29, 2015 14:26 — forked from andershammar/install-apache-zeppelin-on-amazon-emr.sh
Bootstrap script for installing Apache Zeppelin on an Amazon EMR Cluster.
#!/bin/bash -ex
# Install Git
sudo yum -y install git
# Install Maven
wget -P /tmp http://apache.mirrors.spacedump.net/maven/maven-3/3.3.3/binaries/apache-maven-3.3.3-bin.tar.gz
sudo mkdir /opt/apache-maven
sudo tar -xvzf /tmp/apache-maven-3.3.3-bin.tar.gz -C /opt/apache-maven
@tomz
tomz / sparkr-demo
Last active November 11, 2018 13:47 — forked from shivaram/sparkr-demo
SparkR 1.4.1 Demo
# If you are using Spark 1.4, then launch SparkR with the command
#
# ./bin/sparkR --packages com.databricks:spark-csv_2.10:1.0.3
# as the `sparkPackages=` flag was only added in Spark 1.4.1.
# # This will work in Spark 1.4.1.
sc <- sparkR.init(spark_link, sparkPackages = "com.databricks:spark-csv_2.10:1.0.3")
sqlContext <- sparkRSQL.init(sc)
flights <- read.df(sqlContext, "s3n://sparkr-data/nycflights13.csv","com.databricks.spark.csv", header="true")
@tomz
tomz / rstudo-sparkr.R
Last active August 29, 2015 14:25 — forked from shivaram/rstudo-sparkr.R
Rstudio local setup
Sys.setenv(SPARK_HOME="/Users/shivaram/spark-1.4.1")
.libPaths(c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib"), .libPaths()))
library(SparkR)
sc <- sparkR.init(master="local")
sqlContext <- sparkRSQL.init(sc)
df <- createDataFrame(sqlContext, faithful)
# Select one column
head(select(df, df$eruptions))
@tomz
tomz / gist:0e2a5ff23903515c11c0
Last active August 29, 2015 14:25 — forked from sebsto/gist:19b99f1fa1f32cae5d00
Install Maven with Yum on Amazon Linux
sudo wget http://repos.fedorapeople.org/repos/dchen/apache-maven/epel-apache-maven.repo -O /etc/yum.repos.d/epel-apache-maven.repo
sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo
sudo yum install -y apache-maven
mvn --version
cd ~
sudo apt-get update
sudo apt-get install openjdk-7-jre-headless -y
### Check http://www.elasticsearch.org/download/ for latest version of ElasticSearch and replace wget link below
# NEW WAY / EASY WAY
wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.90.2.deb
sudo dpkg -i elasticsearch-0.90.2.deb
# Documentation for HAProxy
# http://code.google.com/p/haproxy-docs/w/list
# http://haproxy.1wt.eu/download/1.2/doc/architecture.txt
# NOTES:
# open files limits need to be > 256000, use ulimit -n to set (on most POSIX systems)
global
log 127.0.0.1 local0
log 127.0.0.1 local1 notice