Skip to content

Instantly share code, notes, and snippets.

View tomz's full-sized avatar

Tom Zeng tomz

View GitHub Profile
@tomz
tomz / rstudio_sparkr_emr4-proc.sh
Created October 29, 2016 18:38
rstudio_sparkr_emr4-proc.sh
#!/bin/bash
set -x -e
# AWS EMR bootstrap script
# for installing open-source R (www.r-project.org) with RHadoop packages and RStudio on AWS EMR
#
# tested with AMI 4.0.0 (hadoop 2.6.0)
#
# schmidbe@amazon.de
# 24. September 2014
@tomz
tomz / rstudio_sparkr_emr4.sh
Created October 29, 2016 18:37
rstudio_sparkr_emr4.sh
#!/bin/bash
set -x -e
# Usage:
# --no-rstudio - don't install rstudio-server
# --sparklyr - install RStudio's sparklyr package
# --sparkr - install SparkR package
# --shiny - install Shiny server
#
# --user - set user for rstudio, default "hadoop"
Dir["./spark/lib/*.jar"].each { |jar| require jar }
require "~/sqljdbc/sqljdbc42.jar"
import java.util.HashMap
import org.apache.spark.SparkConf
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.sql.SQLContext
AWS_KEY = ENV["AWS_KEY"]
AWS_SECRET = ENV["AWS_SECRET"]
# prestogres jdbc
require "java"
java_import java.util.Properties
require "./postgresql-9.4-1203.jdbc41.jar"
Java::org.postgresql.Driver
db_url = "jdbc:postgresql://localhost:5439/hive"
props = Properties.new
props.set_property "protocolVersion", "2"
props.set_property "user","presto"
@tomz
tomz / install-apache-zeppelin-on-amazon-emr.sh
Last active August 29, 2015 14:26 — forked from andershammar/install-apache-zeppelin-on-amazon-emr.sh
Bootstrap script for installing Apache Zeppelin on an Amazon EMR Cluster.
#!/bin/bash -ex
# Install Git
sudo yum -y install git
# Install Maven
wget -P /tmp http://apache.mirrors.spacedump.net/maven/maven-3/3.3.3/binaries/apache-maven-3.3.3-bin.tar.gz
sudo mkdir /opt/apache-maven
sudo tar -xvzf /tmp/apache-maven-3.3.3-bin.tar.gz -C /opt/apache-maven
@tomz
tomz / sparkr-demo
Last active November 11, 2018 13:47 — forked from shivaram/sparkr-demo
SparkR 1.4.1 Demo
# If you are using Spark 1.4, then launch SparkR with the command
#
# ./bin/sparkR --packages com.databricks:spark-csv_2.10:1.0.3
# as the `sparkPackages=` flag was only added in Spark 1.4.1.
# # This will work in Spark 1.4.1.
sc <- sparkR.init(spark_link, sparkPackages = "com.databricks:spark-csv_2.10:1.0.3")
sqlContext <- sparkRSQL.init(sc)
flights <- read.df(sqlContext, "s3n://sparkr-data/nycflights13.csv","com.databricks.spark.csv", header="true")
@tomz
tomz / rstudo-sparkr.R
Last active August 29, 2015 14:25 — forked from shivaram/rstudo-sparkr.R
Rstudio local setup
Sys.setenv(SPARK_HOME="/Users/shivaram/spark-1.4.1")
.libPaths(c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib"), .libPaths()))
library(SparkR)
sc <- sparkR.init(master="local")
sqlContext <- sparkRSQL.init(sc)
df <- createDataFrame(sqlContext, faithful)
# Select one column
head(select(df, df$eruptions))
@tomz
tomz / gist:0e2a5ff23903515c11c0
Last active August 29, 2015 14:25 — forked from sebsto/gist:19b99f1fa1f32cae5d00
Install Maven with Yum on Amazon Linux
sudo wget http://repos.fedorapeople.org/repos/dchen/apache-maven/epel-apache-maven.repo -O /etc/yum.repos.d/epel-apache-maven.repo
sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo
sudo yum install -y apache-maven
mvn --version
var casper = require('casper').create();
var url = 'http://localhost:3000/#!/';
casper.start(url, function() {
var js = this.evaluate(function() {
return document;
});
this.echo(js.all[0].outerHTML);
});
casper.run();