Liang-Chi Hsieh viirya

## gist:1558006
# Basic text search with relevancy for MongoDB.
# See http://blog.tty.nl/2010/02/08/simple-ranked-text-search-for-mongodb/
# Copythingie 2010 - Ward Bekker - ward@tty.nl

#create (or empty) a docs collection
doc_col = MongoMapper.connection.db('example_db').collection('docs')
doc_col.remove({})

#add some sample data
doc_col.insert({ "txt" => "it is what it is"})

## distance.js
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  */
/*                                                                                                */
/*  Simple node js module to get distance between two coordinates.                                */
/*                                                                                                */
/*  Code transformed from Chris Veness example code - please refer to his website for licensing   */
/*  questions.                                                                                    */
/*                                                                                                */
/*                                                                                                */
/*  Latitude/longitude spherical geodesy formulae & scripts (c) Chris Veness 2002-2011            */
/*   - www.movable-type.co.uk/scripts/latlong.html                                                */

## en_json.pl
use strict;
use JSON;
use Data::Dumper;

open(TWEET_STAT, "<$ARGV[0]");

my $rows = [];

while (<TWEET_STAT>) {

## ec2_update_python2.7
# See: http://www.lecloud.net/post/61401763496/install-update-to-python-2-7-and-latest-pip-on-ec2
# install build tools
sudo yum install make automake gcc gcc-c++ kernel-devel git-core -y

# install python 2.7 and change default python symlink
# python27-devel or python27-python-devel.x86_64
sudo yum install python27-devel -y
sudo rm /usr/bin/python
sudo ln -s /usr/bin/python2.7 /usr/bin/python

## gpu_instance_setup
# Update EC2 packages

sudo yum install cmake boost-devel.x86_64 boost-python.x86_64 boost-serialization.x86_64 -y

sudo yum install swig blas-devel.x86_64 lapack-devel.x86_64 -y

# Install Python packages

sudo pip install numpy bitarray

## prepareCudaInstanceForDeepLearning.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                viirya
                / prepareCudaInstanceForDeepLearning.md
            
            
              Last active
              April 5, 2017 13:22
            
              
                Prepare environment on AWS EC2 to run Caffe or other deep learning frameworks
              
          
    Basic environment

Instance: p2.xlarge
AMI ID: ubuntu/images/hvm-ssd/ubuntu-xenial-16.04-amd64-server-20170221
EBS volume for root: 30GB
Install ubuntu packages


## MiscBenchmark-results.txt
================================================================================================
filter & aggregate without group
================================================================================================

OpenJDK 64-Bit Server VM 1.8.0_212-8u212-b03-0ubuntu1.18.04.1-b03 on Linux 4.15.0-1021-aws
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
range/filter/sum:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
range/filter/sum wholestage off                   46264          47546        1814         45.3          22.1       1.0X
range/filter/sum wholestage on                     3156           3523         206        664.5           1.5      14.7X

## gist:8f96ec46424379a83dd2ca23f3c0a1ff

1. Install minikube
2. Start minikube with enough cpus and memory
   minikube start --memory='8196mb' --cpus=4
3. The Pod of spark doesn't specify systemaccount, so it is "default". Spark will create pod. So we should give enough
   permission to "default" systemaccount. Create role by kubectl and bind the role to systemaccount default
   kubectl create role default --verb=get,list,watch,create,update,patch,delete --resource=pods,pods/status
   kubectl create rolebinding default-binding --role=default --serviceaccount=default:default --namespace=default
4. Build Spark images. Remember to build PySpark image too.
   ./bin/docker-image-tool.sh -m -t dev -p resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile build

## nestedColumnExtractor.scala
import java.io.{ByteArrayOutputStream, File}
import java.nio.charset.StandardCharsets
import java.sql.{Date, Timestamp}
import java.util.UUID
import java.util.concurrent.atomic.AtomicLong

import scala.util.Random

import org.scalatest.Matchers._
	# Basic text search with relevancy for MongoDB.
	# See http://blog.tty.nl/2010/02/08/simple-ranked-text-search-for-mongodb/
	# Copythingie 2010 - Ward Bekker - ward@tty.nl

	#create (or empty) a docs collection
	doc_col = MongoMapper.connection.db('example_db').collection('docs')
	doc_col.remove({})

	#add some sample data
	doc_col.insert({ "txt" => "it is what it is"})
	/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
	/* */
	/* Simple node js module to get distance between two coordinates. */
	/* */
	/* Code transformed from Chris Veness example code - please refer to his website for licensing */
	/* questions. */
	/* */
	/* */
	/* Latitude/longitude spherical geodesy formulae & scripts (c) Chris Veness 2002-2011 */
	/* - www.movable-type.co.uk/scripts/latlong.html */
	use strict;
	use JSON;
	use Data::Dumper;

	open(TWEET_STAT, "<$ARGV[0]");

	my $rows = [];

	while (<TWEET_STAT>) {
	# See: http://www.lecloud.net/post/61401763496/install-update-to-python-2-7-and-latest-pip-on-ec2
	# install build tools
	sudo yum install make automake gcc gcc-c++ kernel-devel git-core -y

	# install python 2.7 and change default python symlink
	# python27-devel or python27-python-devel.x86_64
	sudo yum install python27-devel -y
	sudo rm /usr/bin/python
	sudo ln -s /usr/bin/python2.7 /usr/bin/python
	# Update EC2 packages

	sudo yum install cmake boost-devel.x86_64 boost-python.x86_64 boost-serialization.x86_64 -y

	sudo yum install swig blas-devel.x86_64 lapack-devel.x86_64 -y

	# Install Python packages

	sudo pip install numpy bitarray
	================================================================================================
	filter & aggregate without group
	================================================================================================

	OpenJDK 64-Bit Server VM 1.8.0_212-8u212-b03-0ubuntu1.18.04.1-b03 on Linux 4.15.0-1021-aws
	Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
	range/filter/sum: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
	------------------------------------------------------------------------------------------------------------------------
	range/filter/sum wholestage off 46264 47546 1814 45.3 22.1 1.0X
	range/filter/sum wholestage on 3156 3523 206 664.5 1.5 14.7X

	1. Install minikube
	2. Start minikube with enough cpus and memory
	minikube start --memory='8196mb' --cpus=4
	3. The Pod of spark doesn't specify systemaccount, so it is "default". Spark will create pod. So we should give enough
	permission to "default" systemaccount. Create role by kubectl and bind the role to systemaccount default
	kubectl create role default --verb=get,list,watch,create,update,patch,delete --resource=pods,pods/status
	kubectl create rolebinding default-binding --role=default --serviceaccount=default:default --namespace=default
	4. Build Spark images. Remember to build PySpark image too.
	./bin/docker-image-tool.sh -m -t dev -p resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile build
	import java.io.{ByteArrayOutputStream, File}
	import java.nio.charset.StandardCharsets
	import java.sql.{Date, Timestamp}
	import java.util.UUID
	import java.util.concurrent.atomic.AtomicLong

	import scala.util.Random

	import org.scalatest.Matchers._