Jeremy Pierre j14159

## assert_match.ml
open Ppxlib

exception No_match

let assert_match_ext =
  Extension.declare
    "ppx_assert_match.assert_match"
    Extension.Context.Expression
    Ast_pattern.(ppat __ __)
    (fun ~loc ~path:_ patt guard ->

## main.go
package main

type mesa struct {
	height int
	width  int
}

type position struct {
	x         int
	y         int

## gist:aa869c3d04cac59e567f
list_test_() ->
    [?_assertMatch({{t_list, t_float}, _},
                   top_typ_of("1.0 : []")),
     ?_assertMatch({{t_list, t_int}, _},
                   top_typ_of("1 : 2 : []")),
     ?_assertMatch({error, _}, top_typ_of("1 : 2.0 : []")),
     ?_assertMatch({{t_arrow,
                     [{unbound, A, _}, {t_list, {unbound, A, _}}],
                     {t_list, {unbound, A, _}}}, _},
                   top_typ_of("f x y = x : y")),

## gist:19d100a556effacd1475

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              1 star
            
          
                j14159
                / gist:19d100a556effacd1475
            
            
              Last active
              August 29, 2015 14:15
            
          
    I'm putting this list together as a sort of reading plan for myself in order to learn more about general cluster scheduling/utilization and various ways of generically programming to them.  Lists of direct links to PDFs here in the order I think makes some sense from skimming reference sections.
Happy to here of any additions that might be sensible.
The Basics


Google File System since everything references it and data locality is a thing.
Google MapReduce because it's one of the earlier well-known functional approaches to programming against a cluster.
Dryad for a more general (iterative?) programming model.
Quincy for a different take on scheduling.
[Delay Scheduling](h


## gist:404f1dc86aeafff53a12
/*
 * A more recent version of my S3N RDD.  This exists because I needed
 * a reliable way to distribute the fetching of S3 data using instance
 * credentials as well as a simple way to filter out the inputs that
 * I didn't want in the RDD.
 *
 * This version is more eager than the last one and also provides a
 * simple RDD that allows you to tag each line with information about
 * its partition/source.
 *

## gist:88a91fc9b5e926d86f86
#!/bin/sh

INSTANCE_ID="`curl http://169.254.169.254/latest/meta-data/instance-id`"
REGION="`curl http://169.254.169.254/latest/meta-data/placement/availability-zone | sed s'/[a-zA-Z]$//'`"
ZK_HOSTS="`aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" "Name=key,Values=mesos-master" --region $REGION --output=text | cut -f5`"

ulimit -n 200000

LD_LIBRARY_PATH=/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/amd64/jamvm nohup /usr/local/sbin/mesos-slave --master=${ZK_HOSTS} --ip=0.0.0.0 --isolation=cgroups --no-switch_user --log_dir=/var/log/mesos &

## gist:79631a8beab70f83f1cf
(require 'package)
(add-to-list 'package-archives
         '("marmalade" . "http://marmalade-repo.org/packages/"))
(add-to-list 'package-archives
  '("melpa" . "http://melpa.milkbox.net/packages/") t)

(setq-default indent-tabs-mode nil)

(define-key global-map (kbd "C-c SPC") 'ace-jump-mode)
(define-key global-map (kbd "C-x g") 'magit-status)

## gist:dce718012e971b624236
#!/bin/bash

#
# WARNING:  This will wipe and encrypt the device given.  For Mesos workers,
# this is run on EVERY BOOT so you will constantly lose existing data.
#

# I have based this script on the following links:
# https://github.com/matthew-lucidchart/aws-ephemeral-mounts/blob/master/boot_luks.sh
# http://nineofclouds.blogspot.ca/2013/10/how-to-use-lvm-and-luks-with-ebs-volumes.html

## gist:d5355107ebb0aad59930
val pool = new BoneCP(config)

val getConn = () => pool.getConnection()
val relConn = (c: Connection) => pool.releaseConnection(c)

class MyActor(get: () => Connection, rel: (c: Connection) => Unit) extends Actor {
  lazy val c = get()

  override def preRestart(why: Throwable, msg: Option[Any]): Unit = {
    rel(c)

## gist:ca191b61a73382316f9c
trait PersonClient {
  // supply a router with a pool of PersonDao:
  val personPool: ActorRef

  // how long should we wait for a response from PersonDao:
  val timeoutInMillis: Long

  implicit val timeout = Timeout(timeoutInMillis millis)

  def addPerson(p: Person): Future[Int] =
	open Ppxlib

	exception No_match

	let assert_match_ext =
	Extension.declare
	"ppx_assert_match.assert_match"
	Extension.Context.Expression
	Ast_pattern.(ppat __ __)
	(fun ~loc ~path:_ patt guard ->
	package main

	type mesa struct {
	height int
	width int
	}

	type position struct {
	x int
	y int
	list_test_() ->
	[?_assertMatch({{t_list, t_float}, _},
	top_typ_of("1.0 : []")),
	?_assertMatch({{t_list, t_int}, _},
	top_typ_of("1 : 2 : []")),
	?_assertMatch({error, _}, top_typ_of("1 : 2.0 : []")),
	?_assertMatch({{t_arrow,
	[{unbound, A, _}, {t_list, {unbound, A, _}}],
	{t_list, {unbound, A, _}}}, _},
	top_typ_of("f x y = x : y")),
	/*
	* A more recent version of my S3N RDD. This exists because I needed
	* a reliable way to distribute the fetching of S3 data using instance
	* credentials as well as a simple way to filter out the inputs that
	* I didn't want in the RDD.
	*
	* This version is more eager than the last one and also provides a
	* simple RDD that allows you to tag each line with information about
	* its partition/source.
	*
	#!/bin/sh

	INSTANCE_ID="`curl http://169.254.169.254/latest/meta-data/instance-id`"
	REGION="`curl http://169.254.169.254/latest/meta-data/placement/availability-zone \| sed s'/[a-zA-Z]$//'`"
	ZK_HOSTS="`aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" "Name=key,Values=mesos-master" --region $REGION --output=text \| cut -f5`"

	ulimit -n 200000

	LD_LIBRARY_PATH=/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/amd64/jamvm nohup /usr/local/sbin/mesos-slave --master=${ZK_HOSTS} --ip=0.0.0.0 --isolation=cgroups --no-switch_user --log_dir=/var/log/mesos &
	(require 'package)
	(add-to-list 'package-archives
	'("marmalade" . "http://marmalade-repo.org/packages/"))
	(add-to-list 'package-archives
	'("melpa" . "http://melpa.milkbox.net/packages/") t)

	(setq-default indent-tabs-mode nil)

	(define-key global-map (kbd "C-c SPC") 'ace-jump-mode)
	(define-key global-map (kbd "C-x g") 'magit-status)
	#!/bin/bash

	#
	# WARNING: This will wipe and encrypt the device given. For Mesos workers,
	# this is run on EVERY BOOT so you will constantly lose existing data.
	#

	# I have based this script on the following links:
	# https://github.com/matthew-lucidchart/aws-ephemeral-mounts/blob/master/boot_luks.sh
	# http://nineofclouds.blogspot.ca/2013/10/how-to-use-lvm-and-luks-with-ebs-volumes.html
	val pool = new BoneCP(config)

	val getConn = () => pool.getConnection()
	val relConn = (c: Connection) => pool.releaseConnection(c)

	class MyActor(get: () => Connection, rel: (c: Connection) => Unit) extends Actor {
	lazy val c = get()

	override def preRestart(why: Throwable, msg: Option[Any]): Unit = {
	rel(c)
	trait PersonClient {
	// supply a router with a pool of PersonDao:
	val personPool: ActorRef

	// how long should we wait for a response from PersonDao:
	val timeoutInMillis: Long

	implicit val timeout = Timeout(timeoutInMillis millis)

	def addPerson(p: Person): Future[Int] =