John Ramey ramhiser

## spark-master-controller.log
16/09/06 19:00:49 INFO Master: Registered signal handlers for [TERM, HUP, INT]
16/09/06 19:00:50 INFO SecurityManager: Changing view acls to: root
16/09/06 19:00:50 INFO SecurityManager: Changing modify acls to: root
16/09/06 19:00:50 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); users with modify permissions: Set(root)
16/09/06 19:00:51 INFO Slf4jLogger: Slf4jLogger started
16/09/06 19:00:51 INFO Remoting: Starting remoting
16/09/06 19:00:51 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkMaster@spark-master:7077]
16/09/06 19:00:51 INFO Utils: Successfully started service 'sparkMaster' on port 7077.
16/09/06 19:00:51 INFO Master: Starting Spark master at spark://spark-master:7077
16/09/06 19:00:51 INFO Master: Running Spark version 1.5.2

## bayesian-billiards.r
# Problem Definition: https://priorprobability.com/2014/04/27/bayesian-billiards/
# Referenced Shiny app: http://jason.bryer.org/posts/2016-02-21/Bayes_Billiards_Shiny.html

library(dplyr)

set.seed(424242)
true_p <- runif(1)

num_draws <- 1000
draws <- sample(c(0, 1), num_draws, replace=TRUE, prob=c(1-true_p, true_p))

## cloudvision.py
# Did this without *requests* to avoid dependencies.
import urllib
import urllib2

import argparse
import base64
import json

API_URL = 'https://vision.googleapis.com/v1/images:annotate'
API_KEY = 'FLUFFY BUNNIES'

## remove_substrings.py
from collections import defaultdict

def remove_substrings(words):
    """Remove any substrings of other strings in the list.

    O(n) solution from...
    Source: http://stackoverflow.com/a/24049808/234233
    """
    longest = defaultdict(str)
    for word in words:                                 # O(n)

## download-youtube.py
import argparse
import logging

import requests
from lxml import html
from pytube import YouTube


def get_youtube_links(youtube_userid):
    YT_ROOT = r'http://www.youtube.com'

## tornado-example.py
import tornado
import tornado.ioloop
import tornado.web

from tornado.httpclient import AsyncHTTPClient


class MainHandler(tornado.web.RequestHandler):
    @gen.coroutine
    def get(self):

## sphinx2vtt.py
#!/usr/bin/env python
import argparse
import sys
import time
from itertools import izip, count


def parse_sphinx_line(line):
    '''Parse a line from Sphinx's closed captioning alignment'''
    line_split = line.split()

## awk.ftw
# Print each row's number of characters in column of CSV (example: 2nd column)
awk -F, '{print length($2)}'

## sample.js
// Weighted random sample from a vector
//
// By default, the `weights` are set to 1. This equates to equal weighting.
// Loosely based on http://codereview.stackexchange.com/a/4265
//
// If any weight is `null`, revert to default weights (i.e., all 1).
//
// A random-number generator (RNG) seed is optionally set via seedrandom.js.
// NOTE: The JS file is loaded via jQuery.
// Details: https://github.com/davidbau/seedrandom

## thd.py
%matplotlib inline
from yahoo_finance import Share
import matplotlib.pylab
import pandas as pd
import numpy as np

thd = Share('HD')
thd_prices = thd.get_historical('2010-01-01', '2015-06-01')

thd_prices = pd.DataFrame(thd_prices)
	16/09/06 19:00:49 INFO Master: Registered signal handlers for [TERM, HUP, INT]
	16/09/06 19:00:50 INFO SecurityManager: Changing view acls to: root
	16/09/06 19:00:50 INFO SecurityManager: Changing modify acls to: root
	16/09/06 19:00:50 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); users with modify permissions: Set(root)
	16/09/06 19:00:51 INFO Slf4jLogger: Slf4jLogger started
	16/09/06 19:00:51 INFO Remoting: Starting remoting
	16/09/06 19:00:51 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkMaster@spark-master:7077]
	16/09/06 19:00:51 INFO Utils: Successfully started service 'sparkMaster' on port 7077.
	16/09/06 19:00:51 INFO Master: Starting Spark master at spark://spark-master:7077
	16/09/06 19:00:51 INFO Master: Running Spark version 1.5.2
	# Problem Definition: https://priorprobability.com/2014/04/27/bayesian-billiards/
	# Referenced Shiny app: http://jason.bryer.org/posts/2016-02-21/Bayes_Billiards_Shiny.html

	library(dplyr)

	set.seed(424242)
	true_p <- runif(1)

	num_draws <- 1000
	draws <- sample(c(0, 1), num_draws, replace=TRUE, prob=c(1-true_p, true_p))
	# Did this without requests to avoid dependencies.
	import urllib
	import urllib2

	import argparse
	import base64
	import json

	API_URL = 'https://vision.googleapis.com/v1/images:annotate'
	API_KEY = 'FLUFFY BUNNIES'
	from collections import defaultdict

	def remove_substrings(words):
	"""Remove any substrings of other strings in the list.

	O(n) solution from...
	Source: http://stackoverflow.com/a/24049808/234233
	"""
	longest = defaultdict(str)
	for word in words: # O(n)
	import argparse
	import logging

	import requests
	from lxml import html
	from pytube import YouTube


	def get_youtube_links(youtube_userid):
	YT_ROOT = r'http://www.youtube.com'
	import tornado
	import tornado.ioloop
	import tornado.web

	from tornado.httpclient import AsyncHTTPClient


	class MainHandler(tornado.web.RequestHandler):
	@gen.coroutine
	def get(self):
	#!/usr/bin/env python
	import argparse
	import sys
	import time
	from itertools import izip, count


	def parse_sphinx_line(line):
	'''Parse a line from Sphinx's closed captioning alignment'''
	line_split = line.split()
	# Print each row's number of characters in column of CSV (example: 2nd column)
	awk -F, '{print length($2)}'
	// Weighted random sample from a vector
	//
	// By default, the `weights` are set to 1. This equates to equal weighting.
	// Loosely based on http://codereview.stackexchange.com/a/4265
	//
	// If any weight is `null`, revert to default weights (i.e., all 1).
	//
	// A random-number generator (RNG) seed is optionally set via seedrandom.js.
	// NOTE: The JS file is loaded via jQuery.
	// Details: https://github.com/davidbau/seedrandom
	%matplotlib inline
	from yahoo_finance import Share
	import matplotlib.pylab
	import pandas as pd
	import numpy as np

	thd = Share('HD')
	thd_prices = thd.get_historical('2010-01-01', '2015-06-01')

	thd_prices = pd.DataFrame(thd_prices)