Cameron Davidson-Pilon CamDavidsonPilon

## 538.json
{
  "lines.linewidth": 2.0,
  "examples.download": true,
  "patch.linewidth": 0.5,
  "legend.fancybox": true,
  "axes.color_cycle": [
    "#30a2da",
    "#fc4f30",
    "#e5ae38",
    "#6d904f",

## load_538.py
import matplotlib
import json

# 538.json is from https://gist.github.com/CamDavidsonPilon/5238b6499b14604367ac
s = json.load( open("538.json") )
matplotlib.rcParams.update(s)

# plots now use FiveThirtyEight styles

## class_imbalance.py
import numpy as np
from numpy.random import binomial, beta
import pandas as pd

N = np.r_[ 750*np.arange(1,30) ]
p_assignment = 0.5

def sample_beta_posterior(N, C):
    return beta(1+C,N-C+1)

## variances.csv
,0.01,0.04,0.06,0.09,0.11,0.14,0.16,0.19,0.22,0.24,0.27,0.29,0.32,0.35,0.37,0.40,0.42,0.45,0.47,0.50
750,5.3862512173306086e-05,0.00018431921564253194,0.00031355844405735097,0.00042940089912076783,0.0005413071841306608,0.0006332685115330671,0.0007476803364486353,0.0008116122112611769,0.0009076983192572439,0.0009633715254949598,0.0010418319478846378,0.0011075363347131727,0.0011778591911268729,0.00121129482248262,0.001259617421627161,0.0012865336098251596,0.0012832222488798127,0.001354041119620631,0.0013650443271181398,0.001315568626527553
1500,2.650184911783411e-05,9.209479830082467e-05,0.0001528774978470515,0.00020787484098875884,0.0002699076554215256,0.0003258727426287213,0.00035788667314687285,0.0004111510809490972,0.0004359117655034314,0.00047771994212569596,0.0005288493018160943,0.0005515370514559378,0.000580798803038502,0.000606253199804767,0.0006222858253985169,0.0006462444695299379,0.0006482967265578801,0.0006743000904418961,0.0006642287272113608,0.0006635825250303563
2250,1.754325587021407e-05,6.23366

## test_name.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                CamDavidsonPilon
                / test_name.ipynb
            
            
              Created
              November 5, 2014 00:53
            
              
                test name gist
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## gist:7bcdebce13959964269f
{
 "metadata": {
  "name": "",
  "signature": "sha256:6c05f68f3874a728b39c0c5b111d6f90a2c8408facda8e5807cccf7fa086e4c6"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [

## one_bucket_theta.py
from random import randint
from math import sqrt


def theta_join(S,T, join_condition = lambda s,t: s==t):
    """
    This implements the 1-bucket-theta algorithm found in
    http://www.ccs.neu.edu/home/mirek/papers/2011-SIGMOD-ParallelJoins.pdf

    S,T: rdds of (key, value)

## example.py
# aftering import one_bucket_theta.py, https://gist.github.com/CamDavidsonPilon/8750e37242c4942c1984

from datetime import datetime

S = sc.parallelize([
    {'start': datetime(2000,10,11), 'end': datetime(2001,01,01)},
    {'start': datetime(2001,01,01), 'end': datetime(2002,01,01)},
    {'start': datetime(2002,01,01), 'end': datetime(2003,01,01)},
    {'start': datetime(2003,01,01), 'end': datetime(2004,01,01)},
    {'start': datetime(2004,01,01), 'end': datetime(2005,01,01)},

## coinflips.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                CamDavidsonPilon
                / coinflips.ipynb
            
            
              Created
              January 7, 2015 00:15
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## original.py
import pymc as pymc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def linear_setup ( ind_cols, dep_col):
    '''
        Inputs:list of strings for the independent variables,
        single string for the dependent variable
        Output: PyMC Model
	{
	"lines.linewidth": 2.0,
	"examples.download": true,
	"patch.linewidth": 0.5,
	"legend.fancybox": true,
	"axes.color_cycle": [
	"#30a2da",
	"#fc4f30",
	"#e5ae38",
	"#6d904f",
	import matplotlib
	import json

	# 538.json is from https://gist.github.com/CamDavidsonPilon/5238b6499b14604367ac
	s = json.load( open("538.json") )
	matplotlib.rcParams.update(s)

	# plots now use FiveThirtyEight styles
	import numpy as np
	from numpy.random import binomial, beta
	import pandas as pd

	N = np.r_[ 750*np.arange(1,30) ]
	p_assignment = 0.5

	def sample_beta_posterior(N, C):
	return beta(1+C,N-C+1)
	,0.01,0.04,0.06,0.09,0.11,0.14,0.16,0.19,0.22,0.24,0.27,0.29,0.32,0.35,0.37,0.40,0.42,0.45,0.47,0.50
	750,5.3862512173306086e-05,0.00018431921564253194,0.00031355844405735097,0.00042940089912076783,0.0005413071841306608,0.0006332685115330671,0.0007476803364486353,0.0008116122112611769,0.0009076983192572439,0.0009633715254949598,0.0010418319478846378,0.0011075363347131727,0.0011778591911268729,0.00121129482248262,0.001259617421627161,0.0012865336098251596,0.0012832222488798127,0.001354041119620631,0.0013650443271181398,0.001315568626527553
	1500,2.650184911783411e-05,9.209479830082467e-05,0.0001528774978470515,0.00020787484098875884,0.0002699076554215256,0.0003258727426287213,0.00035788667314687285,0.0004111510809490972,0.0004359117655034314,0.00047771994212569596,0.0005288493018160943,0.0005515370514559378,0.000580798803038502,0.000606253199804767,0.0006222858253985169,0.0006462444695299379,0.0006482967265578801,0.0006743000904418961,0.0006642287272113608,0.0006635825250303563
	2250,1.754325587021407e-05,6.23366
	{
	"metadata": {
	"name": "",
	"signature": "sha256:6c05f68f3874a728b39c0c5b111d6f90a2c8408facda8e5807cccf7fa086e4c6"
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	from random import randint
	from math import sqrt


	def theta_join(S,T, join_condition = lambda s,t: s==t):
	"""
	This implements the 1-bucket-theta algorithm found in
	http://www.ccs.neu.edu/home/mirek/papers/2011-SIGMOD-ParallelJoins.pdf

	S,T: rdds of (key, value)
	# aftering import one_bucket_theta.py, https://gist.github.com/CamDavidsonPilon/8750e37242c4942c1984

	from datetime import datetime

	S = sc.parallelize([
	{'start': datetime(2000,10,11), 'end': datetime(2001,01,01)},
	{'start': datetime(2001,01,01), 'end': datetime(2002,01,01)},
	{'start': datetime(2002,01,01), 'end': datetime(2003,01,01)},
	{'start': datetime(2003,01,01), 'end': datetime(2004,01,01)},
	{'start': datetime(2004,01,01), 'end': datetime(2005,01,01)},
	import pymc as pymc
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt

	def linear_setup ( ind_cols, dep_col):
	'''
	Inputs:list of strings for the independent variables,
	single string for the dependent variable
	Output: PyMC Model