Ed Berry eddjberry

## r_vs_py.R
# R -------------------------
x = matrix(c(1,2,0, 4,3,7), ncol = 3, byrow = T)
x
#       [,1] [,2] [,3]
# [1,]    1    2    0
# [2,]    4    3    7

# row means (NB: R indexes from 1)
> apply(x, MARGIN = 1, mean)
# 1.000000 4.666667

## select_pages.tex
\documentclass{article}
\usepackage{pdfpages}

\begin{document}

\includepdf[pages={30, 31, 32, 37}]{/path/to/file}

\end{document}

## time_functions.py
import timeit
import numpy as np
from faker import Faker

# create the faker object
fake = Faker()

# np.random_choice function
def np_choice(N=1000):
  np.random.choice(N+1, N, replace = False)

## fake_customers.py
import pandas as pd
from faker import Faker

# set the seed
Faker.seed(10)

# set the locale to GB
fake = Faker("en_GB")

# how many customers to fake

## shap_dependence_plot_grid.py
# Dependencies ----------------------
import math
import shap
import matplotlib.pyplot as plt

# shap_dependence_plot_grid ---------
def shap_dependence_plot_grid(cols,
                              shap_values,
                              X,
                              interaction_index = None,

## partial_dependence_data.py
def partial_dependency_data(df, model, col, values, sample_fraction = 0.1):

  # empty list for predictions
  avg_predictions = list()

  # take a sample of the data to use
  df_sample = df.sample(fraction = sample_fraction)

  # loop through the values
  for val in values:

## knit_dir.R
#!/usr/bin/env Rscript

# to run from command line:
## chmod +x knit_dir.R
## ./knit_dir.R <dir-name>

# from https://stackoverflow.com/a/49950761
# to avoid conflicts between packages
# breaking things
clean_search <- function() {

## plot_by_group.R
#=================================================
# geom_line() + geom_ribbon()
#=================================================
# plots by group
plot_by_group <- function(df, x, colour) {
  # create the summary data using # group_prop()
  df_summary <- df %>%
    dplyr::filter(!is.na({{ colour }})) %>%
    group_prop({{ x }}, {{ colour }})


## sim_ab_tests.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                eddjberry
                / sim_ab_tests.md
            
            
              Last active
              October 18, 2019 15:42
            
              
                Some functions to simulate simple A/B Tests making use of data.table. (The inline code will work when copied into an Rmd file)
              
          
  title
  author
  date
  
  
  Simulating A/B Tests
  Ed Berry
  04/10/2019
  
  
library(broom)
library(janitor)
library(data.table)

  
## prop_test_power_curves.R
#========================================================#
# Setup
#========================================================#

library(dplyr)
library(ggplot2)
library(here)
library(pwr)
library(scales)
library(stringr)
	# R -------------------------
	x = matrix(c(1,2,0, 4,3,7), ncol = 3, byrow = T)
	x
	# [,1] [,2] [,3]
	# [1,] 1 2 0
	# [2,] 4 3 7

	# row means (NB: R indexes from 1)
	> apply(x, MARGIN = 1, mean)
	# 1.000000 4.666667
	\documentclass{article}
	\usepackage{pdfpages}

	\begin{document}

	\includepdf[pages={30, 31, 32, 37}]{/path/to/file}

	\end{document}
	import timeit
	import numpy as np
	from faker import Faker

	# create the faker object
	fake = Faker()

	# np.random_choice function
	def np_choice(N=1000):
	np.random.choice(N+1, N, replace = False)
	import pandas as pd
	from faker import Faker

	# set the seed
	Faker.seed(10)

	# set the locale to GB
	fake = Faker("en_GB")

	# how many customers to fake
	# Dependencies ----------------------
	import math
	import shap
	import matplotlib.pyplot as plt

	# shap_dependence_plot_grid ---------
	def shap_dependence_plot_grid(cols,
	shap_values,
	X,
	interaction_index = None,
	def partial_dependency_data(df, model, col, values, sample_fraction = 0.1):

	# empty list for predictions
	avg_predictions = list()

	# take a sample of the data to use
	df_sample = df.sample(fraction = sample_fraction)

	# loop through the values
	for val in values:
	#!/usr/bin/env Rscript

	# to run from command line:
	## chmod +x knit_dir.R
	## ./knit_dir.R <dir-name>

	# from https://stackoverflow.com/a/49950761
	# to avoid conflicts between packages
	# breaking things
	clean_search <- function() {
	#=================================================
	# geom_line() + geom_ribbon()
	#=================================================
	# plots by group
	plot_by_group <- function(df, x, colour) {
	# create the summary data using # group_prop()
	df_summary <- df %>%
	dplyr::filter(!is.na({{ colour }})) %>%
	group_prop({{ x }}, {{ colour }})
	#========================================================#
	# Setup
	#========================================================#

	library(dplyr)
	library(ggplot2)
	library(here)
	library(pwr)
	library(scales)
	library(stringr)