Demetri Pananos Dpananos

## ex.R
library(tidyverse)

# Genuine confounding example.  Sex confounds relationship between drug and death

set.seed(0)
n <- 1000000
is_male <- rbinom(n, 1, 0.5)
drug <- rbinom(n, 1, 0.6 + 0.3*is_male)
y <- rbinom(n, 1, 0.4 - 0.1*drug + 0.4*is_male)
d <- tibble(drug, is_male, y)

## docker-compose.yaml
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#

## discrete_dynamic.py
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import multinomial
import cmdstanpy


def simulate(n_transitions):
    A = 1000
    B = 100

## risk_diff.R
library(tidyverse)
library(marginaleffects)

g <- LETTERS[1:4]
N <- rpois(length(g), 100000)
theta <- c(0.1, 0.12, 0.14, 0.08)
y <- rbinom(length(g), N, theta)


d <- tibble(g, N, y)

## marginal_effect.R
library(survival)
library(tidyverse)
library(broom)
library(marginaleffects)


# Make a toy model
gr <- sample(letters[1:2], size = 10000, replace=T)
g <- as.numeric(gr=='a')
etime <- rexp(length(gr), rate = (5+1*g)/10)

## hierarchical_normal.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pymc as pm
import arviz as az


N = 50_000
treatment_conversions = np.array([541, 557, 559, 556, 530, 532, 516, 532, 528, 544, 519, 552])
control_conversions = np.array([496, 524, 486, 500, 516, 475, 507, 475, 490, 506, 512, 489])

## optimism_corrected_calibration.py
import numpy as np
from statsmodels.nonparametric.smoothers_lowess import lowess

from sklearn.datasets import load_breast_cancer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, RepeatedKFold, GridSearchCV, cross_val_score
from sklearn.metrics import make_scorer, brier_score_loss
from sklearn.utils import resample

## LRT
# LRT
data = c(27,24,3,14,9,14,4,6,7,8)
m = matrix(data, nrow = 2)

theta_null = colSums(m)/sum(m)
theta_1 = m[1,]/sum(m[1,])
theta_2 = m[2,]/sum(m[2,])

L0 = dmultinom(colSums(m), prob = theta_null, log=T)
L1 = dmultinom(m[1,], prob = theta_1, log=T) + dmultinom(m[2,], prob = theta_2, log=T)

## binning.R
library(tidyverse)

set.seed(7)
N = 1000
x = rnorm(N)
p = plogis(0.2*x - 0.8)
y = rbinom(N, 1, p)

tibble(x, y) %>%
  mutate(z = cut_number(x, 5)) %>%

## squared_error_plot.py
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


x = np.random.normal(size = 10)
y = 2*x + 1 + np.random.normal(0, 0.3, size=x.size)

grid = np.linspace(-12, 20, 25)
b0, b1 = np.meshgrid(grid, grid)
	library(tidyverse)

	# Genuine confounding example. Sex confounds relationship between drug and death

	set.seed(0)
	n <- 1000000
	is_male <- rbinom(n, 1, 0.5)
	drug <- rbinom(n, 1, 0.6 + 0.3*is_male)
	y <- rbinom(n, 1, 0.4 - 0.1drug + 0.4is_male)
	d <- tibble(drug, is_male, y)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	import matplotlib.pyplot as plt
	import numpy as np
	from scipy.stats import multinomial
	import cmdstanpy



	def simulate(n_transitions):
	A = 1000
	B = 100
	library(tidyverse)
	library(marginaleffects)

	g <- LETTERS[1:4]
	N <- rpois(length(g), 100000)
	theta <- c(0.1, 0.12, 0.14, 0.08)
	y <- rbinom(length(g), N, theta)


	d <- tibble(g, N, y)
	library(survival)
	library(tidyverse)
	library(broom)
	library(marginaleffects)


	# Make a toy model
	gr <- sample(letters[1:2], size = 10000, replace=T)
	g <- as.numeric(gr=='a')
	etime <- rexp(length(gr), rate = (5+1*g)/10)
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import pymc as pm
	import arviz as az


	N = 50_000
	treatment_conversions = np.array([541, 557, 559, 556, 530, 532, 516, 532, 528, 544, 519, 552])
	control_conversions = np.array([496, 524, 486, 500, 516, 475, 507, 475, 490, 506, 512, 489])
	import numpy as np
	from statsmodels.nonparametric.smoothers_lowess import lowess

	from sklearn.datasets import load_breast_cancer
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import StandardScaler
	from sklearn.linear_model import LogisticRegression
	from sklearn.model_selection import KFold, RepeatedKFold, GridSearchCV, cross_val_score
	from sklearn.metrics import make_scorer, brier_score_loss
	from sklearn.utils import resample
	# LRT
	data = c(27,24,3,14,9,14,4,6,7,8)
	m = matrix(data, nrow = 2)

	theta_null = colSums(m)/sum(m)
	theta_1 = m[1,]/sum(m[1,])
	theta_2 = m[2,]/sum(m[2,])

	L0 = dmultinom(colSums(m), prob = theta_null, log=T)
	L1 = dmultinom(m[1,], prob = theta_1, log=T) + dmultinom(m[2,], prob = theta_2, log=T)
	library(tidyverse)

	set.seed(7)
	N = 1000
	x = rnorm(N)
	p = plogis(0.2*x - 0.8)
	y = rbinom(N, 1, p)

	tibble(x, y) %>%
	mutate(z = cut_number(x, 5)) %>%
	import numpy as np
	import matplotlib.pyplot as plt
	%matplotlib inline


	x = np.random.normal(size = 10)
	y = 2*x + 1 + np.random.normal(0, 0.3, size=x.size)

	grid = np.linspace(-12, 20, 25)
	b0, b1 = np.meshgrid(grid, grid)