Martin Chan martinctc

## ci-plots.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                martinctc
                / ci-plots.md
            
            
              Last active
              September 11, 2025 09:33
            
              
                [Python and R code for Causal Inference plots] #Python #R
              
          
    Forest plot

import matplotlib.pyplot as plt
import numpy as np

interventions = ['Leadership Digest', 'Peer Nudges', 'Training Email']
ATEs = [0.045, 0.032, 0.018]
lower_CIs = [0.015, 0.005, -0.004]
upper_CIs = [0.075, 0.059, 0.040]

  
## describe_categorical_combinations.R
#' @title
#' Analyze Categorical Variable Combinations to Describe Data Populations
#'
#' @description
#' This function analyzes categorical variables in a data frame to identify
#' the most common combinations of values. It generates all possible combinations
#' of the specified categorical variables (from single variables up to all
#' variables combined) and calculates their frequencies and proportions.
#'
#' The function is useful for understanding the composition of your data,

## Postcodes to location.R
library(tidyverse)
library(PostcodesioR)

# Customize with your own path
df_with_postcodes <- read_csv(
  "path/data/postcodes.csv"
)

# Update with column name containing postcode
postcode_column <- df_with_postcodes[["postcode"]]

## apply_noise.R
#' @title Apply Noise to Specified Columns in a Data Frame
#'
#' @description This function applies a normal distribution-based noise to
#'   specified columns in a data frame, grouped by a specified variable. The
#'   noise is scaled to a range of -0.2 to 0.2.
#'
#' @param df Data frame to apply the normal distribution to for creating noise.
#' @param group_var String specifying the grouping variable.
#' @param cols Vector of column names to apply the noise to.
#' @param scale_from Numeric value specifying the lower bound of the scaling range.

## simulate_and_modify_by_rnorm.R
# This script simulates a dataset, duplicates it over time, and modifies it to
# create a bell curve-like distribution.

# Set up
library(tidyverse)
library(uuid)

# Simulate dataset
temp_df <-
  tibble(

## run-stats-tests.R
#' @title Perform a Statistical Test
#'
#' @description This function performs a statistical test (e.g., chi-squared, t-test) given a data frame, variable names, and any other parameters needed.
#'
#' @details Insert more detailed information here about what the function does, the assumptions it makes, and how it should be used.
#'
#' @param data A data frame containing the variables of interest.
#' @param var1 A string or symbol specifying the first variable.
#' @param var2 A string or symbol specifying the second variable (if applicable).
#' @param ... Additional arguments passed to the underlying test function.

## approx_num.R
#' @title Convert a numeric value into a natural language approximation string
#'
#' @description
#' This function takes a numeric value and returns a string that approximates the value in natural language.
#'
#' @param x A numeric value.
#'
#' @examples
#' approx_num(0.5)
#' # [1] "increased by a half"

## test-python-rf-runtime.py
# data cleaning and utility
import numpy as np
import pandas as pd
import vivainsights as vi
import os

# timing code
import time
import random
import sys

## get-pypi-stats.py
import requests
import pandas as pd

package_name = "vivainsights"
api_endpoint = f"https://pypistats.org/api/packages/{package_name}/overall"

response = requests.get(api_endpoint)

if response.status_code == 200:
    data = response.json()

## power-analysis.R
# See <https://rpubs.com/mbounthavong/sample_size_power_analysis_R>

library(pwr)

# Sample size estimations for two proportions
# `pwr::ES.h()` computes effect size for two proportions
# n provides required sample size

p0 <- pwr.2p.test(h = ES.h(p1 = 0.60, p2 = 0.50), sig.level = 0.05, power = .80)
plot(p0)
	#' @title
	#' Analyze Categorical Variable Combinations to Describe Data Populations
	#'
	#' @description
	#' This function analyzes categorical variables in a data frame to identify
	#' the most common combinations of values. It generates all possible combinations
	#' of the specified categorical variables (from single variables up to all
	#' variables combined) and calculates their frequencies and proportions.
	#'
	#' The function is useful for understanding the composition of your data,
	library(tidyverse)
	library(PostcodesioR)

	# Customize with your own path
	df_with_postcodes <- read_csv(
	"path/data/postcodes.csv"
	)

	# Update with column name containing postcode
	postcode_column <- df_with_postcodes[["postcode"]]
	#' @title Apply Noise to Specified Columns in a Data Frame
	#'
	#' @description This function applies a normal distribution-based noise to
	#' specified columns in a data frame, grouped by a specified variable. The
	#' noise is scaled to a range of -0.2 to 0.2.
	#'
	#' @param df Data frame to apply the normal distribution to for creating noise.
	#' @param group_var String specifying the grouping variable.
	#' @param cols Vector of column names to apply the noise to.
	#' @param scale_from Numeric value specifying the lower bound of the scaling range.
	# This script simulates a dataset, duplicates it over time, and modifies it to
	# create a bell curve-like distribution.

	# Set up
	library(tidyverse)
	library(uuid)

	# Simulate dataset
	temp_df <-
	tibble(
	#' @title Perform a Statistical Test
	#'
	#' @description This function performs a statistical test (e.g., chi-squared, t-test) given a data frame, variable names, and any other parameters needed.
	#'
	#' @details Insert more detailed information here about what the function does, the assumptions it makes, and how it should be used.
	#'
	#' @param data A data frame containing the variables of interest.
	#' @param var1 A string or symbol specifying the first variable.
	#' @param var2 A string or symbol specifying the second variable (if applicable).
	#' @param ... Additional arguments passed to the underlying test function.
	#' @title Convert a numeric value into a natural language approximation string
	#'
	#' @description
	#' This function takes a numeric value and returns a string that approximates the value in natural language.
	#'
	#' @param x A numeric value.
	#'
	#' @examples
	#' approx_num(0.5)
	#' # [1] "increased by a half"
	# data cleaning and utility
	import numpy as np
	import pandas as pd
	import vivainsights as vi
	import os

	# timing code
	import time
	import random
	import sys
	import requests
	import pandas as pd

	package_name = "vivainsights"
	api_endpoint = f"https://pypistats.org/api/packages/{package_name}/overall"

	response = requests.get(api_endpoint)

	if response.status_code == 200:
	data = response.json()
	# See <https://rpubs.com/mbounthavong/sample_size_power_analysis_R>

	library(pwr)

	# Sample size estimations for two proportions
	# `pwr::ES.h()` computes effect size for two proportions
	# n provides required sample size

	p0 <- pwr.2p.test(h = ES.h(p1 = 0.60, p2 = 0.50), sig.level = 0.05, power = .80)
	plot(p0)