import matplotlib.pyplot as plt
import numpy as np
interventions = ['Leadership Digest', 'Peer Nudges', 'Training Email']
ATEs = [0.045, 0.032, 0.018]
lower_CIs = [0.015, 0.005, -0.004]
upper_CIs = [0.075, 0.059, 0.040]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' @title | |
#' Analyze Categorical Variable Combinations to Describe Data Populations | |
#' | |
#' @description | |
#' This function analyzes categorical variables in a data frame to identify | |
#' the most common combinations of values. It generates all possible combinations | |
#' of the specified categorical variables (from single variables up to all | |
#' variables combined) and calculates their frequencies and proportions. | |
#' | |
#' The function is useful for understanding the composition of your data, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(PostcodesioR) | |
# Customize with your own path | |
df_with_postcodes <- read_csv( | |
"path/data/postcodes.csv" | |
) | |
# Update with column name containing postcode | |
postcode_column <- df_with_postcodes[["postcode"]] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' @title Apply Noise to Specified Columns in a Data Frame | |
#' | |
#' @description This function applies a normal distribution-based noise to | |
#' specified columns in a data frame, grouped by a specified variable. The | |
#' noise is scaled to a range of -0.2 to 0.2. | |
#' | |
#' @param df Data frame to apply the normal distribution to for creating noise. | |
#' @param group_var String specifying the grouping variable. | |
#' @param cols Vector of column names to apply the noise to. | |
#' @param scale_from Numeric value specifying the lower bound of the scaling range. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script simulates a dataset, duplicates it over time, and modifies it to | |
# create a bell curve-like distribution. | |
# Set up | |
library(tidyverse) | |
library(uuid) | |
# Simulate dataset | |
temp_df <- | |
tibble( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' @title Perform a Statistical Test | |
#' | |
#' @description This function performs a statistical test (e.g., chi-squared, t-test) given a data frame, variable names, and any other parameters needed. | |
#' | |
#' @details Insert more detailed information here about what the function does, the assumptions it makes, and how it should be used. | |
#' | |
#' @param data A data frame containing the variables of interest. | |
#' @param var1 A string or symbol specifying the first variable. | |
#' @param var2 A string or symbol specifying the second variable (if applicable). | |
#' @param ... Additional arguments passed to the underlying test function. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' @title Convert a numeric value into a natural language approximation string | |
#' | |
#' @description | |
#' This function takes a numeric value and returns a string that approximates the value in natural language. | |
#' | |
#' @param x A numeric value. | |
#' | |
#' @examples | |
#' approx_num(0.5) | |
#' # [1] "increased by a half" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# data cleaning and utility | |
import numpy as np | |
import pandas as pd | |
import vivainsights as vi | |
import os | |
# timing code | |
import time | |
import random | |
import sys |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import pandas as pd | |
package_name = "vivainsights" | |
api_endpoint = f"https://pypistats.org/api/packages/{package_name}/overall" | |
response = requests.get(api_endpoint) | |
if response.status_code == 200: | |
data = response.json() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# See <https://rpubs.com/mbounthavong/sample_size_power_analysis_R> | |
library(pwr) | |
# Sample size estimations for two proportions | |
# `pwr::ES.h()` computes effect size for two proportions | |
# n provides required sample size | |
p0 <- pwr.2p.test(h = ES.h(p1 = 0.60, p2 = 0.50), sig.level = 0.05, power = .80) | |
plot(p0) |
NewerOlder