Bryan Shalloway brshallo

## predict-interval.R
library(tidyverse)
library(tidymodels)

# Control function used as part of `prep_interval()`
ctrl_fit_recipe <- function(x){
  list(fit = workflows::pull_workflow_fit(x),
       recipe = workflows::pull_workflow_prepped_recipe(x)
  )
}

## risk-ratios-sim.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                brshallo
                / risk-ratios-sim.md
            
            
              Created
              March 26, 2024 02:34
            
              
                simulating distribution of bootstrapped risk ratios https://x.com/nntaleb/status/1770385630163312902?s=20
              
          
    library(tidyverse)

sim_risk_ratios <- function(x){
  events <- map2(rep(c(TRUE, FALSE), 5), c(31, 414 - 31, 82, 1492 - 82, 252, 4832 - 252, 423, 11831 - 423, 52, 1509-52), rep) %>% unlist()
  outcomes <- tibble(
    group = map2(c("<8 h", "8<10 h", "10<12 h", "12-16 h", ">16 h"), c(414, 1492, 4832, 11831, 1509), rep) %>% unlist()) %>% 
    mutate(event_sim = sample(events, n(), replace = TRUE)) %>% 
    group_by(group) %>% 
 summarise(risk = mean(event_sim))

  
## rmse-interval.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                brshallo
                / rmse-interval.md
            
            
              Last active
              February 13, 2024 02:12
            
              
                The RMSE intervall method is based on the solution suggested on cross validated: https://stats.stackexchange.com/a/78318/193123
              
          
    library(palmerpenguins)
library(dplyr)

#' @param rmse Root mean squared error on your sample
#' @param df Degrees of Freedom in your model. In this case it should be the
#'   same as the number of observations in your sample.
rmse_interval <- function(rmse, deg_free, p_lower = 0.025, p_upper = 0.975){
  tibble(.pred_lower = sqrt(deg_free / qchisq(p_upper, df = deg_free)) * rmse,
         .pred_upper = sqrt(deg_free / qchisq(p_lower, df = deg_free)) * rmse)

  
## find_in_files.R
library(magrittr)

find_in_files <- function(path, pattern){
  path %>%
    fs::dir_ls(recurse = TRUE, type = "file", regexp = "(\\.[rR])$") %>%
    purrr::map(~grep(pattern, readLines(.x, warn = FALSE), value = TRUE)) %>%
    purrr::keep(~length(.x) > 0)
}

## seattle-units-added-new-permits.md

      
              1 file
            
          
              0 forks
            
          
              1 comment
            
          
              0 stars
            
          
                brshallo
                / seattle-units-added-new-permits.md
            
            
              Created
              July 26, 2023 19:56
            
              
                Housing units added, new permits
              
          
    library(tidyverse)
library(httr)
library(jsonlite)

# downloaded data from: https://data.seattle.gov/Permitting/Building-Permits/76t5-zqzr
data_permits <- read_csv("Building_Permits.csv")

data_permits %>% 
 filter(PermitTypeDesc == "New") %&gt;% 

  
## permits-issued.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                brshallo
                / permits-issued.md
            
            
              Created
              July 26, 2023 20:16
            
              
                Before finding Seattle's API data source I'd at first pulled their permits issued data via their excel sheets. This was kind of hassle... See related gist here: https://gist.github.com/brshallo/7a14235134f8e10139f71c3369f8d50f
              
          
    library(tidyverse)

urls <- tibble(month = month.name, month_num = 1:12) %>% 
  cross_join(tibble(year = 2019:2023)) %>% 
  arrange(year) %>% 
  mutate(year_month = make_date(year = year, month = month_num)) %>% 
  filter(year_month < floor_date(today(), "months")) %>% 
  mutate(urls = paste0("https://www.seattle.gov/documents/Departments/SDCI/Resources/Stats/", year, month, "Summary", ".xlsx"))

  
## rolling-mean-conditioned-date.R
# This example only includes a value in the rolling mean() if the close date on
# the historical dates comes after the snapshot date for row of interest


### CREATE SAMPLE DATA
library(tidyverse)
library(slider)
library(lubridate)

sample_size <- 5000

## rolling-mean-conditioned-on-iteration-date.R
# This example only includes a value in the rolling mean() if the close date on
# the historical dates comes after the snapshot date for row of interest


### CREATE SAMPLE DATA
library(tidyverse)
library(slider)

sample_size <- 5000
obs_per_day <- 100

## conf_mat_weighted.R
library(dplyr)

#' Confusion Matrix With Observation Weights
#'
#' @param df dataframe
#' @param truth Column that represents 'truth'
#' @param extimate Columns that rrepresents class prediction
#' @param wt Column with observation weights.
#' @param scale_weights_one Whether observations in confusion matrix should equal number of observations.
#' @param dnn Character vector of dimnames for the table

## source-rmd-chunks.r
library(magrittr)
library(stringr)
library(readr)
library(purrr)
library(glue)
library(knitr)

source_rmd_chunks <- function(file, chunk_labels, skip_plots = TRUE, output_temp = FALSE){

  temp <- tempfile(fileext=".R")
	library(tidyverse)
	library(tidymodels)

	# Control function used as part of `prep_interval()`
	ctrl_fit_recipe <- function(x){
	list(fit = workflows::pull_workflow_fit(x),
	recipe = workflows::pull_workflow_prepped_recipe(x)
	)
	}
	library(magrittr)

	find_in_files <- function(path, pattern){
	path %>%
	fs::dir_ls(recurse = TRUE, type = "file", regexp = "(\\.[rR])$") %>%
	purrr::map(~grep(pattern, readLines(.x, warn = FALSE), value = TRUE)) %>%
	purrr::keep(~length(.x) > 0)
	}
	# This example only includes a value in the rolling mean() if the close date on
	# the historical dates comes after the snapshot date for row of interest


	### CREATE SAMPLE DATA
	library(tidyverse)
	library(slider)
	library(lubridate)

	sample_size <- 5000
	library(dplyr)

	#' Confusion Matrix With Observation Weights
	#'
	#' @param df dataframe
	#' @param truth Column that represents 'truth'
	#' @param extimate Columns that rrepresents class prediction
	#' @param wt Column with observation weights.
	#' @param scale_weights_one Whether observations in confusion matrix should equal number of observations.
	#' @param dnn Character vector of dimnames for the table
	library(magrittr)
	library(stringr)
	library(readr)
	library(purrr)
	library(glue)
	library(knitr)

	source_rmd_chunks <- function(file, chunk_labels, skip_plots = TRUE, output_temp = FALSE){

	temp <- tempfile(fileext=".R")