This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(broom) | |
library(purrr) | |
create_data = function(n, trt) { | |
tibble(donut = rnorm(n), | |
sleep = rnorm(n)) %>% | |
mutate(lifting = rnorm(n) + .1*sleep + .3*donut, | |
coffee = rnorm(n) + .2*sleep, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(purrr) | |
predfunc = function(x, xdat) x[1] + x[2]*xdat | |
residsum = function(x, xdat, ydat, pwr) sum(abs((predfunc(x, xdat)-ydat))^pwr) | |
set.seed(1000) | |
exdat = bind_rows( | |
tibble(label = 'Outlier X', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table); library(ggplot2) | |
dat = as.data.table(readxl::read_excel("Polls-per-user-CEU_R_Python_Stata_2022.xlsx")) | |
dat = dat[2:nrow(dat), 7:8] | |
setnames(dat, c('After','Before')) | |
dat = dat[!is.na(Before) & !(Before == 'Other') & !(After == 'Other')] | |
results = data.table(Language = c('Python','R','Stata'), Change = sapply(c('Python','R','Stata'), \(x) dat[, sum(After == x) - sum(Before == x)])) | |
ggplot(results, aes(x = Change, y = 0, label = Language)) + | |
geom_hline(yintercept = 0, size = 1) + geom_point(size = 3, color = 'darkblue') + geom_text(vjust = -1) + | |
scale_x_continuous(breaks = c(-5, 0, 5, 10), limits = c(-7, 12)) + | |
ggthemes::theme_economist() + |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(glmnet) | |
library(ggplot2) | |
MUTATION_RATE = .5 | |
generate_random_data = function(N = 1000, truth = c(.5, .5, -.5, 0, 0, 0, 0, 0, .1)) { | |
dat = data.table(x = rnorm(N)) | |
dat = dat[, y := generate_predictions(x, truth) + rnorm(N)] | |
return(dat) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import statsmodels.formula.api as sm | |
# There is a cem package but it doesn't seem to work that well | |
# So we will do this by hand | |
br = pd.read_csv("broockman2013.csv") | |
# Create bins for our continuous matching variables | |
# cut creates evenly spaced bins | |
# while qcut cuts based on quantiles |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Download SafeGraph data from AWS COVID Response | |
#' | |
#' This is a thin wrapper for \code{aws.s3::s3sync} that will aim you at the right directory to synchronize. | |
#' | |
#' This function doesn't add too much, but it does make the default behavior you probably want a bit easier. If you plan to specify the \code{aws.s3::s3sync} "bucket" option yourself, this function is largely useless. | |
#' | |
#' See catalog.safegraph.io for more description of the various buckets. | |
#' | |
#' @param path The local directory to synchronize. | |
#' @param dataset The SafeGraph bucket to get from. Can be "weekly" (OLD VERSION), "weekly-new" (new method since December 2020), "weekly-backfill" (the new method for times before December 2020), "monthly" (OLD VERSION, but also includes the backfill data for the new version), "monthly-backfill" (method since Dec 2020), "distancing", "transactions", "core" (before Nov 2020), "core-new" (Nov 2020 or later), "geo-supplement" or, to get the baseline bucket, "none". v2 versions always selected. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########## THIS FILE REQUIRES data.table TO BE LOADED TO FUNCTION. USE library(data.table) | |
#' Patterns File Lookup | |
#' | |
#' This function, given a date or range of dates, will return a character vector of folder paths you will need to read in with \code{list.files()} (or just set \code{list_files = TRUE} to return the full set of filepaths), which must be run through \code{list.files(pattern = '.csv.gz', full.names = TRUE)} after downloading files. This is done because the subfolder after this is based on the hour the data is released, which can't be predicted ahead of time for future weeks. | |
#' | |
#' For the period from mid-June-early December, 2020, data is available in both "old" (\code{patterns_backfill}) and "new" (\code{patterns}`) This function will generate filepaths to the "new" format. | |
#' | |
#' @param dates A vector of \code{Date} objects (perhaps taking a single \code{Date} object and adding \code{+lubridate::days(0:finish)}) to find the associated files for. | |
#' @param dir If specified, will append \code{di |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import statsmodels.formula.api as sm | |
# Read in data | |
gm = pd.read_csv('gapminder.csv') | |
# Put GDP per capita in log format since it's very skewed | |
gm['logGDPpercap'] = np.log(gm['gdpPercap']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
# Read in data | |
gm = pd.read_csv('gapminder.csv') | |
# Put GDP per capita in log format since it's very skewed | |
gm['logGDPpercap'] = np.log(gm['gdpPercap']) | |
# Transform gives me one row per original row |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Necessary functions | |
# (never worked with functions in R before? Just run these lines, the functions will be stored in memory | |
# sort of like if you load a package) | |
cpct <- function(df, var, name, append) { | |
# Limit to nonmissings and see how many nonmissings there are | |
df <- df %>% filter_at(var, any_vars(!is.na(.))) | |
N <- nrow(df) | |
df <- df %>% |
NewerOlder