Skip to content

Instantly share code, notes, and snippets.

View dgrtwo's full-sized avatar

David Robinson dgrtwo

View GitHub Profile
@dgrtwo
dgrtwo / gist:39af4512dff5b7357b534a04a971405d
Created March 14, 2018 22:21
Early attempt at creating materialized SQL views from dplyr
separate_sql <- function(expression) {
s <- paste(deparse(expression), collapse = "\n")
s <- stringr::str_replace(s, "%>%", "%>%\n ")
s <- stringr::str_split(s, "\n")[[1]]
val <- eval(expression)
list(expression = expression,
dplyr_code = s,
sql = as.character(dbplyr:::remote_query(val)),
library(purrr)
transition_mc <- function(steps, start, mat) {
i <- seq_len(nrow(mat))
transition <- ~ sample(i, 1, prob = (i == .) %*% mat)
accumulate(seq_len(steps), transition, .init = start)
}
---
title: "R Notebook"
output: html_notebook
---
```{r}
library(purrr)
transition_mc <- function(steps, start, mat) {
i <- seq_len(nrow(mat))
@dgrtwo
dgrtwo / mnist_pairs.R
Created May 31, 2017 18:56
Comparing pairs of MNIST digits based on one pixel
library(tidyverse)
# Data is downloaded from here:
# https://www.kaggle.com/c/digit-recognizer
kaggle_data <- read_csv("~/Downloads/train.csv")
pixels_gathered <- kaggle_data %>%
mutate(instance = row_number()) %>%
gather(pixel, value, -label, -instance) %>%
extract(pixel, "pixel", "(\\d+)", convert = TRUE)
@dgrtwo
dgrtwo / month_tag.csv
Created May 11, 2017 18:51
Number of Stack Overflow questions asked about each tag per month; data behind the Stack Overflow Trends tool
We can't make this file beautiful and searchable because it's too large.
Year,Month,Questions,Tag,MonthTotal,TagTotal
2008,8,4,.htaccess,3929,57155
2008,9,12,.htaccess,14543,57155
2008,10,10,.htaccess,14951,57155
2008,11,16,.htaccess,12967,57155
2008,12,12,.htaccess,12324,57155
2009,1,24,.htaccess,16152,57155
2009,2,35,.htaccess,17889,57155
2009,3,43,.htaccess,20785,57155
2009,4,46,.htaccess,21701,57155
@dgrtwo
dgrtwo / code-faces.R
Last active April 14, 2017 19:28
geom_vase vs geom_faces
library(readr)
library(dplyr)
library(ggplot2)
faces <- read_csv("faces.csv")
vase <- bind_rows(select(faces, x = x1, y),
select(faces, x = x2, y) %>% arrange(y))
faces %>%
library(tidyverse)
library(broom)
library(scales)
theme_set(theme_minimal())
crossing(n = round(10 ^ seq(1, 7, .1)),
delta = 10 ^ seq(-1, -3, -1),
sig.level = .05,
sd = .3) %>%
invoke(power.t.test, ., type = "one.sample") %>%
Type RouteName Percent
Unregistered Questions/Show 0.96941
Unregistered Home/Index 0.00723
Unregistered Questions/ListByTag 0.00532
Unregistered UsersShow/Show 0.00298
Established (1000-19,999) Home/Index 0.11612
Established (1000-19,999) Questions/ListByTag 0.06254
Established (1000-19,999) Questions/Show 0.65319
Established (1000-19,999) UsersShow/Show 0.08317
Registered (< 1000) Home/Index 0.02911
library(ggplot2)
library(igraph)
library(ggraph)
library(scales)
library(ggforce)
network_theme <- theme_no_axes() +
theme(panel.border = element_blank())
theme_set(network_theme)
# randomly select min_n_plates from each strain
randomly_sampled_dataframe <- x %>%
dplyr::group_by_(grouping_var, plate_var, sample_var) %>%
tidyr::nest(.key = id_data) %>%
dplyr::group_by_(grouping_var, plate_var) %>%
dplyr::sample_n(min_n_id_per_plate) %>%
tidyr::nest(.key = plate_data) %>%
dplyr::group_by_(grouping_var) %>%
dplyr::sample_n(min_n_plates_per_strain) %>%
tidyr::unnest() %>%