Skip to content

Instantly share code, notes, and snippets.

View dgrtwo's full-sized avatar

David Robinson dgrtwo

View GitHub Profile
library(tidyverse)
library(broom)
library(scales)
theme_set(theme_minimal())
crossing(n = round(10 ^ seq(1, 7, .1)),
delta = 10 ^ seq(-1, -3, -1),
sig.level = .05,
sd = .3) %>%
invoke(power.t.test, ., type = "one.sample") %>%
@dgrtwo
dgrtwo / month_tag.csv
Created May 11, 2017 18:51
Number of Stack Overflow questions asked about each tag per month; data behind the Stack Overflow Trends tool
We can't make this file beautiful and searchable because it's too large.
Year,Month,Questions,Tag,MonthTotal,TagTotal
2008,8,4,.htaccess,3929,57155
2008,9,12,.htaccess,14543,57155
2008,10,10,.htaccess,14951,57155
2008,11,16,.htaccess,12967,57155
2008,12,12,.htaccess,12324,57155
2009,1,24,.htaccess,16152,57155
2009,2,35,.htaccess,17889,57155
2009,3,43,.htaccess,20785,57155
2009,4,46,.htaccess,21701,57155
@dgrtwo
dgrtwo / code-faces.R
Last active April 14, 2017 19:28
geom_vase vs geom_faces
library(readr)
library(dplyr)
library(ggplot2)
faces <- read_csv("faces.csv")
vase <- bind_rows(select(faces, x = x1, y),
select(faces, x = x2, y) %>% arrange(y))
faces %>%
Type RouteName Percent
Unregistered Questions/Show 0.96941
Unregistered Home/Index 0.00723
Unregistered Questions/ListByTag 0.00532
Unregistered UsersShow/Show 0.00298
Established (1000-19,999) Home/Index 0.11612
Established (1000-19,999) Questions/ListByTag 0.06254
Established (1000-19,999) Questions/Show 0.65319
Established (1000-19,999) UsersShow/Show 0.08317
Registered (< 1000) Home/Index 0.02911
@dgrtwo
dgrtwo / drupal_password_hasher.py
Created April 9, 2012 15:44
Django password hasher for migration from Drupal
"""
DrupalPasswordHasher
To use, put this in any app and add to your settings.py, something like this:
PASSWORD_HASHERS = (
'django.contrib.auth.hashers.PBKDF2PasswordHasher',
'myproject.myapp.drupal_hasher.DrupalPasswordHasher',
'django.contrib.auth.hashers.PBKDF2SHA1PasswordHasher',
'django.contrib.auth.hashers.BCryptPasswordHasher',
# randomly select min_n_plates from each strain
randomly_sampled_dataframe <- x %>%
dplyr::group_by_(grouping_var, plate_var, sample_var) %>%
tidyr::nest(.key = id_data) %>%
dplyr::group_by_(grouping_var, plate_var) %>%
dplyr::sample_n(min_n_id_per_plate) %>%
tidyr::nest(.key = plate_data) %>%
dplyr::group_by_(grouping_var) %>%
dplyr::sample_n(min_n_plates_per_strain) %>%
tidyr::unnest() %>%
library(tidyverse)
library(lubridate)
questions <- readr::read_csv("~/Repositories/stacklite/questions.csv.gz")
question_tags <- readr::read_csv("~/Repositories/stacklite/question_tags.csv.gz")
r_questions <- question_tags %>%
filter(Tag == "r")
r_questions_by_month <- questions %>%
@dgrtwo
dgrtwo / separate_steps.R
Last active September 29, 2016 09:58
separate_steps.R
#' Convert a dplyr expression to a list of step objects
separate_steps <- function(expr, iscall=FALSE) {
if (iscall) {
call <- expr
} else {
call <- match.call()[["expr"]]
}
len <- length(call)
if (len == 1) {
@dgrtwo
dgrtwo / add-tally.R
Last active August 22, 2016 02:13
add_tally and add_count
#' Add a column counting or tallying observations within groups
#'
#' \code{add_tally} adds a column named "n" (or similar) to a table based on the number
#' of items within each group. These functions are to \code{tally} and
#' \code{count} as \code{mutate} is to \code{summarise}: they add an additional
#' column. They tally within the groups of the current data, and do not change them.
#'
#' @param x A table
#' @param wt (Optional) If omitted, will count the number of rows. Otherwise, use a weighted tally
#' @param sort Whether to sort the result in descending order of n
@dgrtwo
dgrtwo / grouply.R
Last active August 21, 2016 16:09
grouply <- function(f, ...) {
groups <- lazyeval::lazy_dots(...)
function(tbl, ...) {
dplyr::group_by_(tbl, .dots = groups) %>%
f(...) %>%
dplyr::ungroup()
}
}