Skip to content

Instantly share code, notes, and snippets.

View jrosen48's full-sized avatar

Joshua Rosenberg jrosen48

View GitHub Profile
f <- list.files('~/downloads/Todoist backup 2020-01-18', full.names = T)
library(tidyverse)
l <- map(f, read_csv)
nr <- l %>% map(nrow)
fn <- f[rep(1:18, times = unlist(nr))]
fn <- fn %>%
str_split("/") %>%
library(tidyverse)
library(quanteda)
library(quanteda.classifiers)
library(shiny)
library(shinythemes)
a <- read_csv("all-new-data.csv") %>%
mutate(Why = text) %>%
mutate(Why = str_c(Why, " ", lesson, " ", gen_or_spec)) %>%
mutate(row_num = row_number(),
``` r
utils::data(anorexia, package = "MASS")
m1 <- glm(Postwt ~ Prewt + Treat + offset(Prewt),
family = gaussian, data = anorexia)
summary(m1)
#>
#> Call:
#> glm(formula = Postwt ~ Prewt + Treat + offset(Prewt), family = gaussian,
texPreview::tex_preview(obj = xtable::xtable(head(iris,10)))
#>   [1] "This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019) (preloaded format=pdflatex 2019.5.30)  30 MAY 2019 22:24"
#>   [2] "entering extended mode"                                                                                                 
#>   [3] " restricted \\write18 enabled."                                                                                         
#>   [4] " %&-line parsing enabled."                                                                                              
#>   [5] "**/private/var/folders/zn/9_h784cn6t5b2tchsgx4v2p00000gn/T/Rtmp12qz1x/tex_tempD"                                        
#>   [6] "oc.tex"                                                                                                                 
#>   [7] ""                                                                                                                       
#>   [8] "(/private/var/folders/zn/9_h78
@jrosen48
jrosen48 / lookup_many_tweets()
Last active May 13, 2019 00:11
A wrapper around rtweet::lookup_tweets() to pull data related to tweets when data on more than 90,000 tweets is requested; credit to @bretsw
library(tidyverse)
library(rtweet)
library(beepr
f <- "replace-with-path-to-file"
d <- read_csv(f)
d$id_str <- str_split(d$status_url, "/") %>%
map_chr(~.[6])
library(rscopus)
library(tidyverse)
issns <- c("0022-0663", "1532-6985", "1532-690X", "1532-7809", "1873-782X", "1556-6501", "8756-3894", "0959-4752", "1090-2384")
query <- str_c("ISSN(", issns, ")")
f <- function(query) {
res <- scopus_search(query = query, max_count = 100000, count = 25, wait_time = 7)
gen_entries_to_df(res$entries)
}
@jrosen48
jrosen48 / seniority
Created December 11, 2018 00:32
Modeling Ed Psych EB
library(tidyverse)
library(googlesheets)
g <- gs_title("Ed Psych Journal Editorial Boards 2018")
d <- gs_read(g)
m <- d %>%
count(Seniority, Gender) %>%
spread(Gender, n)
@jrosen48
jrosen48 / gist:68a13af9aeb743e40f544b23d9c2084c
Last active December 11, 2018 15:23
plotting-interaction
library(tidyverse)
d <- read_csv("Downloads/choice_interest_plot.csv")
sd_val <- 1
lower_cut <- -sd_val*sd(d$interest_c)
upper_cut <- sd_val*sd(d$interest_c)
d <- d %>%
@jrosen48
jrosen48 / lme4-blup-ranef.R
Created November 24, 2018 19:29
lme4-blup-ranef
f <- function (object)
{
se.bygroup <- ranef(object, postVar = TRUE)
n.groupings <- length(se.bygroup)
for (m in 1:n.groupings) {
vars.m <- attr(se.bygroup[[m]], "postVar")
K <- dim(vars.m)[1]
J <- dim(vars.m)[3]
se.bygroup[[m]] <- array(NA, c(J, K))
for (j in 1:J) {
@jrosen48
jrosen48 / f.R
Created November 19, 2018 19:45
Detect multivariate outlier
d <- as.data.frame(cbind(c(2, 3, 4, 1, 7, 4, 3, NA, 4), c(2, 3, 4)))
d
detect_multivariate_outliers <- function(data, df, alpha = .997) {
require(dplyr)
data$row_id = 1:nrow(data)
data_ss <- data[complete.cases(data), ]
mah_dist <- mahalanobis(data_ss, colMeans(data_ss), cov(data_ss))
crit_val <- qchisq(alpha, df)
id_and_logical <- data.frame(row_id = data_ss$row_id, is_outlier = (mah_dist > crit_val))