Skip to content

Instantly share code, notes, and snippets.

View jrnold's full-sized avatar

Jeffrey Arnold jrnold

View GitHub Profile
{
"embeddings": [
{
"tensorName": "Time-Varying Word2Vec Embeddings from 2014 South African Newspapers",
"tensorShape": [
30000,
100
],
"tensorPath": "https://www.dropbox.com/s/pepew73w16xmrlz/word2vec1tv-word_vectors.tsv?dl=0",
"metadataPath": "https://www.dropbox.com/s/s5qfpzgij92ew9u/tensorboard-word_vectors.tsv?dl=0"
@jrnold
jrnold / pgfkeys.R
Created August 22, 2017 20:12
Write R object to pgfkeys
write_pgfkeys <- function(x, ...) {
UseMethod("write_pgfkeys")
}
.pgfkey <- function(key, value) {
str_c("\\pgfkeysetvalue{", key, "}{", value, "}")
}
write_pgfkeys.default <- function(x, path=NULL, root = "/data/", ...) {
keys <- str_c(root, names(x)) %>%
@jrnold
jrnold / preprocess-twitter.py
Created August 16, 2017 05:27 — forked from tokestermw/preprocess-twitter.py
Python version of Ruby script to preprocess tweets for use in GloVe featurization http://nlp.stanford.edu/projects/glove/
"""
preprocess-twitter.py
python preprocess-twitter.py "Some random text with #hashtags, @mentions and http://t.co/kdjfkdjf (links). :)"
Script for preprocessing tweets by Romain Paulus
with small modifications by Jeffrey Pennington
with translation to Python by Motoki Wu
Translation of Ruby script to create features for GloVe vectors for Twitter data.
@jrnold
jrnold / f_filter.R
Created April 15, 2017 05:48
Filtering terms and variables in R model formulas
f_filter_vars <- function(formula., ...) {
f_filter_vars_(formula., .dots = lazyeval::lazy_dots(...))
}
f_filter_vars_ <- function(formula., ..., .dots) {
dots <- lazyeval::all_dots(.dots, ...)
.terms <- terms(formula.)
.factors <- attr(.terms, "factors")
selectx <- dplyr::select_vars_(rownames(.terms), dots)
keepx <- as.logical(colSums(attr(ft, "factors")[selectx, , drop = FALSE]))
reganatomy <- function(model, variable) {
variable <- if (is.character(variable) & 1 == length(variable)) {
variable
} else {
deparse(substitute(variable))
}
mod.mat <- model.matrix(model)
var.names <- colnames(mod.mat)
var <- which(variable == var.names)
if (0 == length(var)) {
@jrnold
jrnold / gist:8b6107ee9dd6c25c14c5cbe57db2add2
Created April 12, 2017 22:00
conjugate distributions
library("R6")
library("purrr")
ConjugateDist <- R6Class("ConjugateDist",
public = list(
post_name = NULL,
post_d = NULL,
post_r = NULL,
post_p = NULL,
post_q = NULL,
@jrnold
jrnold / misc.R
Last active April 16, 2017 01:13
CSSS 564 git stuff
remote_download <- function(url, local_path = NULL, quiet = FALSE, branch = NULL,
credentials = NULL) {
local_path <- local_path %||% tools::file_path_sans_ext(basename(url))
if (!quiet) {
message("Downloading git repo ", url, " to ", local_path)
}
git2r::clone(url, local_path, credentials = credentials, progress = FALSE)
if (!is.null(x$branch)) {
r <- git2r::repository(bundle)
```{r}
#' A line with shaded area around it
#'
#' Plot a line with a shaded area around it. This is often used
#' to plot fitted regression function with confidence intervals.
#' This is a helper function for \code{\link[ggplot2]{geom_smooth}}
#' but defaulting to `stat = "identity"`, so it expects, `y`, `ymin` and `ymax` from the data instead of generating them with a model.
#'
#' @inheritParams geom_pointrange
@jrnold
jrnold / ess.R
Created February 26, 2017 23:45
Convert Missing Values for the European Social Survey
# Put both labels and values of the types of ESS missing values in a
# list to re-use later.
ESS_MISS <- list(
not_applicable = list(pattern = "Not +.p+lic+able", value = 6),
refused = list(pattern = "Refus..", value = 7),
not_available = list(pattern = "(No .nswer|Not .vailable)",
value = 8),
dont_know = list(pattern = "Don.?t .now", value = 9)
)
#' geom_ghazi. Annotate any ggplot2 plot with Benghazi, because it's
#' always about Benghazi
library("ggplot2")
geom_ghazi <- function(gp = grid::gpar(fontsize = 75, fontface = "bold"),
...) {
annotation_custom(grid::textGrob("BENGHAZI", gp = gp, ...),
xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf)
}
ggplot(mtcars, aes(x = disp, y = mpg)) + geom_point() + geom_ghazi()