Skip to content

Instantly share code, notes, and snippets.

Avatar

Tyler Rinker trinker

View GitHub Profile
@trinker
trinker / fgsub.py
Last active Mar 29, 2021
textread fgsub equivalent in python
View fgsub.py
text = ['df dft sdf', 'sd fdggg sd dfhhh d', 'ddd']
def dbllttrwordrev(match):
match = match.group()
return '<<{}>>'.format(match[::-1])
{
'function': [re.sub("\\b\\w*([a-z])(\\1{2,})\\w*\\b", dbllttrwordrev, x, flags = re.IGNORECASE) for x in text],
'lambda': [re.sub("\\b\\w*([a-z])(\\1{2,})\\w*\\b", lambda x: '<<{}>>'.format(x.group()[::-1]) , x, flags = re.IGNORECASE) for x in text]
@trinker
trinker / quanteda_wordcloud.R
Created Jul 8, 2020
Wordcloud with quanteda
View quanteda_wordcloud.R
## Load dependencies
library(quanteda)
library(sentimentr)
library(tidyverse)
library(lexicon)
## Data set from sentimentr package
dat <- presidential_debates_2012
dat
corp <- corpus(dat, text_field = "dialogue")
View gist:a75144f8d90738a169dc4c99f4ad3717
## Norah's Question: Are there any words that start with chr (phoenetically /k/ /r/) that don't have a a short i sound following it?
library(openssl)
library(textshape)
library(tidyverse)
cmudict <- readLines('https://raw.githubusercontent.com/michelleful/ToBoldlyStress/master/stressed_spelling.txt')
cmudict7b <- readLines('http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b') %>% tail(-121) %>% head(-4)
@trinker
trinker / rowwise_subsets.R
Created Sep 9, 2019
Rowwise subsets in dplyr
View rowwise_subsets.R
library(dplyr)
dat <- tibble(
x1=c(1,0,0,NA,0,1,1,NA,0,1),
x2=c(1,1,NA,1,1,0,NA,NA,0,1),
x3=c(0,1,0,1,1,0,NA,NA,0,1),
y4=c(1,0,NA,1,0,0,NA,0,0,1),
y5=c(1,1,NA,1,1,1,NA,1,0,1),
z = LETTERS[1:10]
)
View Histogram as Percentage
library(tidyverse)
library(gridExtra)
plot1 <- ggplot(mtcars, aes(x = hp)) +
geom_histogram(bins = 10) +
labs(
y = 'Count of Awesomeness',
title = 'Count Histogram'
)
@trinker
trinker / roc.R
Created Mar 23, 2019
Calculating AUC: the area under a ROC Curve
View roc.R
## https://blog.revolutionanalytics.com/2016/11/calculating-auc.html
## Load Dependency
library(numform)
##=======================================================
## Make some fake data
##=======================================================
set.seed(10)
actual <- sample(0:1, 100, T, c(.8, .2))
@trinker
trinker / udpipeFormality.R
Created Mar 22, 2019
formality_with_udpipe
View udpipeFormality.R
##==============================================================================
## Formality
##==============================================================================
## 1. tag parts of speech
## 2. convert to generic POS
## 3. COmpute formality off POS
udmodel <- udpipe::udpipe_download_model(language = "english")
udmodel <- udpipe::udpipe_load_model(file = udmodel$file_model)
@trinker
trinker / datasci_install.R
Last active Apr 20, 2019
Install datasci packages
View datasci_install.R
#' @param packages An optional vector of Campus Labs packages to install.
#' @param pattern An optional grep pattern of campus labs packages to install.
install_cl <- function(packages = NULL, pattern = '.', ...){
try_install_cran <- function(package){
if (!require(package, character.only = TRUE, quietly = TRUE)) {
message(sprintf('The "%s" package is missing; do you want me to install it?', package))
ans <- menu(c("Yes", "No"))
if (ans == "2") {
View topicmodeling_bit.R
if (!require("pacman")) install.packages("pacman")
pacman::p_load(udpipe, BTM)
data("brussels_reviews_anno", package = "udpipe")
## Get and load the udpipe model
engmod <- udpipe_download_model(language = "english", udpipe_model_repo = "bnosac/udpipe.models.ud")
ud_engmod <- udpipe_load_model(engmod$file_model)
## Annotate the text data and merge back together
nr <- nrow(sentimentr::presidential_debates_2012)
@trinker
trinker / EulerPlot.R
Created Feb 13, 2019
Euler Plot Demo
View EulerPlot.R
pacman::p_load(venneuler, tidyverse, ggforce)
## Make some fake data
set.seed(10)
dat <- data.frame(
Person = paste0('Person_', 1:10),
setNames(as.data.frame(matrix(rbinom(50, size = 1, prob=c(1/(1:5))), ncol = 5)), paste0('Attribute_', 1:5)),
stringsAsFactors = FALSE
)