Skip to content

Instantly share code, notes, and snippets.

@tvladeck
tvladeck / som_cluster_plot.R
Created March 12, 2017 20:24
Method for producing table of clustering results based on SOMbrero output
korresp_som <- trainSOM(a, scaling = "chi2", type = "korresp", dimension = c(2,2))
row_num <- korresp_som$parameters$the.grid$dim[[1]]
col_num <- korresp_som$parameters$the.grid$dim[[2]]
korresp_clusters <-
korresp_som$clustering %>%
as.data.frame %>%
set_colnames(c("cluster")) %>%
mutate(entity = rownames(.)) %>%
@tvladeck
tvladeck / sequence_pseudo.R
Last active September 18, 2017 19:58
psuedocode for sequence clustering
# normalize sequences
tx_sample <-
tx_sample %>%
mutate(
order_date = order_date - first_date
)
# create TraMineRextras object
tx_seq <- seqecreate(
id = tx_sample$customer_id,
library(corrplot)
playstation_data <- matrix(0, nrow = 6, ncol = 5)
rownames(playstation_data) <- c("Connoisseurs", "Connectors", "Rivals", "Indulgers", "Escapers", "Dabblers")
colnames(playstation_data) <- c("Periphery", "Hit", "Mid-Core", "Core", "Occasionals")
playstation_data[1, ] <- c(10, 5, 3, 101, 18)
playstation_data[2, ] <- c(38, 10, 72, 18, 68)
playstation_data[3, ] <- c(10, 110, 22, 28, 15)
playstation_data[4, ] <- c(80, 10, 9, 44, 70)
# draw from an exponential distribution with mean 90
payment_failures <- rexp(1000, rate = 1/90)
# number of payment failures after 90 days
pf_90 <- (payment_failures < 90) %>% sum
# number of payment failures after 60 days (assuming same distribution)
pf_60 <- (payment_failures < 60) %>% sum
# number of 90 day payment failures observed relative to 60 day payment failures
# We simulate draws from two different timing distributions representing different processes
payment_failures <- rexp(1000, rate = 1/90)
intentional_churn <- rexp(1000, rate = 1/120)
### proportion of payment failures to intentional churn at 90 day cutoff
pf_90 <- (payment_failures < 90) %>% sum
if_90 <- (intentional_churn < 90) %>% sum
pf_90 / (if_90 + pf_90)
set.seed(20180226)
block_mining_times <- rpois(1000, 10) # lambda = 10
max_time <- sum(block_mining_times)
cumulative_times <- cumsum(block_mining_times)
rand_times <- runif(1000, min = 0, max = max_time)
deltas_between_blocks <-
rand_times %>%
sapply(function(t){
cat orders_2018-05-01.csv | head | sed -e 's/,,/, ,/g' | column -s, -t | less -#5 -N -S
@tvladeck
tvladeck / max-diff-power-analysis.R
Created August 21, 2018 16:37
Computing power of covariate in max diff experience
library(foreach)
library(doParallel)
library(tidyverse)
library(magrittr)
registerDoParallel(40)
runs <- 100
N_test <- c(250, 300, 350, 400, 500, 600) # sample in each group
@tvladeck
tvladeck / repel_plot.R
Last active February 19, 2019 21:00
repel plot from contingency table
library(ca)
library(ggplot2)
library(ggrepel)
library(stringr)
library(scales)
library(magrittr)
library(factoextra)
repel_ca_from_table <-
function(
@tvladeck
tvladeck / gradient_heatmap.R
Last active February 19, 2019 21:00
heatmap of table
library(RColorBrewer)
library(fields) #to use designer.colors
library(reshape2)
library(dplyr)
gradient_heatmap <-
function(
df,
num_percentiles = 10,