Skip to content

Instantly share code, notes, and snippets.

cat orders_2018-05-01.csv | head | sed -e 's/,,/, ,/g' | column -s, -t | less -#5 -N -S
set.seed(20180226)
block_mining_times <- rpois(1000, 10) # lambda = 10
max_time <- sum(block_mining_times)
cumulative_times <- cumsum(block_mining_times)
rand_times <- runif(1000, min = 0, max = max_time)
deltas_between_blocks <-
rand_times %>%
sapply(function(t){
# We simulate draws from two different timing distributions representing different processes
payment_failures <- rexp(1000, rate = 1/90)
intentional_churn <- rexp(1000, rate = 1/120)
### proportion of payment failures to intentional churn at 90 day cutoff
pf_90 <- (payment_failures < 90) %>% sum
if_90 <- (intentional_churn < 90) %>% sum
pf_90 / (if_90 + pf_90)
# draw from an exponential distribution with mean 90
payment_failures <- rexp(1000, rate = 1/90)
# number of payment failures after 90 days
pf_90 <- (payment_failures < 90) %>% sum
# number of payment failures after 60 days (assuming same distribution)
pf_60 <- (payment_failures < 60) %>% sum
# number of 90 day payment failures observed relative to 60 day payment failures
library(corrplot)
playstation_data <- matrix(0, nrow = 6, ncol = 5)
rownames(playstation_data) <- c("Connoisseurs", "Connectors", "Rivals", "Indulgers", "Escapers", "Dabblers")
colnames(playstation_data) <- c("Periphery", "Hit", "Mid-Core", "Core", "Occasionals")
playstation_data[1, ] <- c(10, 5, 3, 101, 18)
playstation_data[2, ] <- c(38, 10, 72, 18, 68)
playstation_data[3, ] <- c(10, 110, 22, 28, 15)
playstation_data[4, ] <- c(80, 10, 9, 44, 70)
@tvladeck
tvladeck / sequence_pseudo.R
Last active September 18, 2017 19:58
psuedocode for sequence clustering
# normalize sequences
tx_sample <-
tx_sample %>%
mutate(
order_date = order_date - first_date
)
# create TraMineRextras object
tx_seq <- seqecreate(
id = tx_sample$customer_id,
@tvladeck
tvladeck / som_cluster_plot.R
Created March 12, 2017 20:24
Method for producing table of clustering results based on SOMbrero output
korresp_som <- trainSOM(a, scaling = "chi2", type = "korresp", dimension = c(2,2))
row_num <- korresp_som$parameters$the.grid$dim[[1]]
col_num <- korresp_som$parameters$the.grid$dim[[2]]
korresp_clusters <-
korresp_som$clustering %>%
as.data.frame %>%
set_colnames(c("cluster")) %>%
mutate(entity = rownames(.)) %>%
@tvladeck
tvladeck / indexation.R
Created February 9, 2017 19:27
function to calculate indexations
indexation <- function(A)
{
A / ((rowSums(A)/sum(rowSums(A))) %*%
t(colSums(A)/sum(colSums(A))) *
sum(A))
}
@tvladeck
tvladeck / county_binom_model.bugs.R
Last active December 31, 2016 02:33
bugs model for election analysis
model {
for(i in 1:n_obs){
hrc_votes[i] ~ dbin(p[i], n_votes[i])
p[i] ~ dbeta(alpha[i], beta[i])
# reparameterization of the beta distribution taken from this site
# http://bit.ly/2i880Oj
@tvladeck
tvladeck / bugs_code.bug
Created December 22, 2016 16:12
i don't quite get why you run _coda.samples_ twice
# Betas #
samples <- coda.samples(jags, c('beta_age',
'beta_percent_white',
'beta_uninsured',
'beta_unemployed',
'beta_percent_degree',
'beta_income',
'beta_health'), 10000)