Skip to content

Instantly share code, notes, and snippets.

@abikoushi
Created June 14, 2017 13:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save abikoushi/e3b1a1bd47395405ea46db31afae55ba to your computer and use it in GitHub Desktop.
Save abikoushi/e3b1a1bd47395405ea46db31afae55ba to your computer and use it in GitHub Desktop.
pathanal <-function(mcf_gadata,dimension="basicChannelGroupingPath"){
mcf_gadata <-mcf_gadata %>%
mutate(totalConversions=as.numeric(totalConversions))
path <- with(mcf_gadata,sapply(eval(parse(text=dimension)),function(x)strsplit(x," > ")))
node <-unname(unlist(path))
node <-gsub("CLICK|:|NA","",node)
pathlength <- sapply(path,length)
n <- length(path)
m <- length(node)
freq <- a <- d <-numeric(m)
d[cumsum(pathlength)] <- 1
a[c(1,which(d==1)+1)[-(n+1)]] <- 1
freq <- with(mcf_gadata,unlist(mapply(rep,totalConversions,each=pathlength)))
dat <-data_frame(node,a,d,freq)
harvest <- dat %>%
group_by(node) %>%
summarise(harv_prob=sum(d*freq)/sum(freq))
TotalCV <-with(mcf_gadata,sum(totalConversions))
firsttouch <- dat %>%
group_by(node) %>%
summarise(first_prob=sum(a*freq)/TotalCV)
trans_mat <-dat %>%
mutate(to=lead(node)) %>%
group_by(node,to) %>%
summarise(freq=sum((1-d)*freq)) %>%
dplyr::filter(!is.na(to) & freq>0) %>%
rename(from=node) %>%
group_by(from) %>%
mutate(trans_prob=freq/sum(freq)) %>%
select(-freq) %>%
ungroup()
null_node <-group_by(dat) %>%
group_by(node) %>%
mutate(ind=all(a==1) & all(d==1)) %>%
dplyr::filter(ind) %>%
select(node,freq) %>%
ungroup()
list(unique_node=unique(dat$node),trans_mat=trans_mat,null_node=null_node,
harvest=harvest,firsttouch=firsttouch)
}
simpath <- function(n,K,trans_prob,harv_prob,first_prob){
out <-vector("list",n)
path <- numeric(0)
j=1
tmp <- sample.int(K,1,prob = first_prob)
while(j<=n){
path <- c(path,tmp)
if(runif(1)<harv_prob[tmp]){
out[[j]] <- path
path <- numeric(0)
tmp <- sample.int(K,1,prob = first_prob)
j=j+1
}else{
tmp <-sample.int(K,1,prob = trans_prob[tmp,])
}
}
data_frame(path=sapply(out,paste,collapse=" > ")) %>%
group_by(path) %>%
summarise(totalConversions=n())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment