Gedevan-Aleksizde/RFM_stan_exec.R

## RFM_stan_exec.R
library(dplyr)
library(tidyr)
library(ggplot2)
library(rstan)
library(loo)

# working directory
work.dir <- "HOGEHOGE/20160406_RFM"

df <- read.csv(paste(work.dir, "rfm.csv",sep="/"), stringsAsFactors = F)
colnames(df)[1] <- "ID"
df.spending <- read.csv(paste(work.dir, "spending_mat.csv", sep="/"), stringsAsFactors = F)
spending.num <- read.csv(paste(work.dir, "spending_freq.csv", sep="/"), stringsAsFactors = F, header=F)
colnames(spending.num) <- c("ID","num")

# make TEST datasets
#sample.ID <- sample(df$ID, size=100, replace = F)
#df.test <- df[df$ID %in% sample.ID,]
#df.spending.test <- df.spending[df.spending$CustomerID %in% sample.ID, ]
#spending.num.test <- spending.num[spending.num$ID %in% sample.ID,]

rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
model <- stan_model(paste(work.dir, "rfm_hierarchical.stan", sep="/"))

# annualy discount = 15 %
RFM.res <- sampling(model, data=list(N=nrow(df),
                                     Time=df$Time,
                                     time=df$time,
                                     x=df$Freq,
                                     K=ncol(df.spending),
                                     NSpend=spending.num$num,
                                     Spend=df.spending,
                                     delta=.0027),
                    init=list(chain1=list(tau=rowMeans(df[,c('time','Time')])),
                              chain2=list(tau=rowMeans(df[,c('time','Time')])),
                              chain3=list(tau=rowMeans(df[,c('time','Time')])),
                              chain4=list(tau=rowMeans(df[,c('time','Time')]))
                    ),
                    alogorithm="HMC",
                    warmup=15000, iter=17000, chain=4)
save(df.res, RFM.res, file=paste(work.dir, "result.RData", sep="/"))

traceplot(RFM.res, 'theta0')
traceplot(RFM.res, 'Gamma0')
print(RFM.res, 'theta0')
print(RFM.res, 'Gamma0')
print(RFM.res,'pzeta')
print(RFM.res, pars='CLV')
print(RFM.res, pars='tau')

res.descript <- function(stan.result, df){
  for( col in c("lambda", "mu", "eta", "CLV", "zeta" ,"tau")){
    df[,col] <- apply(rstan::extract(stan.result, col)[[col]], 2, mean)
  }
  df$tau_well <- with(df, (time <= tau ) );
  df$tau_well[df$zeta < 1] <- with(df[df$zeta < 1,], tau_well && (tau <= Time))
  return(dplyr::select(df, ID, lambda, mu, eta, CLV, zeta, tau, time, Time, Recency, Freq, Monetary, tau_well))
}
df.res <- res.descript(RFM.res, df)
	library(dplyr)
	library(tidyr)
	library(ggplot2)
	library(rstan)
	library(loo)

	# working directory
	work.dir <- "HOGEHOGE/20160406_RFM"

	df <- read.csv(paste(work.dir, "rfm.csv",sep="/"), stringsAsFactors = F)
	colnames(df)[1] <- "ID"
	df.spending <- read.csv(paste(work.dir, "spending_mat.csv", sep="/"), stringsAsFactors = F)
	spending.num <- read.csv(paste(work.dir, "spending_freq.csv", sep="/"), stringsAsFactors = F, header=F)
	colnames(spending.num) <- c("ID","num")

	# make TEST datasets
	#sample.ID <- sample(df$ID, size=100, replace = F)
	#df.test <- df[df$ID %in% sample.ID,]
	#df.spending.test <- df.spending[df.spending$CustomerID %in% sample.ID, ]
	#spending.num.test <- spending.num[spending.num$ID %in% sample.ID,]

	rstan_options(auto_write = TRUE)
	options(mc.cores = parallel::detectCores())
	model <- stan_model(paste(work.dir, "rfm_hierarchical.stan", sep="/"))

	# annualy discount = 15 %
	RFM.res <- sampling(model, data=list(N=nrow(df),
	Time=df$Time,
	time=df$time,
	x=df$Freq,
	K=ncol(df.spending),
	NSpend=spending.num$num,
	Spend=df.spending,
	delta=.0027),
	init=list(chain1=list(tau=rowMeans(df[,c('time','Time')])),
	chain2=list(tau=rowMeans(df[,c('time','Time')])),
	chain3=list(tau=rowMeans(df[,c('time','Time')])),
	chain4=list(tau=rowMeans(df[,c('time','Time')]))
	),
	alogorithm="HMC",
	warmup=15000, iter=17000, chain=4)
	save(df.res, RFM.res, file=paste(work.dir, "result.RData", sep="/"))

	traceplot(RFM.res, 'theta0')
	traceplot(RFM.res, 'Gamma0')
	print(RFM.res, 'theta0')
	print(RFM.res, 'Gamma0')
	print(RFM.res,'pzeta')
	print(RFM.res, pars='CLV')
	print(RFM.res, pars='tau')

	res.descript <- function(stan.result, df){
	for( col in c("lambda", "mu", "eta", "CLV", "zeta" ,"tau")){
	df[,col] <- apply(rstan::extract(stan.result, col)[[col]], 2, mean)
	}
	df$tau_well <- with(df, (time <= tau ) );
	df$tau_well[df$zeta < 1] <- with(df[df$zeta < 1,], tau_well && (tau <= Time))
	return(dplyr::select(df, ID, lambda, mu, eta, CLV, zeta, tau, time, Time, Recency, Freq, Monetary, tau_well))
	}
	df.res <- res.descript(RFM.res, df)