Skip to content

Instantly share code, notes, and snippets.

> n_simu <- 10000
> ptc <- proc.time()
> MC_pullAll <- get_sample(n_simu, no_sample = n_simu)
> proc.time() - ptc
user system elapsed
0.065 0.004 0.072
> ptc <- proc.time()
> MC_pullOne <- get_sample(n_simu, no_sample = 1)
> proc.time() - ptc
#################
# MC_OilInPlace.R
# Monte Carlo simulation for oil in place
#
# http://petrowiki.org/Monte_Carlo_simulation
# http://www.statvision.com/webinars/Monte%20Carlo%20Simulation.pdf
#
# Author: Yang Cong
# Created Date: 7/7/2018
# Modified Date: 7/8/2018
targetItem <- 'PINK REGENCY TEACUP AND SAUCER'
dfPre <- subdata[subdata$CustomerID %in% subdata[subdata$Description==targetItem,]$CustomerID, c("CustomerID", "Description")]
dfPre <- unique(dfPre)
dfPre <- dfPre[dfPre$Description %in% c(targetItem, TblRule[TblRule$Purchased == targetItem, ]$AlsoPurchased), ]
dfPre$Purchased <- 1
dfSummary <- spread(dfPre, Description, Purchased)
dfSummary[is.na(dfSummary)] <- 0
@DSMom
DSMom / MBA
Created August 21, 2018 11:27
#### MBA analysis ####
library(arules)
library(tidyr)
library(dplyr)
subdata <- data[data$CustomerID %in% RFM$CustomerID,]
trandata <- subdata[,c('InvoiceNo', 'Description')]
trandata$Description <- trimws(trandata$Description, which = "both")
trandata <- trandata[trandata$Description!='Manual',]
trandata <- trandata[!duplicated(trandata),]
@DSMom
DSMom / RFM
Created August 21, 2018 11:25
#### RFM analysis #####
library(dplyr)
# filter to only United Kingdom
data <- data[data$Country == "United Kingdom", ]
data$PxQ <- data$Quantity * data$UnitPrice
data$Recency <- difftime(as.Date(max(data$InvoiceDate)), as.Date(data$InvoiceDate), units = "days")
RFMresult <- data %>%
select(InvoiceNo, CustomerID, Recency, PxQ) %>%
group_by(CustomerID) %>%
#### data preparation ####
# load data
data <- read.csv('data.csv', stringsAsFactors = FALSE)
# remove imcomplete records
QC_logic <- complete.cases(data[, !names(data) %in% c('Description')])
data <- data[QC_logic,]
# remove duplicated records
data <- data[!duplicated(data),]
# remove returned records
data <- data[!grepl('c', tolower(data$InvoiceNo)), ]
# compare distribution of drawing 1 sample at a time with drawing multiple samples at a time
seed = 790256
n_sample <- 10000
par(mfrow=c(2,2))
# normal distribution #
set.seed(seed)
output_AllAtOnce <- rnorm(n_sample, mean = 0, sd =1)
hist(output_AllAtOnce, freq = TRUE, main = 'Normal Distribution \n (draw multiple samples at a time)')