This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> n_simu <- 10000 | |
> ptc <- proc.time() | |
> MC_pullAll <- get_sample(n_simu, no_sample = n_simu) | |
> proc.time() - ptc | |
user system elapsed | |
0.065 0.004 0.072 | |
> ptc <- proc.time() | |
> MC_pullOne <- get_sample(n_simu, no_sample = 1) | |
> proc.time() - ptc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################# | |
# MC_OilInPlace.R | |
# Monte Carlo simulation for oil in place | |
# | |
# http://petrowiki.org/Monte_Carlo_simulation | |
# http://www.statvision.com/webinars/Monte%20Carlo%20Simulation.pdf | |
# | |
# Author: Yang Cong | |
# Created Date: 7/7/2018 | |
# Modified Date: 7/8/2018 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
targetItem <- 'PINK REGENCY TEACUP AND SAUCER' | |
dfPre <- subdata[subdata$CustomerID %in% subdata[subdata$Description==targetItem,]$CustomerID, c("CustomerID", "Description")] | |
dfPre <- unique(dfPre) | |
dfPre <- dfPre[dfPre$Description %in% c(targetItem, TblRule[TblRule$Purchased == targetItem, ]$AlsoPurchased), ] | |
dfPre$Purchased <- 1 | |
dfSummary <- spread(dfPre, Description, Purchased) | |
dfSummary[is.na(dfSummary)] <- 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### MBA analysis #### | |
library(arules) | |
library(tidyr) | |
library(dplyr) | |
subdata <- data[data$CustomerID %in% RFM$CustomerID,] | |
trandata <- subdata[,c('InvoiceNo', 'Description')] | |
trandata$Description <- trimws(trandata$Description, which = "both") | |
trandata <- trandata[trandata$Description!='Manual',] | |
trandata <- trandata[!duplicated(trandata),] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### RFM analysis ##### | |
library(dplyr) | |
# filter to only United Kingdom | |
data <- data[data$Country == "United Kingdom", ] | |
data$PxQ <- data$Quantity * data$UnitPrice | |
data$Recency <- difftime(as.Date(max(data$InvoiceDate)), as.Date(data$InvoiceDate), units = "days") | |
RFMresult <- data %>% | |
select(InvoiceNo, CustomerID, Recency, PxQ) %>% | |
group_by(CustomerID) %>% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### data preparation #### | |
# load data | |
data <- read.csv('data.csv', stringsAsFactors = FALSE) | |
# remove imcomplete records | |
QC_logic <- complete.cases(data[, !names(data) %in% c('Description')]) | |
data <- data[QC_logic,] | |
# remove duplicated records | |
data <- data[!duplicated(data),] | |
# remove returned records | |
data <- data[!grepl('c', tolower(data$InvoiceNo)), ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# compare distribution of drawing 1 sample at a time with drawing multiple samples at a time | |
seed = 790256 | |
n_sample <- 10000 | |
par(mfrow=c(2,2)) | |
# normal distribution # | |
set.seed(seed) | |
output_AllAtOnce <- rnorm(n_sample, mean = 0, sd =1) | |
hist(output_AllAtOnce, freq = TRUE, main = 'Normal Distribution \n (draw multiple samples at a time)') |