This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(idbr) # devtools::install_github('walkerke/idbr') | |
library(ggplot2) | |
library(animation) | |
library(dplyr) | |
library(ggthemes) | |
idb_api_key("Your Census API key goes here") | |
male <- idb1('JA', 2010:2050, sex = 'male') %>% | |
mutate(POP = POP * -1, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function uploadData() { | |
var accountId = "xxxxxxxx"; | |
var webPropertyId = "UA-xxxxxxxx-x"; | |
var customDataSourceId = "xxxxxxxx"; | |
var ss = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet(); | |
var maxRows = ss.getLastRow(); | |
var maxColumns = ss.getLastColumn(); | |
var data = []; | |
for (var i = 1; i < maxRows;i++) { | |
data.push(ss.getRange([i], 1,1, maxColumns).getValues()); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggraph) | |
library(gganimate) | |
library(igraph) | |
# Data from http://konect.uni-koblenz.de/networks/sociopatterns-infectious | |
infect <- read.table('out.sociopatterns-infectious', skip = 2, sep = ' ', stringsAsFactors = FALSE) | |
infect$V3 <- NULL | |
names(infect) <- c('from', 'to', 'time') | |
infect$timebins <- as.numeric(cut(infect$time, breaks = 100)) | |
# We want that nice fading effect so we need to add extra data for the trailing |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
kResults <- data.frame(k_data, cluster = k$cluster) | |
## Transform data for columns of cluster, rows of Sku with value of mean total for each | |
rl <- as.data.frame(lapply(1:4, function(x){ r3 <- kResults[kResults$cluster == x, | |
setdiff(names(kResults), 'cluster')] | |
r4 <- colSums(r3) / nrow(r3) | |
r4 | |
})) | |
names(rl) <- paste("cluster",1:4) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Determine number of clusters | |
## run kmeans for varying number of clusters 1 to 15 | |
wss <- (nrow(comp)-1)*sum(apply(comp,2,var)) | |
for (i in 2:15) wss[i] <- sum(kmeans(comp, | |
centers=i)$withinss) | |
plot(1:15, wss, type="b", xlab="Number of Clusters", | |
ylab="Within groups sum of squares") | |
# From scree plot elbow occurs at k = 4-6 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Finding number of components | |
pc <- princomp(model_data) | |
plot(pc, type="l") | |
# look for dimension that is ~ 85% variance | |
summary(pc) | |
loadings(pc) | |
# run more convenient pca needed for k-means | |
pc <- prcomp(k_data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## function to get plot data format | |
getCompareTable <- function (test_data, prediction) { | |
require(dplyr) | |
## plot real vs model bought Sku | |
actual_freq <- table(model_data$boughtSku) | |
predicted_freq <- table(prediction) | |
actual_freq <- actual_freq[order(actual_freq)] | |
predicted_freq <- predicted_freq[order(predicted_freq)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(randomForest) | |
## warning - can take a long time (30mins) | |
rf <- randomForest(x = predictors, y = response) | |
## once model done, we run it using test data and compare results to reality | |
predictor_test <- test[,which(!names(test) %in% c("dimension1","boughtSku"))] | |
response_test <- as.factor(test[,"boughtSku"]) | |
## check result on test set | |
prediction <- predict(rf, predictor_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## want: 30049 x 187 | |
## userId, product1_view, product2_view, ...., productN_view, productBought | |
pv <- reshape2::recast(product_views, | |
dimension1 ~ productSku + variable, | |
fun.aggregate=sum) | |
library(dplyr) | |
## if a user buys more than once, the row will be duplicated | |
pt <- product_trans %>% select(productSku, dimension1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(googleAnalyticsR_public) | |
gar_auth(new_user=T) | |
## your profile view Id | |
id <- "XXXXXX" | |
## 61607 results | |
## 30049 unique Ids | |
## 185 Sku's |