Skip to content

Instantly share code, notes, and snippets.

View MarkEdmondson1234's full-sized avatar
🦑
Tappity tap tap

Mark Edmondson MarkEdmondson1234

🦑
Tappity tap tap
View GitHub Profile
library(idbr) # devtools::install_github('walkerke/idbr')
library(ggplot2)
library(animation)
library(dplyr)
library(ggthemes)
idb_api_key("Your Census API key goes here")
male <- idb1('JA', 2010:2050, sex = 'male') %>%
mutate(POP = POP * -1,
@MarkEdmondson1234
MarkEdmondson1234 / costdata.gs
Created February 15, 2016 14:15 — forked from chipoglesby/costdata.gs
Cost Data Upload via Google Analytic's Management API with Google Sheets
function uploadData() {
var accountId = "xxxxxxxx";
var webPropertyId = "UA-xxxxxxxx-x";
var customDataSourceId = "xxxxxxxx";
var ss = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
var maxRows = ss.getLastRow();
var maxColumns = ss.getLastColumn();
var data = [];
for (var i = 1; i < maxRows;i++) {
data.push(ss.getRange([i], 1,1, maxColumns).getValues());
@MarkEdmondson1234
MarkEdmondson1234 / animate.R
Created February 8, 2016 22:34 — forked from thomasp85/animate.R
Animating graph over time
library(ggraph)
library(gganimate)
library(igraph)
# Data from http://konect.uni-koblenz.de/networks/sociopatterns-infectious
infect <- read.table('out.sociopatterns-infectious', skip = 2, sep = ' ', stringsAsFactors = FALSE)
infect$V3 <- NULL
names(infect) <- c('from', 'to', 'time')
infect$timebins <- as.numeric(cut(infect$time, breaks = 100))
# We want that nice fading effect so we need to add extra data for the trailing
kResults <- data.frame(k_data, cluster = k$cluster)
## Transform data for columns of cluster, rows of Sku with value of mean total for each
rl <- as.data.frame(lapply(1:4, function(x){ r3 <- kResults[kResults$cluster == x,
setdiff(names(kResults), 'cluster')]
r4 <- colSums(r3) / nrow(r3)
r4
}))
names(rl) <- paste("cluster",1:4)
# Determine number of clusters
## run kmeans for varying number of clusters 1 to 15
wss <- (nrow(comp)-1)*sum(apply(comp,2,var))
for (i in 2:15) wss[i] <- sum(kmeans(comp,
centers=i)$withinss)
plot(1:15, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")
# From scree plot elbow occurs at k = 4-6
## Finding number of components
pc <- princomp(model_data)
plot(pc, type="l")
# look for dimension that is ~ 85% variance
summary(pc)
loadings(pc)
# run more convenient pca needed for k-means
pc <- prcomp(k_data)
## function to get plot data format
getCompareTable <- function (test_data, prediction) {
require(dplyr)
## plot real vs model bought Sku
actual_freq <- table(model_data$boughtSku)
predicted_freq <- table(prediction)
actual_freq <- actual_freq[order(actual_freq)]
predicted_freq <- predicted_freq[order(predicted_freq)]
library(randomForest)
## warning - can take a long time (30mins)
rf <- randomForest(x = predictors, y = response)
## once model done, we run it using test data and compare results to reality
predictor_test <- test[,which(!names(test) %in% c("dimension1","boughtSku"))]
response_test <- as.factor(test[,"boughtSku"])
## check result on test set
prediction <- predict(rf, predictor_test)
## want: 30049 x 187
## userId, product1_view, product2_view, ...., productN_view, productBought
pv <- reshape2::recast(product_views,
dimension1 ~ productSku + variable,
fun.aggregate=sum)
library(dplyr)
## if a user buys more than once, the row will be duplicated
pt <- product_trans %>% select(productSku, dimension1)
library(googleAnalyticsR_public)
gar_auth(new_user=T)
## your profile view Id
id <- "XXXXXX"
## 61607 results
## 30049 unique Ids
## 185 Sku's