Last active
September 15, 2017 22:04
-
-
Save justinjm/bbd05c85aee2ae39de635f1a7a460ec7 to your computer and use it in GitHub Desktop.
A function to export multiple datasets from the Google Analytics Core Reporting API v4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(googleAuthR) | |
library(googleAnalyticsR) | |
library(dplyr) | |
#' Get Google Analytics Data for Dashboards | |
#' | |
#' Extract and join session and hit level data from GA, save as | |
#' rds files for dashboards | |
#' | |
#' @param dates date range list of pairs, from and to dates | |
#' @param gaViewIds list of ga profile ids as strings | |
#' @param sessionMetrics | |
#' @param sessionDimensions | |
#' @param hitMetrics | |
#' @param hitDimensions | |
#' | |
#' @return dataframe and save as rds file, one for each view | |
get_data <- function(dates, | |
gaViewIds, | |
sessionMetrics, | |
sessionDimensions, | |
hitMetrics, | |
hitDimensions){ | |
print("[-] starting to get data for dashboards...") | |
data <- lapply(gaViewIds, function(x){ | |
# Get GA metadata for printing and timezone setting | |
ga_account_info <- ga_account_list() %>% | |
filter(viewId == x) | |
## save GA view metadata for ease of reference | |
view <- ga_view(ga_account_info$accountId, | |
webPropertyId = ga_account_info$webPropertyId, | |
profileId = ga_account_info$viewId) | |
# load lookup names from config.R | |
lookups <- as.data.frame(get(paste0("lookup",x)), col.names = c("x")) | |
# set dataframe column name for ease of reference in parameterizing | |
# queries | |
names(lookups) <- "displayName" | |
print(paste0("[?] getting sesssion data from GA view: ", view$name , " | ", x)) | |
# create dataframes of session metrics and dimensions | |
# for ease of reference in parametrized queries | |
session_metrics <- lookups %>% | |
tibble::rownames_to_column(var = "old_row_name") %>% | |
dplyr::rename(gaName = old_row_name) %>% | |
dplyr::filter(displayName %in% sessionMetrics) %>% | |
print() | |
session_dimensions <- lookups %>% | |
tibble::rownames_to_column(var = "old_row_name") %>% | |
dplyr::rename(gaName = old_row_name) %>% | |
dplyr::filter(displayName %in% sessionDimensions) %>% | |
print() | |
sessions <- google_analytics_4(x, | |
date_range = date_range, | |
metrics = session_metrics[["gaName"]], | |
dimensions = session_dimensions[["gaName"]], | |
order = order_type("sessions", | |
sort_order = "DESCENDING"), | |
max = -1, | |
anti_sample = TRUE) | |
# rename dataframe columns from lookup names in config.R | |
colnames(sessions) <- set_names(sessions, lookup_names = get(paste0("lookup",x))) | |
print(paste0("[?] getting hit data from GA view: ", view$name , " | ", x)) | |
# create dataframes of hit metrics and dimensions | |
# for ease of reference in parametrized queries | |
hit_metrics <- lookups %>% | |
tibble::rownames_to_column(var = "old_row_name") %>% | |
dplyr::rename(gaName = old_row_name) %>% | |
dplyr::filter(displayName %in% hitMetrics) %>% | |
print() | |
hit_dimensions <- lookups %>% | |
tibble::rownames_to_column(var = "old_row_name") %>% | |
dplyr::rename(gaName = old_row_name) %>% | |
dplyr::filter(displayName %in% hitDimensions) %>% | |
print() | |
hits <- google_analytics_4(x, | |
date_range = date_range, | |
metrics = hit_metrics[["gaName"]], | |
dimensions = hit_dimensions[["gaName"]], | |
order = order_type("uniquePageViews", | |
sort_order = "DESCENDING"), | |
max = -1, | |
anti_sample = TRUE) | |
# rename dataframe columns from lookup names in config.R | |
colnames(hits) <- set_names(hits, lookup_names = get(paste0("lookup",x))) | |
# join hit and dimension data into a single file | |
ga_all <- inner_join(sessions, hits, by = c(date = "date", | |
sessionFirstArticlePublishDate = "hitArticlePublishDate", | |
sessionFirstArticleTitle = "hitArticleTitle", | |
sessionFirstArticleAuthor = "hitArticleAuthor")) | |
### format columns | |
### view timezone set in ga view | |
print(view$timezone) | |
## date conversions | |
ga_all$sessionFirstArticlePublishDate <- as.Date(ga_all$sessionFirstArticlePublishDate, | |
format="%Y %m %d", | |
tz=view$timezone) | |
## order columns and write out to rds object | |
ga_all %>% select(date, | |
sessionFirstArticlePublishDate, | |
sessionFirstArticleTitle, | |
sessionFirstArticleAuthor, | |
uniquePageviews, | |
sessions, | |
events, | |
eventConversionRate, | |
transactions, | |
revenue) %>% | |
saveRDS(file = paste0("data/viewId_",x,".rds")) | |
}) | |
print("[X] gathering of data for dashboards complete.") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment