Skip to content

Instantly share code, notes, and snippets.

@justinjm
Last active September 15, 2017 22:04
Show Gist options
  • Save justinjm/bbd05c85aee2ae39de635f1a7a460ec7 to your computer and use it in GitHub Desktop.
Save justinjm/bbd05c85aee2ae39de635f1a7a460ec7 to your computer and use it in GitHub Desktop.
A function to export multiple datasets from the Google Analytics Core Reporting API v4
library(googleAuthR)
library(googleAnalyticsR)
library(dplyr)
#' Get Google Analytics Data for Dashboards
#'
#' Extract and join session and hit level data from GA, save as
#' rds files for dashboards
#'
#' @param dates date range list of pairs, from and to dates
#' @param gaViewIds list of ga profile ids as strings
#' @param sessionMetrics
#' @param sessionDimensions
#' @param hitMetrics
#' @param hitDimensions
#'
#' @return dataframe and save as rds file, one for each view
get_data <- function(dates,
gaViewIds,
sessionMetrics,
sessionDimensions,
hitMetrics,
hitDimensions){
print("[-] starting to get data for dashboards...")
data <- lapply(gaViewIds, function(x){
# Get GA metadata for printing and timezone setting
ga_account_info <- ga_account_list() %>%
filter(viewId == x)
## save GA view metadata for ease of reference
view <- ga_view(ga_account_info$accountId,
webPropertyId = ga_account_info$webPropertyId,
profileId = ga_account_info$viewId)
# load lookup names from config.R
lookups <- as.data.frame(get(paste0("lookup",x)), col.names = c("x"))
# set dataframe column name for ease of reference in parameterizing
# queries
names(lookups) <- "displayName"
print(paste0("[?] getting sesssion data from GA view: ", view$name , " | ", x))
# create dataframes of session metrics and dimensions
# for ease of reference in parametrized queries
session_metrics <- lookups %>%
tibble::rownames_to_column(var = "old_row_name") %>%
dplyr::rename(gaName = old_row_name) %>%
dplyr::filter(displayName %in% sessionMetrics) %>%
print()
session_dimensions <- lookups %>%
tibble::rownames_to_column(var = "old_row_name") %>%
dplyr::rename(gaName = old_row_name) %>%
dplyr::filter(displayName %in% sessionDimensions) %>%
print()
sessions <- google_analytics_4(x,
date_range = date_range,
metrics = session_metrics[["gaName"]],
dimensions = session_dimensions[["gaName"]],
order = order_type("sessions",
sort_order = "DESCENDING"),
max = -1,
anti_sample = TRUE)
# rename dataframe columns from lookup names in config.R
colnames(sessions) <- set_names(sessions, lookup_names = get(paste0("lookup",x)))
print(paste0("[?] getting hit data from GA view: ", view$name , " | ", x))
# create dataframes of hit metrics and dimensions
# for ease of reference in parametrized queries
hit_metrics <- lookups %>%
tibble::rownames_to_column(var = "old_row_name") %>%
dplyr::rename(gaName = old_row_name) %>%
dplyr::filter(displayName %in% hitMetrics) %>%
print()
hit_dimensions <- lookups %>%
tibble::rownames_to_column(var = "old_row_name") %>%
dplyr::rename(gaName = old_row_name) %>%
dplyr::filter(displayName %in% hitDimensions) %>%
print()
hits <- google_analytics_4(x,
date_range = date_range,
metrics = hit_metrics[["gaName"]],
dimensions = hit_dimensions[["gaName"]],
order = order_type("uniquePageViews",
sort_order = "DESCENDING"),
max = -1,
anti_sample = TRUE)
# rename dataframe columns from lookup names in config.R
colnames(hits) <- set_names(hits, lookup_names = get(paste0("lookup",x)))
# join hit and dimension data into a single file
ga_all <- inner_join(sessions, hits, by = c(date = "date",
sessionFirstArticlePublishDate = "hitArticlePublishDate",
sessionFirstArticleTitle = "hitArticleTitle",
sessionFirstArticleAuthor = "hitArticleAuthor"))
### format columns
### view timezone set in ga view
print(view$timezone)
## date conversions
ga_all$sessionFirstArticlePublishDate <- as.Date(ga_all$sessionFirstArticlePublishDate,
format="%Y %m %d",
tz=view$timezone)
## order columns and write out to rds object
ga_all %>% select(date,
sessionFirstArticlePublishDate,
sessionFirstArticleTitle,
sessionFirstArticleAuthor,
uniquePageviews,
sessions,
events,
eventConversionRate,
transactions,
revenue) %>%
saveRDS(file = paste0("data/viewId_",x,".rds"))
})
print("[X] gathering of data for dashboards complete.")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment