Last active
May 23, 2021 18:47
-
-
Save Leszek-Sieminski/e8bb1f930ee70bd934e1d34a4295c910 to your computer and use it in GitHub Desktop.
Downloading GSC data with R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Description: The aim of this Gist is to show a simple way of downloading Google Search Console API data through R | |
# Created: 2020-04-28 | |
# Author: Leszek Sieminski | |
# | |
# Goals: | |
# 1. Full - no data inconsistencies allowed | |
# 2. Stable - not ruining Google API limits | |
# 3. Functional - based on functions that encapsulate specific tasks. | |
# | |
# The idea: download all the data for every single day and loop this process for all dates in a date range | |
# | |
# The execution: there are two custom functions: | |
# 1) gsc_date_range() that creates date range (optional but handy) | |
# 2) the gsc_download() function which is a wrapper for searchConsoleR's package search_analytics() function | |
# which downloads up to 100k rows per for a single date | |
# Those two functions are combined in purrr's package map_dfr() loop which repeats the download for every date | |
# and returns a data frame with all the results. | |
# To ensure that the API limits are not abused, there is a specified amount of time in which R wait between the calls. | |
# libraries ------------------------------------------------------------------------------------------------ | |
library("purrr") | |
library("searchConsoleR") | |
# authentication of Google Search Console ------------------------------------------------------------------ | |
# depends on your searchConsoleR and gargle packages' versions, so find out what is working for you | |
# searchConsoleR::scr_auth() # simple example | |
# declare a function that creates a date range ------------------------------------------------------------- | |
# start - first date in a date range | |
# end - last date of a date range | |
# returns a vector of dates | |
gsc_date_range <- function(start, end) { | |
seq.Date(from = as.Date(start), | |
to = as.Date(end), | |
by = "day") | |
} | |
# declare a function that downloads GSC data from a single date with chosen dimensions ---------------------- | |
# single_date - a single date, like in as.Date("2020-01-01") | |
# url - the URL of the website with GSC API project | |
# dim1 - first (mandatory) dimension: page, query etc. The date & device are already required by the function | |
# dim2 - second (optional) dimension: page, query etc | |
# returns data frame with GSC API data for a single day | |
gsc_download <- function(single_date, url, dim1, dim2 = NULL) { | |
gsc_data <- searchConsoleR::search_analytics( | |
siteURL = url, | |
startDate = single_date, | |
endDate = single_date, | |
dimensions = c("date", | |
"device", | |
dim1, | |
dim2), | |
walk_data = "byBatch", | |
searchType = "web", | |
rowLimit = 100000) | |
Sys.sleep(3) # we're going to make a lot of API calls, so let's not annoy the API limits | |
# 3 seconds of waiting between the calls seems pretty reasonable | |
return(gsc_data) | |
} | |
# download the data ----------------------------------------------------------------------------------------- | |
# now we use map_dfr - we create a vector of dates for which we want to download the GSC API data (.x parameter) | |
# and we download all the data for each day | |
df_page_query <- purrr::map_dfr( | |
.x = gsc_date_range( | |
start = Sys.Date() – 93, | |
end = Sys.Date() – 3), | |
.f = gsc_download, | |
url = site, | |
dim1 = "page", | |
dim2 = "query") | |
# finish! |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment