Skip to content

Instantly share code, notes, and snippets.

@MartijnSch
Last active November 12, 2020 14:24
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save MartijnSch/6fb6757e660669bd8e606a1457f482ed to your computer and use it in GitHub Desktop.
Save MartijnSch/6fb6757e660669bd8e606a1457f482ed to your computer and use it in GitHub Desktop.
R Script to Calculate CTR Curves with Google Search Console data
## A script to retrieve Google Search Console data for the last 90 days and plot a graph for CTR%
## It will retrieve the data from the Google Search Console API and make sure that it can be plotted for a visual graph
## Version 2: March 1, 2019
## Author: Martijn Scheijbeler (https://www.martijnscheijbeler.com)
## All credits go to Mark Edmondson for creating: https://github.com/MarkEdmondson1234/searchConsoleR & https://code.markedmondson.me/search-console-google-analytics-r-keyword-research/
## Load the required libraries: googleAuthR, searchConsoleR, dplyr, ggplot2
## (Download them with install.packages("googleAuthR"), install.packages("searchConsoleR"), install.packages("dplyr") and install.packages("ggplot2") if necessary
library(searchConsoleR)
library(dplyr)
library(ggplot2)
## Set up and authenticate the user with access to Google Search Console
scr_auth()
## Set website to your Google Search Console siteURL property, including http:// or https://
## Set the dimensions that you want to trigger, default: page and query
## Filter the number of clicks being bigger than x, default: 0
website <- "FULL DOMAIN PATH"
dimensions <- c("page", "query")
filter_clicks <- 0
chart_title <- "Click Through Rate (CTR%) from Google Search Results"
chart_positions <- 20
## Data in Google Search Console can only be retrieved if it's ~3 days ago.
## Set the start date to -93 days because of that and the end date on 3 days ago.
start <- as.character(Sys.Date() - 92)
end <- as.character(Sys.Date() - 3)
# Running this to avoid having a stale OAuth token: https://github.com/MarkEdmondson1234/searchConsoleR/issues/31
list_websites()
## Retrieve the date from the Google Search Console API
gsc_data <- search_analytics(website,
startDate = start,
endDate = end,
dimensions = dimensions,
dimensionFilterExp = c("query!~share"),
rowLimit = 50000)
## Set the SERP based on the position data point
gsc_data$serp <- cut(gsc_data$position,
breaks = seq(1, 100, 10),
labels = as.character(1:9),
include.lowest = TRUE,
ordered_result = TRUE)
## Calculate the % of SEO traffic to each page per keyword
gsc_data <- gsc_data %>%
group_by(page) %>%
mutate(positionRound = round(position)) %>%
mutate(clickP = clicks / sum(clicks)) %>%
ungroup()
## Clean up the data, filter out the minimum clicks required and remove the page column
tidy_data <- gsc_data %>%
filter(clicks > filter_clicks) %>%
select(-page)
## Show the Plotted Graph for this data set
ctr_plot <- ggplot(tidy_data, aes(x = position, y = ctr ))
ctr_plot <- ctr_plot + theme_minimal()
ctr_plot <- ctr_plot + coord_cartesian(xlim = c(1, chart_positions), ylim = c(0, 1))
ctr_plot <- ctr_plot + geom_point(aes(alpha = log(clicks), color = serp, size = clicks))
ctr_plot <- ctr_plot + geom_smooth(aes(weight = clicks), size = 0.2)
ctr_plot <- ctr_plot + scale_y_continuous(labels = scales::percent)
ctr_plot <- ctr_plot + ggtitle(chart_title)
ctr_plot <- ctr_plot + theme(plot.title = element_text(lineheight=.8, face="bold"))
ctr_plot
## Start parsing the data for another graph
## Set the click curve, grouped by position
click_curve <- tidy_data %>%
group_by(positionRound) %>%
summarise(CTRmean = mean(clicks) / mean(impressions),
n = n(),
click.sum = sum(clicks),
impressions.sum = sum(impressions),
sd = sd(ctr),
E = poisson.test(click.sum)$conf.int[2] / poisson.test(impressions.sum)$conf.int[1],
lower = CTRmean - E/2,
upper = CTRmean + E/2) %>% ungroup()
## Add the % increase to position 1
click_curve <- click_curve %>%
mutate(CTR1 = CTRmean[1] / CTRmean,
CTR1.upper = upper[1] / upper,
CTR1.lower = lower[1] / lower)
## Show a Graph with the ribbons with the average CTR per position
hh <- ggplot(click_curve, aes(positionRound, CTRmean))
hh <- hh + theme_minimal()
hh <- hh + geom_line(linetype = 2) + coord_cartesian(xlim = c(1, chart_positions), ylim = c(0, 0.5))
hh <- hh + geom_ribbon(aes(positionRound, ymin = lower, ymax = upper), alpha = 0.2, fill = "orange")
hh <- hh + scale_y_continuous(labels = scales::percent)
hh <- hh + geom_point()
hh <- hh + geom_label(aes(label = scales::percent(CTRmean)))
hh <- hh + ggtitle(chart_title)
hh <- hh + theme(plot.title = element_text(lineheight=.8, face="bold"))
hh
## Write the data of the Click Curve to a CSV for later use.
# write.csv(click_curve, file = "~/ClickCurve.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment