Last active
November 12, 2020 14:24
-
-
Save MartijnSch/6fb6757e660669bd8e606a1457f482ed to your computer and use it in GitHub Desktop.
R Script to Calculate CTR Curves with Google Search Console data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## A script to retrieve Google Search Console data for the last 90 days and plot a graph for CTR% | |
## It will retrieve the data from the Google Search Console API and make sure that it can be plotted for a visual graph | |
## Version 2: March 1, 2019 | |
## Author: Martijn Scheijbeler (https://www.martijnscheijbeler.com) | |
## All credits go to Mark Edmondson for creating: https://github.com/MarkEdmondson1234/searchConsoleR & https://code.markedmondson.me/search-console-google-analytics-r-keyword-research/ | |
## Load the required libraries: googleAuthR, searchConsoleR, dplyr, ggplot2 | |
## (Download them with install.packages("googleAuthR"), install.packages("searchConsoleR"), install.packages("dplyr") and install.packages("ggplot2") if necessary | |
library(searchConsoleR) | |
library(dplyr) | |
library(ggplot2) | |
## Set up and authenticate the user with access to Google Search Console | |
scr_auth() | |
## Set website to your Google Search Console siteURL property, including http:// or https:// | |
## Set the dimensions that you want to trigger, default: page and query | |
## Filter the number of clicks being bigger than x, default: 0 | |
website <- "FULL DOMAIN PATH" | |
dimensions <- c("page", "query") | |
filter_clicks <- 0 | |
chart_title <- "Click Through Rate (CTR%) from Google Search Results" | |
chart_positions <- 20 | |
## Data in Google Search Console can only be retrieved if it's ~3 days ago. | |
## Set the start date to -93 days because of that and the end date on 3 days ago. | |
start <- as.character(Sys.Date() - 92) | |
end <- as.character(Sys.Date() - 3) | |
# Running this to avoid having a stale OAuth token: https://github.com/MarkEdmondson1234/searchConsoleR/issues/31 | |
list_websites() | |
## Retrieve the date from the Google Search Console API | |
gsc_data <- search_analytics(website, | |
startDate = start, | |
endDate = end, | |
dimensions = dimensions, | |
dimensionFilterExp = c("query!~share"), | |
rowLimit = 50000) | |
## Set the SERP based on the position data point | |
gsc_data$serp <- cut(gsc_data$position, | |
breaks = seq(1, 100, 10), | |
labels = as.character(1:9), | |
include.lowest = TRUE, | |
ordered_result = TRUE) | |
## Calculate the % of SEO traffic to each page per keyword | |
gsc_data <- gsc_data %>% | |
group_by(page) %>% | |
mutate(positionRound = round(position)) %>% | |
mutate(clickP = clicks / sum(clicks)) %>% | |
ungroup() | |
## Clean up the data, filter out the minimum clicks required and remove the page column | |
tidy_data <- gsc_data %>% | |
filter(clicks > filter_clicks) %>% | |
select(-page) | |
## Show the Plotted Graph for this data set | |
ctr_plot <- ggplot(tidy_data, aes(x = position, y = ctr )) | |
ctr_plot <- ctr_plot + theme_minimal() | |
ctr_plot <- ctr_plot + coord_cartesian(xlim = c(1, chart_positions), ylim = c(0, 1)) | |
ctr_plot <- ctr_plot + geom_point(aes(alpha = log(clicks), color = serp, size = clicks)) | |
ctr_plot <- ctr_plot + geom_smooth(aes(weight = clicks), size = 0.2) | |
ctr_plot <- ctr_plot + scale_y_continuous(labels = scales::percent) | |
ctr_plot <- ctr_plot + ggtitle(chart_title) | |
ctr_plot <- ctr_plot + theme(plot.title = element_text(lineheight=.8, face="bold")) | |
ctr_plot | |
## Start parsing the data for another graph | |
## Set the click curve, grouped by position | |
click_curve <- tidy_data %>% | |
group_by(positionRound) %>% | |
summarise(CTRmean = mean(clicks) / mean(impressions), | |
n = n(), | |
click.sum = sum(clicks), | |
impressions.sum = sum(impressions), | |
sd = sd(ctr), | |
E = poisson.test(click.sum)$conf.int[2] / poisson.test(impressions.sum)$conf.int[1], | |
lower = CTRmean - E/2, | |
upper = CTRmean + E/2) %>% ungroup() | |
## Add the % increase to position 1 | |
click_curve <- click_curve %>% | |
mutate(CTR1 = CTRmean[1] / CTRmean, | |
CTR1.upper = upper[1] / upper, | |
CTR1.lower = lower[1] / lower) | |
## Show a Graph with the ribbons with the average CTR per position | |
hh <- ggplot(click_curve, aes(positionRound, CTRmean)) | |
hh <- hh + theme_minimal() | |
hh <- hh + geom_line(linetype = 2) + coord_cartesian(xlim = c(1, chart_positions), ylim = c(0, 0.5)) | |
hh <- hh + geom_ribbon(aes(positionRound, ymin = lower, ymax = upper), alpha = 0.2, fill = "orange") | |
hh <- hh + scale_y_continuous(labels = scales::percent) | |
hh <- hh + geom_point() | |
hh <- hh + geom_label(aes(label = scales::percent(CTRmean))) | |
hh <- hh + ggtitle(chart_title) | |
hh <- hh + theme(plot.title = element_text(lineheight=.8, face="bold")) | |
hh | |
## Write the data of the Click Curve to a CSV for later use. | |
# write.csv(click_curve, file = "~/ClickCurve.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment