Skip to content

Instantly share code, notes, and snippets.

@IronistM
Created September 28, 2016 07:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save IronistM/ca4e94788ade7146d40aa89c2f0f4acb to your computer and use it in GitHub Desktop.
Save IronistM/ca4e94788ade7146d40aa89c2f0f4acb to your computer and use it in GitHub Desktop.
Check a list of URLs for existence of the Google Tag Manager Container code (via Donal Phipps)
#' Check a list of URLs for existence of the Google Tag Manager Container code
#'
#' @param containers
#' A character vector of the container IDs for Google Tag Manager
#' @param urlFile
#' A character vector providing the path to a csv file with 2 columns: url, the full url to the page to be checked; country, the country where the URL is hosted.
#'
#' @param outputdir
#' A character vector providing the path to the desired output directory for the csv of results. If not provided, the csv will be output to the current working directory.
#'
#' @return
#' Writes a csv to outputdir with several columns: url, the url provided in the urlFile csv. country: the country providedd in the urlFile csv. After this, the function will return 2 columns for each GTM container snippet provided as an argument in containers - one column confirming if the snippet is provided in an iFrame, and a second column confirming if the snippet is provided in a script tag.
#' @export
#'
#' @examples
#' check_gtm(containers = c("GTM-TT98938", "GTM-FFD99D"), urlFile = "downloads/urlFile.csv")
check_gtm <- function(containers, urlFile, outputdir = getwd() ) {
if (!require(httr)) install.packages("httr")
if (!require(XML)) install.packages("XML")
if (!require(dplyr)) install.packages("dplyr")
resultsFrame<-NULL
urlList <- read.csv(urlFile, stringsAsFactors=FALSE)
for ( i in seq_along(urlList$url) ) {
message(paste0("Checking url number ", i, " of ", length(urlList$url)))
url <- urlList$url[i]
html2<-GET(urlList[i,1])
content2<-content(html2, as="text")
parsedHtml<-htmlParse(content2,asText=TRUE)
# helper functions
# Check for tracking ID in iframe
iframe_checker <- function(tagID) {
src<-xpathApply(parsedHtml, paste0("//iframe[@src='//www.googletagmanager.com/ns.html?id=",tagID,"']"), xmlGetAttr, "src")
print(src)
if ( length(src) == 0 ) {
iFrameResult <-"Not found"
} else {
iFrameResult <-"OK"
}
iFrameResult
}
# check for tracking ID in script tag
script_checker <- function(tagID) {
scriptNode<-xpathApply(parsedHtml, "//script", xmlValue)
scripChars<-as.character(scriptNode)
scriptloc<-grep(pattern = tagID, x = scripChars)
if ( length(scriptloc) == 0 ) {
scriptResult<-"Not found"
} else {
scriptResult<-"OK"
}
scriptResult
}
# Loop through containers and check for GTM in iFrame or script tags
checkResults <- data.frame( url = url, country = urlList$country[i])
for ( container in containers ) {
iFrameStatus <- iframe_checker(container)
scriptStatus <- script_checker(container)
results <- data.frame(cbind(iFrameStatus, scriptStatus))
names(results) <- c(paste0(container, "-iFrame"), paste0(container, "-script"))
checkResults <- cbind(checkResults, results)
}
resultsFrame<-rbind(resultsFrame,checkResults)
}
if ( !file.exists(outputdir) ) {
dir.create(outputdir)
}
write.csv(resultsFrame, file=paste0(outputdir,"/GTM check Full Results-",Sys.Date(),".csv") )
message(paste0("Form check complete. Results have been written to:\n", outputdir,"/GTM check Full Results-", Sys.Date(),".csv"))
rm(list=ls())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment