Created
September 28, 2016 07:36
-
-
Save IronistM/ca4e94788ade7146d40aa89c2f0f4acb to your computer and use it in GitHub Desktop.
Check a list of URLs for existence of the Google Tag Manager Container code (via Donal Phipps)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Check a list of URLs for existence of the Google Tag Manager Container code | |
#' | |
#' @param containers | |
#' A character vector of the container IDs for Google Tag Manager | |
#' @param urlFile | |
#' A character vector providing the path to a csv file with 2 columns: url, the full url to the page to be checked; country, the country where the URL is hosted. | |
#' | |
#' @param outputdir | |
#' A character vector providing the path to the desired output directory for the csv of results. If not provided, the csv will be output to the current working directory. | |
#' | |
#' @return | |
#' Writes a csv to outputdir with several columns: url, the url provided in the urlFile csv. country: the country providedd in the urlFile csv. After this, the function will return 2 columns for each GTM container snippet provided as an argument in containers - one column confirming if the snippet is provided in an iFrame, and a second column confirming if the snippet is provided in a script tag. | |
#' @export | |
#' | |
#' @examples | |
#' check_gtm(containers = c("GTM-TT98938", "GTM-FFD99D"), urlFile = "downloads/urlFile.csv") | |
check_gtm <- function(containers, urlFile, outputdir = getwd() ) { | |
if (!require(httr)) install.packages("httr") | |
if (!require(XML)) install.packages("XML") | |
if (!require(dplyr)) install.packages("dplyr") | |
resultsFrame<-NULL | |
urlList <- read.csv(urlFile, stringsAsFactors=FALSE) | |
for ( i in seq_along(urlList$url) ) { | |
message(paste0("Checking url number ", i, " of ", length(urlList$url))) | |
url <- urlList$url[i] | |
html2<-GET(urlList[i,1]) | |
content2<-content(html2, as="text") | |
parsedHtml<-htmlParse(content2,asText=TRUE) | |
# helper functions | |
# Check for tracking ID in iframe | |
iframe_checker <- function(tagID) { | |
src<-xpathApply(parsedHtml, paste0("//iframe[@src='//www.googletagmanager.com/ns.html?id=",tagID,"']"), xmlGetAttr, "src") | |
print(src) | |
if ( length(src) == 0 ) { | |
iFrameResult <-"Not found" | |
} else { | |
iFrameResult <-"OK" | |
} | |
iFrameResult | |
} | |
# check for tracking ID in script tag | |
script_checker <- function(tagID) { | |
scriptNode<-xpathApply(parsedHtml, "//script", xmlValue) | |
scripChars<-as.character(scriptNode) | |
scriptloc<-grep(pattern = tagID, x = scripChars) | |
if ( length(scriptloc) == 0 ) { | |
scriptResult<-"Not found" | |
} else { | |
scriptResult<-"OK" | |
} | |
scriptResult | |
} | |
# Loop through containers and check for GTM in iFrame or script tags | |
checkResults <- data.frame( url = url, country = urlList$country[i]) | |
for ( container in containers ) { | |
iFrameStatus <- iframe_checker(container) | |
scriptStatus <- script_checker(container) | |
results <- data.frame(cbind(iFrameStatus, scriptStatus)) | |
names(results) <- c(paste0(container, "-iFrame"), paste0(container, "-script")) | |
checkResults <- cbind(checkResults, results) | |
} | |
resultsFrame<-rbind(resultsFrame,checkResults) | |
} | |
if ( !file.exists(outputdir) ) { | |
dir.create(outputdir) | |
} | |
write.csv(resultsFrame, file=paste0(outputdir,"/GTM check Full Results-",Sys.Date(),".csv") ) | |
message(paste0("Form check complete. Results have been written to:\n", outputdir,"/GTM check Full Results-", Sys.Date(),".csv")) | |
rm(list=ls()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment