Last active
April 22, 2022 07:11
-
-
Save rossturk/1223ae5d57fbcbb4bc32da0d49137ef2 to your computer and use it in GitHub Desktop.
Replace GA tags with Matomo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# SPDX-License-Identifier: Apache-2.0 | |
# | |
# Algorithm borrowed from https://github.com/merobi-hub/SPDXUpdater | |
import sys | |
import os | |
import re | |
# OLD_TAG_1: | |
# | |
# <script type="application/javascript"> | |
# window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date; | |
# ga("create", "UA-XXXXXXXXX-1", "auto"); | |
# ga("send", "pageview"); | |
# </script> | |
# <script async src="https://www.google-analytics.com/analytics.js"></script> | |
# | |
OLD_TAG_1_REGEX = "\s*\<script\s+type=\"application/javascript\"\>\s*window.ga.*\n.*\n.*\n.*\n.*analytics.js\"\>\<\/script\>" | |
# OLD_TAG_2: | |
# | |
# <script type="text/javascript"> | |
# var _gaq = _gaq || []; | |
# _gaq.push(['_setAccount', 'UA-XXXXXXXXX-1']); | |
# _gaq.push(['_trackPageview']); | |
# </script> | |
OLD_TAG_2_REGEX = "\s*\<script\s+type=\"text\/javascript\"\>\s*var\s*_gaq.*\n.*\n.*_trackPageview\S+\s*\<\/script\>" | |
# OLD_TAG_3: | |
# | |
# <script type="application/javascript"> | |
# var doNotTrack = false; | |
# window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date; | |
# ga('create', 'UA-XXXXXXXXX-1', 'auto'); | |
# ga('send', 'pageview'); | |
# </script> | |
# <script async src='https://www.google-analytics.com/analytics.js'></script> | |
OLD_TAG_3_REGEX = "\s*\<script\s+type=\"application\/javascript\"\>\s*\n.*\n\s+window.ga.*\n.*\n.*\n.*\n.*analytics.js\'\>\<\/script\>" | |
# OLD_FOOTER: | |
# | |
# <div class="footer">This page uses <a href="https://analytics.google.com/"> | |
# Google Analytics</a> to collect statistics. You can disable it by blocking | |
# the JavaScript coming from www.google-analytics.com. Check our | |
# <a href="privacy_notice.html">Privacy Policy</a> | |
# for more details. | |
# <script type="text/javascript"> | |
# (function() { | |
# var ga = document.createElement('script'); | |
# ga.src = ('https:' == document.location.protocol ? | |
# 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; | |
# ga.setAttribute('async', 'true'); | |
# var nodes = document.documentElement.childNodes; | |
# var i = -1; | |
# var node; | |
# do { | |
# i++; | |
# node = nodes[i] | |
# } while(node.nodeType !== Node.ELEMENT_NODE); | |
# node.appendChild(ga); | |
# })(); | |
# </script> | |
# </div> | |
OLD_FOOTER_REGEX = "\s*\<div\s+class=\"footer\"\>[\w\W\n]*appendChild\(ga\).*\n.*\n.*\s*\<\/div\>" | |
NEW_TAG = ''' | |
<!-- Matomo --> | |
[...] | |
</script> | |
<!-- End Matomo -->''' | |
def process_file(contents): | |
contents = re.sub(OLD_TAG_1_REGEX, NEW_TAG, contents) | |
contents = re.sub(OLD_TAG_2_REGEX, NEW_TAG, contents) | |
contents = re.sub(OLD_TAG_3_REGEX, NEW_TAG, contents) | |
contents = re.sub(OLD_FOOTER_REGEX, '', contents) | |
return(contents) | |
dir = sys.argv[1] | |
for root, dirs, files in os.walk(dir, topdown=False): | |
for f in files: | |
if '.html' in f: | |
path = os.path.join(root, f) | |
with open(path, 'r') as t: | |
str_contents = t.read() | |
if 'UA-XXXXXXXXX-1' in str_contents: | |
print("found in: " + path) | |
new_contents = process_file(str_contents) | |
with open(path, 'w') as t: | |
t.write(new_contents) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment