Skip to content

Instantly share code, notes, and snippets.

@rossturk
Last active April 22, 2022 07:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rossturk/1223ae5d57fbcbb4bc32da0d49137ef2 to your computer and use it in GitHub Desktop.
Save rossturk/1223ae5d57fbcbb4bc32da0d49137ef2 to your computer and use it in GitHub Desktop.
Replace GA tags with Matomo
#!/usr/bin/env python
# SPDX-License-Identifier: Apache-2.0
#
# Algorithm borrowed from https://github.com/merobi-hub/SPDXUpdater
import sys
import os
import re
# OLD_TAG_1:
#
# <script type="application/javascript">
# window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;
# ga("create", "UA-XXXXXXXXX-1", "auto");
# ga("send", "pageview");
# </script>
# <script async src="https://www.google-analytics.com/analytics.js"></script>
#
OLD_TAG_1_REGEX = "\s*\<script\s+type=\"application/javascript\"\>\s*window.ga.*\n.*\n.*\n.*\n.*analytics.js\"\>\<\/script\>"
# OLD_TAG_2:
#
# <script type="text/javascript">
# var _gaq = _gaq || [];
# _gaq.push(['_setAccount', 'UA-XXXXXXXXX-1']);
# _gaq.push(['_trackPageview']);
# </script>
OLD_TAG_2_REGEX = "\s*\<script\s+type=\"text\/javascript\"\>\s*var\s*_gaq.*\n.*\n.*_trackPageview\S+\s*\<\/script\>"
# OLD_TAG_3:
#
# <script type="application/javascript">
# var doNotTrack = false;
# window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;
# ga('create', 'UA-XXXXXXXXX-1', 'auto');
# ga('send', 'pageview');
# </script>
# <script async src='https://www.google-analytics.com/analytics.js'></script>
OLD_TAG_3_REGEX = "\s*\<script\s+type=\"application\/javascript\"\>\s*\n.*\n\s+window.ga.*\n.*\n.*\n.*\n.*analytics.js\'\>\<\/script\>"
# OLD_FOOTER:
#
# <div class="footer">This page uses <a href="https://analytics.google.com/">
# Google Analytics</a> to collect statistics. You can disable it by blocking
# the JavaScript coming from www.google-analytics.com. Check our
# <a href="privacy_notice.html">Privacy Policy</a>
# for more details.
# <script type="text/javascript">
# (function() {
# var ga = document.createElement('script');
# ga.src = ('https:' == document.location.protocol ?
# 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
# ga.setAttribute('async', 'true');
# var nodes = document.documentElement.childNodes;
# var i = -1;
# var node;
# do {
# i++;
# node = nodes[i]
# } while(node.nodeType !== Node.ELEMENT_NODE);
# node.appendChild(ga);
# })();
# </script>
# </div>
OLD_FOOTER_REGEX = "\s*\<div\s+class=\"footer\"\>[\w\W\n]*appendChild\(ga\).*\n.*\n.*\s*\<\/div\>"
NEW_TAG = '''
<!-- Matomo -->
[...]
</script>
<!-- End Matomo -->'''
def process_file(contents):
contents = re.sub(OLD_TAG_1_REGEX, NEW_TAG, contents)
contents = re.sub(OLD_TAG_2_REGEX, NEW_TAG, contents)
contents = re.sub(OLD_TAG_3_REGEX, NEW_TAG, contents)
contents = re.sub(OLD_FOOTER_REGEX, '', contents)
return(contents)
dir = sys.argv[1]
for root, dirs, files in os.walk(dir, topdown=False):
for f in files:
if '.html' in f:
path = os.path.join(root, f)
with open(path, 'r') as t:
str_contents = t.read()
if 'UA-XXXXXXXXX-1' in str_contents:
print("found in: " + path)
new_contents = process_file(str_contents)
with open(path, 'w') as t:
t.write(new_contents)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment