from bs4 import BeautifulSoup | |
import urllib.request | |
# This application uses the gspread module to interact with | |
# Google Sheets, and gmail to send alerts when necessary | |
sheets = CloudWright.get_module("gspread") | |
gmail = CloudWright.get_module("gmail") | |
# Open the customer account tracking doc | |
sheet = sheets.open_by_key("1SS9UxMkH8Pagpcnsz537CdFBsqo15vRsdWJ13-_lpkc").sheet1 | |
# Perform a simple scan over the loaded source to see if it contains a link | |
# for our analytics tag (here, we'll just check for Google Analytics) | |
def contains_tag(soup): | |
matches = lambda link: link['href'] == 'https://www.google-analytics.com' | |
return next((x for x in soup.find_all('link') if matches(x)), None) | |
# For each row in the doc (skipping the header)... | |
values = sheet.get_all_values() | |
for row_num in range(1, len(values)): | |
url,owner,old_status = values[row_num] | |
# Load the URL with the urllib library | |
request = urllib.request.Request(url) | |
response = urllib.request.urlopen(request) | |
if contains_tag(BeautifulSoup(response.read())): | |
status = "YES" | |
else: | |
status = "NO" | |
# Send an email to the account owner if the tag is newly missing | |
if old_status != "NO": | |
gmail.send_email(f"{url} tag removed", "Site analytics tag removed!", owner) | |
# Update the account status in the source spreadsheet | |
sheet.update_cell(row_num+1, 3, status) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment