Skip to content

Instantly share code, notes, and snippets.

@ryantuck
Last active October 31, 2019 18:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryantuck/9723b96ad2e33a2a0fe3e7a2767b5256 to your computer and use it in GitHub Desktop.
Save ryantuck/9723b96ad2e33a2a0fe3e7a2767b5256 to your computer and use it in GitHub Desktop.
Script to extract content from Looker Content Validator HMTL table
# this is probably less elegant than just using the API to do this
# https://docs.looker.com/reference/api-and-integration/api-reference/v3.1/content#validate_content
# largely copied from https://stackoverflow.com/a/44275458
import csv
from bs4 import BeautifulSoup
# find the table where all the data lives via 'inspect element' or whatever
# and save it into a file that we read here.
# should look like a <table class="table table-striped">...</table>
with open('content_validator_table.html') as f:
html = f.read()
soup = BeautifulSoup(html, 'html.parser')
table = soup.find('table')
headings = [th.get_text().strip() for th in table.find("tr").find_all("th")]
# pre-parse the table into a list of dicts
results = [
dict(zip(headings, (td for td in row.find_all('td'))))
for row in table.find_all('tr')[1:]
]
# these headings contain lists of items, so we define them here so we can break
# them out later
headings_with_lists = ['Content', 'Folder', 'Model', 'Explore']
# transpose the various column lists into their corresponding rows, for one row
# per piece of content
final_results = []
for result in results:
error = result['Error'].get_text()
lists = [
[
# get link if content, name for everything else
li.find('a').get('href') if heading == 'Content' else li.get_text()
for li in result[heading].find('ul').findAll('li')
]
for heading in headings_with_lists
]
transposed = [[x[i] for x in lists] for i in range(len(lists[0]))]
for t in transposed:
final_results.append([error] + t)
print(f'{len(final_results)} content validator issues exist')
# write em out to a csv
with open('looker_content_validator_results.csv', 'w') as f:
writer = csv.writer(f)
csv_headers = ['Error'] + headings_with_lists
writer.writerow(csv_headers)
writer.writerows(final_results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment