Skip to content

Instantly share code, notes, and snippets.

@DanielWeitzenfeld
Created July 24, 2015 16:05
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DanielWeitzenfeld/f13bd8892dbe091eaa38 to your computer and use it in GitHub Desktop.
Save DanielWeitzenfeld/f13bd8892dbe091eaa38 to your computer and use it in GitHub Desktop.
Script to calculate annotation coverage on a LookML repo
import os
import sys
import yaml
import pandas as pd
# Generates CSVs with aggregate stats about your LookML:
# % of dimensions and measures that have descriptions
# % of dashboard elements with a note
# First arg should be path to your LookML repo, e.g.:
# python looker_annotation_coverage.py ~/PycharmProjects/looker-q
VIEW_COLUMNS = ['hidden', # for root-level keys, just use the key
'dimension',
'measure',
'description',
'type',
]
DASHBOARD_COLUMNS = ['model',
'name',
'title',
('note', 'text'), # for nested paths, use a tuple
]
def flatten_nested_dict(dict, list_of_paths):
"""Extract desired info from a dict that may have nested data.
List of paths is a list of str (for root-level keys) and tuples (for
nested keys)
"""
d = {}
for p in list_of_paths:
if isinstance(p, str):
new_key = p
else:
new_key = '_'.join(p)
d[new_key] = extract_value_from_dict(dict, p)
return d
def extract_value_from_dict(dict, key_or_keys):
"""Recursively dig into a dict as directed by key_or_keys and return
the value or None.
"""
if isinstance(key_or_keys, str):
if key_or_keys in dict:
return dict[key_or_keys]
return None
elif isinstance(key_or_keys, tuple):
next_key = key_or_keys[0]
if next_key in dict:
if len(key_or_keys) == 1:
return dict[next_key]
return extract_value_from_dict(dict[next_key], key_or_keys[1:])
return None
if __name__ == "__main__":
dirname = sys.argv[1]
dirname = os.path.join(dirname, "") # ensure that dirname ends with /
lookml_files = os.listdir(dirname)
lookml_files = [f for f in lookml_files if '.lookml' in f]
dashboards = [f for f in lookml_files if '.dashboard.' in f]
models = [f for f in lookml_files if '.model.' in f]
views = [f for f in lookml_files if '.view.' in f]
# Views
view_dfs = []
for v in views:
view_name = v.replace('.view.lookml', '')
with open(dirname + v) as file:
lookml = yaml.load(file)[0]
dimensions = [d for d in lookml['fields'] if 'dimension' in d]
measures = [d for d in lookml['fields'] if 'measure' in d]
dimensions_and_measures = dimensions + measures
select_fields = [flatten_nested_dict(d, VIEW_COLUMNS) for d in
dimensions_and_measures]
df = pd.DataFrame(select_fields)
df['view_name'] = view_name
view_dfs.append(df)
all_views = pd.concat(view_dfs)
all_views['is_dimension'] = all_views.dimension.notnull()
all_views['is_hidden'] = all_views.hidden.notnull()
all_views['has_description'] = all_views.description.notnull()
# Dashboards
dash_dfs = []
for d in dashboards:
dash_name = d.replace('.dashboard.lookml', '')
with open(dirname + d) as file:
lookml = yaml.load(file)[0]
elements = lookml['elements']
select_fields = [flatten_nested_dict(e, DASHBOARD_COLUMNS) for e in
elements]
df = pd.DataFrame(select_fields)
df['dash_name'] = dash_name
dash_dfs.append(df)
all_dash = pd.concat(dash_dfs)
all_dash['has_note_text'] = all_dash.note_text.notnull()
all_dash.title = all_dash.title.str.encode('ascii', errors='ignore')
all_dash.note_text = all_dash.note_text.str.encode('ascii',
errors='ignore')
# Aggregate Stats
g = all_views[~all_views.is_hidden].groupby('view_name')
view_stats = pd.DataFrame({'n_unhidden_fields': g.size(),
'percent_described': g.has_description.mean()})
g = all_dash.groupby('dash_name')
dash_stats = pd.DataFrame({'n_elements': g.size(),
'percent_noted': g.has_note_text.mean()})
# Write CSVs
all_views.to_csv('all_looker_view_fields.csv', index=False)
all_dash.to_csv('all_looker_dashboard_elements.csv', index=False)
view_stats.to_csv('looker_view_stats.csv')
dash_stats.to_csv('looker_dash_stats.csv')
print 'Files written: \n' \
' all_looker_view_fields.csv,\n' \
' all_looker_dashboard_elements.csv,\n' \
' looker_view_stats.csv,\n' \
' looker_dash_stats.csv'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment