Skip to content

Instantly share code, notes, and snippets.

@matepaavo
Created June 14, 2020 14:50
Show Gist options
  • Save matepaavo/89c0b61262a25fa983d24a73be4ab376 to your computer and use it in GitHub Desktop.
Save matepaavo/89c0b61262a25fa983d24a73be4ab376 to your computer and use it in GitHub Desktop.
Utility to check and score your dbt documentation
'''
dbt Documentation Checker
'''
import argparse
import json
import logging
import sys
import urllib.parse as url_parse
def description_checker(description):
'''
Checks whether the given description is valid or not.
:param description: Description of a table or a column
:return:
'''
return description
def node_checker(node):
'''
Checks whether the node's descriptions are valid
:param node: node parsed from dbt manifest.json
:return:
'''
table_desc_check = description_checker(node['description'])
all_columns_desc_check = True
if not table_desc_check:
logging.debug("Table %s is missing its description", node['unique_id'])
for column in node['columns'].values():
column_desc_check = description_checker(column['description'])
if not column_desc_check:
all_columns_desc_check = False
logging.debug("Column %s.%s is missing its description",
node['unique_id'], column['name'])
return table_desc_check and all_columns_desc_check
def get_documentation_score(manifest_path, node_ids):
'''
Returns the documentation score for the selected nodes.
:param manifest_path: Path the the dbt manifest.json file
:param node_ids: List of node_ids coming from dbt ls
:return:
'''
with open(manifest_path) as json_file:
manifest = json.load(json_file)
logging.debug('%s provided', len(node_ids))
models = [n for n in manifest['nodes'].values()
if n['resource_type'] != 'test' and '.'.join(n['fqn']) in node_ids]
sources = [s for s in manifest['sources'].values()
if 'source:' + '.'.join(s['fqn']) in node_ids]
nodes = models + sources
logging.debug('%d nodes found', len(nodes))
well_documented = [n for n in nodes if node_checker(n)]
logging.debug('%d well documented', len(well_documented))
return round(len(well_documented) / len(nodes) * 100)
def check_documentation_scores(manifest_path, node_ids, score_threshold):
'''
Checks whether the documentation score is above the threshold or not.
:param manifest_path: Path the the dbt manifest.json file
:param node_ids: List of node_ids coming from dbt ls
:param score_threshold: Score threshold(0-100)
:return:
'''
score = get_documentation_score(manifest_path, node_ids)
if score < score_threshold:
raise ValueError(f'{score} is under the thredhold {score_threshold}')
else:
print(f'DOCUMENTATION CHECK OK, SCORE: {score}')
def update_gitlab_badge(manifest_path, node_ids, project_id, private_token, badge_text):
'''
Creates or updates the GitLab badge for the given badge text
:param project_id: GitLab project id
:param private_token: Gitlab private token for API calls
:param score: documentation score
:param badge_text: GitLab badge text
'''
import gitlab
gl_api = gitlab.Gitlab('https://gitlab.com', private_token=private_token)
gl_api.auth()
score = get_documentation_score(manifest_path, node_ids)
project = gl_api.projects.get(project_id)
badges = project.badges.list()
badge_name = f'dbt_doc_{badge_text}'
if score >= 90:
shield_color = 'green'
elif score >= 60:
shield_color = 'yellow'
else:
shield_color = 'red'
badge_link = 'https://img.shields.io/badge/' + url_parse.quote(
f'{badge_text}-{score}%-{shield_color}')
for badge in badges:
if badge.name == badge_name:
badge.delete()
project.badges.create({'name': badge_name, 'link_url': badge_link, 'image_url': badge_link})
def main():
'''If used as the main module, it parses the arguments and calls check or update badge'''
parser = argparse.ArgumentParser(description='dbt documentation checker')
parser.add_argument('mode', type=str, choices=['check', 'set_badge'])
parser.add_argument('--manifest-path', action='store', help='path to the dbt\'s manifest.json')
parser.add_argument('--threshold', action='store', type=int, default=0, help='check threshold')
parser.add_argument('--badge-text', action='store', help='GitLab badge text', required=False)
parser.add_argument('--project-id', action='store', help='GitLab project id', required=False)
parser.add_argument('--gitlab-token', action='store', help='GitLab API token', required=False)
parser.add_argument('--debug', action='store_true', help='set debug mode on, default is false')
parser.add_argument('--nodes', nargs='+', help='list of model/source/snapshot node ids')
args = parser.parse_args()
if args.mode == 'set_badge' and not args.badge_text:
parser.error('badge text is missing')
if args.mode == 'set_badge' and not args.project_id:
parser.error('project id is missing')
if args.mode == 'set_badge' and not args.gitlab_token:
parser.error('gitlab token is missing')
log_level = logging.DEBUG if args.debug else logging.INFO
logging.basicConfig(stream=sys.stdout, level=log_level,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
if args.mode == 'check':
check_documentation_scores(args.manifest_path, args.nodes, int(args.threshold))
elif args.mode == 'set_badge':
update_gitlab_badge(args.manifest_path, args.nodes, args.project_id, args.gitlab_token, args.badge_text)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment