matepaavo/dbt-docs-check.py

## dbt-docs-check.py
'''
dbt Documentation Checker
'''
import argparse
import json
import logging
import sys
import urllib.parse as url_parse


def description_checker(description):
    '''
    Checks whether the given description is valid or not.
    :param description: Description of a table or a column
    :return:
    '''
    return description


def node_checker(node):
    '''
    Checks whether the node's descriptions are valid
    :param node: node parsed from dbt manifest.json
    :return:
    '''
    table_desc_check = description_checker(node['description'])
    all_columns_desc_check = True
    if not table_desc_check:
        logging.debug("Table %s is missing its description", node['unique_id'])
    for column in node['columns'].values():
        column_desc_check = description_checker(column['description'])
        if not column_desc_check:
            all_columns_desc_check = False
            logging.debug("Column %s.%s is missing its description",
                          node['unique_id'], column['name'])
    return table_desc_check and all_columns_desc_check


def get_documentation_score(manifest_path, node_ids):
    '''
    Returns the documentation score for the selected nodes.
    :param manifest_path: Path the the dbt manifest.json file
    :param node_ids: List of node_ids coming from dbt ls
    :return:
    '''
    with open(manifest_path) as json_file:
        manifest = json.load(json_file)
        logging.debug('%s provided', len(node_ids))
        models = [n for n in manifest['nodes'].values()
                  if n['resource_type'] != 'test' and '.'.join(n['fqn']) in node_ids]
        sources = [s for s in manifest['sources'].values()
                   if 'source:' + '.'.join(s['fqn']) in node_ids]
        nodes = models + sources
        logging.debug('%d nodes found', len(nodes))
        well_documented = [n for n in nodes if node_checker(n)]
        logging.debug('%d well documented', len(well_documented))
        return round(len(well_documented) / len(nodes) * 100)


def check_documentation_scores(manifest_path, node_ids, score_threshold):
    '''
    Checks whether the documentation score is above the threshold or not.
    :param manifest_path: Path the the dbt manifest.json file
    :param node_ids: List of node_ids coming from dbt ls
    :param score_threshold: Score threshold(0-100)
    :return:
    '''
    score = get_documentation_score(manifest_path, node_ids)
    if score < score_threshold:
        raise ValueError(f'{score} is under the thredhold {score_threshold}')
    else:
        print(f'DOCUMENTATION CHECK OK, SCORE: {score}')


def update_gitlab_badge(manifest_path, node_ids, project_id, private_token, badge_text):
    '''
    Creates or updates the GitLab badge for the given badge text
    :param project_id: GitLab project id
    :param private_token: Gitlab private token for API calls
    :param score: documentation score
    :param badge_text: GitLab badge text
    '''
    import gitlab
    gl_api = gitlab.Gitlab('https://gitlab.com', private_token=private_token)
    gl_api.auth()
    score = get_documentation_score(manifest_path, node_ids)
    project = gl_api.projects.get(project_id)
    badges = project.badges.list()
    badge_name = f'dbt_doc_{badge_text}'
    if score >= 90:
        shield_color = 'green'
    elif score >= 60:
        shield_color = 'yellow'
    else:
        shield_color = 'red'
    badge_link = 'https://img.shields.io/badge/' + url_parse.quote(
        f'{badge_text}-{score}%-{shield_color}')
    for badge in badges:
        if badge.name == badge_name:
            badge.delete()
    project.badges.create({'name': badge_name, 'link_url': badge_link, 'image_url': badge_link})


def main():
    '''If used as the main module, it parses the arguments and calls check or update badge'''
    parser = argparse.ArgumentParser(description='dbt documentation checker')

    parser.add_argument('mode', type=str, choices=['check', 'set_badge'])

    parser.add_argument('--manifest-path', action='store', help='path to the dbt\'s manifest.json')
    parser.add_argument('--threshold', action='store', type=int, default=0, help='check threshold')
    parser.add_argument('--badge-text', action='store', help='GitLab badge text', required=False)
    parser.add_argument('--project-id', action='store', help='GitLab project id', required=False)
    parser.add_argument('--gitlab-token', action='store', help='GitLab API token', required=False)
    parser.add_argument('--debug', action='store_true', help='set debug mode on, default is false')
    parser.add_argument('--nodes', nargs='+', help='list of model/source/snapshot node ids')
    args = parser.parse_args()

    if args.mode == 'set_badge' and not args.badge_text:
        parser.error('badge text is missing')
    if args.mode == 'set_badge' and not args.project_id:
        parser.error('project id is missing')
    if args.mode == 'set_badge' and not args.gitlab_token:
        parser.error('gitlab token is missing')

    log_level = logging.DEBUG if args.debug else logging.INFO

    logging.basicConfig(stream=sys.stdout, level=log_level,
                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

    if args.mode == 'check':
        check_documentation_scores(args.manifest_path, args.nodes, int(args.threshold))
    elif args.mode == 'set_badge':
        update_gitlab_badge(args.manifest_path, args.nodes, args.project_id, args.gitlab_token, args.badge_text)


if __name__ == '__main__':
    main()
	'''
	dbt Documentation Checker
	'''
	import argparse
	import json
	import logging
	import sys
	import urllib.parse as url_parse


	def description_checker(description):
	'''
	Checks whether the given description is valid or not.
	:param description: Description of a table or a column
	:return:
	'''
	return description


	def node_checker(node):
	'''
	Checks whether the node's descriptions are valid
	:param node: node parsed from dbt manifest.json
	:return:
	'''
	table_desc_check = description_checker(node['description'])
	all_columns_desc_check = True
	if not table_desc_check:
	logging.debug("Table %s is missing its description", node['unique_id'])
	for column in node['columns'].values():
	column_desc_check = description_checker(column['description'])
	if not column_desc_check:
	all_columns_desc_check = False
	logging.debug("Column %s.%s is missing its description",
	node['unique_id'], column['name'])
	return table_desc_check and all_columns_desc_check


	def get_documentation_score(manifest_path, node_ids):
	'''
	Returns the documentation score for the selected nodes.
	:param manifest_path: Path the the dbt manifest.json file
	:param node_ids: List of node_ids coming from dbt ls
	:return:
	'''
	with open(manifest_path) as json_file:
	manifest = json.load(json_file)
	logging.debug('%s provided', len(node_ids))
	models = [n for n in manifest['nodes'].values()
	if n['resource_type'] != 'test' and '.'.join(n['fqn']) in node_ids]
	sources = [s for s in manifest['sources'].values()
	if 'source:' + '.'.join(s['fqn']) in node_ids]
	nodes = models + sources
	logging.debug('%d nodes found', len(nodes))
	well_documented = [n for n in nodes if node_checker(n)]
	logging.debug('%d well documented', len(well_documented))
	return round(len(well_documented) / len(nodes) * 100)


	def check_documentation_scores(manifest_path, node_ids, score_threshold):
	'''
	Checks whether the documentation score is above the threshold or not.
	:param manifest_path: Path the the dbt manifest.json file
	:param node_ids: List of node_ids coming from dbt ls
	:param score_threshold: Score threshold(0-100)
	:return:
	'''
	score = get_documentation_score(manifest_path, node_ids)
	if score < score_threshold:
	raise ValueError(f'{score} is under the thredhold {score_threshold}')
	else:
	print(f'DOCUMENTATION CHECK OK, SCORE: {score}')


	def update_gitlab_badge(manifest_path, node_ids, project_id, private_token, badge_text):
	'''
	Creates or updates the GitLab badge for the given badge text
	:param project_id: GitLab project id
	:param private_token: Gitlab private token for API calls
	:param score: documentation score
	:param badge_text: GitLab badge text
	'''
	import gitlab
	gl_api = gitlab.Gitlab('https://gitlab.com', private_token=private_token)
	gl_api.auth()
	score = get_documentation_score(manifest_path, node_ids)
	project = gl_api.projects.get(project_id)
	badges = project.badges.list()
	badge_name = f'dbt_doc_{badge_text}'
	if score >= 90:
	shield_color = 'green'
	elif score >= 60:
	shield_color = 'yellow'
	else:
	shield_color = 'red'
	badge_link = 'https://img.shields.io/badge/' + url_parse.quote(
	f'{badge_text}-{score}%-{shield_color}')
	for badge in badges:
	if badge.name == badge_name:
	badge.delete()
	project.badges.create({'name': badge_name, 'link_url': badge_link, 'image_url': badge_link})


	def main():
	'''If used as the main module, it parses the arguments and calls check or update badge'''
	parser = argparse.ArgumentParser(description='dbt documentation checker')

	parser.add_argument('mode', type=str, choices=['check', 'set_badge'])

	parser.add_argument('--manifest-path', action='store', help='path to the dbt\'s manifest.json')
	parser.add_argument('--threshold', action='store', type=int, default=0, help='check threshold')
	parser.add_argument('--badge-text', action='store', help='GitLab badge text', required=False)
	parser.add_argument('--project-id', action='store', help='GitLab project id', required=False)
	parser.add_argument('--gitlab-token', action='store', help='GitLab API token', required=False)
	parser.add_argument('--debug', action='store_true', help='set debug mode on, default is false')
	parser.add_argument('--nodes', nargs='+', help='list of model/source/snapshot node ids')
	args = parser.parse_args()

	if args.mode == 'set_badge' and not args.badge_text:
	parser.error('badge text is missing')
	if args.mode == 'set_badge' and not args.project_id:
	parser.error('project id is missing')
	if args.mode == 'set_badge' and not args.gitlab_token:
	parser.error('gitlab token is missing')

	log_level = logging.DEBUG if args.debug else logging.INFO

	logging.basicConfig(stream=sys.stdout, level=log_level,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

	if args.mode == 'check':
	check_documentation_scores(args.manifest_path, args.nodes, int(args.threshold))
	elif args.mode == 'set_badge':
	update_gitlab_badge(args.manifest_path, args.nodes, args.project_id, args.gitlab_token, args.badge_text)


	if __name__ == '__main__':
	main()