Python3 script to summarise ioos checker report on multiple files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# This script create a summary of CF/ACDD tests run by the IOSS checker on multiple files | |
# https://github.com/ioos/compliance-checker | |
# First run the checker generating a json_new output | |
# NB json format only works on one file, for multiple file you have to use json_new | |
# ie. cchecker.py -t=cf -f json_new -o test.json <files> | |
# then passed the json file as input to this script | |
# python parse_checker.py test.json | |
# | |
# Copyright 2019 ARC Centre of Excellence for Climate Extremes | |
# author: Paola Petrelli <paola.petrelli@utas.edu.au> | |
# Last updated: 2021-06-02 | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import json | |
import sys | |
from collections import defaultdict | |
def read_json(infile): | |
""" Read IOOS checker output from json file report | |
Input: | |
infile - file name (string) | |
NB file should be in json format if not loading of data will fail | |
and print a warning | |
""" | |
try: | |
with open(infile, 'r') as f: | |
data = json.loads(f.read()) | |
except: | |
print(f"Failed to open file: {infile} .") | |
print("Check that file exists, it is in json format and not corrupted") | |
sys.exit() | |
return data | |
def get_keys(data): | |
"""Get lists of files, tests from json data keys and define priority levels | |
Input: | |
data - json data from checker report as read from file | |
Return: | |
tests - list of tests performed by checker | |
files - list of files checked | |
levels - list of priority levels | |
""" | |
files = [k for k in data.keys()] | |
tests = [k for k in data[files[0]].keys()] | |
levels = ['high', 'medium', 'low'] | |
# These keys are ignored currently by this code: | |
#['all_priorities','scored_points', 'possible_points', 'high_count', | |
# 'medium_count', 'low_count', 'testname', 'source_name', | |
# 'cc_spec_version', 'cc_url'] | |
return tests, files, levels | |
def report_test(data, test, files, levels, scores): | |
"""Report resuls for one kind of test | |
Input: | |
data - json data from checker report as read from file | |
test - kind of test to write a report for | |
files - a json data reporting scores and warnings for each file | |
levels - list of priority levels | |
scores - a dictionary to collect each file scores | |
Return: | |
scores - same as input after updating with the test scores | |
warnings - a dictionary collecting all the warnings by priority level | |
""" | |
#For each file collect scores in scores dictionary | |
# and group warnings by priority levels in another dictionary | |
print(f"Results for {test} checks") | |
print(f"{len(files)} files were checked\n") | |
warnings = {l: {} for l in levels} | |
for fpath in files: | |
fname = fpath.split("/")[-1] | |
header = data[fpath][test]['scoreheader'] | |
header['scored_points'] = data[fpath][test]['scored_points'] | |
header['possible_points'] = data[fpath][test]['possible_points'] | |
scores[fpath][test] = header | |
for level in levels: | |
for w in data[fpath][test][level + "_priorities"]: | |
name = w['name'] + ": " | |
for m in w['msgs']: | |
try: | |
warnings[level][name+m].append(fname) | |
except: | |
warnings[level][name+m] = [fname] | |
for level in levels: | |
print(f'{level.capitalize()} priority results\n') | |
for m in warnings[level].keys(): | |
nfiles = len(warnings[level][m]) | |
print(f'{nfiles} files failed:\n{m}') | |
if nfiles < len(files): | |
print(f'{[w for w in warnings[level][m]]}\n') | |
else: | |
print() | |
return scores | |
def print_scores(scores, tests): | |
"""Print all tests scores for each file | |
Input: | |
scores - dictionary collecting all the files scores, for each tests | |
the main dictionary has fpath as keys, the values | |
are dictionary of the scores with tests as keys | |
tests - list of tests performed by checker | |
""" | |
for fpath in scores.keys(): | |
print(f"\nSummary for {fpath}") | |
for test in tests: | |
print(f"{test} scores:") | |
for k,v in scores[fpath][test].items(): | |
print(f'{k}: {v}') | |
return | |
def main(): | |
# read from input filename and if to show scores | |
# scores detail how many checks a file passed and for the failed ones | |
# if check failed with error or warning | |
infile = sys.argv[1] | |
if len(sys.argv) >= 3 and sys.argv[2] == 'scores': | |
show_scores = True | |
else: | |
show_scores = False | |
# read data from json file, get keys defining tests, files and levels | |
# and initiliase dictiory to hold scores | |
data = read_json(infile) | |
tests, files, levels = get_keys(data) | |
scores = defaultdict(dict) | |
# for each test performed print warnings and add scores to dictionary | |
# if user selected so print also scores | |
for t in tests: | |
scores = report_test(data, t, files, levels, scores) | |
print(scores[files[0]]) | |
if show_scores: | |
print_scores(scores, tests) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment