Skip to content

Instantly share code, notes, and snippets.

Last active Jun 2, 2021
What would you like to do?
Python3 script to summarise ioos checker report on multiple files
#!/usr/bin/env python
# This script create a summary of CF/ACDD tests run by the IOSS checker on multiple files
# First run the checker generating a json_new output
# NB json format only works on one file, for multiple file you have to use json_new
# ie. -t=cf -f json_new -o test.json <files>
# then passed the json file as input to this script
# python test.json
# Copyright 2019 ARC Centre of Excellence for Climate Extremes
# author: Paola Petrelli <>
# Last updated: 2021-06-02
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import sys
from collections import defaultdict
def read_json(infile):
""" Read IOOS checker output from json file report
infile - file name (string)
NB file should be in json format if not loading of data will fail
and print a warning
with open(infile, 'r') as f:
data = json.loads(
print(f"Failed to open file: {infile} .")
print("Check that file exists, it is in json format and not corrupted")
return data
def get_keys(data):
"""Get lists of files, tests from json data keys and define priority levels
data - json data from checker report as read from file
tests - list of tests performed by checker
files - list of files checked
levels - list of priority levels
files = [k for k in data.keys()]
tests = [k for k in data[files[0]].keys()]
levels = ['high', 'medium', 'low']
# These keys are ignored currently by this code:
#['all_priorities','scored_points', 'possible_points', 'high_count',
# 'medium_count', 'low_count', 'testname', 'source_name',
# 'cc_spec_version', 'cc_url']
return tests, files, levels
def report_test(data, test, files, levels, scores):
"""Report resuls for one kind of test
data - json data from checker report as read from file
test - kind of test to write a report for
files - a json data reporting scores and warnings for each file
levels - list of priority levels
scores - a dictionary to collect each file scores
scores - same as input after updating with the test scores
warnings - a dictionary collecting all the warnings by priority level
#For each file collect scores in scores dictionary
# and group warnings by priority levels in another dictionary
print(f"Results for {test} checks")
print(f"{len(files)} files were checked\n")
warnings = {l: {} for l in levels}
for fpath in files:
fname = fpath.split("/")[-1]
header = data[fpath][test]['scoreheader']
header['scored_points'] = data[fpath][test]['scored_points']
header['possible_points'] = data[fpath][test]['possible_points']
scores[fpath][test] = header
for level in levels:
for w in data[fpath][test][level + "_priorities"]:
name = w['name'] + ": "
for m in w['msgs']:
warnings[level][name+m] = [fname]
for level in levels:
print(f'{level.capitalize()} priority results\n')
for m in warnings[level].keys():
nfiles = len(warnings[level][m])
print(f'{nfiles} files failed:\n{m}')
if nfiles < len(files):
print(f'{[w for w in warnings[level][m]]}\n')
return scores
def print_scores(scores, tests):
"""Print all tests scores for each file
scores - dictionary collecting all the files scores, for each tests
the main dictionary has fpath as keys, the values
are dictionary of the scores with tests as keys
tests - list of tests performed by checker
for fpath in scores.keys():
print(f"\nSummary for {fpath}")
for test in tests:
print(f"{test} scores:")
for k,v in scores[fpath][test].items():
print(f'{k}: {v}')
def main():
# read from input filename and if to show scores
# scores detail how many checks a file passed and for the failed ones
# if check failed with error or warning
infile = sys.argv[1]
if len(sys.argv) >= 3 and sys.argv[2] == 'scores':
show_scores = True
show_scores = False
# read data from json file, get keys defining tests, files and levels
# and initiliase dictiory to hold scores
data = read_json(infile)
tests, files, levels = get_keys(data)
scores = defaultdict(dict)
# for each test performed print warnings and add scores to dictionary
# if user selected so print also scores
for t in tests:
scores = report_test(data, t, files, levels, scores)
if show_scores:
print_scores(scores, tests)
if __name__ == "__main__":
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment