Last active
July 9, 2022 00:37
-
-
Save jspeed-meyers/0f761f413ae1d20e6b54bce8a33ea8a9 to your computer and use it in GitHub Desktop.
Parse scorecard-derived JSON files and store in a csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Parse json files created by scorecard tool and store results in | |
# a csv | |
# | |
# Usage: | |
# | |
# python parse_scorecard_json.py | |
# | |
# | |
# Note: Results are stored in a csv folder in a timestamped csv | |
# | |
import csv | |
import json | |
import os | |
from datetime import datetime | |
# assign directory that stores the json files | |
directory = 'data' | |
# create a timestamped and thus unique name for results | |
csv_output_name = "csv/results-" + \ | |
datetime.now().isoformat(timespec='seconds') + ".csv" | |
with open(csv_output_name, "a", encoding="utf-8", newline="") as file: | |
fieldnames = ["repo_name", "score"] | |
writer = csv.DictWriter(file, fieldnames=fieldnames) | |
# write field names to csv for easy analysis later | |
writer.writeheader() | |
# iterate over files in directory | |
for filename in os.scandir(directory): | |
# check that file is actually a file, not a directory | |
if filename.is_file(): | |
try: | |
print("Parsing" + str(filename)) | |
with open(filename, 'r') as f: | |
results = json.load(f) | |
writer.writerow( | |
{ | |
"repo_name": results['repo']['name'], | |
"score": results['score'], | |
} | |
) | |
except: | |
# TODO: Determine why some repo JSONs aren't parsing? | |
# Either fix script, fix JSON documents, or note limitation | |
print("Couldn't parse: " + str(filename)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment