Skip to content

Instantly share code, notes, and snippets.

@davidfischer-ch
Last active January 12, 2024 10:53
Show Gist options
  • Save davidfischer-ch/cdfede27ac053a8332b2127becc07608 to your computer and use it in GitHub Desktop.
Save davidfischer-ch/cdfede27ac053a8332b2127becc07608 to your computer and use it in GitHub Desktop.
sonar_sarif_to_generic.py
"""
Convert SARIF to Generic SonarQube issues import format.
Links:
* https://community.sonarsource.com/t/import-sarif-results-as-security-hotspots/83223
* docs.sonarqube.org/9.8/analyzing-source-code/importing-external-issues/generic-issue-import-format
* https://gist.github.com/davidfischer-ch/cdfede27ac053a8332b2127becc07608
Author: David Fischer <david@fisch3r.net>
"""
from __future__ import annotations
from pathlib import Path
from typing import Final
import collections
import json
import os
import sys
# https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317648
# SonarQube severity can be one of BLOCKER, CRITICAL, MAJOR, MINOR, INFO
LEVEL_TO_SERVERITY: Final[dict[str, str]] = {
'warning': 'MAJOR',
'error': 'CRITICAL',
'note': 'MINOR',
'none': 'INFO'
}
DEFAULT_REPORT_TYPE: Final[str] = 'CODE_SMELL'
REPORT_TYPE_BY_ENGINE: Final[dict[str, str]] = {
'ansible-lint': 'CODE_SMELL',
'robocop': 'CODE_SMELL',
'tflint': 'CODE_SMELL'
}
Position = collections.namedtuple('Position', ['line', 'column'])
def main(source: Path | str, target: Path | str) -> None: # pylint:disable=too-many-locals
source = Path(source).resolve()
target = Path(target).resolve()
if target.exists():
raise IOError(f'Target file "{target}" already exist.')
sarif_data: dict = json.loads(source.read_text(encoding='utf-8'))
if 'sarif' not in sarif_data['$schema']:
raise ValueError('Source is (probably) not a valid sarif file.')
issues: list[dict] = []
for run_index, run_data in enumerate(sarif_data['runs'], 1):
driver_data = run_data['tool']['driver']
engine_id = driver_data['name']
engine_key = engine_id.lower()
rules: dict[str, dict] = {rule['id']: rule for rule in driver_data.get('rules', {})}
for result_index, result_data in enumerate(run_data['results'], 1):
# Code is not programmed to handle multiple locations, because ... Its a WIP
if (num_locations := len(result_data['locations'])) != 1:
raise NotImplementedError(
f'File {source} : run[{run_index}].results[{result_index}].locations[] '
f'size expected 1, actual {num_locations}')
rule_id = result_data['ruleId']
rule_data = rules[rule_id] if rules else {} # Only if rules is not empty
location_data = result_data['locations'][0]['physicalLocation']
file_path = location_data['artifactLocation']['uri']
message_lines = [
result_data['message']['text'],
''
f"Help: {rule_data.get('help', {}).get('text') or '<empty>'}",
f"URL: {rule_data.get('helpUri') or '<empty>'}"
]
if tags := rule_data.get('properties', {}).get('tags', []):
message_lines.append(f"Tags: {', '.join(clean_tag(tag) for tag in tags)}")
issue = {
'engineId': engine_id,
'primaryLocation': {
'filePath': file_path,
'message': '\n'.join(message_lines)
},
'ruleId': rule_id,
'severity': LEVEL_TO_SERVERITY[result_data['level']],
'type': REPORT_TYPE_BY_ENGINE.get(engine_key, DEFAULT_REPORT_TYPE)
}
# Converting location data
start = Position(
location_data['region']['startLine'] - 1,
location_data['region'].get('startColumn', 1) - 1)
end = Position(
location_data['region'].get('endLine', start.line + 1) - 1,
location_data['region'].get('endColumn', start.column + 1) - 1)
# Fix location data for some tools (data is wrong or missing)
if engine_key in {'ansible-lint', 'robocop'}:
# Ensure the end position makes sense or fix it
lines = Path(file_path).read_text(encoding='utf-8').split(os.linesep)
if start == end or (end.column and end.column > len(lines[end.line])):
prev_start, prev_end = start, end
if end.line + 1 < len(lines):
# Move end position to next line at column 0
end = Position(end.line + 1, 0)
else:
# Move start to previous line at same column
# Move end position to same line at column 0
start = Position(start.line - 1, start.column)
end = Position(end.line, 0)
assert start.line >= 0, (start, end)
print(
f"Wrong indexation (0-indexed) {file_path}: "
f"(start={tuple(prev_start)} end={tuple(prev_end)}), "
f"fix it by setting start={tuple(start)} end={tuple(end)}")
# Lines are 1-indexed both in SARIF and Sonar Generic
# Columns are 1-indexed in SARIF 0-indexed in Sonar Generic
issue['primaryLocation']['textRange'] = {
'startLine': start.line + 1,
'startColumn': start.column,
'endLine': end.line + 1,
'endColumn': end.column
}
issues.append(issue)
target.write_text(json.dumps({'issues': issues}, indent=2), encoding='utf-8')
def clean_tag(value: str) -> str:
return f"'{value}'" if ' ' in value else value
if __name__ == '__main__':
main(sys.argv[1], sys.argv[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment