Extend the clang -fdiagnostics-format
option's possible values
to include the option to ask clang to emit json diagnostics that are easily
consumed by machines.
Machine consumable diagnostics are important for writing generic static analysis wrappers and harnesses that want to interact with codebases through clang, There are two options to consider for the diagnostic format to use in clang:
- Mimic
gcc-9 -fdiagnostics-format=json
, covered in the previous work section - Emit SARIF diagnostic information, a cross-language standardized format
that is already supported in
clang/lib/StaticAnalyzer
(through--analyzer-output=sarif
)
We propose (2) as it is a standardized format, which should make it easier for tools to implement support for it.
GCC recently implemented serializing diagnostics to JSON. This option
would be implemented as a -fdiagnostics-format=json-gcc
in clang to signal
users of its intended interoperability with the corresponding gcc option.
The schema for this format may be inferred from current gcc code.
While not community standard, it can be expected to be reasonably stable as the original patch states the flag emits machine readable diagnostics.
SARIF (Static Analysis Results Interchange Format) is a standard format for the output for static analysis tools.
LLVM/Clang StaticAnalyzer already implements a SARIF diagnostic consumer in D53814, this should allow us to implement (necessary, if any) extra fields to the diagnostics output
This section assumes the typical compiler diagnostic which looks like what is provided in the expressive diagnostics page
In SARIF, the attributes can be mapped to the results
property as follows:
- File name where the diagnostic occurs is relocated to the
physicalLocation
property - Line/Column of the caret marking the error can be stored in the
region
property, this can also encode the source range to which an error corresponds - The error message can be transferred to the
message
- Each of the locations can store the rendered caret & snippet from clang using the
snippet
property for that region - Nested diagnostics (typically
note
level items) can be represented using thelocationRelationShip
object - Fixit hints can be communicated through the
fixes
property
We propose the following interface changes:
- Extend the
-fdiagnostics-format
flag to recognize:-fdiagnostics-format=sarif
Clang will emit SARIF formatted diagnostics when -fdiagnostics-format=sarif
is provided.
Otherwise, there are no changes proposed to the output interface
Here we look at some examples from clang's expressive diagnostic page.
$ clang -fsyntax-only t.c
t.c:7:39: error: invalid operands to binary expression ('int' and 'struct a')
return y + func(y ? ((SomeA.X + 40) + SomeA) / 42 + SomeA.X : SomeA.X);
~~~~~~~~~~~~~~ ^ ~~~~~
Encoded to SARIF:
$ clang -fsyntax-only -fdiagnostics-format=sarif t.c
{
"version": "2.1.0",
"$schema": "http://json.schemastore.org/sarif-2.1.0",
"runs": [
{
"tool": {
"driver": {
"name": "clang",
"version": "9.0.0",
"language": "en-US"
},
},
"files": {
"file:t.c": {
"filelocation": {
"uri": "file:t.c"
},
"length": 1000,
"mimetype": "text/plain",
"roles": [
"resultfile"
]
}
},
"results": [
{
"ruleId": "syntax",
"level": "error",
"message": {
"text": "invalid operands to binary expression ('int' and 'struct a')"
},
"locations": [
{
"physicalLocation": {
"artifactLocation": {
"uri": "file:t.c",
},
"region": {
"startLine": 7,
"startColumn": 39,
"endColumn": 60,
"snippet": {
"rendered": {
"text": "return y + func(y ? ((SomeA.X + 40) + SomeA) / 42 + SomeA.X : SomeA.X);\n ~~~~~~~~~~~~~~ ^ ~~~~~"
}
}
}
}
}
]
}
]
}
]
}
$ clang t.c
t.c:5:28: warning: use of GNU old-style field designator extension
struct point origin = { x: 0.0, .y = 0.0 };
~~ ^
.x =
$ clang -fdiagnostics-format=sarif t.c
{
"version": "2.1.0",
"$schema": "http://json.schemastore.org/sarif-2.1.0",
"runs": [
{
"tool": {
"driver": {
"name": "clang",
"version": "9.0.0",
"language": "en-US"
},
},
"files": {
"file:t.c": {
"filelocation": {
"uri": "file:t.c"
},
"length": 1000,
"mimetype": "text/plain",
"roles": [
"resultfile"
]
}
},
"results": [
{
"ruleId": "syntax",
"level": "warning",
"message": {
"text": "use of GNU old-style field designator extension"
},
"locations": [
{
"physicalLocation": {
"artifactLocation": {
"uri": "file:t.c",
},
"region": {
"startLine": 5,
"startColumn": 28,
"endColumn": 30,
"snippet": {
"rendered": {
"text": "struct point origin = { x: 0.0, y = 0.0 };\n ~~ ^\n .x = "
}
}
}
}
}
],
"fixes": [
{
"description": {
"text": "use of GNU old-style field designator extension"
},
"artifactChanges": [
"artifactLocation": {
"uri": "file:t.c",
},
"replacements": [
{
"deletedRegion": {
"startLine": 5,
"startColumn": 28,
"endColumn": 30,
},
"insertedContent": {
"text": ".x = "
}
}
]
]
}
]
}
]
}
]
}
$ clang -fsyntax-only t.c
t.c:80:3: error: invalid operands to binary expression ('typeof(P)' (aka 'struct mystruct') and 'typeof(F)' (aka 'float'))
X = MYMAX(P, F);
^~~~~~~~~~~
t.c:76:94: note: expanded from:
#define MYMAX(A,B) __extension__ ({ __typeof__(A) __a = (A); __typeof__(B) __b = (B); __a < __b ? __b : __a; })
~~~ ^ ~~~
$ clang -fsyntax-only -fdiagnostics-format=sarif t.c
{
"version": "2.1.0",
"$schema": "http://json.schemastore.org/sarif-2.1.0",
"runs": [
{
"tool": {
"driver": {
"name": "clang",
"version": "9.0.0",
"language": "en-US"
},
},
"files": {
"file:t.c": {
"filelocation": {
"uri": "file:t.c"
},
"length": 1000,
"mimetype": "text/plain",
"roles": [
"resultfile"
]
}
},
"results": [
{
"ruleId": "syntax",
"level": "error",
"message": {
"text": "invalid operands to binary expression ('typeof(P)' (aka 'struct mystruct') and 'typeof(F)' (aka 'float'))"
},
"locations": [
{
"id": 0,
"physicalLocation": {
"artifactLocation": {
"uri": "file:t.c",
},
"region": {
"startLine": 80,
"startColumn": 3,
"endColumn": 30,
"snippet": {
"rendered": {
"text": "X = MYMAX(P, F);\n ^~~~~~~~~~~"
}
}
}
},
"relationships": [
{
"target": 1,
"kinds": ["macro-expansion"]
}
]
}
],
"relatedLocations": [
{
"id": 1,
"physicalLocation": {
"artifactLocation": {
"uri": "file:t.c",
},
"region": {
"startLine": 76,
"startColumn": 94,
"snippet": {
"rendered": {
"text": "#define MYMAX(A,B) __extension__ ({ __typeof__(A) __a = (A); __typeof__(B) __b = (B); __a < __b ? __b : __a; })\n ~~~ ^ ~~~"
}
}
}
}
}
]
}
]
}
]
}
cfe-dev thread: https://lists.llvm.org/pipermail/cfe-dev/2021-March/067907.html