Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save alejandro-alvarez-sonarsource/48edec4debc8912a6485f989b2a6f0db to your computer and use it in GitHub Desktop.
Save alejandro-alvarez-sonarsource/48edec4debc8912a6485f989b2a6f0db to your computer and use it in GitHub Desktop.
From 2e194ac8b902b340e1c07d8416e7e7cb5b3f64a4 Mon Sep 17 00:00:00 2001
From: Marco Antognini
<89914223+marco-antognini-sonarsource@users.noreply.github.com>
Date: Mon, 9 May 2022 08:47:41 +0200
Subject: [PATCH] [clang][analyzer] Improve fread model
Address false-positives related to fread.
The fix is twofold: the taint analysis and the modeling of fread are
improved.
Before this change, given
char c; // uninitialized
if (1 == fread(&c, 1, 1, fp)) {
char p = c;
malloc(p);
}
"p = c" used to emit a warning about assigning garbage or undefined
value. Now, a warning is raised on "malloc(p)" because the data is
tainted and can result in arbitrary allocations.
When an error occurs, fread might have modified some values of the
buffer but not necessarily all of them. This distinction is not modeled
however: all previous knowledge about the buffer is invalidated.
As best effort, the model tries to invalidate only the memory regions
altered by fread and not the whole buffer, unless too many elements are
accessed by fread.
---
.../StaticAnalyzer/Checkers/StreamChecker.cpp | 82 +++++
clang/test/Analysis/fread.cpp | 331 ++++++++++++++++++
2 files changed, 413 insertions(+)
create mode 100644 clang/test/Analysis/fread.cpp
diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index a070f451694a..3633e9361036 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -819,6 +819,86 @@ void StreamChecker::evalFreadFwrite(const FnDescription *Desc,
return;
}
+ auto UpdateBufferRegionForFread = [&](ProgramStateRef NextState) {
+ if (!IsFread)
+ return NextState;
+
+ const auto *BufferOpRegion = Call.getArgSVal(0).getAsRegion();
+ if (!BufferOpRegion)
+ return NextState;
+
+ // If possible, invalidate knowledge only for the elements that were
+ // written.
+ //
+ // In a failure state, ideally, we would have three ranges of indexes for
+ // the buffer output parameter (without loss of generality, consider 0 as
+ // the first index):
+ // - [0, R) are all valid elements,
+ // - [R, R + 1) is indeterminate,
+ // - [R + 1, NMembVal) are unmodified,
+ // where R is the value returned by fread.
+ //
+ // In a success state, R === NMembVal and all elements are valid.
+ //
+ // However, as highlighted in RegionStore.rst, we cannot represent
+ // dependencies between the return value of fread and the elements of
+ // an array.
+ //
+ // Therefore, we invalidate all we know about these elements of the buffer.
+ using IK = RegionAndSymbolInvalidationTraits::InvalidationKinds;
+ if (const auto *ER = BufferOpRegion->getAs<ElementRegion>()) {
+ auto &B = C.getSValBuilder();
+ const auto &IndexVal = ER->getIndex();
+ const llvm::APSInt *StartIndex = B.getKnownValue(NextState, IndexVal);
+ const llvm::APSInt *ElementCount = B.getKnownValue(NextState, *NMembVal);
+ const auto *BufferRegion =
+ dyn_cast_or_null<SubRegion>(ER->getSuperRegion());
+ if (StartIndex && ElementCount && BufferRegion) {
+ assert(StartIndex->isNonNegative());
+ assert(ElementCount->isStrictlyPositive());
+ const auto SI = StartIndex->getZExtValue();
+ const auto EC = ElementCount->getZExtValue();
+
+ // However, we do not want to generate a significant overhead for this
+ // particular check.
+ // It is intentionally hard-coded here, as a better design would be
+ // necessary to assess the "weight" of memory regions in general.
+ constexpr unsigned MaxInvalidatedElementRegion = 64;
+ if (EC <= MaxInvalidatedElementRegion) {
+ auto &RegionManager = BufferRegion->getMemRegionManager();
+ const auto ElementType = ER->getElementType();
+
+ SmallVector<const MemRegion *> UnknownElements;
+ RegionAndSymbolInvalidationTraits ITraits;
+ for (size_t Idx = SI; Idx < SI + EC; ++Idx) {
+ const NonLoc Index = C.getSValBuilder().makeArrayIndex(Idx);
+ const auto *Element = RegionManager.getElementRegion(
+ ElementType, Index, BufferRegion, C.getASTContext());
+ UnknownElements.push_back(Element);
+ ITraits.setTrait(Element, IK::TK_DoNotInvalidateSuperRegion);
+ }
+
+ return NextState->invalidateRegions(
+ UnknownElements, E.CE, C.blockCount(), C.getLocationContext(),
+ /* CausedByPointerEscape */ false,
+ /* InvalidatedSymbols */ nullptr, &Call, &ITraits);
+ }
+ }
+ }
+
+ // Invalidate knowledge only for this var/field, and not its super regions.
+ // Otherwise, invalid any knowledge about this memory region as a whole.
+ RegionAndSymbolInvalidationTraits ITraits;
+ if (isa<VarRegion, FieldRegion>(BufferOpRegion)) {
+ ITraits.setTrait(BufferOpRegion, IK::TK_DoNotInvalidateSuperRegion);
+ }
+
+ return NextState->invalidateRegions(
+ {BufferOpRegion}, E.CE, C.blockCount(), C.getLocationContext(),
+ /* CausedByPointerEscape */ false, /* InvalidatedSymbols */ nullptr,
+ &Call, &ITraits);
+ };
+
// Generate a transition for the success state.
// If we know the state to be FEOF at fread, do not add a success state.
if (!IsFread || !E.isStreamEof()) {
@@ -826,6 +906,7 @@ void StreamChecker::evalFreadFwrite(const FnDescription *Desc,
State->BindExpr(E.CE, C.getLocationContext(), *NMembVal);
StateNotFailed =
E.setStreamState(StateNotFailed, StreamState::getOpened(Desc));
+ StateNotFailed = UpdateBufferRegionForFread(StateNotFailed);
C.addTransition(StateNotFailed);
}
@@ -837,6 +918,7 @@ void StreamChecker::evalFreadFwrite(const FnDescription *Desc,
if (!StateFailed)
return;
+ StateFailed = UpdateBufferRegionForFread(StateFailed);
StreamErrorState NewES;
if (IsFread)
NewES = E.isStreamEof() ? ErrorFEof : ErrorFEof | ErrorFError;
diff --git a/clang/test/Analysis/fread.cpp b/clang/test/Analysis/fread.cpp
new file mode 100644
index 000000000000..1bd3af77b009
--- /dev/null
+++ b/clang/test/Analysis/fread.cpp
@@ -0,0 +1,331 @@
+// RUN: %clang_analyze_cc1 -verify %s \
+// RUN: -analyzer-checker=core.uninitialized.Assign \
+// RUN: -analyzer-checker=alpha.security.taint \
+// RUN: -analyzer-checker=alpha.unix.Stream \
+// RUN: -analyzer-checker=debug.ExprInspection
+
+extern "C" {
+typedef __typeof(sizeof(int)) size_t;
+typedef struct _FILE FILE;
+
+FILE *fopen(const char *filename, const char *mode);
+int fclose(FILE *stream);
+size_t fread(void *buffer, size_t size, size_t count, FILE *stream);
+int fgetc(FILE *stream);
+int feof(FILE *stream);
+}
+
+template <typename T>
+void clang_analyzer_isTainted(T);
+void clang_analyzer_warnIfReached();
+
+// A stream is only tracked by StreamChecker if it results from a call to "fopen".
+// Otherwise, there is no specific modelling of "fread".
+void untrackedStream(FILE *fp) {
+ char c;
+ if (1 == fread(&c, 1, 1, fp)) {
+ char p = c; // Unknown value but not garbage and not modeled by checker.
+ } else {
+ char p = c; // Possibly indeterminate value but not modeled by checker.
+ }
+}
+
+void tainted1() {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ int c = fgetc(fp); // c is tainted.
+ if (c > 0)
+ clang_analyzer_isTainted(c); // expected-warning{{YES}}
+ fclose(fp);
+ }
+}
+
+void tainted2() {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ char buffer[10];
+ int c = fread(buffer, 1, 10, fp); // c is tainted.
+ if (feof(fp))
+ clang_analyzer_isTainted(c); // expected-warning{{YES}}
+ fclose(fp);
+ }
+}
+
+void readOneByte1() {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ char c;
+ if (1 == fread(&c, 1, 1, fp)) {
+ char p = c; // Unknown value but not garbage.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ } else {
+ char p = c; // Possibly indeterminate value but not modeled by checker.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ }
+ fclose(fp);
+ }
+}
+
+void readOneByte2(char *buffer) {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ if (1 == fread(buffer, 1, 1, fp)) {
+ char p = buffer[0]; // Unknown value but not garbage.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ } else {
+ char p = buffer[0]; // Possibly indeterminate value but not modeled by checker.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ }
+ fclose(fp);
+ }
+}
+
+void readOneByte3(char *buffer) {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ // buffer[1] is not mutated by fread and remains unknown.
+ fread(buffer, 1, 1, fp);
+ char p = buffer[1];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ fclose(fp);
+ }
+}
+
+void readManyBytes(char *buffer) {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ if (42 == fread(buffer, 1, 42, fp)) {
+ char p = buffer[0]; // Unknown value but not garbage.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ } else {
+ char p = buffer[0]; // Possibly indeterminate value but not modeled.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ }
+ fclose(fp);
+ }
+}
+
+void randomAccessWrite1(int index) {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ long c[4];
+ bool success = 2 == fread(c + 1, sizeof(long), 2, fp);
+
+ switch (index) {
+ case 0:
+ // c[0] is not mutated by fread.
+ if (success) {
+ long p = c[0]; // expected-warning{{Assigned value is garbage or undefined}}
+ } else {
+ long p = c[0]; // expected-warning{{Assigned value is garbage or undefined}}
+ }
+ break;
+
+ case 1:
+ if (success) {
+ long p = c[1]; // Unknown value but not garbage.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ } else {
+ long p = c[1]; // Possibly indeterminate value but not modeled.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ }
+ break;
+
+ case 2:
+ if (success) {
+ long p = c[2]; // Unknown value but not garbage.
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ // FIXME: raise issue about tainted value used for allocation.
+ // No issue is raised here because the taint analysis only marks
+ // the first byte of a memory region.
+ // See getPointeeOf in GenericTaintChecker.cpp.
+ } else {
+ long p = c[2]; // Possibly indeterminate value but not modeled.
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ // FIXME: see above.
+ }
+ break;
+
+ case 3:
+ // c[3] is not mutated by fread.
+ if (success) {
+ long p = c[3]; // expected-warning{{Assigned value is garbage or undefined}}
+ } else {
+ long p = c[3]; // expected-warning{{Assigned value is garbage or undefined}}
+ }
+ break;
+ }
+
+ fclose(fp);
+ }
+}
+
+void randomAccessWrite2(bool b) {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ int buffer[10];
+ int *ptr = buffer + 2;
+ if (5 == fread(ptr - 1, sizeof(int), 5, fp)) {
+ int p = buffer[1]; // Unknown value but not garbage.
+ if (b) {
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ } else {
+ p = buffer[0]; // expected-warning{{Assigned value is garbage or undefined}}
+ }
+ } else {
+ int p = buffer[0]; // expected-warning{{Assigned value is garbage or undefined}}
+ }
+ fclose(fp);
+ }
+}
+
+void randomAccessWriteSymbolicCount(size_t count) {
+ // Cover a case that used to crash (symbolic count).
+ if (count > 2)
+ return;
+
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ long c[4];
+ fread(c + 1, sizeof(long), count, fp);
+
+ // c[0] and c[3] are never mutated by fread, but because "count" is a symbolic value,
+ // the checker doesn't know that.
+ long p = c[0];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ p = c[3];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+
+ p = c[1];
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+
+ fclose(fp);
+ }
+}
+
+void dynamicRandomAccessWrite(int startIndex) {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ long buffer[10];
+ // Cannot reason about index.
+ size_t res = fread(buffer + startIndex, sizeof(long), 5, fp);
+ if (5 == res) {
+ long p = buffer[startIndex];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ } else if (res == 4) {
+ long p = buffer[startIndex];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ p = buffer[startIndex + 1];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ p = buffer[startIndex + 2];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ p = buffer[startIndex + 3];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ p = buffer[startIndex + 4];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ p = buffer[startIndex + 5];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ p = buffer[0];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ } else {
+ long p = buffer[startIndex];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ p = buffer[0];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ }
+ fclose(fp);
+ }
+}
+
+struct S {
+ int a;
+ long b;
+};
+
+void comopundWrite1() {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ S s; // s.a is not touched by fread.
+ if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
+ long p = s.b;
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ } else {
+ long p = s.b;
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ }
+ fclose(fp);
+ }
+}
+
+extern void clang_analyzer_dump(long);
+
+void comopundWrite2() {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ S s; // s.a is not touched by fread.
+ if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
+ long p = s.a; // expected-warning{{Assigned value is garbage or undefined}}
+ clang_analyzer_warnIfReached();
+ } else {
+ long p = s.a; // expected-warning{{Assigned value is garbage or undefined}}
+ clang_analyzer_warnIfReached();
+ }
+ fclose(fp);
+ }
+}
+
+void varWrite() {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ int a, b; // a is not touched by fread.
+ if (1 == fread(&b, sizeof(b), 1, fp)) {
+ long p = a; // expected-warning{{Assigned value is garbage or undefined}}
+ clang_analyzer_warnIfReached();
+ } else {
+ long p = a; // expected-warning{{Assigned value is garbage or undefined}}
+ clang_analyzer_warnIfReached();
+ }
+ fclose(fp);
+ }
+}
+
+// When reading a lot of data, invalidating all elements is too time-consuming.
+// Instead, the knowledge of the whole array is lost.
+#define MaxInvalidatedElementRegion 64 // See StreamChecker::evalFreadFwrite in StreamChecker.cpp.
+#define PastMaxComplexity MaxInvalidatedElementRegion + 1
+void testLargeRead() {
+ int buffer[PastMaxComplexity + 1];
+ buffer[PastMaxComplexity] = 42;
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ if (buffer[PastMaxComplexity] != 42) {
+ clang_analyzer_warnIfReached(); // Unreachable.
+ }
+ if (1 == fread(buffer, sizeof(int), PastMaxComplexity, fp)) {
+ if (buffer[PastMaxComplexity] != 42) {
+ clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+ }
+ }
+ fclose(fp);
+ }
+}
+
+void testSmallRead() {
+ int buffer[10];
+ buffer[5] = 42;
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ if (buffer[5] != 42) {
+ clang_analyzer_warnIfReached(); // Unreachable.
+ }
+ if (1 == fread(buffer, sizeof(int), 5, fp)) {
+ if (buffer[5] != 42) {
+ clang_analyzer_warnIfReached(); // Unreachable.
+ }
+ }
+ fclose(fp);
+ }
+}
+
+void testUnknownRegion(void) {
+ FILE *fp = fopen("file", "r");
+ if (!fp) {
+ return;
+ }
+
+ union {
+ int idx;
+ char *ptr;
+ } u;
+ char buffer[10];
+ u.ptr = buffer;
+ u.idx += 10;
+
+ fread(u.ptr, 1, 10, fp);
+ fclose(fp);
+}
--
2.43.2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment