From 222e2a7620e6520ffaf4fc4e69d79c18da31542e Mon Sep 17 00:00:00 2001 From: "Zancanaro; Carlo" Date: Mon, 24 Sep 2012 09:58:17 +1000 Subject: Add the clang library to the repo (with some of my changes, too). --- clang/lib/StaticAnalyzer/CMakeLists.txt | 3 + .../Checkers/.#ArrayBoundCheckerV2_flymake.cpp | 1 + clang/lib/StaticAnalyzer/Checkers/.#CMakeLists.txt | 1 + clang/lib/StaticAnalyzer/Checkers/.#Checkers.td | 1 + .../StaticAnalyzer/Checkers/.#DebugCheckers.cpp | 1 + .../Checkers/.#DebugCheckers_flymake.cpp | 1 + .../Checkers/.#DivZeroChecker_flymake.cpp | 1 + .../lib/StaticAnalyzer/Checkers/.#IntervalTest.cpp | 1 + .../Checkers/.#IntervalTest_flymake.cpp | 1 + .../Checkers/AdjustedReturnValueChecker.cpp | 92 + .../Checkers/AnalyzerStatsChecker.cpp | 140 + .../StaticAnalyzer/Checkers/ArrayBoundChecker.cpp | 92 + .../Checkers/ArrayBoundCheckerV2.cpp | 318 ++ .../StaticAnalyzer/Checkers/AttrNonNullChecker.cpp | 134 + .../Checkers/BasicObjCFoundationChecks.cpp | 672 ++++ .../Checkers/BoolAssignmentChecker.cpp | 157 + .../Checkers/BuiltinFunctionChecker.cpp | 82 + clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt | 81 + .../lib/StaticAnalyzer/Checkers/CStringChecker.cpp | 1981 +++++++++++ .../Checkers/CStringSyntaxChecker.cpp | 191 + .../Checkers/CallAndMessageChecker.cpp | 385 ++ .../StaticAnalyzer/Checkers/CastSizeChecker.cpp | 86 + .../Checkers/CastToStructChecker.cpp | 74 + .../StaticAnalyzer/Checkers/CheckObjCDealloc.cpp | 291 ++ .../Checkers/CheckObjCInstMethSignature.cpp | 146 + .../Checkers/CheckSecuritySyntaxOnly.cpp | 786 +++++ .../StaticAnalyzer/Checkers/CheckSizeofPointer.cpp | 92 + .../Checkers/CheckerDocumentation.cpp | 233 ++ clang/lib/StaticAnalyzer/Checkers/Checkers.td | 491 +++ .../lib/StaticAnalyzer/Checkers/ChrootChecker.cpp | 158 + .../lib/StaticAnalyzer/Checkers/ClangCheckers.cpp | 32 + .../lib/StaticAnalyzer/Checkers/ClangSACheckers.h | 37 + .../Checkers/CommonBugCategories.cpp | 18 + .../StaticAnalyzer/Checkers/DeadStoresChecker.cpp | 386 ++ .../lib/StaticAnalyzer/Checkers/DebugCheckers.cpp | 146 + .../StaticAnalyzer/Checkers/DereferenceChecker.cpp | 216 ++ .../lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp | 96 + .../Checkers/FixedAddressChecker.cpp | 67 + .../Checkers/GenericTaintChecker.cpp | 740 ++++ .../Checkers/IdempotentOperationChecker.cpp | 747 ++++ .../lib/StaticAnalyzer/Checkers/InterCheckerAPI.h | 22 + clang/lib/StaticAnalyzer/Checkers/IntervalTest.cpp | 25 + .../StaticAnalyzer/Checkers/IteratorsChecker.cpp | 603 ++++ .../Checkers/LLVMConventionsChecker.cpp | 314 ++ .../Checkers/MacOSKeychainAPIChecker.cpp | 681 ++++ .../StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp | 116 + clang/lib/StaticAnalyzer/Checkers/Makefile | 24 + .../lib/StaticAnalyzer/Checkers/MallocChecker.cpp | 1463 ++++++++ .../Checkers/MallocOverflowSecurityChecker.cpp | 267 ++ .../Checkers/MallocSizeofChecker.cpp | 211 ++ .../Checkers/NSAutoreleasePoolChecker.cpp | 89 + .../lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp | 334 ++ .../Checkers/NoReturnFunctionChecker.cpp | 146 + .../StaticAnalyzer/Checkers/OSAtomicChecker.cpp | 218 ++ .../StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp | 96 + .../Checkers/ObjCContainersASTChecker.cpp | 174 + .../Checkers/ObjCContainersChecker.cpp | 159 + .../Checkers/ObjCSelfInitChecker.cpp | 396 +++ .../Checkers/ObjCUnusedIVarsChecker.cpp | 186 + .../Checkers/PointerArithChecker.cpp | 69 + .../StaticAnalyzer/Checkers/PointerSubChecker.cpp | 76 + .../StaticAnalyzer/Checkers/PthreadLockChecker.cpp | 198 ++ .../StaticAnalyzer/Checkers/RetainCountChecker.cpp | 3702 ++++++++++++++++++++ .../Checkers/ReturnPointerRangeChecker.cpp | 91 + .../StaticAnalyzer/Checkers/ReturnUndefChecker.cpp | 65 + .../Checkers/StackAddrEscapeChecker.cpp | 230 ++ .../lib/StaticAnalyzer/Checkers/StreamChecker.cpp | 475 +++ .../StaticAnalyzer/Checkers/TaintTesterChecker.cpp | 62 + .../StaticAnalyzer/Checkers/UndefBranchChecker.cpp | 112 + .../Checkers/UndefCapturedBlockVarChecker.cpp | 105 + .../StaticAnalyzer/Checkers/UndefResultChecker.cpp | 91 + .../Checkers/UndefinedArraySubscriptChecker.cpp | 55 + .../Checkers/UndefinedAssignmentChecker.cpp | 88 + .../lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp | 353 ++ .../Checkers/UnreachableCodeChecker.cpp | 247 ++ .../lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp | 162 + .../StaticAnalyzer/Checkers/VirtualCallChecker.cpp | 241 ++ clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp | 78 + .../StaticAnalyzer/Core/BasicConstraintManager.cpp | 367 ++ .../lib/StaticAnalyzer/Core/BasicValueFactory.cpp | 291 ++ clang/lib/StaticAnalyzer/Core/BlockCounter.cpp | 86 + clang/lib/StaticAnalyzer/Core/BugReporter.cpp | 2056 +++++++++++ .../StaticAnalyzer/Core/BugReporterVisitors.cpp | 784 +++++ clang/lib/StaticAnalyzer/Core/CMakeLists.txt | 45 + clang/lib/StaticAnalyzer/Core/Checker.cpp | 31 + clang/lib/StaticAnalyzer/Core/CheckerContext.cpp | 83 + clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp | 80 + clang/lib/StaticAnalyzer/Core/CheckerManager.cpp | 678 ++++ clang/lib/StaticAnalyzer/Core/CheckerRegistry.cpp | 150 + clang/lib/StaticAnalyzer/Core/CoreEngine.cpp | 688 ++++ clang/lib/StaticAnalyzer/Core/Environment.cpp | 295 ++ clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp | 405 +++ clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 2076 +++++++++++ clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp | 811 +++++ clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp | 300 ++ .../Core/ExprEngineCallAndReturn.cpp | 487 +++ clang/lib/StaticAnalyzer/Core/ExprEngineObjC.cpp | 273 ++ clang/lib/StaticAnalyzer/Core/FunctionSummary.cpp | 38 + clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp | 578 +++ clang/lib/StaticAnalyzer/Core/Makefile | 17 + clang/lib/StaticAnalyzer/Core/MemRegion.cpp | 1101 ++++++ clang/lib/StaticAnalyzer/Core/ObjCMessage.cpp | 90 + clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp | 755 ++++ clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp | 513 +++ clang/lib/StaticAnalyzer/Core/ProgramState.cpp | 709 ++++ .../StaticAnalyzer/Core/RangeConstraintManager.cpp | 442 +++ clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 2009 +++++++++++ clang/lib/StaticAnalyzer/Core/SValBuilder.cpp | 386 ++ clang/lib/StaticAnalyzer/Core/SVals.cpp | 331 ++ .../Core/SimpleConstraintManager.cpp | 307 ++ .../StaticAnalyzer/Core/SimpleConstraintManager.h | 101 + .../lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp | 973 +++++ clang/lib/StaticAnalyzer/Core/Store.cpp | 362 ++ clang/lib/StaticAnalyzer/Core/SubEngine.cpp | 14 + clang/lib/StaticAnalyzer/Core/SymbolManager.cpp | 540 +++ .../StaticAnalyzer/Core/TextPathDiagnostics.cpp | 69 + .../StaticAnalyzer/Frontend/AnalysisConsumer.cpp | 680 ++++ .../lib/StaticAnalyzer/Frontend/AnalysisConsumer.h | 43 + clang/lib/StaticAnalyzer/Frontend/CMakeLists.txt | 21 + .../Frontend/CheckerRegistration.cpp | 133 + .../StaticAnalyzer/Frontend/FrontendActions.cpp | 23 + clang/lib/StaticAnalyzer/Frontend/Makefile | 19 + clang/lib/StaticAnalyzer/Makefile | 18 + clang/lib/StaticAnalyzer/README.txt | 139 + 124 files changed, 41599 insertions(+) create mode 100644 clang/lib/StaticAnalyzer/CMakeLists.txt create mode 120000 clang/lib/StaticAnalyzer/Checkers/.#ArrayBoundCheckerV2_flymake.cpp create mode 120000 clang/lib/StaticAnalyzer/Checkers/.#CMakeLists.txt create mode 120000 clang/lib/StaticAnalyzer/Checkers/.#Checkers.td create mode 120000 clang/lib/StaticAnalyzer/Checkers/.#DebugCheckers.cpp create mode 120000 clang/lib/StaticAnalyzer/Checkers/.#DebugCheckers_flymake.cpp create mode 120000 clang/lib/StaticAnalyzer/Checkers/.#DivZeroChecker_flymake.cpp create mode 120000 clang/lib/StaticAnalyzer/Checkers/.#IntervalTest.cpp create mode 120000 clang/lib/StaticAnalyzer/Checkers/.#IntervalTest_flymake.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/AdjustedReturnValueChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/AttrNonNullChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt create mode 100644 clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/CStringSyntaxChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/Checkers.td create mode 100644 clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/ClangCheckers.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/ClangSACheckers.h create mode 100644 clang/lib/StaticAnalyzer/Checkers/CommonBugCategories.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/IdempotentOperationChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/InterCheckerAPI.h create mode 100644 clang/lib/StaticAnalyzer/Checkers/IntervalTest.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/IteratorsChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/Makefile create mode 100644 clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/OSAtomicChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/BasicConstraintManager.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/BasicValueFactory.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/BlockCounter.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/BugReporter.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/CMakeLists.txt create mode 100644 clang/lib/StaticAnalyzer/Core/Checker.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/CheckerContext.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/CheckerManager.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/CheckerRegistry.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/CoreEngine.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/Environment.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/ExprEngineObjC.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/FunctionSummary.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/Makefile create mode 100644 clang/lib/StaticAnalyzer/Core/MemRegion.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/ObjCMessage.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/ProgramState.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/RegionStore.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/SValBuilder.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/SVals.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.h create mode 100644 clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/Store.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/SubEngine.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/SymbolManager.cpp create mode 100644 clang/lib/StaticAnalyzer/Core/TextPathDiagnostics.cpp create mode 100644 clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp create mode 100644 clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.h create mode 100644 clang/lib/StaticAnalyzer/Frontend/CMakeLists.txt create mode 100644 clang/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp create mode 100644 clang/lib/StaticAnalyzer/Frontend/FrontendActions.cpp create mode 100644 clang/lib/StaticAnalyzer/Frontend/Makefile create mode 100644 clang/lib/StaticAnalyzer/Makefile create mode 100644 clang/lib/StaticAnalyzer/README.txt (limited to 'clang/lib/StaticAnalyzer') diff --git a/clang/lib/StaticAnalyzer/CMakeLists.txt b/clang/lib/StaticAnalyzer/CMakeLists.txt new file mode 100644 index 0000000..3d15092 --- /dev/null +++ b/clang/lib/StaticAnalyzer/CMakeLists.txt @@ -0,0 +1,3 @@ +add_subdirectory(Core) +add_subdirectory(Checkers) +add_subdirectory(Frontend) diff --git a/clang/lib/StaticAnalyzer/Checkers/.#ArrayBoundCheckerV2_flymake.cpp b/clang/lib/StaticAnalyzer/Checkers/.#ArrayBoundCheckerV2_flymake.cpp new file mode 120000 index 0000000..235903b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/.#ArrayBoundCheckerV2_flymake.cpp @@ -0,0 +1 @@ +carlo@pc-4w14-0.cs.usyd.edu.au.1585:1347012043 \ No newline at end of file diff --git a/clang/lib/StaticAnalyzer/Checkers/.#CMakeLists.txt b/clang/lib/StaticAnalyzer/Checkers/.#CMakeLists.txt new file mode 120000 index 0000000..235903b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/.#CMakeLists.txt @@ -0,0 +1 @@ +carlo@pc-4w14-0.cs.usyd.edu.au.1585:1347012043 \ No newline at end of file diff --git a/clang/lib/StaticAnalyzer/Checkers/.#Checkers.td b/clang/lib/StaticAnalyzer/Checkers/.#Checkers.td new file mode 120000 index 0000000..235903b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/.#Checkers.td @@ -0,0 +1 @@ +carlo@pc-4w14-0.cs.usyd.edu.au.1585:1347012043 \ No newline at end of file diff --git a/clang/lib/StaticAnalyzer/Checkers/.#DebugCheckers.cpp b/clang/lib/StaticAnalyzer/Checkers/.#DebugCheckers.cpp new file mode 120000 index 0000000..235903b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/.#DebugCheckers.cpp @@ -0,0 +1 @@ +carlo@pc-4w14-0.cs.usyd.edu.au.1585:1347012043 \ No newline at end of file diff --git a/clang/lib/StaticAnalyzer/Checkers/.#DebugCheckers_flymake.cpp b/clang/lib/StaticAnalyzer/Checkers/.#DebugCheckers_flymake.cpp new file mode 120000 index 0000000..235903b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/.#DebugCheckers_flymake.cpp @@ -0,0 +1 @@ +carlo@pc-4w14-0.cs.usyd.edu.au.1585:1347012043 \ No newline at end of file diff --git a/clang/lib/StaticAnalyzer/Checkers/.#DivZeroChecker_flymake.cpp b/clang/lib/StaticAnalyzer/Checkers/.#DivZeroChecker_flymake.cpp new file mode 120000 index 0000000..235903b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/.#DivZeroChecker_flymake.cpp @@ -0,0 +1 @@ +carlo@pc-4w14-0.cs.usyd.edu.au.1585:1347012043 \ No newline at end of file diff --git a/clang/lib/StaticAnalyzer/Checkers/.#IntervalTest.cpp b/clang/lib/StaticAnalyzer/Checkers/.#IntervalTest.cpp new file mode 120000 index 0000000..235903b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/.#IntervalTest.cpp @@ -0,0 +1 @@ +carlo@pc-4w14-0.cs.usyd.edu.au.1585:1347012043 \ No newline at end of file diff --git a/clang/lib/StaticAnalyzer/Checkers/.#IntervalTest_flymake.cpp b/clang/lib/StaticAnalyzer/Checkers/.#IntervalTest_flymake.cpp new file mode 120000 index 0000000..235903b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/.#IntervalTest_flymake.cpp @@ -0,0 +1 @@ +carlo@pc-4w14-0.cs.usyd.edu.au.1585:1347012043 \ No newline at end of file diff --git a/clang/lib/StaticAnalyzer/Checkers/AdjustedReturnValueChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/AdjustedReturnValueChecker.cpp new file mode 100644 index 0000000..84ea8c7 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/AdjustedReturnValueChecker.cpp @@ -0,0 +1,92 @@ +//== AdjustedReturnValueChecker.cpp -----------------------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines AdjustedReturnValueChecker, a simple check to see if the +// return value of a function call is different than the one the caller thinks +// it is. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" + +using namespace clang; +using namespace ento; + +namespace { +class AdjustedReturnValueChecker : + public Checker< check::PostStmt > { +public: + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; +}; +} + +void AdjustedReturnValueChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + + // Get the result type of the call. + QualType expectedResultTy = CE->getType(); + + // Fetch the signature of the called function. + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + + SVal V = state->getSVal(CE, LCtx); + + if (V.isUnknown()) + return; + + // Casting to void? Discard the value. + if (expectedResultTy->isVoidType()) { + C.addTransition(state->BindExpr(CE, LCtx, UnknownVal())); + return; + } + + const MemRegion *callee = state->getSVal(CE->getCallee(), LCtx).getAsRegion(); + if (!callee) + return; + + QualType actualResultTy; + + if (const FunctionTextRegion *FT = dyn_cast(callee)) { + const FunctionDecl *FD = FT->getDecl(); + actualResultTy = FD->getResultType(); + } + else if (const BlockDataRegion *BD = dyn_cast(callee)) { + const BlockTextRegion *BR = BD->getCodeRegion(); + const BlockPointerType *BT=BR->getLocationType()->getAs(); + const FunctionType *FT = BT->getPointeeType()->getAs(); + actualResultTy = FT->getResultType(); + } + + // Can this happen? + if (actualResultTy.isNull()) + return; + + // For now, ignore references. + if (actualResultTy->getAs()) + return; + + + // Are they the same? + if (expectedResultTy != actualResultTy) { + // FIXME: Do more checking and actual emit an error. At least performing + // the cast avoids some assertion failures elsewhere. + SValBuilder &svalBuilder = C.getSValBuilder(); + V = svalBuilder.evalCast(V, expectedResultTy, actualResultTy); + C.addTransition(state->BindExpr(CE, LCtx, V)); + } +} + +void ento::registerAdjustedReturnValueChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp new file mode 100644 index 0000000..aa6f97b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp @@ -0,0 +1,140 @@ +//==--AnalyzerStatsChecker.cpp - Analyzer visitation statistics --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This file reports various statistics about analyzer visitation. +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "StatsChecker" + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" + +#include "clang/AST/DeclObjC.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" + +using namespace clang; +using namespace ento; + +STATISTIC(NumBlocks, + "The # of blocks in top level functions"); +STATISTIC(NumBlocksUnreachable, + "The # of unreachable blocks in analyzing top level functions"); + +namespace { +class AnalyzerStatsChecker : public Checker { +public: + void checkEndAnalysis(ExplodedGraph &G, BugReporter &B,ExprEngine &Eng) const; +}; +} + +void AnalyzerStatsChecker::checkEndAnalysis(ExplodedGraph &G, + BugReporter &B, + ExprEngine &Eng) const { + const CFG *C = 0; + const SourceManager &SM = B.getSourceManager(); + llvm::SmallPtrSet reachable; + + // Root node should have the location context of the top most function. + const ExplodedNode *GraphRoot = *G.roots_begin(); + const LocationContext *LC = GraphRoot->getLocation().getLocationContext(); + + const Decl *D = LC->getDecl(); + + // Iterate over the exploded graph. + for (ExplodedGraph::node_iterator I = G.nodes_begin(); + I != G.nodes_end(); ++I) { + const ProgramPoint &P = I->getLocation(); + + // Only check the coverage in the top level function (optimization). + if (D != P.getLocationContext()->getDecl()) + continue; + + if (const BlockEntrance *BE = dyn_cast(&P)) { + const CFGBlock *CB = BE->getBlock(); + reachable.insert(CB); + } + } + + // Get the CFG and the Decl of this block. + C = LC->getCFG(); + + unsigned total = 0, unreachable = 0; + + // Find CFGBlocks that were not covered by any node + for (CFG::const_iterator I = C->begin(); I != C->end(); ++I) { + const CFGBlock *CB = *I; + ++total; + // Check if the block is unreachable + if (!reachable.count(CB)) { + ++unreachable; + } + } + + // We never 'reach' the entry block, so correct the unreachable count + unreachable--; + // There is no BlockEntrance corresponding to the exit block as well, so + // assume it is reached as well. + unreachable--; + + // Generate the warning string + SmallString<128> buf; + llvm::raw_svector_ostream output(buf); + PresumedLoc Loc = SM.getPresumedLoc(D->getLocation()); + if (!Loc.isValid()) + return; + + if (isa(D) || isa(D)) { + const NamedDecl *ND = cast(D); + output << *ND; + } + else if (isa(D)) { + output << "block(line:" << Loc.getLine() << ":col:" << Loc.getColumn(); + } + + NumBlocksUnreachable += unreachable; + NumBlocks += total; + std::string NameOfRootFunction = output.str(); + + output << " -> Total CFGBlocks: " << total << " | Unreachable CFGBlocks: " + << unreachable << " | Exhausted Block: " + << (Eng.wasBlocksExhausted() ? "yes" : "no") + << " | Empty WorkList: " + << (Eng.hasEmptyWorkList() ? "yes" : "no"); + + B.EmitBasicReport(D, "Analyzer Statistics", "Internal Statistics", + output.str(), PathDiagnosticLocation(D, SM)); + + // Emit warning for each block we bailed out on. + typedef CoreEngine::BlocksExhausted::const_iterator ExhaustedIterator; + const CoreEngine &CE = Eng.getCoreEngine(); + for (ExhaustedIterator I = CE.blocks_exhausted_begin(), + E = CE.blocks_exhausted_end(); I != E; ++I) { + const BlockEdge &BE = I->first; + const CFGBlock *Exit = BE.getDst(); + const CFGElement &CE = Exit->front(); + if (const CFGStmt *CS = dyn_cast(&CE)) { + SmallString<128> bufI; + llvm::raw_svector_ostream outputI(bufI); + outputI << "(" << NameOfRootFunction << ")" << + ": The analyzer generated a sink at this point"; + B.EmitBasicReport(D, "Sink Point", "Internal Statistics", outputI.str(), + PathDiagnosticLocation::createBegin(CS->getStmt(), + SM, LC)); + } + } +} + +void ento::registerAnalyzerStatsChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp new file mode 100644 index 0000000..b2ad184 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp @@ -0,0 +1,92 @@ +//== ArrayBoundChecker.cpp ------------------------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines ArrayBoundChecker, which is a path-sensitive check +// which looks for an out-of-bound array element access. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" + +using namespace clang; +using namespace ento; + +namespace { +class ArrayBoundChecker : + public Checker { + mutable OwningPtr BT; +public: + void checkLocation(SVal l, bool isLoad, const Stmt* S, + CheckerContext &C) const; +}; +} + +void ArrayBoundChecker::checkLocation(SVal l, bool isLoad, const Stmt* LoadS, + CheckerContext &C) const { + // Check for out of bound array element access. + const MemRegion *R = l.getAsRegion(); + if (!R) + return; + + const ElementRegion *ER = dyn_cast(R); + if (!ER) + return; + + // Get the index of the accessed element. + DefinedOrUnknownSVal Idx = cast(ER->getIndex()); + + // Zero index is always in bound, this also passes ElementRegions created for + // pointer casts. + if (Idx.isZeroConstant()) + return; + + ProgramStateRef state = C.getState(); + + // Get the size of the array. + DefinedOrUnknownSVal NumElements + = C.getStoreManager().getSizeInElements(state, ER->getSuperRegion(), + ER->getValueType()); + + ProgramStateRef StInBound = state->assumeInBound(Idx, NumElements, true); + ProgramStateRef StOutBound = state->assumeInBound(Idx, NumElements, false); + if (StOutBound && !StInBound) { + ExplodedNode *N = C.generateSink(StOutBound); + if (!N) + return; + + if (!BT) + BT.reset(new BuiltinBug("Out-of-bound array access", + "Access out-of-bound array element (buffer overflow)")); + + // FIXME: It would be nice to eventually make this diagnostic more clear, + // e.g., by referencing the original declaration or by saying *why* this + // reference is outside the range. + + // Generate a report for this bug. + BugReport *report = + new BugReport(*BT, BT->getDescription(), N); + + report->addRange(LoadS->getSourceRange()); + C.EmitReport(report); + return; + } + + // Array bound check succeeded. From this point forward the array bound + // should always succeed. + C.addTransition(StInBound); +} + +void ento::registerArrayBoundChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp new file mode 100644 index 0000000..c6efe94 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -0,0 +1,318 @@ +//== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines ArrayBoundCheckerV2, which is a path-sensitive check +// which looks for an out-of-bound array element access. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/AST/CharUnits.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" + +using namespace clang; +using namespace ento; + +namespace { +class ArrayBoundCheckerV2 : + public Checker { + mutable OwningPtr BT; + + enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted }; + + void reportOOB(CheckerContext &C, ProgramStateRef errorState, + OOB_Kind kind) const; + +public: + void checkLocation(SVal l, bool isLoad, const Stmt*S, + CheckerContext &C) const; +}; + +// FIXME: Eventually replace RegionRawOffset with this class. +class RegionRawOffsetV2 { +private: + const SubRegion *baseRegion; + SVal byteOffset; + + RegionRawOffsetV2() + : baseRegion(0), byteOffset(UnknownVal()) {} + +public: + RegionRawOffsetV2(const SubRegion* base, SVal offset) + : baseRegion(base), byteOffset(offset) {} + + NonLoc getByteOffset() const { return cast(byteOffset); } + const SubRegion *getRegion() const { return baseRegion; } + + static RegionRawOffsetV2 computeOffset(ProgramStateRef state, + SValBuilder &svalBuilder, + SVal location); + + void dump() const; + void dumpToStream(raw_ostream &os) const; +}; +} + +static SVal computeExtentBegin(SValBuilder &svalBuilder, + const MemRegion *region) { + while (true) + switch (region->getKind()) { + default: + return svalBuilder.makeZeroArrayIndex(); + case MemRegion::SymbolicRegionKind: + // FIXME: improve this later by tracking symbolic lower bounds + // for symbolic regions. + return UnknownVal(); + case MemRegion::ElementRegionKind: + region = cast(region)->getSuperRegion(); + continue; + } +} + +void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, + const Stmt* LoadS, + CheckerContext &checkerContext) const { + + // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping + // some new logic here that reasons directly about memory region extents. + // Once that logic is more mature, we can bring it back to assumeInBound() + // for all clients to use. + // + // The algorithm we are using here for bounds checking is to see if the + // memory access is within the extent of the base region. Since we + // have some flexibility in defining the base region, we can achieve + // various levels of conservatism in our buffer overflow checking. + ProgramStateRef state = checkerContext.getState(); + ProgramStateRef originalState = state; + + SValBuilder &svalBuilder = checkerContext.getSValBuilder(); + const RegionRawOffsetV2 &rawOffset = + RegionRawOffsetV2::computeOffset(state, svalBuilder, location); + + if (!rawOffset.getRegion()) + return; + + // CHECK LOWER BOUND: Is byteOffset < extent begin? + // If so, we are doing a load/store + // before the first valid offset in the memory region. + + SVal extentBegin = computeExtentBegin(svalBuilder, rawOffset.getRegion()); + + if (isa(extentBegin)) { + SVal lowerBound + = svalBuilder.evalBinOpNN(state, BO_LT, rawOffset.getByteOffset(), + cast(extentBegin), + svalBuilder.getConditionType()); + + NonLoc *lowerBoundToCheck = dyn_cast(&lowerBound); + if (!lowerBoundToCheck) + return; + + ProgramStateRef state_precedesLowerBound, state_withinLowerBound; + llvm::tie(state_precedesLowerBound, state_withinLowerBound) = + state->assume(*lowerBoundToCheck); + + // Are we constrained enough to definitely precede the lower bound? + if (state_precedesLowerBound && !state_withinLowerBound) { + reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes); + return; + } + + // Otherwise, assume the constraint of the lower bound. + assert(state_withinLowerBound); + state = state_withinLowerBound; + } + + do { + // CHECK UPPER BOUND: Is byteOffset >= extent(baseRegion)? If so, + // we are doing a load/store after the last valid offset. + DefinedOrUnknownSVal extentVal = + rawOffset.getRegion()->getExtent(svalBuilder); + if (!isa(extentVal)) + break; + + SVal upperbound + = svalBuilder.evalBinOpNN(state, BO_GE, rawOffset.getByteOffset(), + cast(extentVal), + svalBuilder.getConditionType()); + + NonLoc *upperboundToCheck = dyn_cast(&upperbound); + if (!upperboundToCheck) + break; + + ProgramStateRef state_exceedsUpperBound, state_withinUpperBound; + llvm::tie(state_exceedsUpperBound, state_withinUpperBound) = + state->assume(*upperboundToCheck); + + // If we are under constrained and the index variables are tainted, report. + if (state_exceedsUpperBound && state_withinUpperBound) { + if (state->isTainted(rawOffset.getByteOffset())) + reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted); + return; + } + + // If we are constrained enough to definitely exceed the upper bound, report. + if (state_exceedsUpperBound) { + assert(!state_withinUpperBound); + reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes); + return; + } + + assert(state_withinUpperBound); + state = state_withinUpperBound; + } + while (false); + + if (state != originalState) + checkerContext.addTransition(state); +} + +void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext, + ProgramStateRef errorState, + OOB_Kind kind) const { + + ExplodedNode *errorNode = checkerContext.generateSink(errorState); + if (!errorNode) + return; + + if (!BT) + BT.reset(new BuiltinBug("Out-of-bound access")); + + // FIXME: This diagnostics are preliminary. We should get far better + // diagnostics for explaining buffer overruns. + + SmallString<256> buf; + llvm::raw_svector_ostream os(buf); + os << "Out of bound memory access "; + switch (kind) { + case OOB_Precedes: + os << "(accessed memory precedes memory block)"; + break; + case OOB_Excedes: + os << "(access exceeds upper limit of memory block)"; + break; + case OOB_Tainted: + os << "(index is tainted)"; + break; + } + + checkerContext.EmitReport(new BugReport(*BT, os.str(), errorNode)); +} + +void RegionRawOffsetV2::dump() const { + dumpToStream(llvm::errs()); +} + +void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const { + os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}'; +} + +// FIXME: Merge with the implementation of the same method in Store.cpp +static bool IsCompleteType(ASTContext &Ctx, QualType Ty) { + if (const RecordType *RT = Ty->getAs()) { + const RecordDecl *D = RT->getDecl(); + if (!D->getDefinition()) + return false; + } + + return true; +} + + +// Lazily computes a value to be used by 'computeOffset'. If 'val' +// is unknown or undefined, we lazily substitute '0'. Otherwise, +// return 'val'. +static inline SVal getValue(SVal val, SValBuilder &svalBuilder) { + return isa(val) ? svalBuilder.makeArrayIndex(0) : val; +} + +// Scale a base value by a scaling factor, and return the scaled +// value as an SVal. Used by 'computeOffset'. +static inline SVal scaleValue(ProgramStateRef state, + NonLoc baseVal, CharUnits scaling, + SValBuilder &sb) { + return sb.evalBinOpNN(state, BO_Mul, baseVal, + sb.makeArrayIndex(scaling.getQuantity()), + sb.getArrayIndexType()); +} + +// Add an SVal to another, treating unknown and undefined values as +// summing to UnknownVal. Used by 'computeOffset'. +static SVal addValue(ProgramStateRef state, SVal x, SVal y, + SValBuilder &svalBuilder) { + // We treat UnknownVals and UndefinedVals the same here because we + // only care about computing offsets. + if (x.isUnknownOrUndef() || y.isUnknownOrUndef()) + return UnknownVal(); + + return svalBuilder.evalBinOpNN(state, BO_Add, + cast(x), cast(y), + svalBuilder.getArrayIndexType()); +} + +/// Compute a raw byte offset from a base region. Used for array bounds +/// checking. +RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state, + SValBuilder &svalBuilder, + SVal location) +{ + const MemRegion *region = location.getAsRegion(); + SVal offset = UndefinedVal(); + + while (region) { + switch (region->getKind()) { + default: { + if (const SubRegion *subReg = dyn_cast(region)) { + offset = getValue(offset, svalBuilder); + if (!offset.isUnknownOrUndef()) + return RegionRawOffsetV2(subReg, offset); + } + return RegionRawOffsetV2(); + } + case MemRegion::ElementRegionKind: { + const ElementRegion *elemReg = cast(region); + SVal index = elemReg->getIndex(); + if (!isa(index)) + return RegionRawOffsetV2(); + QualType elemType = elemReg->getElementType(); + // If the element is an incomplete type, go no further. + ASTContext &astContext = svalBuilder.getContext(); + if (!IsCompleteType(astContext, elemType)) + return RegionRawOffsetV2(); + + // Update the offset. + offset = addValue(state, + getValue(offset, svalBuilder), + scaleValue(state, + cast(index), + astContext.getTypeSizeInChars(elemType), + svalBuilder), + svalBuilder); + + if (offset.isUnknownOrUndef()) + return RegionRawOffsetV2(); + + region = elemReg->getSuperRegion(); + continue; + } + } + } + return RegionRawOffsetV2(); +} + + +void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/AttrNonNullChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/AttrNonNullChecker.cpp new file mode 100644 index 0000000..ab66e98 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/AttrNonNullChecker.cpp @@ -0,0 +1,134 @@ +//===--- AttrNonNullChecker.h - Undefined arguments checker ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines AttrNonNullChecker, a builtin check in ExprEngine that +// performs checks for arguments declared to have nonnull attribute. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" + +using namespace clang; +using namespace ento; + +namespace { +class AttrNonNullChecker + : public Checker< check::PreStmt > { + mutable OwningPtr BT; +public: + + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; +}; +} // end anonymous namespace + +void AttrNonNullChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + + // Check if the callee has a 'nonnull' attribute. + SVal X = state->getSVal(CE->getCallee(), LCtx); + + const FunctionDecl *FD = X.getAsFunctionDecl(); + if (!FD) + return; + + const NonNullAttr* Att = FD->getAttr(); + if (!Att) + return; + + // Iterate through the arguments of CE and check them for null. + unsigned idx = 0; + + for (CallExpr::const_arg_iterator I=CE->arg_begin(), E=CE->arg_end(); I!=E; + ++I, ++idx) { + + if (!Att->isNonNull(idx)) + continue; + + SVal V = state->getSVal(*I, LCtx); + DefinedSVal *DV = dyn_cast(&V); + + // If the value is unknown or undefined, we can't perform this check. + if (!DV) + continue; + + if (!isa(*DV)) { + // If the argument is a union type, we want to handle a potential + // transparent_unoin GCC extension. + QualType T = (*I)->getType(); + const RecordType *UT = T->getAsUnionType(); + if (!UT || !UT->getDecl()->hasAttr()) + continue; + if (nonloc::CompoundVal *CSV = dyn_cast(DV)) { + nonloc::CompoundVal::iterator CSV_I = CSV->begin(); + assert(CSV_I != CSV->end()); + V = *CSV_I; + DV = dyn_cast(&V); + assert(++CSV_I == CSV->end()); + if (!DV) + continue; + } + else { + // FIXME: Handle LazyCompoundVals? + continue; + } + } + + ConstraintManager &CM = C.getConstraintManager(); + ProgramStateRef stateNotNull, stateNull; + llvm::tie(stateNotNull, stateNull) = CM.assumeDual(state, *DV); + + if (stateNull && !stateNotNull) { + // Generate an error node. Check for a null node in case + // we cache out. + if (ExplodedNode *errorNode = C.generateSink(stateNull)) { + + // Lazily allocate the BugType object if it hasn't already been + // created. Ownership is transferred to the BugReporter object once + // the BugReport is passed to 'EmitWarning'. + if (!BT) + BT.reset(new BugType("Argument with 'nonnull' attribute passed null", + "API")); + + BugReport *R = + new BugReport(*BT, "Null pointer passed as an argument to a " + "'nonnull' parameter", errorNode); + + // Highlight the range of the argument that was null. + const Expr *arg = *I; + R->addRange(arg->getSourceRange()); + R->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(errorNode, + arg, R)); + // Emit the bug report. + C.EmitReport(R); + } + + // Always return. Either we cached out or we just emitted an error. + return; + } + + // If a pointer value passed the check we should assume that it is + // indeed not null from this point forward. + assert(stateNotNull); + state = stateNotNull; + } + + // If we reach here all of the arguments passed the nonnull check. + // If 'state' has been updated generated a new node. + C.addTransition(state); +} + +void ento::registerAttrNonNullChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp new file mode 100644 index 0000000..6dd0a8c --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp @@ -0,0 +1,672 @@ +//== BasicObjCFoundationChecks.cpp - Simple Apple-Foundation checks -*- C++ -*-- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines BasicObjCFoundationChecks, a class that encapsulates +// a set of simple checks to run on Objective-C code using Apple's Foundation +// classes. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/Analysis/DomainSpecific/CocoaConventions.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprObjC.h" +#include "clang/AST/ASTContext.h" +#include "llvm/ADT/SmallString.h" + +using namespace clang; +using namespace ento; + +namespace { +class APIMisuse : public BugType { +public: + APIMisuse(const char* name) : BugType(name, "API Misuse (Apple)") {} +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Utility functions. +//===----------------------------------------------------------------------===// + +static const char* GetReceiverNameType(const ObjCMessage &msg) { + if (const ObjCInterfaceDecl *ID = msg.getReceiverInterface()) + return ID->getIdentifier()->getNameStart(); + return 0; +} + +static bool isReceiverClassOrSuperclass(const ObjCInterfaceDecl *ID, + StringRef ClassName) { + if (ID->getIdentifier()->getName() == ClassName) + return true; + + if (const ObjCInterfaceDecl *Super = ID->getSuperClass()) + return isReceiverClassOrSuperclass(Super, ClassName); + + return false; +} + +static inline bool isNil(SVal X) { + return isa(X); +} + +//===----------------------------------------------------------------------===// +// NilArgChecker - Check for prohibited nil arguments to ObjC method calls. +//===----------------------------------------------------------------------===// + +namespace { + class NilArgChecker : public Checker { + mutable OwningPtr BT; + + void WarnNilArg(CheckerContext &C, + const ObjCMessage &msg, unsigned Arg) const; + + public: + void checkPreObjCMessage(ObjCMessage msg, CheckerContext &C) const; + }; +} + +void NilArgChecker::WarnNilArg(CheckerContext &C, + const ObjCMessage &msg, + unsigned int Arg) const +{ + if (!BT) + BT.reset(new APIMisuse("nil argument")); + + if (ExplodedNode *N = C.generateSink()) { + SmallString<128> sbuf; + llvm::raw_svector_ostream os(sbuf); + os << "Argument to '" << GetReceiverNameType(msg) << "' method '" + << msg.getSelector().getAsString() << "' cannot be nil"; + + BugReport *R = new BugReport(*BT, os.str(), N); + R->addRange(msg.getArgSourceRange(Arg)); + C.EmitReport(R); + } +} + +void NilArgChecker::checkPreObjCMessage(ObjCMessage msg, + CheckerContext &C) const { + const ObjCInterfaceDecl *ID = msg.getReceiverInterface(); + if (!ID) + return; + + if (isReceiverClassOrSuperclass(ID, "NSString")) { + Selector S = msg.getSelector(); + + if (S.isUnarySelector()) + return; + + // FIXME: This is going to be really slow doing these checks with + // lexical comparisons. + + std::string NameStr = S.getAsString(); + StringRef Name(NameStr); + assert(!Name.empty()); + + // FIXME: Checking for initWithFormat: will not work in most cases + // yet because [NSString alloc] returns id, not NSString*. We will + // need support for tracking expected-type information in the analyzer + // to find these errors. + if (Name == "caseInsensitiveCompare:" || + Name == "compare:" || + Name == "compare:options:" || + Name == "compare:options:range:" || + Name == "compare:options:range:locale:" || + Name == "componentsSeparatedByCharactersInSet:" || + Name == "initWithFormat:") { + if (isNil(msg.getArgSVal(0, C.getLocationContext(), C.getState()))) + WarnNilArg(C, msg, 0); + } + } +} + +//===----------------------------------------------------------------------===// +// Error reporting. +//===----------------------------------------------------------------------===// + +namespace { +class CFNumberCreateChecker : public Checker< check::PreStmt > { + mutable OwningPtr BT; + mutable IdentifierInfo* II; +public: + CFNumberCreateChecker() : II(0) {} + + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + +private: + void EmitError(const TypedRegion* R, const Expr *Ex, + uint64_t SourceSize, uint64_t TargetSize, uint64_t NumberKind); +}; +} // end anonymous namespace + +enum CFNumberType { + kCFNumberSInt8Type = 1, + kCFNumberSInt16Type = 2, + kCFNumberSInt32Type = 3, + kCFNumberSInt64Type = 4, + kCFNumberFloat32Type = 5, + kCFNumberFloat64Type = 6, + kCFNumberCharType = 7, + kCFNumberShortType = 8, + kCFNumberIntType = 9, + kCFNumberLongType = 10, + kCFNumberLongLongType = 11, + kCFNumberFloatType = 12, + kCFNumberDoubleType = 13, + kCFNumberCFIndexType = 14, + kCFNumberNSIntegerType = 15, + kCFNumberCGFloatType = 16 +}; + +namespace { + template + class Optional { + bool IsKnown; + T Val; + public: + Optional() : IsKnown(false), Val(0) {} + Optional(const T& val) : IsKnown(true), Val(val) {} + + bool isKnown() const { return IsKnown; } + + const T& getValue() const { + assert (isKnown()); + return Val; + } + + operator const T&() const { + return getValue(); + } + }; +} + +static Optional GetCFNumberSize(ASTContext &Ctx, uint64_t i) { + static const unsigned char FixedSize[] = { 8, 16, 32, 64, 32, 64 }; + + if (i < kCFNumberCharType) + return FixedSize[i-1]; + + QualType T; + + switch (i) { + case kCFNumberCharType: T = Ctx.CharTy; break; + case kCFNumberShortType: T = Ctx.ShortTy; break; + case kCFNumberIntType: T = Ctx.IntTy; break; + case kCFNumberLongType: T = Ctx.LongTy; break; + case kCFNumberLongLongType: T = Ctx.LongLongTy; break; + case kCFNumberFloatType: T = Ctx.FloatTy; break; + case kCFNumberDoubleType: T = Ctx.DoubleTy; break; + case kCFNumberCFIndexType: + case kCFNumberNSIntegerType: + case kCFNumberCGFloatType: + // FIXME: We need a way to map from names to Type*. + default: + return Optional(); + } + + return Ctx.getTypeSize(T); +} + +#if 0 +static const char* GetCFNumberTypeStr(uint64_t i) { + static const char* Names[] = { + "kCFNumberSInt8Type", + "kCFNumberSInt16Type", + "kCFNumberSInt32Type", + "kCFNumberSInt64Type", + "kCFNumberFloat32Type", + "kCFNumberFloat64Type", + "kCFNumberCharType", + "kCFNumberShortType", + "kCFNumberIntType", + "kCFNumberLongType", + "kCFNumberLongLongType", + "kCFNumberFloatType", + "kCFNumberDoubleType", + "kCFNumberCFIndexType", + "kCFNumberNSIntegerType", + "kCFNumberCGFloatType" + }; + + return i <= kCFNumberCGFloatType ? Names[i-1] : "Invalid CFNumberType"; +} +#endif + +void CFNumberCreateChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD) + return; + + ASTContext &Ctx = C.getASTContext(); + if (!II) + II = &Ctx.Idents.get("CFNumberCreate"); + + if (FD->getIdentifier() != II || CE->getNumArgs() != 3) + return; + + // Get the value of the "theType" argument. + const LocationContext *LCtx = C.getLocationContext(); + SVal TheTypeVal = state->getSVal(CE->getArg(1), LCtx); + + // FIXME: We really should allow ranges of valid theType values, and + // bifurcate the state appropriately. + nonloc::ConcreteInt* V = dyn_cast(&TheTypeVal); + if (!V) + return; + + uint64_t NumberKind = V->getValue().getLimitedValue(); + Optional TargetSize = GetCFNumberSize(Ctx, NumberKind); + + // FIXME: In some cases we can emit an error. + if (!TargetSize.isKnown()) + return; + + // Look at the value of the integer being passed by reference. Essentially + // we want to catch cases where the value passed in is not equal to the + // size of the type being created. + SVal TheValueExpr = state->getSVal(CE->getArg(2), LCtx); + + // FIXME: Eventually we should handle arbitrary locations. We can do this + // by having an enhanced memory model that does low-level typing. + loc::MemRegionVal* LV = dyn_cast(&TheValueExpr); + if (!LV) + return; + + const TypedValueRegion* R = dyn_cast(LV->stripCasts()); + if (!R) + return; + + QualType T = Ctx.getCanonicalType(R->getValueType()); + + // FIXME: If the pointee isn't an integer type, should we flag a warning? + // People can do weird stuff with pointers. + + if (!T->isIntegerType()) + return; + + uint64_t SourceSize = Ctx.getTypeSize(T); + + // CHECK: is SourceSize == TargetSize + if (SourceSize == TargetSize) + return; + + // Generate an error. Only generate a sink if 'SourceSize < TargetSize'; + // otherwise generate a regular node. + // + // FIXME: We can actually create an abstract "CFNumber" object that has + // the bits initialized to the provided values. + // + if (ExplodedNode *N = SourceSize < TargetSize ? C.generateSink() + : C.addTransition()) { + SmallString<128> sbuf; + llvm::raw_svector_ostream os(sbuf); + + os << (SourceSize == 8 ? "An " : "A ") + << SourceSize << " bit integer is used to initialize a CFNumber " + "object that represents " + << (TargetSize == 8 ? "an " : "a ") + << TargetSize << " bit integer. "; + + if (SourceSize < TargetSize) + os << (TargetSize - SourceSize) + << " bits of the CFNumber value will be garbage." ; + else + os << (SourceSize - TargetSize) + << " bits of the input integer will be lost."; + + if (!BT) + BT.reset(new APIMisuse("Bad use of CFNumberCreate")); + + BugReport *report = new BugReport(*BT, os.str(), N); + report->addRange(CE->getArg(2)->getSourceRange()); + C.EmitReport(report); + } +} + +//===----------------------------------------------------------------------===// +// CFRetain/CFRelease checking for null arguments. +//===----------------------------------------------------------------------===// + +namespace { +class CFRetainReleaseChecker : public Checker< check::PreStmt > { + mutable OwningPtr BT; + mutable IdentifierInfo *Retain, *Release; +public: + CFRetainReleaseChecker(): Retain(0), Release(0) {} + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; +}; +} // end anonymous namespace + + +void CFRetainReleaseChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + // If the CallExpr doesn't have exactly 1 argument just give up checking. + if (CE->getNumArgs() != 1) + return; + + ProgramStateRef state = C.getState(); + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD) + return; + + if (!BT) { + ASTContext &Ctx = C.getASTContext(); + Retain = &Ctx.Idents.get("CFRetain"); + Release = &Ctx.Idents.get("CFRelease"); + BT.reset(new APIMisuse("null passed to CFRetain/CFRelease")); + } + + // Check if we called CFRetain/CFRelease. + const IdentifierInfo *FuncII = FD->getIdentifier(); + if (!(FuncII == Retain || FuncII == Release)) + return; + + // FIXME: The rest of this just checks that the argument is non-null. + // It should probably be refactored and combined with AttrNonNullChecker. + + // Get the argument's value. + const Expr *Arg = CE->getArg(0); + SVal ArgVal = state->getSVal(Arg, C.getLocationContext()); + DefinedSVal *DefArgVal = dyn_cast(&ArgVal); + if (!DefArgVal) + return; + + // Get a NULL value. + SValBuilder &svalBuilder = C.getSValBuilder(); + DefinedSVal zero = cast(svalBuilder.makeZeroVal(Arg->getType())); + + // Make an expression asserting that they're equal. + DefinedOrUnknownSVal ArgIsNull = svalBuilder.evalEQ(state, zero, *DefArgVal); + + // Are they equal? + ProgramStateRef stateTrue, stateFalse; + llvm::tie(stateTrue, stateFalse) = state->assume(ArgIsNull); + + if (stateTrue && !stateFalse) { + ExplodedNode *N = C.generateSink(stateTrue); + if (!N) + return; + + const char *description = (FuncII == Retain) + ? "Null pointer argument in call to CFRetain" + : "Null pointer argument in call to CFRelease"; + + BugReport *report = new BugReport(*BT, description, N); + report->addRange(Arg->getSourceRange()); + report->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, Arg, + report)); + C.EmitReport(report); + return; + } + + // From here on, we know the argument is non-null. + C.addTransition(stateFalse); +} + +//===----------------------------------------------------------------------===// +// Check for sending 'retain', 'release', or 'autorelease' directly to a Class. +//===----------------------------------------------------------------------===// + +namespace { +class ClassReleaseChecker : public Checker { + mutable Selector releaseS; + mutable Selector retainS; + mutable Selector autoreleaseS; + mutable Selector drainS; + mutable OwningPtr BT; + +public: + void checkPreObjCMessage(ObjCMessage msg, CheckerContext &C) const; +}; +} + +void ClassReleaseChecker::checkPreObjCMessage(ObjCMessage msg, + CheckerContext &C) const { + + if (!BT) { + BT.reset(new APIMisuse("message incorrectly sent to class instead of class " + "instance")); + + ASTContext &Ctx = C.getASTContext(); + releaseS = GetNullarySelector("release", Ctx); + retainS = GetNullarySelector("retain", Ctx); + autoreleaseS = GetNullarySelector("autorelease", Ctx); + drainS = GetNullarySelector("drain", Ctx); + } + + if (msg.isInstanceMessage()) + return; + const ObjCInterfaceDecl *Class = msg.getReceiverInterface(); + assert(Class); + + Selector S = msg.getSelector(); + if (!(S == releaseS || S == retainS || S == autoreleaseS || S == drainS)) + return; + + if (ExplodedNode *N = C.addTransition()) { + SmallString<200> buf; + llvm::raw_svector_ostream os(buf); + + os << "The '" << S.getAsString() << "' message should be sent to instances " + "of class '" << Class->getName() + << "' and not the class directly"; + + BugReport *report = new BugReport(*BT, os.str(), N); + report->addRange(msg.getSourceRange()); + C.EmitReport(report); + } +} + +//===----------------------------------------------------------------------===// +// Check for passing non-Objective-C types to variadic methods that expect +// only Objective-C types. +//===----------------------------------------------------------------------===// + +namespace { +class VariadicMethodTypeChecker : public Checker { + mutable Selector arrayWithObjectsS; + mutable Selector dictionaryWithObjectsAndKeysS; + mutable Selector setWithObjectsS; + mutable Selector orderedSetWithObjectsS; + mutable Selector initWithObjectsS; + mutable Selector initWithObjectsAndKeysS; + mutable OwningPtr BT; + + bool isVariadicMessage(const ObjCMessage &msg) const; + +public: + void checkPreObjCMessage(ObjCMessage msg, CheckerContext &C) const; +}; +} + +/// isVariadicMessage - Returns whether the given message is a variadic message, +/// where all arguments must be Objective-C types. +bool +VariadicMethodTypeChecker::isVariadicMessage(const ObjCMessage &msg) const { + const ObjCMethodDecl *MD = msg.getMethodDecl(); + + if (!MD || !MD->isVariadic() || isa(MD->getDeclContext())) + return false; + + Selector S = msg.getSelector(); + + if (msg.isInstanceMessage()) { + // FIXME: Ideally we'd look at the receiver interface here, but that's not + // useful for init, because alloc returns 'id'. In theory, this could lead + // to false positives, for example if there existed a class that had an + // initWithObjects: implementation that does accept non-Objective-C pointer + // types, but the chance of that happening is pretty small compared to the + // gains that this analysis gives. + const ObjCInterfaceDecl *Class = MD->getClassInterface(); + + // -[NSArray initWithObjects:] + if (isReceiverClassOrSuperclass(Class, "NSArray") && + S == initWithObjectsS) + return true; + + // -[NSDictionary initWithObjectsAndKeys:] + if (isReceiverClassOrSuperclass(Class, "NSDictionary") && + S == initWithObjectsAndKeysS) + return true; + + // -[NSSet initWithObjects:] + if (isReceiverClassOrSuperclass(Class, "NSSet") && + S == initWithObjectsS) + return true; + + // -[NSOrderedSet initWithObjects:] + if (isReceiverClassOrSuperclass(Class, "NSOrderedSet") && + S == initWithObjectsS) + return true; + } else { + const ObjCInterfaceDecl *Class = msg.getReceiverInterface(); + + // -[NSArray arrayWithObjects:] + if (isReceiverClassOrSuperclass(Class, "NSArray") && + S == arrayWithObjectsS) + return true; + + // -[NSDictionary dictionaryWithObjectsAndKeys:] + if (isReceiverClassOrSuperclass(Class, "NSDictionary") && + S == dictionaryWithObjectsAndKeysS) + return true; + + // -[NSSet setWithObjects:] + if (isReceiverClassOrSuperclass(Class, "NSSet") && + S == setWithObjectsS) + return true; + + // -[NSOrderedSet orderedSetWithObjects:] + if (isReceiverClassOrSuperclass(Class, "NSOrderedSet") && + S == orderedSetWithObjectsS) + return true; + } + + return false; +} + +void VariadicMethodTypeChecker::checkPreObjCMessage(ObjCMessage msg, + CheckerContext &C) const { + if (!BT) { + BT.reset(new APIMisuse("Arguments passed to variadic method aren't all " + "Objective-C pointer types")); + + ASTContext &Ctx = C.getASTContext(); + arrayWithObjectsS = GetUnarySelector("arrayWithObjects", Ctx); + dictionaryWithObjectsAndKeysS = + GetUnarySelector("dictionaryWithObjectsAndKeys", Ctx); + setWithObjectsS = GetUnarySelector("setWithObjects", Ctx); + orderedSetWithObjectsS = GetUnarySelector("orderedSetWithObjects", Ctx); + + initWithObjectsS = GetUnarySelector("initWithObjects", Ctx); + initWithObjectsAndKeysS = GetUnarySelector("initWithObjectsAndKeys", Ctx); + } + + if (!isVariadicMessage(msg)) + return; + + // We are not interested in the selector arguments since they have + // well-defined types, so the compiler will issue a warning for them. + unsigned variadicArgsBegin = msg.getSelector().getNumArgs(); + + // We're not interested in the last argument since it has to be nil or the + // compiler would have issued a warning for it elsewhere. + unsigned variadicArgsEnd = msg.getNumArgs() - 1; + + if (variadicArgsEnd <= variadicArgsBegin) + return; + + // Verify that all arguments have Objective-C types. + llvm::Optional errorNode; + ProgramStateRef state = C.getState(); + + for (unsigned I = variadicArgsBegin; I != variadicArgsEnd; ++I) { + QualType ArgTy = msg.getArgType(I); + if (ArgTy->isObjCObjectPointerType()) + continue; + + // Block pointers are treaded as Objective-C pointers. + if (ArgTy->isBlockPointerType()) + continue; + + // Ignore pointer constants. + if (isa(msg.getArgSVal(I, C.getLocationContext(), + state))) + continue; + + // Ignore pointer types annotated with 'NSObject' attribute. + if (C.getASTContext().isObjCNSObjectType(ArgTy)) + continue; + + // Ignore CF references, which can be toll-free bridged. + if (coreFoundation::isCFObjectRef(ArgTy)) + continue; + + // Generate only one error node to use for all bug reports. + if (!errorNode.hasValue()) { + errorNode = C.addTransition(); + } + + if (!errorNode.getValue()) + continue; + + SmallString<128> sbuf; + llvm::raw_svector_ostream os(sbuf); + + if (const char *TypeName = GetReceiverNameType(msg)) + os << "Argument to '" << TypeName << "' method '"; + else + os << "Argument to method '"; + + os << msg.getSelector().getAsString() + << "' should be an Objective-C pointer type, not '" + << ArgTy.getAsString() << "'"; + + BugReport *R = new BugReport(*BT, os.str(), + errorNode.getValue()); + R->addRange(msg.getArgSourceRange(I)); + C.EmitReport(R); + } +} + +//===----------------------------------------------------------------------===// +// Check registration. +//===----------------------------------------------------------------------===// + +void ento::registerNilArgChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} + +void ento::registerCFNumberCreateChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} + +void ento::registerCFRetainReleaseChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} + +void ento::registerClassReleaseChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} + +void ento::registerVariadicMethodTypeChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp new file mode 100644 index 0000000..a4fc396 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp @@ -0,0 +1,157 @@ +//== BoolAssignmentChecker.cpp - Boolean assignment checker -----*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines BoolAssignmentChecker, a builtin check in ExprEngine that +// performs checks for assignment of non-Boolean values to Boolean variables. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" + +using namespace clang; +using namespace ento; + +namespace { + class BoolAssignmentChecker : public Checker< check::Bind > { + mutable llvm::OwningPtr BT; + void emitReport(ProgramStateRef state, CheckerContext &C) const; + public: + void checkBind(SVal loc, SVal val, const Stmt *S, CheckerContext &C) const; + }; +} // end anonymous namespace + +void BoolAssignmentChecker::emitReport(ProgramStateRef state, + CheckerContext &C) const { + if (ExplodedNode *N = C.addTransition(state)) { + if (!BT) + BT.reset(new BuiltinBug("Assignment of a non-Boolean value")); + C.EmitReport(new BugReport(*BT, BT->getDescription(), N)); + } +} + +static bool isBooleanType(QualType Ty) { + if (Ty->isBooleanType()) // C++ or C99 + return true; + + if (const TypedefType *TT = Ty->getAs()) + return TT->getDecl()->getName() == "BOOL" || // Objective-C + TT->getDecl()->getName() == "_Bool" || // stdbool.h < C99 + TT->getDecl()->getName() == "Boolean"; // MacTypes.h + + return false; +} + +void BoolAssignmentChecker::checkBind(SVal loc, SVal val, const Stmt *S, + CheckerContext &C) const { + + // We are only interested in stores into Booleans. + const TypedValueRegion *TR = + dyn_cast_or_null(loc.getAsRegion()); + + if (!TR) + return; + + QualType valTy = TR->getValueType(); + + if (!isBooleanType(valTy)) + return; + + // Get the value of the right-hand side. We only care about values + // that are defined (UnknownVals and UndefinedVals are handled by other + // checkers). + const DefinedSVal *DV = dyn_cast(&val); + if (!DV) + return; + + // Check if the assigned value meets our criteria for correctness. It must + // be a value that is either 0 or 1. One way to check this is to see if + // the value is possibly < 0 (for a negative value) or greater than 1. + ProgramStateRef state = C.getState(); + SValBuilder &svalBuilder = C.getSValBuilder(); + ConstraintManager &CM = C.getConstraintManager(); + + // First, ensure that the value is >= 0. + DefinedSVal zeroVal = svalBuilder.makeIntVal(0, valTy); + SVal greaterThanOrEqualToZeroVal = + svalBuilder.evalBinOp(state, BO_GE, *DV, zeroVal, + svalBuilder.getConditionType()); + + DefinedSVal *greaterThanEqualToZero = + dyn_cast(&greaterThanOrEqualToZeroVal); + + if (!greaterThanEqualToZero) { + // The SValBuilder cannot construct a valid SVal for this condition. + // This means we cannot properly reason about it. + return; + } + + ProgramStateRef stateLT, stateGE; + llvm::tie(stateGE, stateLT) = CM.assumeDual(state, *greaterThanEqualToZero); + + // Is it possible for the value to be less than zero? + if (stateLT) { + // It is possible for the value to be less than zero. We only + // want to emit a warning, however, if that value is fully constrained. + // If it it possible for the value to be >= 0, then essentially the + // value is underconstrained and there is nothing left to be done. + if (!stateGE) + emitReport(stateLT, C); + + // In either case, we are done. + return; + } + + // If we reach here, it must be the case that the value is constrained + // to only be >= 0. + assert(stateGE == state); + + // At this point we know that the value is >= 0. + // Now check to ensure that the value is <= 1. + DefinedSVal OneVal = svalBuilder.makeIntVal(1, valTy); + SVal lessThanEqToOneVal = + svalBuilder.evalBinOp(state, BO_LE, *DV, OneVal, + svalBuilder.getConditionType()); + + DefinedSVal *lessThanEqToOne = + dyn_cast(&lessThanEqToOneVal); + + if (!lessThanEqToOne) { + // The SValBuilder cannot construct a valid SVal for this condition. + // This means we cannot properly reason about it. + return; + } + + ProgramStateRef stateGT, stateLE; + llvm::tie(stateLE, stateGT) = CM.assumeDual(state, *lessThanEqToOne); + + // Is it possible for the value to be greater than one? + if (stateGT) { + // It is possible for the value to be greater than one. We only + // want to emit a warning, however, if that value is fully constrained. + // If it is possible for the value to be <= 1, then essentially the + // value is underconstrained and there is nothing left to be done. + if (!stateLE) + emitReport(stateGT, C); + + // In either case, we are done. + return; + } + + // If we reach here, it must be the case that the value is constrained + // to only be <= 1. + assert(stateLE == state); +} + +void ento::registerBoolAssignmentChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp new file mode 100644 index 0000000..509bc79 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp @@ -0,0 +1,82 @@ +//=== BuiltinFunctionChecker.cpp --------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This checker evaluates clang builtin functions. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/Basic/Builtins.h" + +using namespace clang; +using namespace ento; + +namespace { + +class BuiltinFunctionChecker : public Checker { +public: + bool evalCall(const CallExpr *CE, CheckerContext &C) const; +}; + +} + +bool BuiltinFunctionChecker::evalCall(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + const FunctionDecl *FD = C.getCalleeDecl(CE); + const LocationContext *LCtx = C.getLocationContext(); + if (!FD) + return false; + + unsigned id = FD->getBuiltinID(); + + if (!id) + return false; + + switch (id) { + case Builtin::BI__builtin_expect: { + // For __builtin_expect, just return the value of the subexpression. + assert (CE->arg_begin() != CE->arg_end()); + SVal X = state->getSVal(*(CE->arg_begin()), LCtx); + C.addTransition(state->BindExpr(CE, LCtx, X)); + return true; + } + + case Builtin::BI__builtin_alloca: { + // FIXME: Refactor into StoreManager itself? + MemRegionManager& RM = C.getStoreManager().getRegionManager(); + const AllocaRegion* R = + RM.getAllocaRegion(CE, C.getCurrentBlockCount(), C.getLocationContext()); + + // Set the extent of the region in bytes. This enables us to use the + // SVal of the argument directly. If we save the extent in bits, we + // cannot represent values like symbol*8. + DefinedOrUnknownSVal Size = + cast(state->getSVal(*(CE->arg_begin()), LCtx)); + + SValBuilder& svalBuilder = C.getSValBuilder(); + DefinedOrUnknownSVal Extent = R->getExtent(svalBuilder); + DefinedOrUnknownSVal extentMatchesSizeArg = + svalBuilder.evalEQ(state, Extent, Size); + state = state->assume(extentMatchesSizeArg, true); + + C.addTransition(state->BindExpr(CE, LCtx, loc::MemRegionVal(R))); + return true; + } + } + + return false; +} + +void ento::registerBuiltinFunctionChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt new file mode 100644 index 0000000..75d5448 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt @@ -0,0 +1,81 @@ +clang_tablegen(Checkers.inc -gen-clang-sa-checkers + -I ${CMAKE_CURRENT_SOURCE_DIR}/../../../include + SOURCE Checkers.td + TARGET ClangSACheckers) + +set(LLVM_USED_LIBS clangBasic clangAST clangStaticAnalyzerCore) + +add_clang_library(clangStaticAnalyzerCheckers + AdjustedReturnValueChecker.cpp + AnalyzerStatsChecker.cpp + ArrayBoundChecker.cpp + ArrayBoundCheckerV2.cpp + AttrNonNullChecker.cpp + BasicObjCFoundationChecks.cpp + BoolAssignmentChecker.cpp + BuiltinFunctionChecker.cpp + CStringChecker.cpp + CStringSyntaxChecker.cpp + CallAndMessageChecker.cpp + CastSizeChecker.cpp + CastToStructChecker.cpp + CheckObjCDealloc.cpp + CheckObjCInstMethSignature.cpp + CheckSecuritySyntaxOnly.cpp + CheckSizeofPointer.cpp + CheckerDocumentation.cpp + ChrootChecker.cpp + ClangCheckers.cpp + CommonBugCategories.cpp + DeadStoresChecker.cpp + DebugCheckers.cpp + DereferenceChecker.cpp + DivZeroChecker.cpp + FixedAddressChecker.cpp + GenericTaintChecker.cpp + IdempotentOperationChecker.cpp + IteratorsChecker.cpp + IntervalTest.cpp + LLVMConventionsChecker.cpp + MacOSKeychainAPIChecker.cpp + MacOSXAPIChecker.cpp + MallocChecker.cpp + MallocOverflowSecurityChecker.cpp + MallocSizeofChecker.cpp + NSAutoreleasePoolChecker.cpp + NSErrorChecker.cpp + NoReturnFunctionChecker.cpp + OSAtomicChecker.cpp + ObjCAtSyncChecker.cpp + ObjCContainersASTChecker.cpp + ObjCContainersChecker.cpp + ObjCSelfInitChecker.cpp + ObjCUnusedIVarsChecker.cpp + PointerArithChecker.cpp + PointerSubChecker.cpp + PthreadLockChecker.cpp + RetainCountChecker.cpp + ReturnPointerRangeChecker.cpp + ReturnUndefChecker.cpp + StackAddrEscapeChecker.cpp + StreamChecker.cpp + TaintTesterChecker.cpp + UndefBranchChecker.cpp + UndefCapturedBlockVarChecker.cpp + UndefResultChecker.cpp + UndefinedArraySubscriptChecker.cpp + UndefinedAssignmentChecker.cpp + UnixAPIChecker.cpp + UnreachableCodeChecker.cpp + VLASizeChecker.cpp + VirtualCallChecker.cpp + ) + +add_dependencies(clangStaticAnalyzerCheckers + clangStaticAnalyzerCore + ClangAttrClasses + ClangAttrList + ClangDeclNodes + ClangStmtNodes + ClangSACheckers + ) diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp new file mode 100644 index 0000000..9eb7edf --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -0,0 +1,1981 @@ +//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines CStringChecker, which is an assortment of checks on calls +// to functions in . +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "InterCheckerAPI.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" + +using namespace clang; +using namespace ento; + +namespace { +class CStringChecker : public Checker< eval::Call, + check::PreStmt, + check::LiveSymbols, + check::DeadSymbols, + check::RegionChanges + > { + mutable OwningPtr BT_Null, + BT_Bounds, + BT_Overlap, + BT_NotCString, + BT_AdditionOverflow; + + mutable const char *CurrentFunctionDescription; + +public: + /// The filter is used to filter out the diagnostics which are not enabled by + /// the user. + struct CStringChecksFilter { + DefaultBool CheckCStringNullArg; + DefaultBool CheckCStringOutOfBounds; + DefaultBool CheckCStringBufferOverlap; + DefaultBool CheckCStringNotNullTerm; + }; + + CStringChecksFilter Filter; + + static void *getTag() { static int tag; return &tag; } + + bool evalCall(const CallExpr *CE, CheckerContext &C) const; + void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; + void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const; + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; + bool wantsRegionChangeUpdate(ProgramStateRef state) const; + + ProgramStateRef + checkRegionChanges(ProgramStateRef state, + const StoreManager::InvalidatedSymbols *, + ArrayRef ExplicitRegions, + ArrayRef Regions, + const CallOrObjCMessage *Call) const; + + typedef void (CStringChecker::*FnCheck)(CheckerContext &, + const CallExpr *) const; + + void evalMemcpy(CheckerContext &C, const CallExpr *CE) const; + void evalMempcpy(CheckerContext &C, const CallExpr *CE) const; + void evalMemmove(CheckerContext &C, const CallExpr *CE) const; + void evalBcopy(CheckerContext &C, const CallExpr *CE) const; + void evalCopyCommon(CheckerContext &C, const CallExpr *CE, + ProgramStateRef state, + const Expr *Size, + const Expr *Source, + const Expr *Dest, + bool Restricted = false, + bool IsMempcpy = false) const; + + void evalMemcmp(CheckerContext &C, const CallExpr *CE) const; + + void evalstrLength(CheckerContext &C, const CallExpr *CE) const; + void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; + void evalstrLengthCommon(CheckerContext &C, + const CallExpr *CE, + bool IsStrnlen = false) const; + + void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; + void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; + void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; + void evalStrcpyCommon(CheckerContext &C, + const CallExpr *CE, + bool returnEnd, + bool isBounded, + bool isAppending) const; + + void evalStrcat(CheckerContext &C, const CallExpr *CE) const; + void evalStrncat(CheckerContext &C, const CallExpr *CE) const; + + void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; + void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; + void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; + void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; + void evalStrcmpCommon(CheckerContext &C, + const CallExpr *CE, + bool isBounded = false, + bool ignoreCase = false) const; + + // Utility methods + std::pair + static assumeZero(CheckerContext &C, + ProgramStateRef state, SVal V, QualType Ty); + + static ProgramStateRef setCStringLength(ProgramStateRef state, + const MemRegion *MR, + SVal strLength); + static SVal getCStringLengthForRegion(CheckerContext &C, + ProgramStateRef &state, + const Expr *Ex, + const MemRegion *MR, + bool hypothetical); + SVal getCStringLength(CheckerContext &C, + ProgramStateRef &state, + const Expr *Ex, + SVal Buf, + bool hypothetical = false) const; + + const StringLiteral *getCStringLiteral(CheckerContext &C, + ProgramStateRef &state, + const Expr *expr, + SVal val) const; + + static ProgramStateRef InvalidateBuffer(CheckerContext &C, + ProgramStateRef state, + const Expr *Ex, SVal V); + + static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, + const MemRegion *MR); + + // Re-usable checks + ProgramStateRef checkNonNull(CheckerContext &C, + ProgramStateRef state, + const Expr *S, + SVal l) const; + ProgramStateRef CheckLocation(CheckerContext &C, + ProgramStateRef state, + const Expr *S, + SVal l, + const char *message = NULL) const; + ProgramStateRef CheckBufferAccess(CheckerContext &C, + ProgramStateRef state, + const Expr *Size, + const Expr *FirstBuf, + const Expr *SecondBuf, + const char *firstMessage = NULL, + const char *secondMessage = NULL, + bool WarnAboutSize = false) const; + + ProgramStateRef CheckBufferAccess(CheckerContext &C, + ProgramStateRef state, + const Expr *Size, + const Expr *Buf, + const char *message = NULL, + bool WarnAboutSize = false) const { + // This is a convenience override. + return CheckBufferAccess(C, state, Size, Buf, NULL, message, NULL, + WarnAboutSize); + } + ProgramStateRef CheckOverlap(CheckerContext &C, + ProgramStateRef state, + const Expr *Size, + const Expr *First, + const Expr *Second) const; + void emitOverlapBug(CheckerContext &C, + ProgramStateRef state, + const Stmt *First, + const Stmt *Second) const; + + ProgramStateRef checkAdditionOverflow(CheckerContext &C, + ProgramStateRef state, + NonLoc left, + NonLoc right) const; +}; + +class CStringLength { +public: + typedef llvm::ImmutableMap EntryMap; +}; +} //end anonymous namespace + +namespace clang { +namespace ento { + template <> + struct ProgramStateTrait + : public ProgramStatePartialTrait { + static void *GDMIndex() { return CStringChecker::getTag(); } + }; +} +} + +//===----------------------------------------------------------------------===// +// Individual checks and utility methods. +//===----------------------------------------------------------------------===// + +std::pair +CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, + QualType Ty) { + DefinedSVal *val = dyn_cast(&V); + if (!val) + return std::pair(state, state); + + SValBuilder &svalBuilder = C.getSValBuilder(); + DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); + return state->assume(svalBuilder.evalEQ(state, *val, zero)); +} + +ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, + ProgramStateRef state, + const Expr *S, SVal l) const { + // If a previous check has failed, propagate the failure. + if (!state) + return NULL; + + ProgramStateRef stateNull, stateNonNull; + llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); + + if (stateNull && !stateNonNull) { + if (!Filter.CheckCStringNullArg) + return NULL; + + ExplodedNode *N = C.generateSink(stateNull); + if (!N) + return NULL; + + if (!BT_Null) + BT_Null.reset(new BuiltinBug("Unix API", + "Null pointer argument in call to byte string function")); + + SmallString<80> buf; + llvm::raw_svector_ostream os(buf); + assert(CurrentFunctionDescription); + os << "Null pointer argument in call to " << CurrentFunctionDescription; + + // Generate a report for this bug. + BuiltinBug *BT = static_cast(BT_Null.get()); + BugReport *report = new BugReport(*BT, os.str(), N); + + report->addRange(S->getSourceRange()); + report->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, S, + report)); + C.EmitReport(report); + return NULL; + } + + // From here on, assume that the value is non-null. + assert(stateNonNull); + return stateNonNull; +} + +// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? +ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, + ProgramStateRef state, + const Expr *S, SVal l, + const char *warningMsg) const { + // If a previous check has failed, propagate the failure. + if (!state) + return NULL; + + // Check for out of bound array element access. + const MemRegion *R = l.getAsRegion(); + if (!R) + return state; + + const ElementRegion *ER = dyn_cast(R); + if (!ER) + return state; + + assert(ER->getValueType() == C.getASTContext().CharTy && + "CheckLocation should only be called with char* ElementRegions"); + + // Get the size of the array. + const SubRegion *superReg = cast(ER->getSuperRegion()); + SValBuilder &svalBuilder = C.getSValBuilder(); + SVal Extent = + svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); + DefinedOrUnknownSVal Size = cast(Extent); + + // Get the index of the accessed element. + DefinedOrUnknownSVal Idx = cast(ER->getIndex()); + + ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true); + ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false); + if (StOutBound && !StInBound) { + ExplodedNode *N = C.generateSink(StOutBound); + if (!N) + return NULL; + + if (!BT_Bounds) { + BT_Bounds.reset(new BuiltinBug("Out-of-bound array access", + "Byte string function accesses out-of-bound array element")); + } + BuiltinBug *BT = static_cast(BT_Bounds.get()); + + // Generate a report for this bug. + BugReport *report; + if (warningMsg) { + report = new BugReport(*BT, warningMsg, N); + } else { + assert(CurrentFunctionDescription); + assert(CurrentFunctionDescription[0] != '\0'); + + SmallString<80> buf; + llvm::raw_svector_ostream os(buf); + os << (char)toupper(CurrentFunctionDescription[0]) + << &CurrentFunctionDescription[1] + << " accesses out-of-bound array element"; + report = new BugReport(*BT, os.str(), N); + } + + // FIXME: It would be nice to eventually make this diagnostic more clear, + // e.g., by referencing the original declaration or by saying *why* this + // reference is outside the range. + + report->addRange(S->getSourceRange()); + C.EmitReport(report); + return NULL; + } + + // Array bound check succeeded. From this point forward the array bound + // should always succeed. + return StInBound; +} + +ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C, + ProgramStateRef state, + const Expr *Size, + const Expr *FirstBuf, + const Expr *SecondBuf, + const char *firstMessage, + const char *secondMessage, + bool WarnAboutSize) const { + // If a previous check has failed, propagate the failure. + if (!state) + return NULL; + + SValBuilder &svalBuilder = C.getSValBuilder(); + ASTContext &Ctx = svalBuilder.getContext(); + const LocationContext *LCtx = C.getLocationContext(); + + QualType sizeTy = Size->getType(); + QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); + + // Check that the first buffer is non-null. + SVal BufVal = state->getSVal(FirstBuf, LCtx); + state = checkNonNull(C, state, FirstBuf, BufVal); + if (!state) + return NULL; + + // If out-of-bounds checking is turned off, skip the rest. + if (!Filter.CheckCStringOutOfBounds) + return state; + + // Get the access length and make sure it is known. + // FIXME: This assumes the caller has already checked that the access length + // is positive. And that it's unsigned. + SVal LengthVal = state->getSVal(Size, LCtx); + NonLoc *Length = dyn_cast(&LengthVal); + if (!Length) + return state; + + // Compute the offset of the last element to be accessed: size-1. + NonLoc One = cast(svalBuilder.makeIntVal(1, sizeTy)); + NonLoc LastOffset = cast(svalBuilder.evalBinOpNN(state, BO_Sub, + *Length, One, sizeTy)); + + // Check that the first buffer is sufficiently long. + SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); + if (Loc *BufLoc = dyn_cast(&BufStart)) { + const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf); + + SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, + LastOffset, PtrTy); + state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage); + + // If the buffer isn't large enough, abort. + if (!state) + return NULL; + } + + // If there's a second buffer, check it as well. + if (SecondBuf) { + BufVal = state->getSVal(SecondBuf, LCtx); + state = checkNonNull(C, state, SecondBuf, BufVal); + if (!state) + return NULL; + + BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); + if (Loc *BufLoc = dyn_cast(&BufStart)) { + const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf); + + SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, + LastOffset, PtrTy); + state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage); + } + } + + // Large enough or not, return this state! + return state; +} + +ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, + ProgramStateRef state, + const Expr *Size, + const Expr *First, + const Expr *Second) const { + if (!Filter.CheckCStringBufferOverlap) + return state; + + // Do a simple check for overlap: if the two arguments are from the same + // buffer, see if the end of the first is greater than the start of the second + // or vice versa. + + // If a previous check has failed, propagate the failure. + if (!state) + return NULL; + + ProgramStateRef stateTrue, stateFalse; + + // Get the buffer values and make sure they're known locations. + const LocationContext *LCtx = C.getLocationContext(); + SVal firstVal = state->getSVal(First, LCtx); + SVal secondVal = state->getSVal(Second, LCtx); + + Loc *firstLoc = dyn_cast(&firstVal); + if (!firstLoc) + return state; + + Loc *secondLoc = dyn_cast(&secondVal); + if (!secondLoc) + return state; + + // Are the two values the same? + SValBuilder &svalBuilder = C.getSValBuilder(); + llvm::tie(stateTrue, stateFalse) = + state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); + + if (stateTrue && !stateFalse) { + // If the values are known to be equal, that's automatically an overlap. + emitOverlapBug(C, stateTrue, First, Second); + return NULL; + } + + // assume the two expressions are not equal. + assert(stateFalse); + state = stateFalse; + + // Which value comes first? + QualType cmpTy = svalBuilder.getConditionType(); + SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, + *firstLoc, *secondLoc, cmpTy); + DefinedOrUnknownSVal *reverseTest = dyn_cast(&reverse); + if (!reverseTest) + return state; + + llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest); + if (stateTrue) { + if (stateFalse) { + // If we don't know which one comes first, we can't perform this test. + return state; + } else { + // Switch the values so that firstVal is before secondVal. + Loc *tmpLoc = firstLoc; + firstLoc = secondLoc; + secondLoc = tmpLoc; + + // Switch the Exprs as well, so that they still correspond. + const Expr *tmpExpr = First; + First = Second; + Second = tmpExpr; + } + } + + // Get the length, and make sure it too is known. + SVal LengthVal = state->getSVal(Size, LCtx); + NonLoc *Length = dyn_cast(&LengthVal); + if (!Length) + return state; + + // Convert the first buffer's start address to char*. + // Bail out if the cast fails. + ASTContext &Ctx = svalBuilder.getContext(); + QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); + SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, + First->getType()); + Loc *FirstStartLoc = dyn_cast(&FirstStart); + if (!FirstStartLoc) + return state; + + // Compute the end of the first buffer. Bail out if THAT fails. + SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, + *FirstStartLoc, *Length, CharPtrTy); + Loc *FirstEndLoc = dyn_cast(&FirstEnd); + if (!FirstEndLoc) + return state; + + // Is the end of the first buffer past the start of the second buffer? + SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, + *FirstEndLoc, *secondLoc, cmpTy); + DefinedOrUnknownSVal *OverlapTest = dyn_cast(&Overlap); + if (!OverlapTest) + return state; + + llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); + + if (stateTrue && !stateFalse) { + // Overlap! + emitOverlapBug(C, stateTrue, First, Second); + return NULL; + } + + // assume the two expressions don't overlap. + assert(stateFalse); + return stateFalse; +} + +void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state, + const Stmt *First, const Stmt *Second) const { + ExplodedNode *N = C.generateSink(state); + if (!N) + return; + + if (!BT_Overlap) + BT_Overlap.reset(new BugType("Unix API", "Improper arguments")); + + // Generate a report for this bug. + BugReport *report = + new BugReport(*BT_Overlap, + "Arguments must not be overlapping buffers", N); + report->addRange(First->getSourceRange()); + report->addRange(Second->getSourceRange()); + + C.EmitReport(report); +} + +ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, + ProgramStateRef state, + NonLoc left, + NonLoc right) const { + // If out-of-bounds checking is turned off, skip the rest. + if (!Filter.CheckCStringOutOfBounds) + return state; + + // If a previous check has failed, propagate the failure. + if (!state) + return NULL; + + SValBuilder &svalBuilder = C.getSValBuilder(); + BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); + + QualType sizeTy = svalBuilder.getContext().getSizeType(); + const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); + NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); + + SVal maxMinusRight; + if (isa(right)) { + maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, + sizeTy); + } else { + // Try switching the operands. (The order of these two assignments is + // important!) + maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, + sizeTy); + left = right; + } + + if (NonLoc *maxMinusRightNL = dyn_cast(&maxMinusRight)) { + QualType cmpTy = svalBuilder.getConditionType(); + // If left > max - right, we have an overflow. + SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, + *maxMinusRightNL, cmpTy); + + ProgramStateRef stateOverflow, stateOkay; + llvm::tie(stateOverflow, stateOkay) = + state->assume(cast(willOverflow)); + + if (stateOverflow && !stateOkay) { + // We have an overflow. Emit a bug report. + ExplodedNode *N = C.generateSink(stateOverflow); + if (!N) + return NULL; + + if (!BT_AdditionOverflow) + BT_AdditionOverflow.reset(new BuiltinBug("API", + "Sum of expressions causes overflow")); + + // This isn't a great error message, but this should never occur in real + // code anyway -- you'd have to create a buffer longer than a size_t can + // represent, which is sort of a contradiction. + const char *warning = + "This expression will create a string whose length is too big to " + "be represented as a size_t"; + + // Generate a report for this bug. + BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N); + C.EmitReport(report); + + return NULL; + } + + // From now on, assume an overflow didn't occur. + assert(stateOkay); + state = stateOkay; + } + + return state; +} + +ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, + const MemRegion *MR, + SVal strLength) { + assert(!strLength.isUndef() && "Attempt to set an undefined string length"); + + MR = MR->StripCasts(); + + switch (MR->getKind()) { + case MemRegion::StringRegionKind: + // FIXME: This can happen if we strcpy() into a string region. This is + // undefined [C99 6.4.5p6], but we should still warn about it. + return state; + + case MemRegion::SymbolicRegionKind: + case MemRegion::AllocaRegionKind: + case MemRegion::VarRegionKind: + case MemRegion::FieldRegionKind: + case MemRegion::ObjCIvarRegionKind: + // These are the types we can currently track string lengths for. + break; + + case MemRegion::ElementRegionKind: + // FIXME: Handle element regions by upper-bounding the parent region's + // string length. + return state; + + default: + // Other regions (mostly non-data) can't have a reliable C string length. + // For now, just ignore the change. + // FIXME: These are rare but not impossible. We should output some kind of + // warning for things like strcpy((char[]){'a', 0}, "b"); + return state; + } + + if (strLength.isUnknown()) + return state->remove(MR); + + return state->set(MR, strLength); +} + +SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, + ProgramStateRef &state, + const Expr *Ex, + const MemRegion *MR, + bool hypothetical) { + if (!hypothetical) { + // If there's a recorded length, go ahead and return it. + const SVal *Recorded = state->get(MR); + if (Recorded) + return *Recorded; + } + + // Otherwise, get a new symbol and update the state. + unsigned Count = C.getCurrentBlockCount(); + SValBuilder &svalBuilder = C.getSValBuilder(); + QualType sizeTy = svalBuilder.getContext().getSizeType(); + SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), + MR, Ex, sizeTy, Count); + + if (!hypothetical) + state = state->set(MR, strLength); + + return strLength; +} + +SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, + const Expr *Ex, SVal Buf, + bool hypothetical) const { + const MemRegion *MR = Buf.getAsRegion(); + if (!MR) { + // If we can't get a region, see if it's something we /know/ isn't a + // C string. In the context of locations, the only time we can issue such + // a warning is for labels. + if (loc::GotoLabel *Label = dyn_cast(&Buf)) { + if (!Filter.CheckCStringNotNullTerm) + return UndefinedVal(); + + if (ExplodedNode *N = C.addTransition(state)) { + if (!BT_NotCString) + BT_NotCString.reset(new BuiltinBug("Unix API", + "Argument is not a null-terminated string.")); + + SmallString<120> buf; + llvm::raw_svector_ostream os(buf); + assert(CurrentFunctionDescription); + os << "Argument to " << CurrentFunctionDescription + << " is the address of the label '" << Label->getLabel()->getName() + << "', which is not a null-terminated string"; + + // Generate a report for this bug. + BugReport *report = new BugReport(*BT_NotCString, + os.str(), N); + + report->addRange(Ex->getSourceRange()); + C.EmitReport(report); + } + return UndefinedVal(); + + } + + // If it's not a region and not a label, give up. + return UnknownVal(); + } + + // If we have a region, strip casts from it and see if we can figure out + // its length. For anything we can't figure out, just return UnknownVal. + MR = MR->StripCasts(); + + switch (MR->getKind()) { + case MemRegion::StringRegionKind: { + // Modifying the contents of string regions is undefined [C99 6.4.5p6], + // so we can assume that the byte length is the correct C string length. + SValBuilder &svalBuilder = C.getSValBuilder(); + QualType sizeTy = svalBuilder.getContext().getSizeType(); + const StringLiteral *strLit = cast(MR)->getStringLiteral(); + return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); + } + case MemRegion::SymbolicRegionKind: + case MemRegion::AllocaRegionKind: + case MemRegion::VarRegionKind: + case MemRegion::FieldRegionKind: + case MemRegion::ObjCIvarRegionKind: + return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); + case MemRegion::CompoundLiteralRegionKind: + // FIXME: Can we track this? Is it necessary? + return UnknownVal(); + case MemRegion::ElementRegionKind: + // FIXME: How can we handle this? It's not good enough to subtract the + // offset from the base string length; consider "123\x00567" and &a[5]. + return UnknownVal(); + default: + // Other regions (mostly non-data) can't have a reliable C string length. + // In this case, an error is emitted and UndefinedVal is returned. + // The caller should always be prepared to handle this case. + if (!Filter.CheckCStringNotNullTerm) + return UndefinedVal(); + + if (ExplodedNode *N = C.addTransition(state)) { + if (!BT_NotCString) + BT_NotCString.reset(new BuiltinBug("Unix API", + "Argument is not a null-terminated string.")); + + SmallString<120> buf; + llvm::raw_svector_ostream os(buf); + + assert(CurrentFunctionDescription); + os << "Argument to " << CurrentFunctionDescription << " is "; + + if (SummarizeRegion(os, C.getASTContext(), MR)) + os << ", which is not a null-terminated string"; + else + os << "not a null-terminated string"; + + // Generate a report for this bug. + BugReport *report = new BugReport(*BT_NotCString, + os.str(), N); + + report->addRange(Ex->getSourceRange()); + C.EmitReport(report); + } + + return UndefinedVal(); + } +} + +const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, + ProgramStateRef &state, const Expr *expr, SVal val) const { + + // Get the memory region pointed to by the val. + const MemRegion *bufRegion = val.getAsRegion(); + if (!bufRegion) + return NULL; + + // Strip casts off the memory region. + bufRegion = bufRegion->StripCasts(); + + // Cast the memory region to a string region. + const StringRegion *strRegion= dyn_cast(bufRegion); + if (!strRegion) + return NULL; + + // Return the actual string in the string region. + return strRegion->getStringLiteral(); +} + +ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C, + ProgramStateRef state, + const Expr *E, SVal V) { + Loc *L = dyn_cast(&V); + if (!L) + return state; + + // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes + // some assumptions about the value that CFRefCount can't. Even so, it should + // probably be refactored. + if (loc::MemRegionVal* MR = dyn_cast(L)) { + const MemRegion *R = MR->getRegion()->StripCasts(); + + // Are we dealing with an ElementRegion? If so, we should be invalidating + // the super-region. + if (const ElementRegion *ER = dyn_cast(R)) { + R = ER->getSuperRegion(); + // FIXME: What about layers of ElementRegions? + } + + // Invalidate this region. + unsigned Count = C.getCurrentBlockCount(); + const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); + return state->invalidateRegions(R, E, Count, LCtx); + } + + // If we have a non-region value by chance, just remove the binding. + // FIXME: is this necessary or correct? This handles the non-Region + // cases. Is it ever valid to store to these? + return state->unbindLoc(*L); +} + +bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, + const MemRegion *MR) { + const TypedValueRegion *TVR = dyn_cast(MR); + + switch (MR->getKind()) { + case MemRegion::FunctionTextRegionKind: { + const FunctionDecl *FD = cast(MR)->getDecl(); + if (FD) + os << "the address of the function '" << *FD << '\''; + else + os << "the address of a function"; + return true; + } + case MemRegion::BlockTextRegionKind: + os << "block text"; + return true; + case MemRegion::BlockDataRegionKind: + os << "a block"; + return true; + case MemRegion::CXXThisRegionKind: + case MemRegion::CXXTempObjectRegionKind: + os << "a C++ temp object of type " << TVR->getValueType().getAsString(); + return true; + case MemRegion::VarRegionKind: + os << "a variable of type" << TVR->getValueType().getAsString(); + return true; + case MemRegion::FieldRegionKind: + os << "a field of type " << TVR->getValueType().getAsString(); + return true; + case MemRegion::ObjCIvarRegionKind: + os << "an instance variable of type " << TVR->getValueType().getAsString(); + return true; + default: + return false; + } +} + +//===----------------------------------------------------------------------===// +// evaluation of individual function calls. +//===----------------------------------------------------------------------===// + +void CStringChecker::evalCopyCommon(CheckerContext &C, + const CallExpr *CE, + ProgramStateRef state, + const Expr *Size, const Expr *Dest, + const Expr *Source, bool Restricted, + bool IsMempcpy) const { + CurrentFunctionDescription = "memory copy function"; + + // See if the size argument is zero. + const LocationContext *LCtx = C.getLocationContext(); + SVal sizeVal = state->getSVal(Size, LCtx); + QualType sizeTy = Size->getType(); + + ProgramStateRef stateZeroSize, stateNonZeroSize; + llvm::tie(stateZeroSize, stateNonZeroSize) = + assumeZero(C, state, sizeVal, sizeTy); + + // Get the value of the Dest. + SVal destVal = state->getSVal(Dest, LCtx); + + // If the size is zero, there won't be any actual memory access, so + // just bind the return value to the destination buffer and return. + if (stateZeroSize) { + stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal); + C.addTransition(stateZeroSize); + } + + // If the size can be nonzero, we have to check the other arguments. + if (stateNonZeroSize) { + state = stateNonZeroSize; + + // Ensure the destination is not null. If it is NULL there will be a + // NULL pointer dereference. + state = checkNonNull(C, state, Dest, destVal); + if (!state) + return; + + // Get the value of the Src. + SVal srcVal = state->getSVal(Source, LCtx); + + // Ensure the source is not null. If it is NULL there will be a + // NULL pointer dereference. + state = checkNonNull(C, state, Source, srcVal); + if (!state) + return; + + // Ensure the accesses are valid and that the buffers do not overlap. + const char * const writeWarning = + "Memory copy function overflows destination buffer"; + state = CheckBufferAccess(C, state, Size, Dest, Source, + writeWarning, /* sourceWarning = */ NULL); + if (Restricted) + state = CheckOverlap(C, state, Size, Dest, Source); + + if (!state) + return; + + // If this is mempcpy, get the byte after the last byte copied and + // bind the expr. + if (IsMempcpy) { + loc::MemRegionVal *destRegVal = dyn_cast(&destVal); + assert(destRegVal && "Destination should be a known MemRegionVal here"); + + // Get the length to copy. + NonLoc *lenValNonLoc = dyn_cast(&sizeVal); + + if (lenValNonLoc) { + // Get the byte after the last byte copied. + SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add, + *destRegVal, + *lenValNonLoc, + Dest->getType()); + + // The byte after the last byte copied is the return value. + state = state->BindExpr(CE, LCtx, lastElement); + } else { + // If we don't know how much we copied, we can at least + // conjure a return value for later. + unsigned Count = C.getCurrentBlockCount(); + SVal result = + C.getSValBuilder().getConjuredSymbolVal(NULL, CE, LCtx, Count); + state = state->BindExpr(CE, LCtx, result); + } + + } else { + // All other copies return the destination buffer. + // (Well, bcopy() has a void return type, but this won't hurt.) + state = state->BindExpr(CE, LCtx, destVal); + } + + // Invalidate the destination. + // FIXME: Even if we can't perfectly model the copy, we should see if we + // can use LazyCompoundVals to copy the source values into the destination. + // This would probably remove any existing bindings past the end of the + // copied region, but that's still an improvement over blank invalidation. + state = InvalidateBuffer(C, state, Dest, + state->getSVal(Dest, C.getLocationContext())); + C.addTransition(state); + } +} + + +void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const { + if (CE->getNumArgs() < 3) + return; + + // void *memcpy(void *restrict dst, const void *restrict src, size_t n); + // The return value is the address of the destination buffer. + const Expr *Dest = CE->getArg(0); + ProgramStateRef state = C.getState(); + + evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true); +} + +void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const { + if (CE->getNumArgs() < 3) + return; + + // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); + // The return value is a pointer to the byte following the last written byte. + const Expr *Dest = CE->getArg(0); + ProgramStateRef state = C.getState(); + + evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true); +} + +void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const { + if (CE->getNumArgs() < 3) + return; + + // void *memmove(void *dst, const void *src, size_t n); + // The return value is the address of the destination buffer. + const Expr *Dest = CE->getArg(0); + ProgramStateRef state = C.getState(); + + evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1)); +} + +void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { + if (CE->getNumArgs() < 3) + return; + + // void bcopy(const void *src, void *dst, size_t n); + evalCopyCommon(C, CE, C.getState(), + CE->getArg(2), CE->getArg(1), CE->getArg(0)); +} + +void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const { + if (CE->getNumArgs() < 3) + return; + + // int memcmp(const void *s1, const void *s2, size_t n); + CurrentFunctionDescription = "memory comparison function"; + + const Expr *Left = CE->getArg(0); + const Expr *Right = CE->getArg(1); + const Expr *Size = CE->getArg(2); + + ProgramStateRef state = C.getState(); + SValBuilder &svalBuilder = C.getSValBuilder(); + + // See if the size argument is zero. + const LocationContext *LCtx = C.getLocationContext(); + SVal sizeVal = state->getSVal(Size, LCtx); + QualType sizeTy = Size->getType(); + + ProgramStateRef stateZeroSize, stateNonZeroSize; + llvm::tie(stateZeroSize, stateNonZeroSize) = + assumeZero(C, state, sizeVal, sizeTy); + + // If the size can be zero, the result will be 0 in that case, and we don't + // have to check either of the buffers. + if (stateZeroSize) { + state = stateZeroSize; + state = state->BindExpr(CE, LCtx, + svalBuilder.makeZeroVal(CE->getType())); + C.addTransition(state); + } + + // If the size can be nonzero, we have to check the other arguments. + if (stateNonZeroSize) { + state = stateNonZeroSize; + // If we know the two buffers are the same, we know the result is 0. + // First, get the two buffers' addresses. Another checker will have already + // made sure they're not undefined. + DefinedOrUnknownSVal LV = + cast(state->getSVal(Left, LCtx)); + DefinedOrUnknownSVal RV = + cast(state->getSVal(Right, LCtx)); + + // See if they are the same. + DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); + ProgramStateRef StSameBuf, StNotSameBuf; + llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); + + // If the two arguments might be the same buffer, we know the result is 0, + // and we only need to check one size. + if (StSameBuf) { + state = StSameBuf; + state = CheckBufferAccess(C, state, Size, Left); + if (state) { + state = StSameBuf->BindExpr(CE, LCtx, + svalBuilder.makeZeroVal(CE->getType())); + C.addTransition(state); + } + } + + // If the two arguments might be different buffers, we have to check the + // size of both of them. + if (StNotSameBuf) { + state = StNotSameBuf; + state = CheckBufferAccess(C, state, Size, Left, Right); + if (state) { + // The return value is the comparison result, which we don't know. + unsigned Count = C.getCurrentBlockCount(); + SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, LCtx, Count); + state = state->BindExpr(CE, LCtx, CmpV); + C.addTransition(state); + } + } + } +} + +void CStringChecker::evalstrLength(CheckerContext &C, + const CallExpr *CE) const { + if (CE->getNumArgs() < 1) + return; + + // size_t strlen(const char *s); + evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); +} + +void CStringChecker::evalstrnLength(CheckerContext &C, + const CallExpr *CE) const { + if (CE->getNumArgs() < 2) + return; + + // size_t strnlen(const char *s, size_t maxlen); + evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); +} + +void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, + bool IsStrnlen) const { + CurrentFunctionDescription = "string length function"; + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + + if (IsStrnlen) { + const Expr *maxlenExpr = CE->getArg(1); + SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); + + ProgramStateRef stateZeroSize, stateNonZeroSize; + llvm::tie(stateZeroSize, stateNonZeroSize) = + assumeZero(C, state, maxlenVal, maxlenExpr->getType()); + + // If the size can be zero, the result will be 0 in that case, and we don't + // have to check the string itself. + if (stateZeroSize) { + SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); + stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero); + C.addTransition(stateZeroSize); + } + + // If the size is GUARANTEED to be zero, we're done! + if (!stateNonZeroSize) + return; + + // Otherwise, record the assumption that the size is nonzero. + state = stateNonZeroSize; + } + + // Check that the string argument is non-null. + const Expr *Arg = CE->getArg(0); + SVal ArgVal = state->getSVal(Arg, LCtx); + + state = checkNonNull(C, state, Arg, ArgVal); + + if (!state) + return; + + SVal strLength = getCStringLength(C, state, Arg, ArgVal); + + // If the argument isn't a valid C string, there's no valid state to + // transition to. + if (strLength.isUndef()) + return; + + DefinedOrUnknownSVal result = UnknownVal(); + + // If the check is for strnlen() then bind the return value to no more than + // the maxlen value. + if (IsStrnlen) { + QualType cmpTy = C.getSValBuilder().getConditionType(); + + // It's a little unfortunate to be getting this again, + // but it's not that expensive... + const Expr *maxlenExpr = CE->getArg(1); + SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); + + NonLoc *strLengthNL = dyn_cast(&strLength); + NonLoc *maxlenValNL = dyn_cast(&maxlenVal); + + if (strLengthNL && maxlenValNL) { + ProgramStateRef stateStringTooLong, stateStringNotTooLong; + + // Check if the strLength is greater than the maxlen. + llvm::tie(stateStringTooLong, stateStringNotTooLong) = + state->assume(cast + (C.getSValBuilder().evalBinOpNN(state, BO_GT, + *strLengthNL, + *maxlenValNL, + cmpTy))); + + if (stateStringTooLong && !stateStringNotTooLong) { + // If the string is longer than maxlen, return maxlen. + result = *maxlenValNL; + } else if (stateStringNotTooLong && !stateStringTooLong) { + // If the string is shorter than maxlen, return its length. + result = *strLengthNL; + } + } + + if (result.isUnknown()) { + // If we don't have enough information for a comparison, there's + // no guarantee the full string length will actually be returned. + // All we know is the return value is the min of the string length + // and the limit. This is better than nothing. + unsigned Count = C.getCurrentBlockCount(); + result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, LCtx, Count); + NonLoc *resultNL = cast(&result); + + if (strLengthNL) { + state = state->assume(cast + (C.getSValBuilder().evalBinOpNN(state, BO_LE, + *resultNL, + *strLengthNL, + cmpTy)), true); + } + + if (maxlenValNL) { + state = state->assume(cast + (C.getSValBuilder().evalBinOpNN(state, BO_LE, + *resultNL, + *maxlenValNL, + cmpTy)), true); + } + } + + } else { + // This is a plain strlen(), not strnlen(). + result = cast(strLength); + + // If we don't know the length of the string, conjure a return + // value, so it can be used in constraints, at least. + if (result.isUnknown()) { + unsigned Count = C.getCurrentBlockCount(); + result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, LCtx, Count); + } + } + + // Bind the return value. + assert(!result.isUnknown() && "Should have conjured a value by now"); + state = state->BindExpr(CE, LCtx, result); + C.addTransition(state); +} + +void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { + if (CE->getNumArgs() < 2) + return; + + // char *strcpy(char *restrict dst, const char *restrict src); + evalStrcpyCommon(C, CE, + /* returnEnd = */ false, + /* isBounded = */ false, + /* isAppending = */ false); +} + +void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { + if (CE->getNumArgs() < 3) + return; + + // char *strncpy(char *restrict dst, const char *restrict src, size_t n); + evalStrcpyCommon(C, CE, + /* returnEnd = */ false, + /* isBounded = */ true, + /* isAppending = */ false); +} + +void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { + if (CE->getNumArgs() < 2) + return; + + // char *stpcpy(char *restrict dst, const char *restrict src); + evalStrcpyCommon(C, CE, + /* returnEnd = */ true, + /* isBounded = */ false, + /* isAppending = */ false); +} + +void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { + if (CE->getNumArgs() < 2) + return; + + //char *strcat(char *restrict s1, const char *restrict s2); + evalStrcpyCommon(C, CE, + /* returnEnd = */ false, + /* isBounded = */ false, + /* isAppending = */ true); +} + +void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { + if (CE->getNumArgs() < 3) + return; + + //char *strncat(char *restrict s1, const char *restrict s2, size_t n); + evalStrcpyCommon(C, CE, + /* returnEnd = */ false, + /* isBounded = */ true, + /* isAppending = */ true); +} + +void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, + bool returnEnd, bool isBounded, + bool isAppending) const { + CurrentFunctionDescription = "string copy function"; + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + + // Check that the destination is non-null. + const Expr *Dst = CE->getArg(0); + SVal DstVal = state->getSVal(Dst, LCtx); + + state = checkNonNull(C, state, Dst, DstVal); + if (!state) + return; + + // Check that the source is non-null. + const Expr *srcExpr = CE->getArg(1); + SVal srcVal = state->getSVal(srcExpr, LCtx); + state = checkNonNull(C, state, srcExpr, srcVal); + if (!state) + return; + + // Get the string length of the source. + SVal strLength = getCStringLength(C, state, srcExpr, srcVal); + + // If the source isn't a valid C string, give up. + if (strLength.isUndef()) + return; + + SValBuilder &svalBuilder = C.getSValBuilder(); + QualType cmpTy = svalBuilder.getConditionType(); + QualType sizeTy = svalBuilder.getContext().getSizeType(); + + // These two values allow checking two kinds of errors: + // - actual overflows caused by a source that doesn't fit in the destination + // - potential overflows caused by a bound that could exceed the destination + SVal amountCopied = UnknownVal(); + SVal maxLastElementIndex = UnknownVal(); + const char *boundWarning = NULL; + + // If the function is strncpy, strncat, etc... it is bounded. + if (isBounded) { + // Get the max number of characters to copy. + const Expr *lenExpr = CE->getArg(2); + SVal lenVal = state->getSVal(lenExpr, LCtx); + + // Protect against misdeclared strncpy(). + lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType()); + + NonLoc *strLengthNL = dyn_cast(&strLength); + NonLoc *lenValNL = dyn_cast(&lenVal); + + // If we know both values, we might be able to figure out how much + // we're copying. + if (strLengthNL && lenValNL) { + ProgramStateRef stateSourceTooLong, stateSourceNotTooLong; + + // Check if the max number to copy is less than the length of the src. + // If the bound is equal to the source length, strncpy won't null- + // terminate the result! + llvm::tie(stateSourceTooLong, stateSourceNotTooLong) = + state->assume(cast + (svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, + *lenValNL, cmpTy))); + + if (stateSourceTooLong && !stateSourceNotTooLong) { + // Max number to copy is less than the length of the src, so the actual + // strLength copied is the max number arg. + state = stateSourceTooLong; + amountCopied = lenVal; + + } else if (!stateSourceTooLong && stateSourceNotTooLong) { + // The source buffer entirely fits in the bound. + state = stateSourceNotTooLong; + amountCopied = strLength; + } + } + + // We still want to know if the bound is known to be too large. + if (lenValNL) { + if (isAppending) { + // For strncat, the check is strlen(dst) + lenVal < sizeof(dst) + + // Get the string length of the destination. If the destination is + // memory that can't have a string length, we shouldn't be copying + // into it anyway. + SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); + if (dstStrLength.isUndef()) + return; + + if (NonLoc *dstStrLengthNL = dyn_cast(&dstStrLength)) { + maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add, + *lenValNL, + *dstStrLengthNL, + sizeTy); + boundWarning = "Size argument is greater than the free space in the " + "destination buffer"; + } + + } else { + // For strncpy, this is just checking that lenVal <= sizeof(dst) + // (Yes, strncpy and strncat differ in how they treat termination. + // strncat ALWAYS terminates, but strncpy doesn't.) + NonLoc one = cast(svalBuilder.makeIntVal(1, sizeTy)); + maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, + one, sizeTy); + boundWarning = "Size argument is greater than the length of the " + "destination buffer"; + } + } + + // If we couldn't pin down the copy length, at least bound it. + // FIXME: We should actually run this code path for append as well, but + // right now it creates problems with constraints (since we can end up + // trying to pass constraints from symbol to symbol). + if (amountCopied.isUnknown() && !isAppending) { + // Try to get a "hypothetical" string length symbol, which we can later + // set as a real value if that turns out to be the case. + amountCopied = getCStringLength(C, state, lenExpr, srcVal, true); + assert(!amountCopied.isUndef()); + + if (NonLoc *amountCopiedNL = dyn_cast(&amountCopied)) { + if (lenValNL) { + // amountCopied <= lenVal + SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE, + *amountCopiedNL, + *lenValNL, + cmpTy); + state = state->assume(cast(copiedLessThanBound), + true); + if (!state) + return; + } + + if (strLengthNL) { + // amountCopied <= strlen(source) + SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE, + *amountCopiedNL, + *strLengthNL, + cmpTy); + state = state->assume(cast(copiedLessThanSrc), + true); + if (!state) + return; + } + } + } + + } else { + // The function isn't bounded. The amount copied should match the length + // of the source buffer. + amountCopied = strLength; + } + + assert(state); + + // This represents the number of characters copied into the destination + // buffer. (It may not actually be the strlen if the destination buffer + // is not terminated.) + SVal finalStrLength = UnknownVal(); + + // If this is an appending function (strcat, strncat...) then set the + // string length to strlen(src) + strlen(dst) since the buffer will + // ultimately contain both. + if (isAppending) { + // Get the string length of the destination. If the destination is memory + // that can't have a string length, we shouldn't be copying into it anyway. + SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); + if (dstStrLength.isUndef()) + return; + + NonLoc *srcStrLengthNL = dyn_cast(&amountCopied); + NonLoc *dstStrLengthNL = dyn_cast(&dstStrLength); + + // If we know both string lengths, we might know the final string length. + if (srcStrLengthNL && dstStrLengthNL) { + // Make sure the two lengths together don't overflow a size_t. + state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL); + if (!state) + return; + + finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, + *dstStrLengthNL, sizeTy); + } + + // If we couldn't get a single value for the final string length, + // we can at least bound it by the individual lengths. + if (finalStrLength.isUnknown()) { + // Try to get a "hypothetical" string length symbol, which we can later + // set as a real value if that turns out to be the case. + finalStrLength = getCStringLength(C, state, CE, DstVal, true); + assert(!finalStrLength.isUndef()); + + if (NonLoc *finalStrLengthNL = dyn_cast(&finalStrLength)) { + if (srcStrLengthNL) { + // finalStrLength >= srcStrLength + SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE, + *finalStrLengthNL, + *srcStrLengthNL, + cmpTy); + state = state->assume(cast(sourceInResult), + true); + if (!state) + return; + } + + if (dstStrLengthNL) { + // finalStrLength >= dstStrLength + SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, + *finalStrLengthNL, + *dstStrLengthNL, + cmpTy); + state = state->assume(cast(destInResult), + true); + if (!state) + return; + } + } + } + + } else { + // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and + // the final string length will match the input string length. + finalStrLength = amountCopied; + } + + // The final result of the function will either be a pointer past the last + // copied element, or a pointer to the start of the destination buffer. + SVal Result = (returnEnd ? UnknownVal() : DstVal); + + assert(state); + + // If the destination is a MemRegion, try to check for a buffer overflow and + // record the new string length. + if (loc::MemRegionVal *dstRegVal = dyn_cast(&DstVal)) { + QualType ptrTy = Dst->getType(); + + // If we have an exact value on a bounded copy, use that to check for + // overflows, rather than our estimate about how much is actually copied. + if (boundWarning) { + if (NonLoc *maxLastNL = dyn_cast(&maxLastElementIndex)) { + SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, + *maxLastNL, ptrTy); + state = CheckLocation(C, state, CE->getArg(2), maxLastElement, + boundWarning); + if (!state) + return; + } + } + + // Then, if the final length is known... + if (NonLoc *knownStrLength = dyn_cast(&finalStrLength)) { + SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, + *knownStrLength, ptrTy); + + // ...and we haven't checked the bound, we'll check the actual copy. + if (!boundWarning) { + const char * const warningMsg = + "String copy function overflows destination buffer"; + state = CheckLocation(C, state, Dst, lastElement, warningMsg); + if (!state) + return; + } + + // If this is a stpcpy-style copy, the last element is the return value. + if (returnEnd) + Result = lastElement; + } + + // Invalidate the destination. This must happen before we set the C string + // length because invalidation will clear the length. + // FIXME: Even if we can't perfectly model the copy, we should see if we + // can use LazyCompoundVals to copy the source values into the destination. + // This would probably remove any existing bindings past the end of the + // string, but that's still an improvement over blank invalidation. + state = InvalidateBuffer(C, state, Dst, *dstRegVal); + + // Set the C string length of the destination, if we know it. + if (isBounded && !isAppending) { + // strncpy is annoying in that it doesn't guarantee to null-terminate + // the result string. If the original string didn't fit entirely inside + // the bound (including the null-terminator), we don't know how long the + // result is. + if (amountCopied != strLength) + finalStrLength = UnknownVal(); + } + state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); + } + + assert(state); + + // If this is a stpcpy-style copy, but we were unable to check for a buffer + // overflow, we still need a result. Conjure a return value. + if (returnEnd && Result.isUnknown()) { + unsigned Count = C.getCurrentBlockCount(); + Result = svalBuilder.getConjuredSymbolVal(NULL, CE, LCtx, Count); + } + + // Set the return value. + state = state->BindExpr(CE, LCtx, Result); + C.addTransition(state); +} + +void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { + if (CE->getNumArgs() < 2) + return; + + //int strcmp(const char *s1, const char *s2); + evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false); +} + +void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { + if (CE->getNumArgs() < 3) + return; + + //int strncmp(const char *s1, const char *s2, size_t n); + evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false); +} + +void CStringChecker::evalStrcasecmp(CheckerContext &C, + const CallExpr *CE) const { + if (CE->getNumArgs() < 2) + return; + + //int strcasecmp(const char *s1, const char *s2); + evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true); +} + +void CStringChecker::evalStrncasecmp(CheckerContext &C, + const CallExpr *CE) const { + if (CE->getNumArgs() < 3) + return; + + //int strncasecmp(const char *s1, const char *s2, size_t n); + evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true); +} + +void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, + bool isBounded, bool ignoreCase) const { + CurrentFunctionDescription = "string comparison function"; + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + + // Check that the first string is non-null + const Expr *s1 = CE->getArg(0); + SVal s1Val = state->getSVal(s1, LCtx); + state = checkNonNull(C, state, s1, s1Val); + if (!state) + return; + + // Check that the second string is non-null. + const Expr *s2 = CE->getArg(1); + SVal s2Val = state->getSVal(s2, LCtx); + state = checkNonNull(C, state, s2, s2Val); + if (!state) + return; + + // Get the string length of the first string or give up. + SVal s1Length = getCStringLength(C, state, s1, s1Val); + if (s1Length.isUndef()) + return; + + // Get the string length of the second string or give up. + SVal s2Length = getCStringLength(C, state, s2, s2Val); + if (s2Length.isUndef()) + return; + + // If we know the two buffers are the same, we know the result is 0. + // First, get the two buffers' addresses. Another checker will have already + // made sure they're not undefined. + DefinedOrUnknownSVal LV = cast(s1Val); + DefinedOrUnknownSVal RV = cast(s2Val); + + // See if they are the same. + SValBuilder &svalBuilder = C.getSValBuilder(); + DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); + ProgramStateRef StSameBuf, StNotSameBuf; + llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); + + // If the two arguments might be the same buffer, we know the result is 0, + // and we only need to check one size. + if (StSameBuf) { + StSameBuf = StSameBuf->BindExpr(CE, LCtx, + svalBuilder.makeZeroVal(CE->getType())); + C.addTransition(StSameBuf); + + // If the two arguments are GUARANTEED to be the same, we're done! + if (!StNotSameBuf) + return; + } + + assert(StNotSameBuf); + state = StNotSameBuf; + + // At this point we can go about comparing the two buffers. + // For now, we only do this if they're both known string literals. + + // Attempt to extract string literals from both expressions. + const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); + const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val); + bool canComputeResult = false; + + if (s1StrLiteral && s2StrLiteral) { + StringRef s1StrRef = s1StrLiteral->getString(); + StringRef s2StrRef = s2StrLiteral->getString(); + + if (isBounded) { + // Get the max number of characters to compare. + const Expr *lenExpr = CE->getArg(2); + SVal lenVal = state->getSVal(lenExpr, LCtx); + + // If the length is known, we can get the right substrings. + if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { + // Create substrings of each to compare the prefix. + s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue()); + s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue()); + canComputeResult = true; + } + } else { + // This is a normal, unbounded strcmp. + canComputeResult = true; + } + + if (canComputeResult) { + // Real strcmp stops at null characters. + size_t s1Term = s1StrRef.find('\0'); + if (s1Term != StringRef::npos) + s1StrRef = s1StrRef.substr(0, s1Term); + + size_t s2Term = s2StrRef.find('\0'); + if (s2Term != StringRef::npos) + s2StrRef = s2StrRef.substr(0, s2Term); + + // Use StringRef's comparison methods to compute the actual result. + int result; + + if (ignoreCase) { + // Compare string 1 to string 2 the same way strcasecmp() does. + result = s1StrRef.compare_lower(s2StrRef); + } else { + // Compare string 1 to string 2 the same way strcmp() does. + result = s1StrRef.compare(s2StrRef); + } + + // Build the SVal of the comparison and bind the return value. + SVal resultVal = svalBuilder.makeIntVal(result, CE->getType()); + state = state->BindExpr(CE, LCtx, resultVal); + } + } + + if (!canComputeResult) { + // Conjure a symbolic value. It's the best we can do. + unsigned Count = C.getCurrentBlockCount(); + SVal resultVal = svalBuilder.getConjuredSymbolVal(NULL, CE, LCtx, Count); + state = state->BindExpr(CE, LCtx, resultVal); + } + + // Record this as a possible path. + C.addTransition(state); +} + +//===----------------------------------------------------------------------===// +// The driver method, and other Checker callbacks. +//===----------------------------------------------------------------------===// + +bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { + const FunctionDecl *FDecl = C.getCalleeDecl(CE); + + if (!FDecl) + return false; + + FnCheck evalFunction = 0; + if (C.isCLibraryFunction(FDecl, "memcpy")) + evalFunction = &CStringChecker::evalMemcpy; + else if (C.isCLibraryFunction(FDecl, "mempcpy")) + evalFunction = &CStringChecker::evalMempcpy; + else if (C.isCLibraryFunction(FDecl, "memcmp")) + evalFunction = &CStringChecker::evalMemcmp; + else if (C.isCLibraryFunction(FDecl, "memmove")) + evalFunction = &CStringChecker::evalMemmove; + else if (C.isCLibraryFunction(FDecl, "strcpy")) + evalFunction = &CStringChecker::evalStrcpy; + else if (C.isCLibraryFunction(FDecl, "strncpy")) + evalFunction = &CStringChecker::evalStrncpy; + else if (C.isCLibraryFunction(FDecl, "stpcpy")) + evalFunction = &CStringChecker::evalStpcpy; + else if (C.isCLibraryFunction(FDecl, "strcat")) + evalFunction = &CStringChecker::evalStrcat; + else if (C.isCLibraryFunction(FDecl, "strncat")) + evalFunction = &CStringChecker::evalStrncat; + else if (C.isCLibraryFunction(FDecl, "strlen")) + evalFunction = &CStringChecker::evalstrLength; + else if (C.isCLibraryFunction(FDecl, "strnlen")) + evalFunction = &CStringChecker::evalstrnLength; + else if (C.isCLibraryFunction(FDecl, "strcmp")) + evalFunction = &CStringChecker::evalStrcmp; + else if (C.isCLibraryFunction(FDecl, "strncmp")) + evalFunction = &CStringChecker::evalStrncmp; + else if (C.isCLibraryFunction(FDecl, "strcasecmp")) + evalFunction = &CStringChecker::evalStrcasecmp; + else if (C.isCLibraryFunction(FDecl, "strncasecmp")) + evalFunction = &CStringChecker::evalStrncasecmp; + else if (C.isCLibraryFunction(FDecl, "bcopy")) + evalFunction = &CStringChecker::evalBcopy; + else if (C.isCLibraryFunction(FDecl, "bcmp")) + evalFunction = &CStringChecker::evalMemcmp; + + // If the callee isn't a string function, let another checker handle it. + if (!evalFunction) + return false; + + // Make sure each function sets its own description. + // (But don't bother in a release build.) + assert(!(CurrentFunctionDescription = NULL)); + + // Check and evaluate the call. + (this->*evalFunction)(C, CE); + + // If the evaluate call resulted in no change, chain to the next eval call + // handler. + // Note, the custom CString evaluation calls assume that basic safety + // properties are held. However, if the user chooses to turn off some of these + // checks, we ignore the issues and leave the call evaluation to a generic + // handler. + if (!C.isDifferent()) + return false; + + return true; +} + +void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { + // Record string length for char a[] = "abc"; + ProgramStateRef state = C.getState(); + + for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end(); + I != E; ++I) { + const VarDecl *D = dyn_cast(*I); + if (!D) + continue; + + // FIXME: Handle array fields of structs. + if (!D->getType()->isArrayType()) + continue; + + const Expr *Init = D->getInit(); + if (!Init) + continue; + if (!isa(Init)) + continue; + + Loc VarLoc = state->getLValue(D, C.getLocationContext()); + const MemRegion *MR = VarLoc.getAsRegion(); + if (!MR) + continue; + + SVal StrVal = state->getSVal(Init, C.getLocationContext()); + assert(StrVal.isValid() && "Initializer string is unknown or undefined"); + DefinedOrUnknownSVal strLength + = cast(getCStringLength(C, state, Init, StrVal)); + + state = state->set(MR, strLength); + } + + C.addTransition(state); +} + +bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const { + CStringLength::EntryMap Entries = state->get(); + return !Entries.isEmpty(); +} + +ProgramStateRef +CStringChecker::checkRegionChanges(ProgramStateRef state, + const StoreManager::InvalidatedSymbols *, + ArrayRef ExplicitRegions, + ArrayRef Regions, + const CallOrObjCMessage *Call) const { + CStringLength::EntryMap Entries = state->get(); + if (Entries.isEmpty()) + return state; + + llvm::SmallPtrSet Invalidated; + llvm::SmallPtrSet SuperRegions; + + // First build sets for the changed regions and their super-regions. + for (ArrayRef::iterator + I = Regions.begin(), E = Regions.end(); I != E; ++I) { + const MemRegion *MR = *I; + Invalidated.insert(MR); + + SuperRegions.insert(MR); + while (const SubRegion *SR = dyn_cast(MR)) { + MR = SR->getSuperRegion(); + SuperRegions.insert(MR); + } + } + + CStringLength::EntryMap::Factory &F = state->get_context(); + + // Then loop over the entries in the current state. + for (CStringLength::EntryMap::iterator I = Entries.begin(), + E = Entries.end(); I != E; ++I) { + const MemRegion *MR = I.getKey(); + + // Is this entry for a super-region of a changed region? + if (SuperRegions.count(MR)) { + Entries = F.remove(Entries, MR); + continue; + } + + // Is this entry for a sub-region of a changed region? + const MemRegion *Super = MR; + while (const SubRegion *SR = dyn_cast(Super)) { + Super = SR->getSuperRegion(); + if (Invalidated.count(Super)) { + Entries = F.remove(Entries, MR); + break; + } + } + } + + return state->set(Entries); +} + +void CStringChecker::checkLiveSymbols(ProgramStateRef state, + SymbolReaper &SR) const { + // Mark all symbols in our string length map as valid. + CStringLength::EntryMap Entries = state->get(); + + for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); + I != E; ++I) { + SVal Len = I.getData(); + + for (SymExpr::symbol_iterator si = Len.symbol_begin(), + se = Len.symbol_end(); si != se; ++si) + SR.markInUse(*si); + } +} + +void CStringChecker::checkDeadSymbols(SymbolReaper &SR, + CheckerContext &C) const { + if (!SR.hasDeadSymbols()) + return; + + ProgramStateRef state = C.getState(); + CStringLength::EntryMap Entries = state->get(); + if (Entries.isEmpty()) + return; + + CStringLength::EntryMap::Factory &F = state->get_context(); + for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); + I != E; ++I) { + SVal Len = I.getData(); + if (SymbolRef Sym = Len.getAsSymbol()) { + if (SR.isDead(Sym)) + Entries = F.remove(Entries, I.getKey()); + } + } + + state = state->set(Entries); + C.addTransition(state); +} + +#define REGISTER_CHECKER(name) \ +void ento::register##name(CheckerManager &mgr) {\ + static CStringChecker *TheChecker = 0; \ + if (TheChecker == 0) \ + TheChecker = mgr.registerChecker(); \ + TheChecker->Filter.Check##name = true; \ +} + +REGISTER_CHECKER(CStringNullArg) +REGISTER_CHECKER(CStringOutOfBounds) +REGISTER_CHECKER(CStringBufferOverlap) +REGISTER_CHECKER(CStringNotNullTerm) + +void ento::registerCStringCheckerBasic(CheckerManager &Mgr) { + registerCStringNullArg(Mgr); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringSyntaxChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringSyntaxChecker.cpp new file mode 100644 index 0000000..befc935 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CStringSyntaxChecker.cpp @@ -0,0 +1,191 @@ +//== CStringSyntaxChecker.cpp - CoreFoundation containers API *- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// An AST checker that looks for common pitfalls when using C string APIs. +// - Identifies erroneous patterns in the last argument to strncat - the number +// of bytes to copy. +// +//===----------------------------------------------------------------------===// +#include "ClangSACheckers.h" +#include "clang/Analysis/AnalysisContext.h" +#include "clang/AST/Expr.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/TypeTraits.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { +class WalkAST: public StmtVisitor { + BugReporter &BR; + AnalysisDeclContext* AC; + ASTContext &ASTC; + + /// Check if two expressions refer to the same declaration. + inline bool sameDecl(const Expr *A1, const Expr *A2) { + if (const DeclRefExpr *D1 = dyn_cast(A1->IgnoreParenCasts())) + if (const DeclRefExpr *D2 = dyn_cast(A2->IgnoreParenCasts())) + return D1->getDecl() == D2->getDecl(); + return false; + } + + /// Check if the expression E is a sizeof(WithArg). + inline bool isSizeof(const Expr *E, const Expr *WithArg) { + if (const UnaryExprOrTypeTraitExpr *UE = + dyn_cast(E)) + if (UE->getKind() == UETT_SizeOf) + return sameDecl(UE->getArgumentExpr(), WithArg); + return false; + } + + /// Check if the expression E is a strlen(WithArg). + inline bool isStrlen(const Expr *E, const Expr *WithArg) { + if (const CallExpr *CE = dyn_cast(E)) { + const FunctionDecl *FD = CE->getDirectCallee(); + if (!FD) + return false; + return (CheckerContext::isCLibraryFunction(FD, "strlen", ASTC) + && sameDecl(CE->getArg(0), WithArg)); + } + return false; + } + + /// Check if the expression is an integer literal with value 1. + inline bool isOne(const Expr *E) { + if (const IntegerLiteral *IL = dyn_cast(E)) + return (IL->getValue().isIntN(1)); + return false; + } + + inline StringRef getPrintableName(const Expr *E) { + if (const DeclRefExpr *D = dyn_cast(E->IgnoreParenCasts())) + return D->getDecl()->getName(); + return StringRef(); + } + + /// Identify erroneous patterns in the last argument to strncat - the number + /// of bytes to copy. + bool containsBadStrncatPattern(const CallExpr *CE); + +public: + WalkAST(BugReporter &br, AnalysisDeclContext* ac) : + BR(br), AC(ac), ASTC(AC->getASTContext()) { + } + + // Statement visitor methods. + void VisitChildren(Stmt *S); + void VisitStmt(Stmt *S) { + VisitChildren(S); + } + void VisitCallExpr(CallExpr *CE); +}; +} // end anonymous namespace + +// The correct size argument should look like following: +// strncat(dst, src, sizeof(dst) - strlen(dest) - 1); +// We look for the following anti-patterns: +// - strncat(dst, src, sizeof(dst) - strlen(dst)); +// - strncat(dst, src, sizeof(dst) - 1); +// - strncat(dst, src, sizeof(dst)); +bool WalkAST::containsBadStrncatPattern(const CallExpr *CE) { + const Expr *DstArg = CE->getArg(0); + const Expr *SrcArg = CE->getArg(1); + const Expr *LenArg = CE->getArg(2); + + // Identify wrong size expressions, which are commonly used instead. + if (const BinaryOperator *BE = + dyn_cast(LenArg->IgnoreParenCasts())) { + // - sizeof(dst) - strlen(dst) + if (BE->getOpcode() == BO_Sub) { + const Expr *L = BE->getLHS(); + const Expr *R = BE->getRHS(); + if (isSizeof(L, DstArg) && isStrlen(R, DstArg)) + return true; + + // - sizeof(dst) - 1 + if (isSizeof(L, DstArg) && isOne(R->IgnoreParenCasts())) + return true; + } + } + // - sizeof(dst) + if (isSizeof(LenArg, DstArg)) + return true; + + // - sizeof(src) + if (isSizeof(LenArg, SrcArg)) + return true; + return false; +} + +void WalkAST::VisitCallExpr(CallExpr *CE) { + const FunctionDecl *FD = CE->getDirectCallee(); + if (!FD) + return; + + if (CheckerContext::isCLibraryFunction(FD, "strncat", ASTC)) { + if (containsBadStrncatPattern(CE)) { + const Expr *DstArg = CE->getArg(0); + const Expr *LenArg = CE->getArg(2); + SourceRange R = LenArg->getSourceRange(); + PathDiagnosticLocation Loc = + PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC); + + StringRef DstName = getPrintableName(DstArg); + + SmallString<256> S; + llvm::raw_svector_ostream os(S); + os << "Potential buffer overflow. "; + if (!DstName.empty()) { + os << "Replace with 'sizeof(" << DstName << ") " + "- strlen(" << DstName <<") - 1'"; + os << " or u"; + } else + os << "U"; + os << "se a safer 'strlcat' API"; + + BR.EmitBasicReport(FD, "Anti-pattern in the argument", "C String API", + os.str(), Loc, &R, 1); + } + } + + // Recurse and check children. + VisitChildren(CE); +} + +void WalkAST::VisitChildren(Stmt *S) { + for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I != E; + ++I) + if (Stmt *child = *I) + Visit(child); +} + +namespace { +class CStringSyntaxChecker: public Checker { +public: + + void checkASTCodeBody(const Decl *D, AnalysisManager& Mgr, + BugReporter &BR) const { + WalkAST walker(BR, Mgr.getAnalysisDeclContext(D)); + walker.Visit(D->getBody()); + } +}; +} + +void ento::registerCStringSyntaxChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} + diff --git a/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp new file mode 100644 index 0000000..f601431 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp @@ -0,0 +1,385 @@ +//===--- CallAndMessageChecker.cpp ------------------------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines CallAndMessageChecker, a builtin checker that checks for various +// errors of call and objc message expressions. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/AST/ParentMap.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/SmallString.h" + +using namespace clang; +using namespace ento; + +namespace { +class CallAndMessageChecker + : public Checker< check::PreStmt, check::PreObjCMessage > { + mutable OwningPtr BT_call_null; + mutable OwningPtr BT_call_undef; + mutable OwningPtr BT_call_arg; + mutable OwningPtr BT_msg_undef; + mutable OwningPtr BT_objc_prop_undef; + mutable OwningPtr BT_msg_arg; + mutable OwningPtr BT_msg_ret; +public: + + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + void checkPreObjCMessage(ObjCMessage msg, CheckerContext &C) const; + +private: + static void PreVisitProcessArgs(CheckerContext &C,CallOrObjCMessage callOrMsg, + const char *BT_desc, OwningPtr &BT); + static bool PreVisitProcessArg(CheckerContext &C, SVal V,SourceRange argRange, + const Expr *argEx, + const bool checkUninitFields, + const char *BT_desc, + OwningPtr &BT); + + static void EmitBadCall(BugType *BT, CheckerContext &C, const CallExpr *CE); + void emitNilReceiverBug(CheckerContext &C, const ObjCMessage &msg, + ExplodedNode *N) const; + + void HandleNilReceiver(CheckerContext &C, + ProgramStateRef state, + ObjCMessage msg) const; + + static void LazyInit_BT(const char *desc, OwningPtr &BT) { + if (!BT) + BT.reset(new BuiltinBug(desc)); + } +}; +} // end anonymous namespace + +void CallAndMessageChecker::EmitBadCall(BugType *BT, CheckerContext &C, + const CallExpr *CE) { + ExplodedNode *N = C.generateSink(); + if (!N) + return; + + BugReport *R = new BugReport(*BT, BT->getName(), N); + R->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, + bugreporter::GetCalleeExpr(N), R)); + C.EmitReport(R); +} + +void CallAndMessageChecker::PreVisitProcessArgs(CheckerContext &C, + CallOrObjCMessage callOrMsg, + const char *BT_desc, + OwningPtr &BT) { + // Don't check for uninitialized field values in arguments if the + // caller has a body that is available and we have the chance to inline it. + // This is a hack, but is a reasonable compromise betweens sometimes warning + // and sometimes not depending on if we decide to inline a function. + const Decl *D = callOrMsg.getDecl(); + const bool checkUninitFields = + !(C.getAnalysisManager().shouldInlineCall() && + (D && D->getBody())); + + for (unsigned i = 0, e = callOrMsg.getNumArgs(); i != e; ++i) + if (PreVisitProcessArg(C, callOrMsg.getArgSVal(i), + callOrMsg.getArgSourceRange(i), callOrMsg.getArg(i), + checkUninitFields, + BT_desc, BT)) + return; +} + +bool CallAndMessageChecker::PreVisitProcessArg(CheckerContext &C, + SVal V, SourceRange argRange, + const Expr *argEx, + const bool checkUninitFields, + const char *BT_desc, + OwningPtr &BT) { + if (V.isUndef()) { + if (ExplodedNode *N = C.generateSink()) { + LazyInit_BT(BT_desc, BT); + + // Generate a report for this bug. + BugReport *R = new BugReport(*BT, BT->getName(), N); + R->addRange(argRange); + if (argEx) + R->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, argEx, + R)); + C.EmitReport(R); + } + return true; + } + + if (!checkUninitFields) + return false; + + if (const nonloc::LazyCompoundVal *LV = + dyn_cast(&V)) { + + class FindUninitializedField { + public: + SmallVector FieldChain; + private: + ASTContext &C; + StoreManager &StoreMgr; + MemRegionManager &MrMgr; + Store store; + public: + FindUninitializedField(ASTContext &c, StoreManager &storeMgr, + MemRegionManager &mrMgr, Store s) + : C(c), StoreMgr(storeMgr), MrMgr(mrMgr), store(s) {} + + bool Find(const TypedValueRegion *R) { + QualType T = R->getValueType(); + if (const RecordType *RT = T->getAsStructureType()) { + const RecordDecl *RD = RT->getDecl()->getDefinition(); + assert(RD && "Referred record has no definition"); + for (RecordDecl::field_iterator I = + RD->field_begin(), E = RD->field_end(); I!=E; ++I) { + const FieldRegion *FR = MrMgr.getFieldRegion(*I, R); + FieldChain.push_back(*I); + T = (*I)->getType(); + if (T->getAsStructureType()) { + if (Find(FR)) + return true; + } + else { + const SVal &V = StoreMgr.getBinding(store, loc::MemRegionVal(FR)); + if (V.isUndef()) + return true; + } + FieldChain.pop_back(); + } + } + + return false; + } + }; + + const LazyCompoundValData *D = LV->getCVData(); + FindUninitializedField F(C.getASTContext(), + C.getState()->getStateManager().getStoreManager(), + C.getSValBuilder().getRegionManager(), + D->getStore()); + + if (F.Find(D->getRegion())) { + if (ExplodedNode *N = C.generateSink()) { + LazyInit_BT(BT_desc, BT); + SmallString<512> Str; + llvm::raw_svector_ostream os(Str); + os << "Passed-by-value struct argument contains uninitialized data"; + + if (F.FieldChain.size() == 1) + os << " (e.g., field: '" << *F.FieldChain[0] << "')"; + else { + os << " (e.g., via the field chain: '"; + bool first = true; + for (SmallVectorImpl::iterator + DI = F.FieldChain.begin(), DE = F.FieldChain.end(); DI!=DE;++DI){ + if (first) + first = false; + else + os << '.'; + os << **DI; + } + os << "')"; + } + + // Generate a report for this bug. + BugReport *R = new BugReport(*BT, os.str(), N); + R->addRange(argRange); + + // FIXME: enhance track back for uninitialized value for arbitrary + // memregions + C.EmitReport(R); + } + return true; + } + } + + return false; +} + +void CallAndMessageChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const{ + + const Expr *Callee = CE->getCallee()->IgnoreParens(); + const LocationContext *LCtx = C.getLocationContext(); + SVal L = C.getState()->getSVal(Callee, LCtx); + + if (L.isUndef()) { + if (!BT_call_undef) + BT_call_undef.reset(new BuiltinBug("Called function pointer is an " + "uninitalized pointer value")); + EmitBadCall(BT_call_undef.get(), C, CE); + return; + } + + if (isa(L)) { + if (!BT_call_null) + BT_call_null.reset( + new BuiltinBug("Called function pointer is null (null dereference)")); + EmitBadCall(BT_call_null.get(), C, CE); + } + + PreVisitProcessArgs(C, CallOrObjCMessage(CE, C.getState(), LCtx), + "Function call argument is an uninitialized value", + BT_call_arg); +} + +void CallAndMessageChecker::checkPreObjCMessage(ObjCMessage msg, + CheckerContext &C) const { + + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + + // FIXME: Handle 'super'? + if (const Expr *receiver = msg.getInstanceReceiver()) { + SVal recVal = state->getSVal(receiver, LCtx); + if (recVal.isUndef()) { + if (ExplodedNode *N = C.generateSink()) { + BugType *BT = 0; + if (msg.isPureMessageExpr()) { + if (!BT_msg_undef) + BT_msg_undef.reset(new BuiltinBug("Receiver in message expression " + "is an uninitialized value")); + BT = BT_msg_undef.get(); + } + else { + if (!BT_objc_prop_undef) + BT_objc_prop_undef.reset(new BuiltinBug("Property access on an " + "uninitialized object pointer")); + BT = BT_objc_prop_undef.get(); + } + BugReport *R = + new BugReport(*BT, BT->getName(), N); + R->addRange(receiver->getSourceRange()); + R->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, + receiver, + R)); + C.EmitReport(R); + } + return; + } else { + // Bifurcate the state into nil and non-nil ones. + DefinedOrUnknownSVal receiverVal = cast(recVal); + + ProgramStateRef notNilState, nilState; + llvm::tie(notNilState, nilState) = state->assume(receiverVal); + + // Handle receiver must be nil. + if (nilState && !notNilState) { + HandleNilReceiver(C, state, msg); + return; + } + } + } + + const char *bugDesc = msg.isPropertySetter() ? + "Argument for property setter is an uninitialized value" + : "Argument in message expression is an uninitialized value"; + // Check for any arguments that are uninitialized/undefined. + PreVisitProcessArgs(C, CallOrObjCMessage(msg, state, LCtx), + bugDesc, BT_msg_arg); +} + +void CallAndMessageChecker::emitNilReceiverBug(CheckerContext &C, + const ObjCMessage &msg, + ExplodedNode *N) const { + + if (!BT_msg_ret) + BT_msg_ret.reset( + new BuiltinBug("Receiver in message expression is " + "'nil' and returns a garbage value")); + + SmallString<200> buf; + llvm::raw_svector_ostream os(buf); + os << "The receiver of message '" << msg.getSelector().getAsString() + << "' is nil and returns a value of type '" + << msg.getType(C.getASTContext()).getAsString() << "' that will be garbage"; + + BugReport *report = new BugReport(*BT_msg_ret, os.str(), N); + if (const Expr *receiver = msg.getInstanceReceiver()) { + report->addRange(receiver->getSourceRange()); + report->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, + receiver, + report)); + } + C.EmitReport(report); +} + +static bool supportsNilWithFloatRet(const llvm::Triple &triple) { + return (triple.getVendor() == llvm::Triple::Apple && + (triple.getOS() == llvm::Triple::IOS || + !triple.isMacOSXVersionLT(10,5))); +} + +void CallAndMessageChecker::HandleNilReceiver(CheckerContext &C, + ProgramStateRef state, + ObjCMessage msg) const { + ASTContext &Ctx = C.getASTContext(); + + // Check the return type of the message expression. A message to nil will + // return different values depending on the return type and the architecture. + QualType RetTy = msg.getType(Ctx); + CanQualType CanRetTy = Ctx.getCanonicalType(RetTy); + const LocationContext *LCtx = C.getLocationContext(); + + if (CanRetTy->isStructureOrClassType()) { + // Structure returns are safe since the compiler zeroes them out. + SVal V = C.getSValBuilder().makeZeroVal(msg.getType(Ctx)); + C.addTransition(state->BindExpr(msg.getMessageExpr(), LCtx, V)); + return; + } + + // Other cases: check if sizeof(return type) > sizeof(void*) + if (CanRetTy != Ctx.VoidTy && C.getLocationContext()->getParentMap() + .isConsumedExpr(msg.getMessageExpr())) { + // Compute: sizeof(void *) and sizeof(return type) + const uint64_t voidPtrSize = Ctx.getTypeSize(Ctx.VoidPtrTy); + const uint64_t returnTypeSize = Ctx.getTypeSize(CanRetTy); + + if (voidPtrSize < returnTypeSize && + !(supportsNilWithFloatRet(Ctx.getTargetInfo().getTriple()) && + (Ctx.FloatTy == CanRetTy || + Ctx.DoubleTy == CanRetTy || + Ctx.LongDoubleTy == CanRetTy || + Ctx.LongLongTy == CanRetTy || + Ctx.UnsignedLongLongTy == CanRetTy))) { + if (ExplodedNode *N = C.generateSink(state)) + emitNilReceiverBug(C, msg, N); + return; + } + + // Handle the safe cases where the return value is 0 if the + // receiver is nil. + // + // FIXME: For now take the conservative approach that we only + // return null values if we *know* that the receiver is nil. + // This is because we can have surprises like: + // + // ... = [[NSScreens screens] objectAtIndex:0]; + // + // What can happen is that [... screens] could return nil, but + // it most likely isn't nil. We should assume the semantics + // of this case unless we have *a lot* more knowledge. + // + SVal V = C.getSValBuilder().makeZeroVal(msg.getType(Ctx)); + C.addTransition(state->BindExpr(msg.getMessageExpr(), LCtx, V)); + return; + } + + C.addTransition(state); +} + +void ento::registerCallAndMessageChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp new file mode 100644 index 0000000..2e184fb --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp @@ -0,0 +1,86 @@ +//=== CastSizeChecker.cpp ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// CastSizeChecker checks when casting a malloc'ed symbolic region to type T, +// whether the size of the symbolic region is a multiple of the size of T. +// +//===----------------------------------------------------------------------===// +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/AST/CharUnits.h" + +using namespace clang; +using namespace ento; + +namespace { +class CastSizeChecker : public Checker< check::PreStmt > { + mutable OwningPtr BT; +public: + void checkPreStmt(const CastExpr *CE, CheckerContext &C) const; +}; +} + +void CastSizeChecker::checkPreStmt(const CastExpr *CE,CheckerContext &C) const { + const Expr *E = CE->getSubExpr(); + ASTContext &Ctx = C.getASTContext(); + QualType ToTy = Ctx.getCanonicalType(CE->getType()); + const PointerType *ToPTy = dyn_cast(ToTy.getTypePtr()); + + if (!ToPTy) + return; + + QualType ToPointeeTy = ToPTy->getPointeeType(); + + // Only perform the check if 'ToPointeeTy' is a complete type. + if (ToPointeeTy->isIncompleteType()) + return; + + ProgramStateRef state = C.getState(); + const MemRegion *R = state->getSVal(E, C.getLocationContext()).getAsRegion(); + if (R == 0) + return; + + const SymbolicRegion *SR = dyn_cast(R); + if (SR == 0) + return; + + SValBuilder &svalBuilder = C.getSValBuilder(); + SVal extent = SR->getExtent(svalBuilder); + const llvm::APSInt *extentInt = svalBuilder.getKnownValue(state, extent); + if (!extentInt) + return; + + CharUnits regionSize = CharUnits::fromQuantity(extentInt->getSExtValue()); + CharUnits typeSize = C.getASTContext().getTypeSizeInChars(ToPointeeTy); + + // Ignore void, and a few other un-sizeable types. + if (typeSize.isZero()) + return; + + if (regionSize % typeSize != 0) { + if (ExplodedNode *errorNode = C.generateSink()) { + if (!BT) + BT.reset(new BuiltinBug("Cast region with wrong size.", + "Cast a region whose size is not a multiple of the" + " destination type size.")); + BugReport *R = new BugReport(*BT, BT->getDescription(), + errorNode); + R->addRange(CE->getSourceRange()); + C.EmitReport(R); + } + } +} + + +void ento::registerCastSizeChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp new file mode 100644 index 0000000..1407638 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp @@ -0,0 +1,74 @@ +//=== CastToStructChecker.cpp - Fixed address usage checker ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This files defines CastToStructChecker, a builtin checker that checks for +// cast from non-struct pointer to struct pointer. +// This check corresponds to CWE-588. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" + +using namespace clang; +using namespace ento; + +namespace { +class CastToStructChecker : public Checker< check::PreStmt > { + mutable OwningPtr BT; + +public: + void checkPreStmt(const CastExpr *CE, CheckerContext &C) const; +}; +} + +void CastToStructChecker::checkPreStmt(const CastExpr *CE, + CheckerContext &C) const { + const Expr *E = CE->getSubExpr(); + ASTContext &Ctx = C.getASTContext(); + QualType OrigTy = Ctx.getCanonicalType(E->getType()); + QualType ToTy = Ctx.getCanonicalType(CE->getType()); + + const PointerType *OrigPTy = dyn_cast(OrigTy.getTypePtr()); + const PointerType *ToPTy = dyn_cast(ToTy.getTypePtr()); + + if (!ToPTy || !OrigPTy) + return; + + QualType OrigPointeeTy = OrigPTy->getPointeeType(); + QualType ToPointeeTy = ToPTy->getPointeeType(); + + if (!ToPointeeTy->isStructureOrClassType()) + return; + + // We allow cast from void*. + if (OrigPointeeTy->isVoidType()) + return; + + // Now the cast-to-type is struct pointer, the original type is not void*. + if (!OrigPointeeTy->isRecordType()) { + if (ExplodedNode *N = C.addTransition()) { + if (!BT) + BT.reset(new BuiltinBug("Cast from non-struct type to struct type", + "Casting a non-structure type to a structure type " + "and accessing a field can lead to memory access " + "errors or data corruption.")); + BugReport *R = new BugReport(*BT,BT->getDescription(), N); + R->addRange(CE->getSourceRange()); + C.EmitReport(R); + } + } +} + +void ento::registerCastToStructChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp new file mode 100644 index 0000000..133204a --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp @@ -0,0 +1,291 @@ +//==- CheckObjCDealloc.cpp - Check ObjC -dealloc implementation --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a CheckObjCDealloc, a checker that +// analyzes an Objective-C class's implementation to determine if it +// correctly implements -dealloc. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/AST/ExprObjC.h" +#include "clang/AST/Expr.h" +#include "clang/AST/DeclObjC.h" +#include "clang/Basic/LangOptions.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +static bool scan_dealloc(Stmt *S, Selector Dealloc) { + + if (ObjCMessageExpr *ME = dyn_cast(S)) + if (ME->getSelector() == Dealloc) { + switch (ME->getReceiverKind()) { + case ObjCMessageExpr::Instance: return false; + case ObjCMessageExpr::SuperInstance: return true; + case ObjCMessageExpr::Class: break; + case ObjCMessageExpr::SuperClass: break; + } + } + + // Recurse to children. + + for (Stmt::child_iterator I = S->child_begin(), E= S->child_end(); I!=E; ++I) + if (*I && scan_dealloc(*I, Dealloc)) + return true; + + return false; +} + +static bool scan_ivar_release(Stmt *S, ObjCIvarDecl *ID, + const ObjCPropertyDecl *PD, + Selector Release, + IdentifierInfo* SelfII, + ASTContext &Ctx) { + + // [mMyIvar release] + if (ObjCMessageExpr *ME = dyn_cast(S)) + if (ME->getSelector() == Release) + if (ME->getInstanceReceiver()) + if (Expr *Receiver = ME->getInstanceReceiver()->IgnoreParenCasts()) + if (ObjCIvarRefExpr *E = dyn_cast(Receiver)) + if (E->getDecl() == ID) + return true; + + // [self setMyIvar:nil]; + if (ObjCMessageExpr *ME = dyn_cast(S)) + if (ME->getInstanceReceiver()) + if (Expr *Receiver = ME->getInstanceReceiver()->IgnoreParenCasts()) + if (DeclRefExpr *E = dyn_cast(Receiver)) + if (E->getDecl()->getIdentifier() == SelfII) + if (ME->getMethodDecl() == PD->getSetterMethodDecl() && + ME->getNumArgs() == 1 && + ME->getArg(0)->isNullPointerConstant(Ctx, + Expr::NPC_ValueDependentIsNull)) + return true; + + // self.myIvar = nil; + if (BinaryOperator* BO = dyn_cast(S)) + if (BO->isAssignmentOp()) + if (ObjCPropertyRefExpr *PRE = + dyn_cast(BO->getLHS()->IgnoreParenCasts())) + if (PRE->isExplicitProperty() && PRE->getExplicitProperty() == PD) + if (BO->getRHS()->isNullPointerConstant(Ctx, + Expr::NPC_ValueDependentIsNull)) { + // This is only a 'release' if the property kind is not + // 'assign'. + return PD->getSetterKind() != ObjCPropertyDecl::Assign;; + } + + // Recurse to children. + for (Stmt::child_iterator I = S->child_begin(), E= S->child_end(); I!=E; ++I) + if (*I && scan_ivar_release(*I, ID, PD, Release, SelfII, Ctx)) + return true; + + return false; +} + +static void checkObjCDealloc(const ObjCImplementationDecl *D, + const LangOptions& LOpts, BugReporter& BR) { + + assert (LOpts.getGC() != LangOptions::GCOnly); + + ASTContext &Ctx = BR.getContext(); + const ObjCInterfaceDecl *ID = D->getClassInterface(); + + // Does the class contain any ivars that are pointers (or id<...>)? + // If not, skip the check entirely. + // NOTE: This is motivated by PR 2517: + // http://llvm.org/bugs/show_bug.cgi?id=2517 + + bool containsPointerIvar = false; + + for (ObjCInterfaceDecl::ivar_iterator I=ID->ivar_begin(), E=ID->ivar_end(); + I!=E; ++I) { + + ObjCIvarDecl *ID = *I; + QualType T = ID->getType(); + + if (!T->isObjCObjectPointerType() || + ID->getAttr() || // Skip IBOutlets. + ID->getAttr()) // Skip IBOutletCollections. + continue; + + containsPointerIvar = true; + break; + } + + if (!containsPointerIvar) + return; + + // Determine if the class subclasses NSObject. + IdentifierInfo* NSObjectII = &Ctx.Idents.get("NSObject"); + IdentifierInfo* SenTestCaseII = &Ctx.Idents.get("SenTestCase"); + + + for ( ; ID ; ID = ID->getSuperClass()) { + IdentifierInfo *II = ID->getIdentifier(); + + if (II == NSObjectII) + break; + + // FIXME: For now, ignore classes that subclass SenTestCase, as these don't + // need to implement -dealloc. They implement tear down in another way, + // which we should try and catch later. + // http://llvm.org/bugs/show_bug.cgi?id=3187 + if (II == SenTestCaseII) + return; + } + + if (!ID) + return; + + // Get the "dealloc" selector. + IdentifierInfo* II = &Ctx.Idents.get("dealloc"); + Selector S = Ctx.Selectors.getSelector(0, &II); + ObjCMethodDecl *MD = 0; + + // Scan the instance methods for "dealloc". + for (ObjCImplementationDecl::instmeth_iterator I = D->instmeth_begin(), + E = D->instmeth_end(); I!=E; ++I) { + + if ((*I)->getSelector() == S) { + MD = *I; + break; + } + } + + PathDiagnosticLocation DLoc = + PathDiagnosticLocation::createBegin(D, BR.getSourceManager()); + + if (!MD) { // No dealloc found. + + const char* name = LOpts.getGC() == LangOptions::NonGC + ? "missing -dealloc" + : "missing -dealloc (Hybrid MM, non-GC)"; + + std::string buf; + llvm::raw_string_ostream os(buf); + os << "Objective-C class '" << *D << "' lacks a 'dealloc' instance method"; + + BR.EmitBasicReport(D, name, categories::CoreFoundationObjectiveC, + os.str(), DLoc); + return; + } + + // dealloc found. Scan for missing [super dealloc]. + if (MD->getBody() && !scan_dealloc(MD->getBody(), S)) { + + const char* name = LOpts.getGC() == LangOptions::NonGC + ? "missing [super dealloc]" + : "missing [super dealloc] (Hybrid MM, non-GC)"; + + std::string buf; + llvm::raw_string_ostream os(buf); + os << "The 'dealloc' instance method in Objective-C class '" << *D + << "' does not send a 'dealloc' message to its super class" + " (missing [super dealloc])"; + + BR.EmitBasicReport(MD, name, categories::CoreFoundationObjectiveC, + os.str(), DLoc); + return; + } + + // Get the "release" selector. + IdentifierInfo* RII = &Ctx.Idents.get("release"); + Selector RS = Ctx.Selectors.getSelector(0, &RII); + + // Get the "self" identifier + IdentifierInfo* SelfII = &Ctx.Idents.get("self"); + + // Scan for missing and extra releases of ivars used by implementations + // of synthesized properties + for (ObjCImplementationDecl::propimpl_iterator I = D->propimpl_begin(), + E = D->propimpl_end(); I!=E; ++I) { + + // We can only check the synthesized properties + if ((*I)->getPropertyImplementation() != ObjCPropertyImplDecl::Synthesize) + continue; + + ObjCIvarDecl *ID = (*I)->getPropertyIvarDecl(); + if (!ID) + continue; + + QualType T = ID->getType(); + if (!T->isObjCObjectPointerType()) // Skip non-pointer ivars + continue; + + const ObjCPropertyDecl *PD = (*I)->getPropertyDecl(); + if (!PD) + continue; + + // ivars cannot be set via read-only properties, so we'll skip them + if (PD->isReadOnly()) + continue; + + // ivar must be released if and only if the kind of setter was not 'assign' + bool requiresRelease = PD->getSetterKind() != ObjCPropertyDecl::Assign; + if (scan_ivar_release(MD->getBody(), ID, PD, RS, SelfII, Ctx) + != requiresRelease) { + const char *name = 0; + std::string buf; + llvm::raw_string_ostream os(buf); + + if (requiresRelease) { + name = LOpts.getGC() == LangOptions::NonGC + ? "missing ivar release (leak)" + : "missing ivar release (Hybrid MM, non-GC)"; + + os << "The '" << *ID + << "' instance variable was retained by a synthesized property but " + "wasn't released in 'dealloc'"; + } else { + name = LOpts.getGC() == LangOptions::NonGC + ? "extra ivar release (use-after-release)" + : "extra ivar release (Hybrid MM, non-GC)"; + + os << "The '" << *ID + << "' instance variable was not retained by a synthesized property " + "but was released in 'dealloc'"; + } + + PathDiagnosticLocation SDLoc = + PathDiagnosticLocation::createBegin((*I), BR.getSourceManager()); + + BR.EmitBasicReport(MD, name, categories::CoreFoundationObjectiveC, + os.str(), SDLoc); + } + } +} + +//===----------------------------------------------------------------------===// +// ObjCDeallocChecker +//===----------------------------------------------------------------------===// + +namespace { +class ObjCDeallocChecker : public Checker< + check::ASTDecl > { +public: + void checkASTDecl(const ObjCImplementationDecl *D, AnalysisManager& mgr, + BugReporter &BR) const { + if (mgr.getLangOpts().getGC() == LangOptions::GCOnly) + return; + checkObjCDealloc(cast(D), mgr.getLangOpts(), BR); + } +}; +} + +void ento::registerObjCDeallocChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp new file mode 100644 index 0000000..6df47b1 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp @@ -0,0 +1,146 @@ +//=- CheckObjCInstMethodRetTy.cpp - Check ObjC method signatures -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a CheckObjCInstMethSignature, a flow-insenstive check +// that determines if an Objective-C class interface incorrectly redefines +// the method signature in a subclass. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Type.h" +#include "clang/AST/ASTContext.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +static bool AreTypesCompatible(QualType Derived, QualType Ancestor, + ASTContext &C) { + + // Right now don't compare the compatibility of pointers. That involves + // looking at subtyping relationships. FIXME: Future patch. + if (Derived->isAnyPointerType() && Ancestor->isAnyPointerType()) + return true; + + return C.typesAreCompatible(Derived, Ancestor); +} + +static void CompareReturnTypes(const ObjCMethodDecl *MethDerived, + const ObjCMethodDecl *MethAncestor, + BugReporter &BR, ASTContext &Ctx, + const ObjCImplementationDecl *ID) { + + QualType ResDerived = MethDerived->getResultType(); + QualType ResAncestor = MethAncestor->getResultType(); + + if (!AreTypesCompatible(ResDerived, ResAncestor, Ctx)) { + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + + os << "The Objective-C class '" + << *MethDerived->getClassInterface() + << "', which is derived from class '" + << *MethAncestor->getClassInterface() + << "', defines the instance method '" + << MethDerived->getSelector().getAsString() + << "' whose return type is '" + << ResDerived.getAsString() + << "'. A method with the same name (same selector) is also defined in " + "class '" + << *MethAncestor->getClassInterface() + << "' and has a return type of '" + << ResAncestor.getAsString() + << "'. These two types are incompatible, and may result in undefined " + "behavior for clients of these classes."; + + PathDiagnosticLocation MethDLoc = + PathDiagnosticLocation::createBegin(MethDerived, + BR.getSourceManager()); + + BR.EmitBasicReport(MethDerived, + "Incompatible instance method return type", + categories::CoreFoundationObjectiveC, + os.str(), MethDLoc); + } +} + +static void CheckObjCInstMethSignature(const ObjCImplementationDecl *ID, + BugReporter& BR) { + + const ObjCInterfaceDecl *D = ID->getClassInterface(); + const ObjCInterfaceDecl *C = D->getSuperClass(); + + if (!C) + return; + + ASTContext &Ctx = BR.getContext(); + + // Build a DenseMap of the methods for quick querying. + typedef llvm::DenseMap MapTy; + MapTy IMeths; + unsigned NumMethods = 0; + + for (ObjCImplementationDecl::instmeth_iterator I=ID->instmeth_begin(), + E=ID->instmeth_end(); I!=E; ++I) { + + ObjCMethodDecl *M = *I; + IMeths[M->getSelector()] = M; + ++NumMethods; + } + + // Now recurse the class hierarchy chain looking for methods with the + // same signatures. + while (C && NumMethods) { + for (ObjCInterfaceDecl::instmeth_iterator I=C->instmeth_begin(), + E=C->instmeth_end(); I!=E; ++I) { + + ObjCMethodDecl *M = *I; + Selector S = M->getSelector(); + + MapTy::iterator MI = IMeths.find(S); + + if (MI == IMeths.end() || MI->second == 0) + continue; + + --NumMethods; + ObjCMethodDecl *MethDerived = MI->second; + MI->second = 0; + + CompareReturnTypes(MethDerived, M, BR, Ctx, ID); + } + + C = C->getSuperClass(); + } +} + +//===----------------------------------------------------------------------===// +// ObjCMethSigsChecker +//===----------------------------------------------------------------------===// + +namespace { +class ObjCMethSigsChecker : public Checker< + check::ASTDecl > { +public: + void checkASTDecl(const ObjCImplementationDecl *D, AnalysisManager& mgr, + BugReporter &BR) const { + CheckObjCInstMethSignature(D, BR); + } +}; +} + +void ento::registerObjCMethSigsChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp new file mode 100644 index 0000000..dde9071 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp @@ -0,0 +1,786 @@ +//==- CheckSecuritySyntaxOnly.cpp - Basic security checks --------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a set of flow-insensitive security checks. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/Analysis/AnalysisContext.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +static bool isArc4RandomAvailable(const ASTContext &Ctx) { + const llvm::Triple &T = Ctx.getTargetInfo().getTriple(); + return T.getVendor() == llvm::Triple::Apple || + T.getOS() == llvm::Triple::FreeBSD || + T.getOS() == llvm::Triple::NetBSD || + T.getOS() == llvm::Triple::OpenBSD || + T.getOS() == llvm::Triple::DragonFly; +} + +namespace { +struct DefaultBool { + bool val; + DefaultBool() : val(false) {} + operator bool() const { return val; } + DefaultBool &operator=(bool b) { val = b; return *this; } +}; + +struct ChecksFilter { + DefaultBool check_gets; + DefaultBool check_getpw; + DefaultBool check_mktemp; + DefaultBool check_mkstemp; + DefaultBool check_strcpy; + DefaultBool check_rand; + DefaultBool check_vfork; + DefaultBool check_FloatLoopCounter; + DefaultBool check_UncheckedReturn; +}; + +class WalkAST : public StmtVisitor { + BugReporter &BR; + AnalysisDeclContext* AC; + enum { num_setids = 6 }; + IdentifierInfo *II_setid[num_setids]; + + const bool CheckRand; + const ChecksFilter &filter; + +public: + WalkAST(BugReporter &br, AnalysisDeclContext* ac, + const ChecksFilter &f) + : BR(br), AC(ac), II_setid(), + CheckRand(isArc4RandomAvailable(BR.getContext())), + filter(f) {} + + // Statement visitor methods. + void VisitCallExpr(CallExpr *CE); + void VisitForStmt(ForStmt *S); + void VisitCompoundStmt (CompoundStmt *S); + void VisitStmt(Stmt *S) { VisitChildren(S); } + + void VisitChildren(Stmt *S); + + // Helpers. + bool checkCall_strCommon(const CallExpr *CE, const FunctionDecl *FD); + + typedef void (WalkAST::*FnCheck)(const CallExpr *, + const FunctionDecl *); + + // Checker-specific methods. + void checkLoopConditionForFloat(const ForStmt *FS); + void checkCall_gets(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_getpw(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_mktemp(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_mkstemp(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_strcpy(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_strcat(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_rand(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_random(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_vfork(const CallExpr *CE, const FunctionDecl *FD); + void checkUncheckedReturnValue(CallExpr *CE); +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// AST walking. +//===----------------------------------------------------------------------===// + +void WalkAST::VisitChildren(Stmt *S) { + for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I!=E; ++I) + if (Stmt *child = *I) + Visit(child); +} + +void WalkAST::VisitCallExpr(CallExpr *CE) { + // Get the callee. + const FunctionDecl *FD = CE->getDirectCallee(); + + if (!FD) + return; + + // Get the name of the callee. If it's a builtin, strip off the prefix. + IdentifierInfo *II = FD->getIdentifier(); + if (!II) // if no identifier, not a simple C function + return; + StringRef Name = II->getName(); + if (Name.startswith("__builtin_")) + Name = Name.substr(10); + + // Set the evaluation function by switching on the callee name. + FnCheck evalFunction = llvm::StringSwitch(Name) + .Case("gets", &WalkAST::checkCall_gets) + .Case("getpw", &WalkAST::checkCall_getpw) + .Case("mktemp", &WalkAST::checkCall_mktemp) + .Case("mkstemp", &WalkAST::checkCall_mkstemp) + .Case("mkdtemp", &WalkAST::checkCall_mkstemp) + .Case("mkstemps", &WalkAST::checkCall_mkstemp) + .Cases("strcpy", "__strcpy_chk", &WalkAST::checkCall_strcpy) + .Cases("strcat", "__strcat_chk", &WalkAST::checkCall_strcat) + .Case("drand48", &WalkAST::checkCall_rand) + .Case("erand48", &WalkAST::checkCall_rand) + .Case("jrand48", &WalkAST::checkCall_rand) + .Case("lrand48", &WalkAST::checkCall_rand) + .Case("mrand48", &WalkAST::checkCall_rand) + .Case("nrand48", &WalkAST::checkCall_rand) + .Case("lcong48", &WalkAST::checkCall_rand) + .Case("rand", &WalkAST::checkCall_rand) + .Case("rand_r", &WalkAST::checkCall_rand) + .Case("random", &WalkAST::checkCall_random) + .Case("vfork", &WalkAST::checkCall_vfork) + .Default(NULL); + + // If the callee isn't defined, it is not of security concern. + // Check and evaluate the call. + if (evalFunction) + (this->*evalFunction)(CE, FD); + + // Recurse and check children. + VisitChildren(CE); +} + +void WalkAST::VisitCompoundStmt(CompoundStmt *S) { + for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I!=E; ++I) + if (Stmt *child = *I) { + if (CallExpr *CE = dyn_cast(child)) + checkUncheckedReturnValue(CE); + Visit(child); + } +} + +void WalkAST::VisitForStmt(ForStmt *FS) { + checkLoopConditionForFloat(FS); + + // Recurse and check children. + VisitChildren(FS); +} + +//===----------------------------------------------------------------------===// +// Check: floating poing variable used as loop counter. +// Originally: +// Implements: CERT security coding advisory FLP-30. +//===----------------------------------------------------------------------===// + +static const DeclRefExpr* +getIncrementedVar(const Expr *expr, const VarDecl *x, const VarDecl *y) { + expr = expr->IgnoreParenCasts(); + + if (const BinaryOperator *B = dyn_cast(expr)) { + if (!(B->isAssignmentOp() || B->isCompoundAssignmentOp() || + B->getOpcode() == BO_Comma)) + return NULL; + + if (const DeclRefExpr *lhs = getIncrementedVar(B->getLHS(), x, y)) + return lhs; + + if (const DeclRefExpr *rhs = getIncrementedVar(B->getRHS(), x, y)) + return rhs; + + return NULL; + } + + if (const DeclRefExpr *DR = dyn_cast(expr)) { + const NamedDecl *ND = DR->getDecl(); + return ND == x || ND == y ? DR : NULL; + } + + if (const UnaryOperator *U = dyn_cast(expr)) + return U->isIncrementDecrementOp() + ? getIncrementedVar(U->getSubExpr(), x, y) : NULL; + + return NULL; +} + +/// CheckLoopConditionForFloat - This check looks for 'for' statements that +/// use a floating point variable as a loop counter. +/// CERT: FLP30-C, FLP30-CPP. +/// +void WalkAST::checkLoopConditionForFloat(const ForStmt *FS) { + if (!filter.check_FloatLoopCounter) + return; + + // Does the loop have a condition? + const Expr *condition = FS->getCond(); + + if (!condition) + return; + + // Does the loop have an increment? + const Expr *increment = FS->getInc(); + + if (!increment) + return; + + // Strip away '()' and casts. + condition = condition->IgnoreParenCasts(); + increment = increment->IgnoreParenCasts(); + + // Is the loop condition a comparison? + const BinaryOperator *B = dyn_cast(condition); + + if (!B) + return; + + // Is this a comparison? + if (!(B->isRelationalOp() || B->isEqualityOp())) + return; + + // Are we comparing variables? + const DeclRefExpr *drLHS = + dyn_cast(B->getLHS()->IgnoreParenLValueCasts()); + const DeclRefExpr *drRHS = + dyn_cast(B->getRHS()->IgnoreParenLValueCasts()); + + // Does at least one of the variables have a floating point type? + drLHS = drLHS && drLHS->getType()->isRealFloatingType() ? drLHS : NULL; + drRHS = drRHS && drRHS->getType()->isRealFloatingType() ? drRHS : NULL; + + if (!drLHS && !drRHS) + return; + + const VarDecl *vdLHS = drLHS ? dyn_cast(drLHS->getDecl()) : NULL; + const VarDecl *vdRHS = drRHS ? dyn_cast(drRHS->getDecl()) : NULL; + + if (!vdLHS && !vdRHS) + return; + + // Does either variable appear in increment? + const DeclRefExpr *drInc = getIncrementedVar(increment, vdLHS, vdRHS); + + if (!drInc) + return; + + // Emit the error. First figure out which DeclRefExpr in the condition + // referenced the compared variable. + const DeclRefExpr *drCond = vdLHS == drInc->getDecl() ? drLHS : drRHS; + + SmallVector ranges; + SmallString<256> sbuf; + llvm::raw_svector_ostream os(sbuf); + + os << "Variable '" << drCond->getDecl()->getName() + << "' with floating point type '" << drCond->getType().getAsString() + << "' should not be used as a loop counter"; + + ranges.push_back(drCond->getSourceRange()); + ranges.push_back(drInc->getSourceRange()); + + const char *bugType = "Floating point variable used as loop counter"; + + PathDiagnosticLocation FSLoc = + PathDiagnosticLocation::createBegin(FS, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), + bugType, "Security", os.str(), + FSLoc, ranges.data(), ranges.size()); +} + +//===----------------------------------------------------------------------===// +// Check: Any use of 'gets' is insecure. +// Originally: +// Implements (part of): 300-BSI (buildsecurityin.us-cert.gov) +// CWE-242: Use of Inherently Dangerous Function +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_gets(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_gets) + return; + + const FunctionProtoType *FPT + = dyn_cast(FD->getType().IgnoreParens()); + if (!FPT) + return; + + // Verify that the function takes a single argument. + if (FPT->getNumArgs() != 1) + return; + + // Is the argument a 'char*'? + const PointerType *PT = dyn_cast(FPT->getArgType(0)); + if (!PT) + return; + + if (PT->getPointeeType().getUnqualifiedType() != BR.getContext().CharTy) + return; + + // Issue a warning. + SourceRange R = CE->getCallee()->getSourceRange(); + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), + "Potential buffer overflow in call to 'gets'", + "Security", + "Call to function 'gets' is extremely insecure as it can " + "always result in a buffer overflow", + CELoc, &R, 1); +} + +//===----------------------------------------------------------------------===// +// Check: Any use of 'getpwd' is insecure. +// CWE-477: Use of Obsolete Functions +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_getpw(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_getpw) + return; + + const FunctionProtoType *FPT + = dyn_cast(FD->getType().IgnoreParens()); + if (!FPT) + return; + + // Verify that the function takes two arguments. + if (FPT->getNumArgs() != 2) + return; + + // Verify the first argument type is integer. + if (!FPT->getArgType(0)->isIntegerType()) + return; + + // Verify the second argument type is char*. + const PointerType *PT = dyn_cast(FPT->getArgType(1)); + if (!PT) + return; + + if (PT->getPointeeType().getUnqualifiedType() != BR.getContext().CharTy) + return; + + // Issue a warning. + SourceRange R = CE->getCallee()->getSourceRange(); + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), + "Potential buffer overflow in call to 'getpw'", + "Security", + "The getpw() function is dangerous as it may overflow the " + "provided buffer. It is obsoleted by getpwuid().", + CELoc, &R, 1); +} + +//===----------------------------------------------------------------------===// +// Check: Any use of 'mktemp' is insecure. It is obsoleted by mkstemp(). +// CWE-377: Insecure Temporary File +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_mktemp(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_mktemp) { + // Fall back to the security check of looking for enough 'X's in the + // format string, since that is a less severe warning. + checkCall_mkstemp(CE, FD); + return; + } + + const FunctionProtoType *FPT + = dyn_cast(FD->getType().IgnoreParens()); + if(!FPT) + return; + + // Verify that the function takes a single argument. + if (FPT->getNumArgs() != 1) + return; + + // Verify that the argument is Pointer Type. + const PointerType *PT = dyn_cast(FPT->getArgType(0)); + if (!PT) + return; + + // Verify that the argument is a 'char*'. + if (PT->getPointeeType().getUnqualifiedType() != BR.getContext().CharTy) + return; + + // Issue a waring. + SourceRange R = CE->getCallee()->getSourceRange(); + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), + "Potential insecure temporary file in call 'mktemp'", + "Security", + "Call to function 'mktemp' is insecure as it always " + "creates or uses insecure temporary file. Use 'mkstemp' " + "instead", + CELoc, &R, 1); +} + + +//===----------------------------------------------------------------------===// +// Check: Use of 'mkstemp', 'mktemp', 'mkdtemp' should contain at least 6 X's. +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_mkstemp(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_mkstemp) + return; + + StringRef Name = FD->getIdentifier()->getName(); + std::pair ArgSuffix = + llvm::StringSwitch >(Name) + .Case("mktemp", std::make_pair(0,-1)) + .Case("mkstemp", std::make_pair(0,-1)) + .Case("mkdtemp", std::make_pair(0,-1)) + .Case("mkstemps", std::make_pair(0,1)) + .Default(std::make_pair(-1, -1)); + + assert(ArgSuffix.first >= 0 && "Unsupported function"); + + // Check if the number of arguments is consistent with out expectations. + unsigned numArgs = CE->getNumArgs(); + if ((signed) numArgs <= ArgSuffix.first) + return; + + const StringLiteral *strArg = + dyn_cast(CE->getArg((unsigned)ArgSuffix.first) + ->IgnoreParenImpCasts()); + + // Currently we only handle string literals. It is possible to do better, + // either by looking at references to const variables, or by doing real + // flow analysis. + if (!strArg || strArg->getCharByteWidth() != 1) + return; + + // Count the number of X's, taking into account a possible cutoff suffix. + StringRef str = strArg->getString(); + unsigned numX = 0; + unsigned n = str.size(); + + // Take into account the suffix. + unsigned suffix = 0; + if (ArgSuffix.second >= 0) { + const Expr *suffixEx = CE->getArg((unsigned)ArgSuffix.second); + llvm::APSInt Result; + if (!suffixEx->EvaluateAsInt(Result, BR.getContext())) + return; + // FIXME: Issue a warning. + if (Result.isNegative()) + return; + suffix = (unsigned) Result.getZExtValue(); + n = (n > suffix) ? n - suffix : 0; + } + + for (unsigned i = 0; i < n; ++i) + if (str[i] == 'X') ++numX; + + if (numX >= 6) + return; + + // Issue a warning. + SourceRange R = strArg->getSourceRange(); + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + SmallString<512> buf; + llvm::raw_svector_ostream out(buf); + out << "Call to '" << Name << "' should have at least 6 'X's in the" + " format string to be secure (" << numX << " 'X'"; + if (numX != 1) + out << 's'; + out << " seen"; + if (suffix) { + out << ", " << suffix << " character"; + if (suffix > 1) + out << 's'; + out << " used as a suffix"; + } + out << ')'; + BR.EmitBasicReport(AC->getDecl(), + "Insecure temporary file creation", "Security", + out.str(), CELoc, &R, 1); +} + +//===----------------------------------------------------------------------===// +// Check: Any use of 'strcpy' is insecure. +// +// CWE-119: Improper Restriction of Operations within +// the Bounds of a Memory Buffer +//===----------------------------------------------------------------------===// +void WalkAST::checkCall_strcpy(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_strcpy) + return; + + if (!checkCall_strCommon(CE, FD)) + return; + + // Issue a warning. + SourceRange R = CE->getCallee()->getSourceRange(); + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), + "Potential insecure memory buffer bounds restriction in " + "call 'strcpy'", + "Security", + "Call to function 'strcpy' is insecure as it does not " + "provide bounding of the memory buffer. Replace " + "unbounded copy functions with analogous functions that " + "support length arguments such as 'strlcpy'. CWE-119.", + CELoc, &R, 1); +} + +//===----------------------------------------------------------------------===// +// Check: Any use of 'strcat' is insecure. +// +// CWE-119: Improper Restriction of Operations within +// the Bounds of a Memory Buffer +//===----------------------------------------------------------------------===// +void WalkAST::checkCall_strcat(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_strcpy) + return; + + if (!checkCall_strCommon(CE, FD)) + return; + + // Issue a warning. + SourceRange R = CE->getCallee()->getSourceRange(); + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), + "Potential insecure memory buffer bounds restriction in " + "call 'strcat'", + "Security", + "Call to function 'strcat' is insecure as it does not " + "provide bounding of the memory buffer. Replace " + "unbounded copy functions with analogous functions that " + "support length arguments such as 'strlcat'. CWE-119.", + CELoc, &R, 1); +} + +//===----------------------------------------------------------------------===// +// Common check for str* functions with no bounds parameters. +//===----------------------------------------------------------------------===// +bool WalkAST::checkCall_strCommon(const CallExpr *CE, const FunctionDecl *FD) { + const FunctionProtoType *FPT + = dyn_cast(FD->getType().IgnoreParens()); + if (!FPT) + return false; + + // Verify the function takes two arguments, three in the _chk version. + int numArgs = FPT->getNumArgs(); + if (numArgs != 2 && numArgs != 3) + return false; + + // Verify the type for both arguments. + for (int i = 0; i < 2; i++) { + // Verify that the arguments are pointers. + const PointerType *PT = dyn_cast(FPT->getArgType(i)); + if (!PT) + return false; + + // Verify that the argument is a 'char*'. + if (PT->getPointeeType().getUnqualifiedType() != BR.getContext().CharTy) + return false; + } + + return true; +} + +//===----------------------------------------------------------------------===// +// Check: Linear congruent random number generators should not be used +// Originally: +// CWE-338: Use of cryptographically weak prng +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_rand(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_rand || !CheckRand) + return; + + const FunctionProtoType *FTP + = dyn_cast(FD->getType().IgnoreParens()); + if (!FTP) + return; + + if (FTP->getNumArgs() == 1) { + // Is the argument an 'unsigned short *'? + // (Actually any integer type is allowed.) + const PointerType *PT = dyn_cast(FTP->getArgType(0)); + if (!PT) + return; + + if (! PT->getPointeeType()->isIntegerType()) + return; + } + else if (FTP->getNumArgs() != 0) + return; + + // Issue a warning. + SmallString<256> buf1; + llvm::raw_svector_ostream os1(buf1); + os1 << '\'' << *FD << "' is a poor random number generator"; + + SmallString<256> buf2; + llvm::raw_svector_ostream os2(buf2); + os2 << "Function '" << *FD + << "' is obsolete because it implements a poor random number generator." + << " Use 'arc4random' instead"; + + SourceRange R = CE->getCallee()->getSourceRange(); + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), os1.str(), "Security", os2.str(), + CELoc, &R, 1); +} + +//===----------------------------------------------------------------------===// +// Check: 'random' should not be used +// Originally: +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_random(const CallExpr *CE, const FunctionDecl *FD) { + if (!CheckRand || !filter.check_rand) + return; + + const FunctionProtoType *FTP + = dyn_cast(FD->getType().IgnoreParens()); + if (!FTP) + return; + + // Verify that the function takes no argument. + if (FTP->getNumArgs() != 0) + return; + + // Issue a warning. + SourceRange R = CE->getCallee()->getSourceRange(); + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), + "'random' is not a secure random number generator", + "Security", + "The 'random' function produces a sequence of values that " + "an adversary may be able to predict. Use 'arc4random' " + "instead", CELoc, &R, 1); +} + +//===----------------------------------------------------------------------===// +// Check: 'vfork' should not be used. +// POS33-C: Do not use vfork(). +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_vfork(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_vfork) + return; + + // All calls to vfork() are insecure, issue a warning. + SourceRange R = CE->getCallee()->getSourceRange(); + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), + "Potential insecure implementation-specific behavior in " + "call 'vfork'", + "Security", + "Call to function 'vfork' is insecure as it can lead to " + "denial of service situations in the parent process. " + "Replace calls to vfork with calls to the safer " + "'posix_spawn' function", + CELoc, &R, 1); +} + +//===----------------------------------------------------------------------===// +// Check: Should check whether privileges are dropped successfully. +// Originally: +//===----------------------------------------------------------------------===// + +void WalkAST::checkUncheckedReturnValue(CallExpr *CE) { + if (!filter.check_UncheckedReturn) + return; + + const FunctionDecl *FD = CE->getDirectCallee(); + if (!FD) + return; + + if (II_setid[0] == NULL) { + static const char * const identifiers[num_setids] = { + "setuid", "setgid", "seteuid", "setegid", + "setreuid", "setregid" + }; + + for (size_t i = 0; i < num_setids; i++) + II_setid[i] = &BR.getContext().Idents.get(identifiers[i]); + } + + const IdentifierInfo *id = FD->getIdentifier(); + size_t identifierid; + + for (identifierid = 0; identifierid < num_setids; identifierid++) + if (id == II_setid[identifierid]) + break; + + if (identifierid >= num_setids) + return; + + const FunctionProtoType *FTP + = dyn_cast(FD->getType().IgnoreParens()); + if (!FTP) + return; + + // Verify that the function takes one or two arguments (depending on + // the function). + if (FTP->getNumArgs() != (identifierid < 4 ? 1 : 2)) + return; + + // The arguments must be integers. + for (unsigned i = 0; i < FTP->getNumArgs(); i++) + if (! FTP->getArgType(i)->isIntegerType()) + return; + + // Issue a warning. + SmallString<256> buf1; + llvm::raw_svector_ostream os1(buf1); + os1 << "Return value is not checked in call to '" << *FD << '\''; + + SmallString<256> buf2; + llvm::raw_svector_ostream os2(buf2); + os2 << "The return value from the call to '" << *FD + << "' is not checked. If an error occurs in '" << *FD + << "', the following code may execute with unexpected privileges"; + + SourceRange R = CE->getCallee()->getSourceRange(); + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), os1.str(), "Security", os2.str(), + CELoc, &R, 1); +} + +//===----------------------------------------------------------------------===// +// SecuritySyntaxChecker +//===----------------------------------------------------------------------===// + +namespace { +class SecuritySyntaxChecker : public Checker { +public: + ChecksFilter filter; + + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + WalkAST walker(BR, mgr.getAnalysisDeclContext(D), filter); + walker.Visit(D->getBody()); + } +}; +} + +#define REGISTER_CHECKER(name) \ +void ento::register##name(CheckerManager &mgr) {\ + mgr.registerChecker()->filter.check_##name = true;\ +} + +REGISTER_CHECKER(gets) +REGISTER_CHECKER(getpw) +REGISTER_CHECKER(mkstemp) +REGISTER_CHECKER(mktemp) +REGISTER_CHECKER(strcpy) +REGISTER_CHECKER(rand) +REGISTER_CHECKER(vfork) +REGISTER_CHECKER(FloatLoopCounter) +REGISTER_CHECKER(UncheckedReturn) + + diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp new file mode 100644 index 0000000..cc7fd37 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp @@ -0,0 +1,92 @@ +//==- CheckSizeofPointer.cpp - Check for sizeof on pointers ------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a check for unintended use of sizeof() on pointer +// expressions. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" + +using namespace clang; +using namespace ento; + +namespace { +class WalkAST : public StmtVisitor { + BugReporter &BR; + AnalysisDeclContext* AC; + +public: + WalkAST(BugReporter &br, AnalysisDeclContext* ac) : BR(br), AC(ac) {} + void VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E); + void VisitStmt(Stmt *S) { VisitChildren(S); } + void VisitChildren(Stmt *S); +}; +} + +void WalkAST::VisitChildren(Stmt *S) { + for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I!=E; ++I) + if (Stmt *child = *I) + Visit(child); +} + +// CWE-467: Use of sizeof() on a Pointer Type +void WalkAST::VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E) { + if (E->getKind() != UETT_SizeOf) + return; + + // If an explicit type is used in the code, usually the coder knows what he is + // doing. + if (E->isArgumentType()) + return; + + QualType T = E->getTypeOfArgument(); + if (T->isPointerType()) { + + // Many false positives have the form 'sizeof *p'. This is reasonable + // because people know what they are doing when they intentionally + // dereference the pointer. + Expr *ArgEx = E->getArgumentExpr(); + if (!isa(ArgEx->IgnoreParens())) + return; + + SourceRange R = ArgEx->getSourceRange(); + PathDiagnosticLocation ELoc = + PathDiagnosticLocation::createBegin(E, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), + "Potential unintended use of sizeof() on pointer type", + "Logic", + "The code calls sizeof() on a pointer type. " + "This can produce an unexpected result.", + ELoc, &R, 1); + } +} + +//===----------------------------------------------------------------------===// +// SizeofPointerChecker +//===----------------------------------------------------------------------===// + +namespace { +class SizeofPointerChecker : public Checker { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + WalkAST walker(BR, mgr.getAnalysisDeclContext(D)); + walker.Visit(D->getBody()); + } +}; +} + +void ento::registerSizeofPointerChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp new file mode 100644 index 0000000..843502f --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp @@ -0,0 +1,233 @@ +//= CheckerDocumentation.cpp - Documentation checker ---------------*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This checker lists all the checker callbacks and provides documentation for +// checker writers. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" + +using namespace clang; +using namespace ento; + +// All checkers should be placed into anonymous namespace. +// We place the CheckerDocumentation inside ento namespace to make the +// it visible in doxygen. +namespace ento { + +/// This checker documents the callback functions checkers can use to implement +/// the custom handling of the specific events during path exploration as well +/// as reporting bugs. Most of the callbacks are targeted at path-sensitive +/// checking. +/// +/// \sa CheckerContext +class CheckerDocumentation : public Checker< check::PreStmt, + check::PostStmt, + check::PreObjCMessage, + check::PostObjCMessage, + check::BranchCondition, + check::Location, + check::Bind, + check::DeadSymbols, + check::EndPath, + check::EndAnalysis, + check::EndOfTranslationUnit, + eval::Call, + eval::Assume, + check::LiveSymbols, + check::RegionChanges, + check::Event, + check::ASTDecl > { +public: + + /// \brief Pre-visit the Statement. + /// + /// The method will be called before the analyzer core processes the + /// statement. The notification is performed for every explored CFGElement, + /// which does not include the control flow statements such as IfStmt. The + /// callback can be specialized to be called with any subclass of Stmt. + /// + /// See checkBranchCondition() callback for performing custom processing of + /// the branching statements. + /// + /// check::PreStmt + void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {} + + /// \brief Post-visit the Statement. + /// + /// The method will be called after the analyzer core processes the + /// statement. The notification is performed for every explored CFGElement, + /// which does not include the control flow statements such as IfStmt. The + /// callback can be specialized to be called with any subclass of Stmt. + /// + /// check::PostStmt + void checkPostStmt(const CallExpr *DS, CheckerContext &C) const; + + /// \brief Pre-visit the Objective C messages. + void checkPreObjCMessage(const ObjCMessage &Msg, CheckerContext &C) const {} + + /// \brief Post-visit the Objective C messages. + void checkPostObjCMessage(const ObjCMessage &Msg, CheckerContext &C) const {} + + /// \brief Pre-visit of the condition statement of a branch (such as IfStmt). + void checkBranchCondition(const Stmt *Condition, CheckerContext &Ctx) const {} + + /// \brief Called on a load from and a store to a location. + /// + /// The method will be called each time a location (pointer) value is + /// accessed. + /// \param Loc The value of the location (pointer). + /// \param IsLoad The flag specifying if the location is a store or a load. + /// \param S The load is performed while processing the statement. + /// + /// check::Location + void checkLocation(SVal Loc, bool IsLoad, const Stmt *S, + CheckerContext &C) const {} + + /// \brief Called on binding of a value to a location. + /// + /// \param Loc The value of the location (pointer). + /// \param Val The value which will be stored at the location Loc. + /// \param S The bind is performed while processing the statement S. + /// + /// check::Bind + void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const {} + + + /// \brief Called whenever a symbol becomes dead. + /// + /// This callback should be used by the checkers to aggressively clean + /// up/reduce the checker state, which is important for reducing the overall + /// memory usage. Specifically, if a checker keeps symbol specific information + /// in the sate, it can and should be dropped after the symbol becomes dead. + /// In addition, reporting a bug as soon as the checker becomes dead leads to + /// more precise diagnostics. (For example, one should report that a malloced + /// variable is not freed right after it goes out of scope.) + /// + /// \param SR The SymbolReaper object can be queried to determine which + /// symbols are dead. + /// + /// check::DeadSymbols + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const {} + + /// \brief Called when an end of path is reached in the ExplodedGraph. + /// + /// This callback should be used to check if the allocated resources are freed. + /// + /// check::EndPath + void checkEndPath(CheckerContext &Ctx) const {} + + /// \brief Called after all the paths in the ExplodedGraph reach end of path + /// - the symbolic execution graph is fully explored. + /// + /// This callback should be used in cases when a checker needs to have a + /// global view of the information generated on all paths. For example, to + /// compare execution summary/result several paths. + /// See IdempotentOperationChecker for a usage example. + /// + /// check::EndAnalysis + void checkEndAnalysis(ExplodedGraph &G, + BugReporter &BR, + ExprEngine &Eng) const {} + + /// \brief Called after analysis of a TranslationUnit is complete. + /// + /// check::EndOfTranslationUnit + void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, + BugReporter &BR) const {} + + + /// \brief Evaluates function call. + /// + /// The analysis core threats all function calls in the same way. However, some + /// functions have special meaning, which should be reflected in the program + /// state. This callback allows a checker to provide domain specific knowledge + /// about the particular functions it knows about. + /// + /// \returns true if the call has been successfully evaluated + /// and false otherwise. Note, that only one checker can evaluate a call. If + /// more then one checker claim that they can evaluate the same call the + /// first one wins. + /// + /// eval::Call + bool evalCall(const CallExpr *CE, CheckerContext &C) const { return true; } + + /// \brief Handles assumptions on symbolic values. + /// + /// This method is called when a symbolic expression is assumed to be true or + /// false. For example, the assumptions are performed when evaluating a + /// condition at a branch. The callback allows checkers track the assumptions + /// performed on the symbols of interest and change the state accordingly. + /// + /// eval::Assume + ProgramStateRef evalAssume(ProgramStateRef State, + SVal Cond, + bool Assumption) const { return State; } + + /// Allows modifying SymbolReaper object. For example, checkers can explicitly + /// register symbols of interest as live. These symbols will not be marked + /// dead and removed. + /// + /// check::LiveSymbols + void checkLiveSymbols(ProgramStateRef State, SymbolReaper &SR) const {} + + + bool wantsRegionChangeUpdate(ProgramStateRef St) const { return true; } + + /// check::RegionChanges + /// Allows tracking regions which get invalidated. + /// \param state The current program state. + /// \param invalidated A set of all symbols potentially touched by the change. + /// \param ExplicitRegions The regions explicitly requested for invalidation. + /// For example, in the case of a function call, these would be arguments. + /// \param Regions The transitive closure of accessible regions, + /// i.e. all regions that may have been touched by this change. + /// \param The call expression wrapper if the regions are invalidated by a + /// call, 0 otherwise. + /// Note, in order to be notified, the checker should also implement + /// wantsRegionChangeUpdate callback. + ProgramStateRef + checkRegionChanges(ProgramStateRef State, + const StoreManager::InvalidatedSymbols *, + ArrayRef ExplicitRegions, + ArrayRef Regions, + const CallOrObjCMessage *Call) const { + return State; + } + + /// check::Event + void checkEvent(ImplicitNullDerefEvent Event) const {} + + /// \brief Check every declaration in the AST. + /// + /// An AST traversal callback, which should only be used when the checker is + /// not path sensitive. It will be called for every Declaration in the AST and + /// can be specialized to only be called on subclasses of Decl, for example, + /// FunctionDecl. + /// + /// check::ASTDecl + void checkASTDecl(const FunctionDecl *D, + AnalysisManager &Mgr, + BugReporter &BR) const {} + +}; + +void CheckerDocumentation::checkPostStmt(const CallExpr *DS, + CheckerContext &C) const { + return; +} + +} // end namespace diff --git a/clang/lib/StaticAnalyzer/Checkers/Checkers.td b/clang/lib/StaticAnalyzer/Checkers/Checkers.td new file mode 100644 index 0000000..f577a89 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/Checkers.td @@ -0,0 +1,491 @@ +//===--- Checkers.td - Static Analyzer Checkers -===-----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +include "clang/StaticAnalyzer/Checkers/CheckerBase.td" + +//===----------------------------------------------------------------------===// +// Packages. +//===----------------------------------------------------------------------===// + +def Experimental : Package<"experimental">; + +def Core : Package<"core">; +def CoreBuiltin : Package<"builtin">, InPackage; +def CoreUninitialized : Package<"uninitialized">, InPackage; +def CoreExperimental : Package<"core">, InPackage, Hidden; + +def Cplusplus : Package<"cplusplus">; +def CplusplusExperimental : Package<"cplusplus">, InPackage, Hidden; + +def DeadCode : Package<"deadcode">; +def DeadCodeExperimental : Package<"deadcode">, InPackage, Hidden; + +def Security : Package <"security">; +def InsecureAPI : Package<"insecureAPI">, InPackage; +def SecurityExperimental : Package<"security">, InPackage, Hidden; +def Taint : Package<"taint">, InPackage, Hidden; + +def Unix : Package<"unix">; +def UnixExperimental : Package<"unix">, InPackage, Hidden; +def CString : Package<"cstring">, InPackage, Hidden; +def CStringExperimental : Package<"cstring">, InPackage, Hidden; + +def OSX : Package<"osx">; +def OSXExperimental : Package<"osx">, InPackage, Hidden; +def Cocoa : Package<"cocoa">, InPackage; +def CocoaExperimental : Package<"cocoa">, InPackage, Hidden; +def CoreFoundation : Package<"coreFoundation">, InPackage; +def Containers : Package<"containers">, InPackage; + +def LLVM : Package<"llvm">; +def Debug : Package<"debug">; + +//===----------------------------------------------------------------------===// +// Core Checkers. +//===----------------------------------------------------------------------===// + +let ParentPackage = Core in { + +def DereferenceChecker : Checker<"NullDereference">, + HelpText<"Check for dereferences of null pointers">, + DescFile<"DereferenceChecker.cpp">; + +def CallAndMessageChecker : Checker<"CallAndMessage">, + HelpText<"Check for logical errors for function calls and Objective-C message expressions (e.g., uninitialized arguments, null function pointers)">, + DescFile<"CallAndMessageChecker.cpp">; + +def AdjustedReturnValueChecker : Checker<"AdjustedReturnValue">, + HelpText<"Check to see if the return value of a function call is different than the caller expects (e.g., from calls through function pointers)">, + DescFile<"AdjustedReturnValueChecker.cpp">; + +def AttrNonNullChecker : Checker<"AttributeNonNull">, + HelpText<"Check for null pointers passed as arguments to a function whose arguments are marked with the 'nonnull' attribute">, + DescFile<"AttrNonNullChecker.cpp">; + +def VLASizeChecker : Checker<"VLASize">, + HelpText<"Check for declarations of VLA of undefined or zero size">, + DescFile<"VLASizeChecker.cpp">; + +def DivZeroChecker : Checker<"DivideZero">, + HelpText<"Check for division by zero">, + DescFile<"DivZeroChecker.cpp">; + +def UndefResultChecker : Checker<"UndefinedBinaryOperatorResult">, + HelpText<"Check for undefined results of binary operators">, + DescFile<"UndefResultChecker.cpp">; + +def StackAddrEscapeChecker : Checker<"StackAddressEscape">, + HelpText<"Check that addresses to stack memory do not escape the function">, + DescFile<"StackAddrEscapeChecker.cpp">; + +} // end "core" + +let ParentPackage = CoreExperimental in { + +def BoolAssignmentChecker : Checker<"BoolAssignment">, + HelpText<"Warn about assigning non-{0,1} values to Boolean variables">, + DescFile<"BoolAssignmentChecker.cpp">; + +def CastSizeChecker : Checker<"CastSize">, + HelpText<"Check when casting a malloc'ed type T, whether the size is a multiple of the size of T">, + DescFile<"CastSizeChecker.cpp">; + +def CastToStructChecker : Checker<"CastToStruct">, + HelpText<"Check for cast from non-struct pointer to struct pointer">, + DescFile<"CastToStructChecker.cpp">; + +def FixedAddressChecker : Checker<"FixedAddr">, + HelpText<"Check for assignment of a fixed address to a pointer">, + DescFile<"FixedAddressChecker.cpp">; + +def PointerArithChecker : Checker<"PointerArithm">, + HelpText<"Check for pointer arithmetic on locations other than array elements">, + DescFile<"PointerArithChecker">; + +def PointerSubChecker : Checker<"PointerSub">, + HelpText<"Check for pointer subtractions on two pointers pointing to different memory chunks">, + DescFile<"PointerSubChecker">; + +def SizeofPointerChecker : Checker<"SizeofPtr">, + HelpText<"Warn about unintended use of sizeof() on pointer expressions">, + DescFile<"CheckSizeofPointer.cpp">; + +} // end "core.experimental" + +//===----------------------------------------------------------------------===// +// Evaluate "builtin" functions. +//===----------------------------------------------------------------------===// + +let ParentPackage = CoreBuiltin in { + +def NoReturnFunctionChecker : Checker<"NoReturnFunctions">, + HelpText<"Evaluate \"panic\" functions that are known to not return to the caller">, + DescFile<"NoReturnFunctionChecker.cpp">; + +def BuiltinFunctionChecker : Checker<"BuiltinFunctions">, + HelpText<"Evaluate compiler builtin functions (e.g., alloca())">, + DescFile<"BuiltinFunctionChecker.cpp">; + +} // end "core.builtin" + +//===----------------------------------------------------------------------===// +// Uninitialized values checkers. +//===----------------------------------------------------------------------===// + +let ParentPackage = CoreUninitialized in { + +def UndefinedArraySubscriptChecker : Checker<"ArraySubscript">, + HelpText<"Check for uninitialized values used as array subscripts">, + DescFile<"UndefinedArraySubscriptChecker.cpp">; + +def UndefinedAssignmentChecker : Checker<"Assign">, + HelpText<"Check for assigning uninitialized values">, + DescFile<"UndefinedAssignmentChecker.cpp">; + +def UndefBranchChecker : Checker<"Branch">, + HelpText<"Check for uninitialized values used as branch conditions">, + DescFile<"UndefBranchChecker.cpp">; + +def UndefCapturedBlockVarChecker : Checker<"CapturedBlockVariable">, + HelpText<"Check for blocks that capture uninitialized values">, + DescFile<"UndefCapturedBlockVarChecker.cpp">; + +def ReturnUndefChecker : Checker<"UndefReturn">, + HelpText<"Check for uninitialized values being returned to the caller">, + DescFile<"ReturnUndefChecker.cpp">; + +} // end "core.uninitialized" + +//===----------------------------------------------------------------------===// +// C++ checkers. +//===----------------------------------------------------------------------===// + +let ParentPackage = CplusplusExperimental in { + +def IteratorsChecker : Checker<"Iterators">, + HelpText<"Check improper uses of STL vector iterators">, + DescFile<"IteratorsChecker.cpp">; + +def VirtualCallChecker : Checker<"VirtualCall">, + HelpText<"Check virtual function calls during construction or destruction">, + DescFile<"VirtualCallChecker.cpp">; + +} // end: "cplusplus.experimental" + +//===----------------------------------------------------------------------===// +// Deadcode checkers. +//===----------------------------------------------------------------------===// + +let ParentPackage = DeadCode in { + +def DeadStoresChecker : Checker<"DeadStores">, + HelpText<"Check for values stored to variables that are never read afterwards">, + DescFile<"DeadStoresChecker.cpp">; +} // end DeadCode + +let ParentPackage = DeadCodeExperimental in { + +def IdempotentOperationChecker : Checker<"IdempotentOperations">, + HelpText<"Warn about idempotent operations">, + DescFile<"IdempotentOperationChecker.cpp">; + +def UnreachableCodeChecker : Checker<"UnreachableCode">, + HelpText<"Check unreachable code">, + DescFile<"UnreachableCodeChecker.cpp">; + +} // end "deadcode.experimental" + +//===----------------------------------------------------------------------===// +// Security checkers. +//===----------------------------------------------------------------------===// + +let ParentPackage = InsecureAPI in { + def gets : Checker<"gets">, + HelpText<"Warn on uses of the 'gets' function">, + DescFile<"CheckSecuritySyntaxOnly.cpp">; + def getpw : Checker<"getpw">, + HelpText<"Warn on uses of the 'getpw' function">, + DescFile<"CheckSecuritySyntaxOnly.cpp">; + def mktemp : Checker<"mktemp">, + HelpText<"Warn on uses of the 'mktemp' function">, + DescFile<"CheckSecuritySyntaxOnly.cpp">; + def mkstemp : Checker<"mkstemp">, + HelpText<"Warn when 'mkstemp' is passed fewer than 6 X's in the format string">, + DescFile<"CheckSecuritySyntaxOnly.cpp">; + def rand : Checker<"rand">, + HelpText<"Warn on uses of the 'rand', 'random', and related functions">, + DescFile<"CheckSecuritySyntaxOnly.cpp">; + def strcpy : Checker<"strcpy">, + HelpText<"Warn on uses of the 'strcpy' and 'strcat' functions">, + DescFile<"CheckSecuritySyntaxOnly.cpp">; + def vfork : Checker<"vfork">, + HelpText<"Warn on uses of the 'vfork' function">, + DescFile<"CheckSecuritySyntaxOnly.cpp">; + def UncheckedReturn : Checker<"UncheckedReturn">, + HelpText<"Warn on uses of functions whose return values must be always checked">, + DescFile<"CheckSecuritySyntaxOnly.cpp">; +} +let ParentPackage = Security in { + def FloatLoopCounter : Checker<"FloatLoopCounter">, + HelpText<"Warn on using a floating point value as a loop counter (CERT: FLP30-C, FLP30-CPP)">, + DescFile<"CheckSecuritySyntaxOnly.cpp">; +} + +let ParentPackage = SecurityExperimental in { + +def ArrayBoundChecker : Checker<"ArrayBound">, + HelpText<"Warn about buffer overflows (older checker)">, + DescFile<"ArrayBoundChecker.cpp">; + +def ArrayBoundCheckerV2 : Checker<"ArrayBoundV2">, + HelpText<"Warn about buffer overflows (newer checker)">, + DescFile<"ArrayBoundCheckerV2.cpp">; + +def ReturnPointerRangeChecker : Checker<"ReturnPtrRange">, + HelpText<"Check for an out-of-bound pointer being returned to callers">, + DescFile<"ReturnPointerRangeChecker.cpp">; + +def MallocOverflowSecurityChecker : Checker<"MallocOverflow">, + HelpText<"Check for overflows in the arguments to malloc()">, + DescFile<"MallocOverflowSecurityChecker.cpp">; + +} // end "security.experimental" + +//===----------------------------------------------------------------------===// +// Taint checkers. +//===----------------------------------------------------------------------===// + +let ParentPackage = Taint in { + +def GenericTaintChecker : Checker<"TaintPropagation">, + HelpText<"Generate taint information used by other checkers">, + DescFile<"GenericTaintChecker.cpp">; + +} // end "experimental.security.taint" + +//===----------------------------------------------------------------------===// +// Unix API checkers. +//===----------------------------------------------------------------------===// + +let ParentPackage = Unix in { + +def UnixAPIChecker : Checker<"API">, + HelpText<"Check calls to various UNIX/Posix functions">, + DescFile<"UnixAPIChecker.cpp">; + +def MallocPessimistic : Checker<"Malloc">, + HelpText<"Check for memory leaks, double free, and use-after-free problems.">, + DescFile<"MallocChecker.cpp">; + +} // end "unix" + +let ParentPackage = UnixExperimental in { + +def ChrootChecker : Checker<"Chroot">, + HelpText<"Check improper use of chroot">, + DescFile<"ChrootChecker.cpp">; + +def MallocOptimistic : Checker<"MallocWithAnnotations">, + HelpText<"Check for memory leaks, double free, and use-after-free problems. Assumes that all user-defined functions which might free a pointer are annotated.">, + DescFile<"MallocChecker.cpp">; + +def MallocSizeofChecker : Checker<"MallocSizeof">, + HelpText<"Check for dubious malloc arguments involving sizeof">, + DescFile<"MallocSizeofChecker.cpp">; + +def PthreadLockChecker : Checker<"PthreadLock">, + HelpText<"Simple lock -> unlock checker">, + DescFile<"PthreadLockChecker.cpp">; + +def StreamChecker : Checker<"Stream">, + HelpText<"Check stream handling functions">, + DescFile<"StreamChecker.cpp">; + +} // end "unix.experimental" + +let ParentPackage = CString in { + +def CStringNullArg : Checker<"NullArg">, + HelpText<"Check for null pointers being passed as arguments to C string functions">, + DescFile<"CStringChecker.cpp">; + +def CStringSyntaxChecker : Checker<"BadSizeArg">, + HelpText<"Check the size argument passed into C string functions for common erroneous patterns">, + DescFile<"CStringSyntaxChecker.cpp">; +} + +let ParentPackage = CStringExperimental in { + +def CStringOutOfBounds : Checker<"OutOfBounds">, + HelpText<"Check for out-of-bounds access in string functions">, + DescFile<"CStringChecker.cpp">; + +def CStringBufferOverlap : Checker<"BufferOverlap">, + HelpText<"Checks for overlap in two buffer arguments">, + DescFile<"CStringChecker.cpp">; + +def CStringNotNullTerm : Checker<"NotNullTerminated">, + HelpText<"Check for arguments which are not null-terminating strings">, + DescFile<"CStringChecker.cpp">; +} + +//===----------------------------------------------------------------------===// +// Mac OS X, Cocoa, and Core Foundation checkers. +//===----------------------------------------------------------------------===// + +let ParentPackage = OSX in { + +def MacOSXAPIChecker : Checker<"API">, + InPackage, + HelpText<"Check for proper uses of various Mac OS X APIs">, + DescFile<"MacOSXAPIChecker.cpp">; + +def OSAtomicChecker : Checker<"AtomicCAS">, + InPackage, + HelpText<"Evaluate calls to OSAtomic functions">, + DescFile<"OSAtomicChecker.cpp">; + +def MacOSKeychainAPIChecker : Checker<"SecKeychainAPI">, + InPackage, + HelpText<"Check for proper uses of Secure Keychain APIs">, + DescFile<"MacOSKeychainAPIChecker.cpp">; + +} // end "macosx" + +let ParentPackage = Cocoa in { + +def ObjCAtSyncChecker : Checker<"AtSync">, + HelpText<"Check for null pointers used as mutexes for @synchronized">, + DescFile<"ObjCAtSyncChecker.cpp">; + +def NilArgChecker : Checker<"NilArg">, + HelpText<"Check for prohibited nil arguments to ObjC method calls">, + DescFile<"BasicObjCFoundationChecks.cpp">; + +def ClassReleaseChecker : Checker<"ClassRelease">, + HelpText<"Check for sending 'retain', 'release', or 'autorelease' directly to a Class">, + DescFile<"BasicObjCFoundationChecks.cpp">; + +def VariadicMethodTypeChecker : Checker<"VariadicMethodTypes">, + HelpText<"Check for passing non-Objective-C types to variadic methods that expect " + "only Objective-C types">, + DescFile<"BasicObjCFoundationChecks.cpp">; + +def NSAutoreleasePoolChecker : Checker<"NSAutoreleasePool">, + HelpText<"Warn for suboptimal uses of NSAutoreleasePool in Objective-C GC mode">, + DescFile<"NSAutoreleasePoolChecker.cpp">; + +def ObjCMethSigsChecker : Checker<"IncompatibleMethodTypes">, + HelpText<"Warn about Objective-C method signatures with type incompatibilities">, + DescFile<"CheckObjCInstMethSignature.cpp">; + +def ObjCUnusedIvarsChecker : Checker<"UnusedIvars">, + HelpText<"Warn about private ivars that are never used">, + DescFile<"ObjCUnusedIVarsChecker.cpp">; + +def ObjCSelfInitChecker : Checker<"SelfInit">, + HelpText<"Check that 'self' is properly initialized inside an initializer method">, + DescFile<"ObjCSelfInitChecker.cpp">; + +def NSErrorChecker : Checker<"NSError">, + HelpText<"Check usage of NSError** parameters">, + DescFile<"NSErrorChecker.cpp">; + +def RetainCountChecker : Checker<"RetainCount">, + HelpText<"Check for leaks and improper reference count management">, + DescFile<"RetainCountChecker.cpp">; + +} // end "cocoa" + +let ParentPackage = CocoaExperimental in { + +def ObjCDeallocChecker : Checker<"Dealloc">, + HelpText<"Warn about Objective-C classes that lack a correct implementation of -dealloc">, + DescFile<"CheckObjCDealloc.cpp">; + +} // end "cocoa.experimental" + +let ParentPackage = CoreFoundation in { + +def CFNumberCreateChecker : Checker<"CFNumber">, + HelpText<"Check for proper uses of CFNumberCreate">, + DescFile<"BasicObjCFoundationChecks.cpp">; + +def CFRetainReleaseChecker : Checker<"CFRetainRelease">, + HelpText<"Check for null arguments to CFRetain/CFRelease">, + DescFile<"BasicObjCFoundationChecks.cpp">; + +def CFErrorChecker : Checker<"CFError">, + HelpText<"Check usage of CFErrorRef* parameters">, + DescFile<"NSErrorChecker.cpp">; +} + +let ParentPackage = Containers in { +def ObjCContainersASTChecker : Checker<"PointerSizedValues">, + HelpText<"Warns if 'CFArray', 'CFDictionary', 'CFSet' are created with non-pointer-size values">, + DescFile<"ObjCContainersASTChecker.cpp">; + +def ObjCContainersChecker : Checker<"OutOfBounds">, + HelpText<"Checks for index out-of-bounds when using 'CFArray' API">, + DescFile<"ObjCContainersChecker.cpp">; + +} +//===----------------------------------------------------------------------===// +// Checkers for LLVM development. +//===----------------------------------------------------------------------===// + +def LLVMConventionsChecker : Checker<"Conventions">, + InPackage, + HelpText<"Check code for LLVM codebase conventions">, + DescFile<"LLVMConventionsChecker.cpp">; + +//===----------------------------------------------------------------------===// +// Debugging checkers (for analyzer development). +//===----------------------------------------------------------------------===// + +let ParentPackage = Debug in { + +def DominatorsTreeDumper : Checker<"DumpDominators">, + HelpText<"Print the dominance tree for a given CFG">, + DescFile<"DebugCheckers.cpp">; + +def LiveVariablesDumper : Checker<"DumpLiveVars">, + HelpText<"Print results of live variable analysis">, + DescFile<"DebugCheckers.cpp">; + +def CFGViewer : Checker<"ViewCFG">, + HelpText<"View Control-Flow Graphs using GraphViz">, + DescFile<"DebugCheckers.cpp">; + +def CFGDumper : Checker<"DumpCFG">, + HelpText<"Display Control-Flow Graphs">, + DescFile<"DebugCheckers.cpp">; + +def CallGraphViewer : Checker<"ViewCallGraph">, + HelpText<"View Call Graph using GraphViz">, + DescFile<"DebugCheckers.cpp">; + +def CallGraphDumper : Checker<"DumpCallGraph">, + HelpText<"Display Call Graph">, + DescFile<"DebugCheckers.cpp">; + +def AnalyzerStatsChecker : Checker<"Stats">, + HelpText<"Emit warnings with analyzer statistics">, + DescFile<"AnalyzerStatsChecker.cpp">; + +def TaintTesterChecker : Checker<"TaintTest">, + HelpText<"Mark tainted symbols as such.">, + DescFile<"TaintTesterChecker.cpp">; + +def IntervalTestChecker : Checker<"IntervalTest">, + HelpText<"Test the IntervalAnalysis stuff">, + DescFile<"IntervalTest.cpp">; + +} // end "debug" + diff --git a/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp new file mode 100644 index 0000000..30d0609 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp @@ -0,0 +1,158 @@ +//===- Chrootchecker.cpp -------- Basic security checks ----------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines chroot checker, which checks improper use of chroot. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" +#include "llvm/ADT/ImmutableMap.h" +using namespace clang; +using namespace ento; + +namespace { + +// enum value that represent the jail state +enum Kind { NO_CHROOT, ROOT_CHANGED, JAIL_ENTERED }; + +bool isRootChanged(intptr_t k) { return k == ROOT_CHANGED; } +//bool isJailEntered(intptr_t k) { return k == JAIL_ENTERED; } + +// This checker checks improper use of chroot. +// The state transition: +// NO_CHROOT ---chroot(path)--> ROOT_CHANGED ---chdir(/) --> JAIL_ENTERED +// | | +// ROOT_CHANGED<--chdir(..)-- JAIL_ENTERED<--chdir(..)-- +// | | +// bug<--foo()-- JAIL_ENTERED<--foo()-- +class ChrootChecker : public Checker > { + mutable IdentifierInfo *II_chroot, *II_chdir; + // This bug refers to possibly break out of a chroot() jail. + mutable OwningPtr BT_BreakJail; + +public: + ChrootChecker() : II_chroot(0), II_chdir(0) {} + + static void *getTag() { + static int x; + return &x; + } + + bool evalCall(const CallExpr *CE, CheckerContext &C) const; + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + +private: + void Chroot(CheckerContext &C, const CallExpr *CE) const; + void Chdir(CheckerContext &C, const CallExpr *CE) const; +}; + +} // end anonymous namespace + +bool ChrootChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD) + return false; + + ASTContext &Ctx = C.getASTContext(); + if (!II_chroot) + II_chroot = &Ctx.Idents.get("chroot"); + if (!II_chdir) + II_chdir = &Ctx.Idents.get("chdir"); + + if (FD->getIdentifier() == II_chroot) { + Chroot(C, CE); + return true; + } + if (FD->getIdentifier() == II_chdir) { + Chdir(C, CE); + return true; + } + + return false; +} + +void ChrootChecker::Chroot(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + ProgramStateManager &Mgr = state->getStateManager(); + + // Once encouter a chroot(), set the enum value ROOT_CHANGED directly in + // the GDM. + state = Mgr.addGDM(state, ChrootChecker::getTag(), (void*) ROOT_CHANGED); + C.addTransition(state); +} + +void ChrootChecker::Chdir(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + ProgramStateManager &Mgr = state->getStateManager(); + + // If there are no jail state in the GDM, just return. + const void *k = state->FindGDM(ChrootChecker::getTag()); + if (!k) + return; + + // After chdir("/"), enter the jail, set the enum value JAIL_ENTERED. + const Expr *ArgExpr = CE->getArg(0); + SVal ArgVal = state->getSVal(ArgExpr, C.getLocationContext()); + + if (const MemRegion *R = ArgVal.getAsRegion()) { + R = R->StripCasts(); + if (const StringRegion* StrRegion= dyn_cast(R)) { + const StringLiteral* Str = StrRegion->getStringLiteral(); + if (Str->getString() == "/") + state = Mgr.addGDM(state, ChrootChecker::getTag(), + (void*) JAIL_ENTERED); + } + } + + C.addTransition(state); +} + +// Check the jail state before any function call except chroot and chdir(). +void ChrootChecker::checkPreStmt(const CallExpr *CE, CheckerContext &C) const { + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD) + return; + + ASTContext &Ctx = C.getASTContext(); + if (!II_chroot) + II_chroot = &Ctx.Idents.get("chroot"); + if (!II_chdir) + II_chdir = &Ctx.Idents.get("chdir"); + + // Ingnore chroot and chdir. + if (FD->getIdentifier() == II_chroot || FD->getIdentifier() == II_chdir) + return; + + // If jail state is ROOT_CHANGED, generate BugReport. + void *const* k = C.getState()->FindGDM(ChrootChecker::getTag()); + if (k) + if (isRootChanged((intptr_t) *k)) + if (ExplodedNode *N = C.addTransition()) { + if (!BT_BreakJail) + BT_BreakJail.reset(new BuiltinBug("Break out of jail", + "No call of chdir(\"/\") immediately " + "after chroot")); + BugReport *R = new BugReport(*BT_BreakJail, + BT_BreakJail->getDescription(), N); + C.EmitReport(R); + } + + return; +} + +void ento::registerChrootChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ClangCheckers.cpp b/clang/lib/StaticAnalyzer/Checkers/ClangCheckers.cpp new file mode 100644 index 0000000..77a5a72 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ClangCheckers.cpp @@ -0,0 +1,32 @@ +//===--- ClangCheckers.h - Provides builtin checkers ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/ClangCheckers.h" +#include "clang/StaticAnalyzer/Core/CheckerRegistry.h" + +// FIXME: This is only necessary as long as there are checker registration +// functions that do additional work besides mgr.registerChecker(). +// The only checkers that currently do this are: +// - NSAutoreleasePoolChecker +// - NSErrorChecker +// - ObjCAtSyncChecker +// It's probably worth including this information in Checkers.td to minimize +// boilerplate code. +#include "ClangSACheckers.h" + +using namespace clang; +using namespace ento; + +void ento::registerBuiltinCheckers(CheckerRegistry ®istry) { +#define GET_CHECKERS +#define CHECKER(FULLNAME,CLASS,DESCFILE,HELPTEXT,GROUPINDEX,HIDDEN) \ + registry.addChecker(register##CLASS, FULLNAME, HELPTEXT); +#include "Checkers.inc" +#undef GET_CHECKERS +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ClangSACheckers.h b/clang/lib/StaticAnalyzer/Checkers/ClangSACheckers.h new file mode 100644 index 0000000..230baa7 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ClangSACheckers.h @@ -0,0 +1,37 @@ +//===--- ClangSACheckers.h - Registration functions for Checkers *- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Declares the registation functions for the checkers defined in +// libclangStaticAnalyzerCheckers. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/CommonBugCategories.h" + +#ifndef LLVM_CLANG_SA_LIB_CHECKERS_CLANGSACHECKERS_H +#define LLVM_CLANG_SA_LIB_CHECKERS_CLANGSACHECKERS_H + +namespace clang { + +namespace ento { +class CheckerManager; +class CheckerRegistry; + +#define GET_CHECKERS +#define CHECKER(FULLNAME,CLASS,CXXFILE,HELPTEXT,GROUPINDEX,HIDDEN) \ + void register##CLASS(CheckerManager &mgr); +#include "Checkers.inc" +#undef CHECKER +#undef GET_CHECKERS + +} // end ento namespace + +} // end clang namespace + +#endif diff --git a/clang/lib/StaticAnalyzer/Checkers/CommonBugCategories.cpp b/clang/lib/StaticAnalyzer/Checkers/CommonBugCategories.cpp new file mode 100644 index 0000000..e2a8ea6 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CommonBugCategories.cpp @@ -0,0 +1,18 @@ +//=--- CommonBugCategories.cpp - Provides common issue categories -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Common strings used for the "category" of many static analyzer issues. +namespace clang { namespace ento { namespace categories { + +const char *CoreFoundationObjectiveC = "Core Foundation/Objective-C"; +const char *MemoryCoreFoundationObjectiveC = + "Memory (Core Foundation/Objective-C)"; +const char *UnixAPI = "Unix API"; +}}} + diff --git a/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp new file mode 100644 index 0000000..510e8cd --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp @@ -0,0 +1,386 @@ +//==- DeadStoresChecker.cpp - Check for stores to dead variables -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a DeadStores, a flow-sensitive checker that looks for +// stores to variables that are no longer live. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/Analysis/Analyses/LiveVariables.h" +#include "clang/Analysis/Visitors/CFGRecStmtVisitor.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/Analysis/Visitors/CFGRecStmtDeclVisitor.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/ParentMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" + +using namespace clang; +using namespace ento; + +namespace { + +// FIXME: Eventually migrate into its own file, and have it managed by +// AnalysisManager. +class ReachableCode { + const CFG &cfg; + llvm::BitVector reachable; +public: + ReachableCode(const CFG &cfg) + : cfg(cfg), reachable(cfg.getNumBlockIDs(), false) {} + + void computeReachableBlocks(); + + bool isReachable(const CFGBlock *block) const { + return reachable[block->getBlockID()]; + } +}; +} + +void ReachableCode::computeReachableBlocks() { + if (!cfg.getNumBlockIDs()) + return; + + SmallVector worklist; + worklist.push_back(&cfg.getEntry()); + + while (!worklist.empty()) { + const CFGBlock *block = worklist.back(); + worklist.pop_back(); + llvm::BitVector::reference isReachable = reachable[block->getBlockID()]; + if (isReachable) + continue; + isReachable = true; + for (CFGBlock::const_succ_iterator i = block->succ_begin(), + e = block->succ_end(); i != e; ++i) + if (const CFGBlock *succ = *i) + worklist.push_back(succ); + } +} + +static const Expr *LookThroughTransitiveAssignments(const Expr *Ex) { + while (Ex) { + const BinaryOperator *BO = + dyn_cast(Ex->IgnoreParenCasts()); + if (!BO) + break; + if (BO->getOpcode() == BO_Assign) { + Ex = BO->getRHS(); + continue; + } + break; + } + return Ex; +} + +namespace { +class DeadStoreObs : public LiveVariables::Observer { + const CFG &cfg; + ASTContext &Ctx; + BugReporter& BR; + AnalysisDeclContext* AC; + ParentMap& Parents; + llvm::SmallPtrSet Escaped; + OwningPtr reachableCode; + const CFGBlock *currentBlock; + + enum DeadStoreKind { Standard, Enclosing, DeadIncrement, DeadInit }; + +public: + DeadStoreObs(const CFG &cfg, ASTContext &ctx, + BugReporter& br, AnalysisDeclContext* ac, ParentMap& parents, + llvm::SmallPtrSet &escaped) + : cfg(cfg), Ctx(ctx), BR(br), AC(ac), Parents(parents), + Escaped(escaped), currentBlock(0) {} + + virtual ~DeadStoreObs() {} + + void Report(const VarDecl *V, DeadStoreKind dsk, + PathDiagnosticLocation L, SourceRange R) { + if (Escaped.count(V)) + return; + + // Compute reachable blocks within the CFG for trivial cases + // where a bogus dead store can be reported because itself is unreachable. + if (!reachableCode.get()) { + reachableCode.reset(new ReachableCode(cfg)); + reachableCode->computeReachableBlocks(); + } + + if (!reachableCode->isReachable(currentBlock)) + return; + + SmallString<64> buf; + llvm::raw_svector_ostream os(buf); + const char *BugType = 0; + + switch (dsk) { + case DeadInit: + BugType = "Dead initialization"; + os << "Value stored to '" << *V + << "' during its initialization is never read"; + break; + + case DeadIncrement: + BugType = "Dead increment"; + case Standard: + if (!BugType) BugType = "Dead assignment"; + os << "Value stored to '" << *V << "' is never read"; + break; + + case Enclosing: + // Don't report issues in this case, e.g.: "if (x = foo())", + // where 'x' is unused later. We have yet to see a case where + // this is a real bug. + return; + } + + BR.EmitBasicReport(AC->getDecl(), BugType, "Dead store", os.str(), L, R); + } + + void CheckVarDecl(const VarDecl *VD, const Expr *Ex, const Expr *Val, + DeadStoreKind dsk, + const LiveVariables::LivenessValues &Live) { + + if (!VD->hasLocalStorage()) + return; + // Reference types confuse the dead stores checker. Skip them + // for now. + if (VD->getType()->getAs()) + return; + + if (!Live.isLive(VD) && + !(VD->getAttr() || VD->getAttr())) { + + PathDiagnosticLocation ExLoc = + PathDiagnosticLocation::createBegin(Ex, BR.getSourceManager(), AC); + Report(VD, dsk, ExLoc, Val->getSourceRange()); + } + } + + void CheckDeclRef(const DeclRefExpr *DR, const Expr *Val, DeadStoreKind dsk, + const LiveVariables::LivenessValues& Live) { + if (const VarDecl *VD = dyn_cast(DR->getDecl())) + CheckVarDecl(VD, DR, Val, dsk, Live); + } + + bool isIncrement(VarDecl *VD, const BinaryOperator* B) { + if (B->isCompoundAssignmentOp()) + return true; + + const Expr *RHS = B->getRHS()->IgnoreParenCasts(); + const BinaryOperator* BRHS = dyn_cast(RHS); + + if (!BRHS) + return false; + + const DeclRefExpr *DR; + + if ((DR = dyn_cast(BRHS->getLHS()->IgnoreParenCasts()))) + if (DR->getDecl() == VD) + return true; + + if ((DR = dyn_cast(BRHS->getRHS()->IgnoreParenCasts()))) + if (DR->getDecl() == VD) + return true; + + return false; + } + + virtual void observeStmt(const Stmt *S, const CFGBlock *block, + const LiveVariables::LivenessValues &Live) { + + currentBlock = block; + + // Skip statements in macros. + if (S->getLocStart().isMacroID()) + return; + + // Only cover dead stores from regular assignments. ++/-- dead stores + // have never flagged a real bug. + if (const BinaryOperator* B = dyn_cast(S)) { + if (!B->isAssignmentOp()) return; // Skip non-assignments. + + if (DeclRefExpr *DR = dyn_cast(B->getLHS())) + if (VarDecl *VD = dyn_cast(DR->getDecl())) { + // Special case: check for assigning null to a pointer. + // This is a common form of defensive programming. + const Expr *RHS = LookThroughTransitiveAssignments(B->getRHS()); + + QualType T = VD->getType(); + if (T->isPointerType() || T->isObjCObjectPointerType()) { + if (RHS->isNullPointerConstant(Ctx, Expr::NPC_ValueDependentIsNull)) + return; + } + + RHS = RHS->IgnoreParenCasts(); + // Special case: self-assignments. These are often used to shut up + // "unused variable" compiler warnings. + if (const DeclRefExpr *RhsDR = dyn_cast(RHS)) + if (VD == dyn_cast(RhsDR->getDecl())) + return; + + // Otherwise, issue a warning. + DeadStoreKind dsk = Parents.isConsumedExpr(B) + ? Enclosing + : (isIncrement(VD,B) ? DeadIncrement : Standard); + + CheckVarDecl(VD, DR, B->getRHS(), dsk, Live); + } + } + else if (const UnaryOperator* U = dyn_cast(S)) { + if (!U->isIncrementOp() || U->isPrefix()) + return; + + const Stmt *parent = Parents.getParentIgnoreParenCasts(U); + if (!parent || !isa(parent)) + return; + + const Expr *Ex = U->getSubExpr()->IgnoreParenCasts(); + + if (const DeclRefExpr *DR = dyn_cast(Ex)) + CheckDeclRef(DR, U, DeadIncrement, Live); + } + else if (const DeclStmt *DS = dyn_cast(S)) + // Iterate through the decls. Warn if any initializers are complex + // expressions that are not live (never used). + for (DeclStmt::const_decl_iterator DI=DS->decl_begin(), DE=DS->decl_end(); + DI != DE; ++DI) { + + VarDecl *V = dyn_cast(*DI); + + if (!V) + continue; + + if (V->hasLocalStorage()) { + // Reference types confuse the dead stores checker. Skip them + // for now. + if (V->getType()->getAs()) + return; + + if (const Expr *E = V->getInit()) { + while (const ExprWithCleanups *exprClean = + dyn_cast(E)) + E = exprClean->getSubExpr(); + + // Look through transitive assignments, e.g.: + // int x = y = 0; + E = LookThroughTransitiveAssignments(E); + + // Don't warn on C++ objects (yet) until we can show that their + // constructors/destructors don't have side effects. + if (isa(E)) + return; + + // A dead initialization is a variable that is dead after it + // is initialized. We don't flag warnings for those variables + // marked 'unused'. + if (!Live.isLive(V) && V->getAttr() == 0) { + // Special case: check for initializations with constants. + // + // e.g. : int x = 0; + // + // If x is EVER assigned a new value later, don't issue + // a warning. This is because such initialization can be + // due to defensive programming. + if (E->isEvaluatable(Ctx)) + return; + + if (const DeclRefExpr *DRE = + dyn_cast(E->IgnoreParenCasts())) + if (const VarDecl *VD = dyn_cast(DRE->getDecl())) { + // Special case: check for initialization from constant + // variables. + // + // e.g. extern const int MyConstant; + // int x = MyConstant; + // + if (VD->hasGlobalStorage() && + VD->getType().isConstQualified()) + return; + // Special case: check for initialization from scalar + // parameters. This is often a form of defensive + // programming. Non-scalars are still an error since + // because it more likely represents an actual algorithmic + // bug. + if (isa(VD) && VD->getType()->isScalarType()) + return; + } + + PathDiagnosticLocation Loc = + PathDiagnosticLocation::create(V, BR.getSourceManager()); + Report(V, DeadInit, Loc, E->getSourceRange()); + } + } + } + } + } +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Driver function to invoke the Dead-Stores checker on a CFG. +//===----------------------------------------------------------------------===// + +namespace { +class FindEscaped : public CFGRecStmtDeclVisitor{ + CFG *cfg; +public: + FindEscaped(CFG *c) : cfg(c) {} + + CFG& getCFG() { return *cfg; } + + llvm::SmallPtrSet Escaped; + + void VisitUnaryOperator(UnaryOperator* U) { + // Check for '&'. Any VarDecl whose value has its address-taken we + // treat as escaped. + Expr *E = U->getSubExpr()->IgnoreParenCasts(); + if (U->getOpcode() == UO_AddrOf) + if (DeclRefExpr *DR = dyn_cast(E)) + if (VarDecl *VD = dyn_cast(DR->getDecl())) { + Escaped.insert(VD); + return; + } + Visit(E); + } +}; +} // end anonymous namespace + + +//===----------------------------------------------------------------------===// +// DeadStoresChecker +//===----------------------------------------------------------------------===// + +namespace { +class DeadStoresChecker : public Checker { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + if (LiveVariables *L = mgr.getAnalysis(D)) { + CFG &cfg = *mgr.getCFG(D); + AnalysisDeclContext *AC = mgr.getAnalysisDeclContext(D); + ParentMap &pmap = mgr.getParentMap(D); + FindEscaped FS(&cfg); + FS.getCFG().VisitBlockStmts(FS); + DeadStoreObs A(cfg, BR.getContext(), BR, AC, pmap, FS.Escaped); + L->runOnAllBlocks(A); + } + } +}; +} + +void ento::registerDeadStoresChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp b/clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp new file mode 100644 index 0000000..34053cd --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp @@ -0,0 +1,146 @@ +//==- DebugCheckers.cpp - Debugging Checkers ---------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a checkers that display debugging information. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/Analysis/Analyses/LiveVariables.h" +#include "clang/Analysis/Analyses/Dominators.h" +#include "clang/Analysis/CallGraph.h" +#include "llvm/Support/Process.h" + +using namespace clang; +using namespace ento; + +//===----------------------------------------------------------------------===// +// DominatorsTreeDumper +//===----------------------------------------------------------------------===// + +namespace { +class DominatorsTreeDumper : public Checker { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + if (AnalysisDeclContext *AC = mgr.getAnalysisDeclContext(D)) { + DominatorTree dom; + dom.buildDominatorTree(*AC); + dom.dump(); + } + } +}; +} + +void ento::registerDominatorsTreeDumper(CheckerManager &mgr) { + mgr.registerChecker(); +} + +//===----------------------------------------------------------------------===// +// LiveVariablesDumper +//===----------------------------------------------------------------------===// + +namespace { +class LiveVariablesDumper : public Checker { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + if (LiveVariables* L = mgr.getAnalysis(D)) { + L->dumpBlockLiveness(mgr.getSourceManager()); + } + } +}; +} + +void ento::registerLiveVariablesDumper(CheckerManager &mgr) { + mgr.registerChecker(); +} + +//===----------------------------------------------------------------------===// +// CFGViewer +//===----------------------------------------------------------------------===// + +namespace { +class CFGViewer : public Checker { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + if (CFG *cfg = mgr.getCFG(D)) { + cfg->viewCFG(mgr.getLangOpts()); + } + } +}; +} + +void ento::registerCFGViewer(CheckerManager &mgr) { + mgr.registerChecker(); +} + +//===----------------------------------------------------------------------===// +// CFGDumper +//===----------------------------------------------------------------------===// + +namespace { +class CFGDumper : public Checker { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + if (CFG *cfg = mgr.getCFG(D)) { + cfg->dump(mgr.getLangOpts(), + llvm::sys::Process::StandardErrHasColors()); + } + } +}; +} + +void ento::registerCFGDumper(CheckerManager &mgr) { + mgr.registerChecker(); +} + +//===----------------------------------------------------------------------===// +// CallGraphViewer +//===----------------------------------------------------------------------===// + +namespace { +class CallGraphViewer : public Checker< check::ASTDecl > { +public: + void checkASTDecl(const TranslationUnitDecl *TU, AnalysisManager& mgr, + BugReporter &BR) const { + CallGraph CG; + CG.addToCallGraph(const_cast(TU)); + CG.viewGraph(); + } +}; +} + +void ento::registerCallGraphViewer(CheckerManager &mgr) { + mgr.registerChecker(); +} + +//===----------------------------------------------------------------------===// +// CallGraphDumper +//===----------------------------------------------------------------------===// + +namespace { +class CallGraphDumper : public Checker< check::ASTDecl > { +public: + void checkASTDecl(const TranslationUnitDecl *TU, AnalysisManager& mgr, + BugReporter &BR) const { + CallGraph CG; + CG.addToCallGraph(const_cast(TU)); + CG.dump(); + } +}; +} + +void ento::registerCallGraphDumper(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp new file mode 100644 index 0000000..81a2745 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp @@ -0,0 +1,216 @@ +//== NullDerefChecker.cpp - Null dereference checker ------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines NullDerefChecker, a builtin check in ExprEngine that performs +// checks for null pointers at loads and stores. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/AST/ExprObjC.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "llvm/ADT/SmallString.h" + +using namespace clang; +using namespace ento; + +namespace { +class DereferenceChecker + : public Checker< check::Location, + EventDispatcher > { + mutable OwningPtr BT_null; + mutable OwningPtr BT_undef; + +public: + void checkLocation(SVal location, bool isLoad, const Stmt* S, + CheckerContext &C) const; + + static const MemRegion *AddDerefSource(raw_ostream &os, + SmallVectorImpl &Ranges, + const Expr *Ex, const ProgramState *state, + const LocationContext *LCtx, + bool loadedFrom = false); +}; +} // end anonymous namespace + +const MemRegion * +DereferenceChecker::AddDerefSource(raw_ostream &os, + SmallVectorImpl &Ranges, + const Expr *Ex, + const ProgramState *state, + const LocationContext *LCtx, + bool loadedFrom) { + Ex = Ex->IgnoreParenLValueCasts(); + const MemRegion *sourceR = 0; + switch (Ex->getStmtClass()) { + default: + break; + case Stmt::DeclRefExprClass: { + const DeclRefExpr *DR = cast(Ex); + if (const VarDecl *VD = dyn_cast(DR->getDecl())) { + os << " (" << (loadedFrom ? "loaded from" : "from") + << " variable '" << VD->getName() << "')"; + Ranges.push_back(DR->getSourceRange()); + sourceR = state->getLValue(VD, LCtx).getAsRegion(); + } + break; + } + case Stmt::MemberExprClass: { + const MemberExpr *ME = cast(Ex); + os << " (" << (loadedFrom ? "loaded from" : "via") + << " field '" << ME->getMemberNameInfo() << "')"; + SourceLocation L = ME->getMemberLoc(); + Ranges.push_back(SourceRange(L, L)); + break; + } + } + return sourceR; +} + +void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S, + CheckerContext &C) const { + // Check for dereference of an undefined value. + if (l.isUndef()) { + if (ExplodedNode *N = C.generateSink()) { + if (!BT_undef) + BT_undef.reset(new BuiltinBug("Dereference of undefined pointer value")); + + BugReport *report = + new BugReport(*BT_undef, BT_undef->getDescription(), N); + report->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, + bugreporter::GetDerefExpr(N), report)); + C.EmitReport(report); + } + return; + } + + DefinedOrUnknownSVal location = cast(l); + + // Check for null dereferences. + if (!isa(location)) + return; + + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + ProgramStateRef notNullState, nullState; + llvm::tie(notNullState, nullState) = state->assume(location); + + // The explicit NULL case. + if (nullState) { + if (!notNullState) { + // Generate an error node. + ExplodedNode *N = C.generateSink(nullState); + if (!N) + return; + + // We know that 'location' cannot be non-null. This is what + // we call an "explicit" null dereference. + if (!BT_null) + BT_null.reset(new BuiltinBug("Dereference of null pointer")); + + SmallString<100> buf; + SmallVector Ranges; + + // Walk through lvalue casts to get the original expression + // that syntactically caused the load. + if (const Expr *expr = dyn_cast(S)) + S = expr->IgnoreParenLValueCasts(); + + const MemRegion *sourceR = 0; + + switch (S->getStmtClass()) { + case Stmt::ArraySubscriptExprClass: { + llvm::raw_svector_ostream os(buf); + os << "Array access"; + const ArraySubscriptExpr *AE = cast(S); + sourceR = + AddDerefSource(os, Ranges, AE->getBase()->IgnoreParenCasts(), + state.getPtr(), LCtx); + os << " results in a null pointer dereference"; + break; + } + case Stmt::UnaryOperatorClass: { + llvm::raw_svector_ostream os(buf); + os << "Dereference of null pointer"; + const UnaryOperator *U = cast(S); + sourceR = + AddDerefSource(os, Ranges, U->getSubExpr()->IgnoreParens(), + state.getPtr(), LCtx, true); + break; + } + case Stmt::MemberExprClass: { + const MemberExpr *M = cast(S); + if (M->isArrow()) { + llvm::raw_svector_ostream os(buf); + os << "Access to field '" << M->getMemberNameInfo() + << "' results in a dereference of a null pointer"; + sourceR = + AddDerefSource(os, Ranges, M->getBase()->IgnoreParenCasts(), + state.getPtr(), LCtx, true); + } + break; + } + case Stmt::ObjCIvarRefExprClass: { + const ObjCIvarRefExpr *IV = cast(S); + if (const DeclRefExpr *DR = + dyn_cast(IV->getBase()->IgnoreParenCasts())) { + if (const VarDecl *VD = dyn_cast(DR->getDecl())) { + llvm::raw_svector_ostream os(buf); + os << "Instance variable access (via '" << VD->getName() + << "') results in a null pointer dereference"; + } + } + Ranges.push_back(IV->getSourceRange()); + break; + } + default: + break; + } + + BugReport *report = + new BugReport(*BT_null, + buf.empty() ? BT_null->getDescription():buf.str(), + N); + + report->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, + bugreporter::GetDerefExpr(N), report)); + + for (SmallVectorImpl::iterator + I = Ranges.begin(), E = Ranges.end(); I!=E; ++I) + report->addRange(*I); + + if (sourceR) { + report->markInteresting(sourceR); + report->markInteresting(state->getRawSVal(loc::MemRegionVal(sourceR))); + } + + C.EmitReport(report); + return; + } + else { + // Otherwise, we have the case where the location could either be + // null or not-null. Record the error node as an "implicit" null + // dereference. + if (ExplodedNode *N = C.generateSink(nullState)) { + ImplicitNullDerefEvent event = { l, isLoad, N, &C.getBugReporter() }; + dispatchEvent(event); + } + } + } + + // From this point forward, we know that the location is not null. + C.addTransition(notNullState); +} + +void ento::registerDereferenceChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp new file mode 100644 index 0000000..2627f0c --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp @@ -0,0 +1,96 @@ +//== DivZeroChecker.cpp - Division by zero checker --------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines DivZeroChecker, a builtin check in ExprEngine that performs +// checks for division by zeros. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" + +using namespace clang; +using namespace ento; + +namespace { +class DivZeroChecker : public Checker< check::PreStmt > { + mutable OwningPtr BT; + void reportBug(const char *Msg, + ProgramStateRef StateZero, + CheckerContext &C) const ; +public: + void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const; +}; +} // end anonymous namespace + +void DivZeroChecker::reportBug(const char *Msg, + ProgramStateRef StateZero, + CheckerContext &C) const { + if (ExplodedNode *N = C.generateSink(StateZero)) { + if (!BT) + BT.reset(new BuiltinBug("Division by zero")); + + BugReport *R = + new BugReport(*BT, Msg, N); + + R->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, + bugreporter::GetDenomExpr(N), R)); + C.EmitReport(R); + } +} + +void DivZeroChecker::checkPreStmt(const BinaryOperator *B, + CheckerContext &C) const { + BinaryOperator::Opcode Op = B->getOpcode(); + if (Op != BO_Div && + Op != BO_Rem && + Op != BO_DivAssign && + Op != BO_RemAssign) + return; + + if (!B->getRHS()->getType()->isIntegerType() || + !B->getRHS()->getType()->isScalarType()) + return; + + SVal Denom = C.getState()->getSVal(B->getRHS(), C.getLocationContext()); + const DefinedSVal *DV = dyn_cast(&Denom); + + // Divide-by-undefined handled in the generic checking for uses of + // undefined values. + if (!DV) + return; + + // Check for divide by zero. + ConstraintManager &CM = C.getConstraintManager(); + ProgramStateRef stateNotZero, stateZero; + llvm::tie(stateNotZero, stateZero) = CM.assumeDual(C.getState(), *DV); + + if (!stateNotZero) { + assert(stateZero); + reportBug("Division by zero", stateZero, C); + return; + } + + bool TaintedD = C.getState()->isTainted(*DV); + if ((stateNotZero && stateZero && TaintedD)) { + reportBug("Division by a tainted value, possibly zero", stateZero, C); + return; + } + + // If we get here, then the denom should not be zero. We abandon the implicit + // zero denom case for now. + C.addTransition(stateNotZero); +} + +void ento::registerDivZeroChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp new file mode 100644 index 0000000..a1f2f3b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp @@ -0,0 +1,67 @@ +//=== FixedAddressChecker.cpp - Fixed address usage checker ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This files defines FixedAddressChecker, a builtin checker that checks for +// assignment of a fixed address to a pointer. +// This check corresponds to CWE-587. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" + +using namespace clang; +using namespace ento; + +namespace { +class FixedAddressChecker + : public Checker< check::PreStmt > { + mutable OwningPtr BT; + +public: + void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const; +}; +} + +void FixedAddressChecker::checkPreStmt(const BinaryOperator *B, + CheckerContext &C) const { + // Using a fixed address is not portable because that address will probably + // not be valid in all environments or platforms. + + if (B->getOpcode() != BO_Assign) + return; + + QualType T = B->getType(); + if (!T->isPointerType()) + return; + + ProgramStateRef state = C.getState(); + SVal RV = state->getSVal(B->getRHS(), C.getLocationContext()); + + if (!RV.isConstant() || RV.isZeroConstant()) + return; + + if (ExplodedNode *N = C.addTransition()) { + if (!BT) + BT.reset(new BuiltinBug("Use fixed address", + "Using a fixed address is not portable because that " + "address will probably not be valid in all " + "environments or platforms.")); + BugReport *R = new BugReport(*BT, BT->getDescription(), N); + R->addRange(B->getRHS()->getSourceRange()); + C.EmitReport(R); + } +} + +void ento::registerFixedAddressChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp new file mode 100644 index 0000000..135b81d --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -0,0 +1,740 @@ +//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This checker defines the attack surface for generic taint propagation. +// +// The taint information produced by it might be useful to other checkers. For +// example, checkers should report errors which involve tainted data more +// aggressively, even if the involved symbols are under constrained. +// +//===----------------------------------------------------------------------===// +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/Basic/Builtins.h" +#include + +using namespace clang; +using namespace ento; + +namespace { +class GenericTaintChecker : public Checker< check::PostStmt, + check::PreStmt > { +public: + static void *getTag() { static int Tag; return &Tag; } + + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; + void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const; + + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + +private: + static const unsigned InvalidArgIndex = UINT_MAX; + /// Denotes the return vale. + static const unsigned ReturnValueIndex = UINT_MAX - 1; + + mutable OwningPtr BT; + inline void initBugType() const { + if (!BT) + BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data")); + } + + /// \brief Catch taint related bugs. Check if tainted data is passed to a + /// system call etc. + bool checkPre(const CallExpr *CE, CheckerContext &C) const; + + /// \brief Add taint sources on a pre-visit. + void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; + + /// \brief Propagate taint generated at pre-visit. + bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; + + /// \brief Add taint sources on a post visit. + void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; + + /// Check if the region the expression evaluates to is the standard input, + /// and thus, is tainted. + static bool isStdin(const Expr *E, CheckerContext &C); + + /// \brief Given a pointer argument, get the symbol of the value it contains + /// (points to). + static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); + + /// Functions defining the attack surface. + typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, + CheckerContext &C) const; + ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; + ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; + ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; + + /// Taint the scanned input if the file is tainted. + ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; + + /// Check for CWE-134: Uncontrolled Format String. + static const char MsgUncontrolledFormatString[]; + bool checkUncontrolledFormatString(const CallExpr *CE, + CheckerContext &C) const; + + /// Check for: + /// CERT/STR02-C. "Sanitize data passed to complex subsystems" + /// CWE-78, "Failure to Sanitize Data into an OS Command" + static const char MsgSanitizeSystemArgs[]; + bool checkSystemCall(const CallExpr *CE, StringRef Name, + CheckerContext &C) const; + + /// Check if tainted data is used as a buffer size ins strn.. functions, + /// and allocators. + static const char MsgTaintedBufferSize[]; + bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, + CheckerContext &C) const; + + /// Generate a report if the expression is tainted or points to tainted data. + bool generateReportIfTainted(const Expr *E, const char Msg[], + CheckerContext &C) const; + + + typedef llvm::SmallVector ArgVector; + + /// \brief A struct used to specify taint propagation rules for a function. + /// + /// If any of the possible taint source arguments is tainted, all of the + /// destination arguments should also be tainted. Use InvalidArgIndex in the + /// src list to specify that all of the arguments can introduce taint. Use + /// InvalidArgIndex in the dst arguments to signify that all the non-const + /// pointer and reference arguments might be tainted on return. If + /// ReturnValueIndex is added to the dst list, the return value will be + /// tainted. + struct TaintPropagationRule { + /// List of arguments which can be taint sources and should be checked. + ArgVector SrcArgs; + /// List of arguments which should be tainted on function return. + ArgVector DstArgs; + // TODO: Check if using other data structures would be more optimal. + + TaintPropagationRule() {} + + TaintPropagationRule(unsigned SArg, + unsigned DArg, bool TaintRet = false) { + SrcArgs.push_back(SArg); + DstArgs.push_back(DArg); + if (TaintRet) + DstArgs.push_back(ReturnValueIndex); + } + + TaintPropagationRule(unsigned SArg1, unsigned SArg2, + unsigned DArg, bool TaintRet = false) { + SrcArgs.push_back(SArg1); + SrcArgs.push_back(SArg2); + DstArgs.push_back(DArg); + if (TaintRet) + DstArgs.push_back(ReturnValueIndex); + } + + /// Get the propagation rule for a given function. + static TaintPropagationRule + getTaintPropagationRule(const FunctionDecl *FDecl, + StringRef Name, + CheckerContext &C); + + inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } + inline void addDstArg(unsigned A) { DstArgs.push_back(A); } + + inline bool isNull() const { return SrcArgs.empty(); } + + inline bool isDestinationArgument(unsigned ArgNum) const { + return (std::find(DstArgs.begin(), + DstArgs.end(), ArgNum) != DstArgs.end()); + } + + static inline bool isTaintedOrPointsToTainted(const Expr *E, + ProgramStateRef State, + CheckerContext &C) { + return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) || + (E->getType().getTypePtr()->isPointerType() && + State->isTainted(getPointedToSymbol(C, E)))); + } + + /// \brief Pre-process a function which propagates taint according to the + /// taint rule. + ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; + + }; +}; + +const unsigned GenericTaintChecker::ReturnValueIndex; +const unsigned GenericTaintChecker::InvalidArgIndex; + +const char GenericTaintChecker::MsgUncontrolledFormatString[] = + "Untrusted data is used as a format string " + "(CWE-134: Uncontrolled Format String)"; + +const char GenericTaintChecker::MsgSanitizeSystemArgs[] = + "Untrusted data is passed to a system call " + "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; + +const char GenericTaintChecker::MsgTaintedBufferSize[] = + "Untrusted data is used to specify the buffer size " + "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " + "character data and the null terminator)"; + +} // end of anonymous namespace + +/// A set which is used to pass information from call pre-visit instruction +/// to the call post-visit. The values are unsigned integers, which are either +/// ReturnValueIndex, or indexes of the pointer/reference argument, which +/// points to data, which should be tainted on return. +namespace { struct TaintArgsOnPostVisit{}; } +namespace clang { namespace ento { +template<> struct ProgramStateTrait + : public ProgramStatePartialTrait > { + static void *GDMIndex() { return GenericTaintChecker::getTag(); } +}; +}} + +GenericTaintChecker::TaintPropagationRule +GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( + const FunctionDecl *FDecl, + StringRef Name, + CheckerContext &C) { + // TODO: Currently, we might loose precision here: we always mark a return + // value as tainted even if it's just a pointer, pointing to tainted data. + + // Check for exact name match for functions without builtin substitutes. + TaintPropagationRule Rule = llvm::StringSwitch(Name) + .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) + .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) + .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) + .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) + .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) + .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) + .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) + .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) + .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) + .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) + .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) + .Case("read", TaintPropagationRule(0, 2, 1, true)) + .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) + .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) + .Case("fgets", TaintPropagationRule(2, 0, true)) + .Case("getline", TaintPropagationRule(2, 0)) + .Case("getdelim", TaintPropagationRule(3, 0)) + .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) + .Default(TaintPropagationRule()); + + if (!Rule.isNull()) + return Rule; + + // Check if it's one of the memory setting/copying functions. + // This check is specialized but faster then calling isCLibraryFunction. + unsigned BId = 0; + if ( (BId = FDecl->getMemoryFunctionKind()) ) + switch(BId) { + case Builtin::BImemcpy: + case Builtin::BImemmove: + case Builtin::BIstrncpy: + case Builtin::BIstrncat: + return TaintPropagationRule(1, 2, 0, true); + case Builtin::BIstrlcpy: + case Builtin::BIstrlcat: + return TaintPropagationRule(1, 2, 0, false); + case Builtin::BIstrndup: + return TaintPropagationRule(0, 1, ReturnValueIndex); + + default: + break; + }; + + // Process all other functions which could be defined as builtins. + if (Rule.isNull()) { + if (C.isCLibraryFunction(FDecl, "snprintf") || + C.isCLibraryFunction(FDecl, "sprintf")) + return TaintPropagationRule(InvalidArgIndex, 0, true); + else if (C.isCLibraryFunction(FDecl, "strcpy") || + C.isCLibraryFunction(FDecl, "stpcpy") || + C.isCLibraryFunction(FDecl, "strcat")) + return TaintPropagationRule(1, 0, true); + else if (C.isCLibraryFunction(FDecl, "bcopy")) + return TaintPropagationRule(0, 2, 1, false); + else if (C.isCLibraryFunction(FDecl, "strdup") || + C.isCLibraryFunction(FDecl, "strdupa")) + return TaintPropagationRule(0, ReturnValueIndex); + else if (C.isCLibraryFunction(FDecl, "wcsdup")) + return TaintPropagationRule(0, ReturnValueIndex); + } + + // Skipping the following functions, since they might be used for cleansing + // or smart memory copy: + // - memccpy - copying untill hitting a special character. + + return TaintPropagationRule(); +} + +void GenericTaintChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + // Check for errors first. + if (checkPre(CE, C)) + return; + + // Add taint second. + addSourcesPre(CE, C); +} + +void GenericTaintChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + if (propagateFromPre(CE, C)) + return; + addSourcesPost(CE, C); +} + +void GenericTaintChecker::addSourcesPre(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = 0; + const FunctionDecl *FDecl = C.getCalleeDecl(CE); + StringRef Name = C.getCalleeName(FDecl); + if (Name.empty()) + return; + + // First, try generating a propagation rule for this function. + TaintPropagationRule Rule = + TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); + if (!Rule.isNull()) { + State = Rule.process(CE, C); + if (!State) + return; + C.addTransition(State); + return; + } + + // Otherwise, check if we have custom pre-processing implemented. + FnCheck evalFunction = llvm::StringSwitch(Name) + .Case("fscanf", &GenericTaintChecker::preFscanf) + .Default(0); + // Check and evaluate the call. + if (evalFunction) + State = (this->*evalFunction)(CE, C); + if (!State) + return; + C.addTransition(State); + +} + +bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + // Depending on what was tainted at pre-visit, we determined a set of + // arguments which should be tainted after the function returns. These are + // stored in the state as TaintArgsOnPostVisit set. + llvm::ImmutableSet TaintArgs = State->get(); + if (TaintArgs.isEmpty()) + return false; + + for (llvm::ImmutableSet::iterator + I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { + unsigned ArgNum = *I; + + // Special handling for the tainted return value. + if (ArgNum == ReturnValueIndex) { + State = State->addTaint(CE, C.getLocationContext()); + continue; + } + + // The arguments are pointer arguments. The data they are pointing at is + // tainted after the call. + if (CE->getNumArgs() < (ArgNum + 1)) + return false; + const Expr* Arg = CE->getArg(ArgNum); + SymbolRef Sym = getPointedToSymbol(C, Arg); + if (Sym) + State = State->addTaint(Sym); + } + + // Clear up the taint info from the state. + State = State->remove(); + + if (State != C.getState()) { + C.addTransition(State); + return true; + } + return false; +} + +void GenericTaintChecker::addSourcesPost(const CallExpr *CE, + CheckerContext &C) const { + // Define the attack surface. + // Set the evaluation function by switching on the callee name. + StringRef Name = C.getCalleeName(CE); + if (Name.empty()) + return; + FnCheck evalFunction = llvm::StringSwitch(Name) + .Case("scanf", &GenericTaintChecker::postScanf) + // TODO: Add support for vfscanf & family. + .Case("getchar", &GenericTaintChecker::postRetTaint) + .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) + .Case("getenv", &GenericTaintChecker::postRetTaint) + .Case("fopen", &GenericTaintChecker::postRetTaint) + .Case("fdopen", &GenericTaintChecker::postRetTaint) + .Case("freopen", &GenericTaintChecker::postRetTaint) + .Case("getch", &GenericTaintChecker::postRetTaint) + .Case("wgetch", &GenericTaintChecker::postRetTaint) + .Case("socket", &GenericTaintChecker::postSocket) + .Default(0); + + // If the callee isn't defined, it is not of security concern. + // Check and evaluate the call. + ProgramStateRef State = 0; + if (evalFunction) + State = (this->*evalFunction)(CE, C); + if (!State) + return; + + C.addTransition(State); +} + +bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ + + if (checkUncontrolledFormatString(CE, C)) + return true; + + const FunctionDecl *FDecl = C.getCalleeDecl(CE); + StringRef Name = C.getCalleeName(FDecl); + if (Name.empty()) + return false; + + if (checkSystemCall(CE, Name, C)) + return true; + + if (checkTaintedBufferSize(CE, FDecl, C)) + return true; + + return false; +} + +SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, + const Expr* Arg) { + ProgramStateRef State = C.getState(); + SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); + if (AddrVal.isUnknownOrUndef()) + return 0; + + Loc *AddrLoc = dyn_cast(&AddrVal); + if (!AddrLoc) + return 0; + + const PointerType *ArgTy = + dyn_cast(Arg->getType().getCanonicalType().getTypePtr()); + SVal Val = State->getSVal(*AddrLoc, + ArgTy ? ArgTy->getPointeeType(): QualType()); + return Val.getAsSymbol(); +} + +ProgramStateRef +GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + // Check for taint in arguments. + bool IsTainted = false; + for (ArgVector::const_iterator I = SrcArgs.begin(), + E = SrcArgs.end(); I != E; ++I) { + unsigned ArgNum = *I; + + if (ArgNum == InvalidArgIndex) { + // Check if any of the arguments is tainted, but skip the + // destination arguments. + for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { + if (isDestinationArgument(i)) + continue; + if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) + break; + } + break; + } + + if (CE->getNumArgs() < (ArgNum + 1)) + return State; + if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) + break; + } + if (!IsTainted) + return State; + + // Mark the arguments which should be tainted after the function returns. + for (ArgVector::const_iterator I = DstArgs.begin(), + E = DstArgs.end(); I != E; ++I) { + unsigned ArgNum = *I; + + // Should we mark all arguments as tainted? + if (ArgNum == InvalidArgIndex) { + // For all pointer and references that were passed in: + // If they are not pointing to const data, mark data as tainted. + // TODO: So far we are just going one level down; ideally we'd need to + // recurse here. + for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { + const Expr *Arg = CE->getArg(i); + // Process pointer argument. + const Type *ArgTy = Arg->getType().getTypePtr(); + QualType PType = ArgTy->getPointeeType(); + if ((!PType.isNull() && !PType.isConstQualified()) + || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) + State = State->add(i); + } + continue; + } + + // Should mark the return value? + if (ArgNum == ReturnValueIndex) { + State = State->add(ReturnValueIndex); + continue; + } + + // Mark the given argument. + assert(ArgNum < CE->getNumArgs()); + State = State->add(ArgNum); + } + + return State; +} + + +// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 +// and arg 1 should get taint. +ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, + CheckerContext &C) const { + assert(CE->getNumArgs() >= 2); + ProgramStateRef State = C.getState(); + + // Check is the file descriptor is tainted. + if (State->isTainted(CE->getArg(0), C.getLocationContext()) || + isStdin(CE->getArg(0), C)) { + // All arguments except for the first two should get taint. + for (unsigned int i = 2; i < CE->getNumArgs(); ++i) + State = State->add(i); + return State; + } + + return 0; +} + + +// If argument 0(protocol domain) is network, the return value should get taint. +ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + if (CE->getNumArgs() < 3) + return State; + + SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); + StringRef DomName = C.getMacroNameOrSpelling(DomLoc); + // White list the internal communication protocols. + if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || + DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) + return State; + State = State->addTaint(CE, C.getLocationContext()); + return State; +} + +ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + if (CE->getNumArgs() < 2) + return State; + + SVal x = State->getSVal(CE->getArg(1), C.getLocationContext()); + // All arguments except for the very first one should get taint. + for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { + // The arguments are pointer arguments. The data they are pointing at is + // tainted after the call. + const Expr* Arg = CE->getArg(i); + SymbolRef Sym = getPointedToSymbol(C, Arg); + if (Sym) + State = State->addTaint(Sym); + } + return State; +} + +ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, + CheckerContext &C) const { + return C.getState()->addTaint(CE, C.getLocationContext()); +} + +bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { + ProgramStateRef State = C.getState(); + SVal Val = State->getSVal(E, C.getLocationContext()); + + // stdin is a pointer, so it would be a region. + const MemRegion *MemReg = Val.getAsRegion(); + + // The region should be symbolic, we do not know it's value. + const SymbolicRegion *SymReg = dyn_cast_or_null(MemReg); + if (!SymReg) + return false; + + // Get it's symbol and find the declaration region it's pointing to. + const SymbolRegionValue *Sm =dyn_cast(SymReg->getSymbol()); + if (!Sm) + return false; + const DeclRegion *DeclReg = dyn_cast_or_null(Sm->getRegion()); + if (!DeclReg) + return false; + + // This region corresponds to a declaration, find out if it's a global/extern + // variable named stdin with the proper type. + if (const VarDecl *D = dyn_cast_or_null(DeclReg->getDecl())) { + D = D->getCanonicalDecl(); + if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) + if (const PointerType * PtrTy = + dyn_cast(D->getType().getTypePtr())) + if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) + return true; + } + return false; +} + +static bool getPrintfFormatArgumentNum(const CallExpr *CE, + const CheckerContext &C, + unsigned int &ArgNum) { + // Find if the function contains a format string argument. + // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, + // vsnprintf, syslog, custom annotated functions. + const FunctionDecl *FDecl = C.getCalleeDecl(CE); + if (!FDecl) + return false; + for (specific_attr_iterator + i = FDecl->specific_attr_begin(), + e = FDecl->specific_attr_end(); i != e ; ++i) { + + const FormatAttr *Format = *i; + ArgNum = Format->getFormatIdx() - 1; + if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum) + return true; + } + + // Or if a function is named setproctitle (this is a heuristic). + if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { + ArgNum = 0; + return true; + } + + return false; +} + +bool GenericTaintChecker::generateReportIfTainted(const Expr *E, + const char Msg[], + CheckerContext &C) const { + assert(E); + + // Check for taint. + ProgramStateRef State = C.getState(); + if (!State->isTainted(getPointedToSymbol(C, E)) && + !State->isTainted(E, C.getLocationContext())) + return false; + + // Generate diagnostic. + if (ExplodedNode *N = C.addTransition()) { + initBugType(); + BugReport *report = new BugReport(*BT, Msg, N); + report->addRange(E->getSourceRange()); + C.EmitReport(report); + return true; + } + return false; +} + +bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, + CheckerContext &C) const{ + // Check if the function contains a format string argument. + unsigned int ArgNum = 0; + if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) + return false; + + // If either the format string content or the pointer itself are tainted, warn. + if (generateReportIfTainted(CE->getArg(ArgNum), + MsgUncontrolledFormatString, C)) + return true; + return false; +} + +bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, + StringRef Name, + CheckerContext &C) const { + // TODO: It might make sense to run this check on demand. In some cases, + // we should check if the environment has been cleansed here. We also might + // need to know if the user was reset before these calls(seteuid). + unsigned ArgNum = llvm::StringSwitch(Name) + .Case("system", 0) + .Case("popen", 0) + .Case("execl", 0) + .Case("execle", 0) + .Case("execlp", 0) + .Case("execv", 0) + .Case("execvp", 0) + .Case("execvP", 0) + .Case("execve", 0) + .Case("dlopen", 0) + .Default(UINT_MAX); + + if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) + return false; + + if (generateReportIfTainted(CE->getArg(ArgNum), + MsgSanitizeSystemArgs, C)) + return true; + + return false; +} + +// TODO: Should this check be a part of the CString checker? +// If yes, should taint be a global setting? +bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, + const FunctionDecl *FDecl, + CheckerContext &C) const { + // If the function has a buffer size argument, set ArgNum. + unsigned ArgNum = InvalidArgIndex; + unsigned BId = 0; + if ( (BId = FDecl->getMemoryFunctionKind()) ) + switch(BId) { + case Builtin::BImemcpy: + case Builtin::BImemmove: + case Builtin::BIstrncpy: + ArgNum = 2; + break; + case Builtin::BIstrndup: + ArgNum = 1; + break; + default: + break; + }; + + if (ArgNum == InvalidArgIndex) { + if (C.isCLibraryFunction(FDecl, "malloc") || + C.isCLibraryFunction(FDecl, "calloc") || + C.isCLibraryFunction(FDecl, "alloca")) + ArgNum = 0; + else if (C.isCLibraryFunction(FDecl, "memccpy")) + ArgNum = 3; + else if (C.isCLibraryFunction(FDecl, "realloc")) + ArgNum = 1; + else if (C.isCLibraryFunction(FDecl, "bcopy")) + ArgNum = 2; + } + + if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && + generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C)) + return true; + + return false; +} + +void ento::registerGenericTaintChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/IdempotentOperationChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/IdempotentOperationChecker.cpp new file mode 100644 index 0000000..c08f163 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/IdempotentOperationChecker.cpp @@ -0,0 +1,747 @@ +//==- IdempotentOperationChecker.cpp - Idempotent Operations ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a set of path-sensitive checks for idempotent and/or +// tautological operations. Each potential operation is checked along all paths +// to see if every path results in a pointless operation. +// +-------------------------------------------+ +// |Table of idempotent/tautological operations| +// +-------------------------------------------+ +//+--------------------------------------------------------------------------+ +//|Operator | x op x | x op 1 | 1 op x | x op 0 | 0 op x | x op ~0 | ~0 op x | +//+--------------------------------------------------------------------------+ +// +, += | | | | x | x | | +// -, -= | | | | x | -x | | +// *, *= | | x | x | 0 | 0 | | +// /, /= | 1 | x | | N/A | 0 | | +// &, &= | x | | | 0 | 0 | x | x +// |, |= | x | | | x | x | ~0 | ~0 +// ^, ^= | 0 | | | x | x | | +// <<, <<= | | | | x | 0 | | +// >>, >>= | | | | x | 0 | | +// || | 1 | 1 | 1 | x | x | 1 | 1 +// && | 1 | x | x | 0 | 0 | x | x +// = | x | | | | | | +// == | 1 | | | | | | +// >= | 1 | | | | | | +// <= | 1 | | | | | | +// > | 0 | | | | | | +// < | 0 | | | | | | +// != | 0 | | | | | | +//===----------------------------------------------------------------------===// +// +// Things TODO: +// - Improved error messages +// - Handle mixed assumptions (which assumptions can belong together?) +// - Finer grained false positive control (levels) +// - Handling ~0 values + +#include "ClangSACheckers.h" +#include "clang/Analysis/CFGStmtMap.h" +#include "clang/Analysis/Analyses/PseudoConstantAnalysis.h" +#include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" +#include "clang/AST/Stmt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace clang; +using namespace ento; + +namespace { +class IdempotentOperationChecker + : public Checker, + check::PostStmt, + check::EndAnalysis> { +public: + void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const; + void checkPostStmt(const BinaryOperator *B, CheckerContext &C) const; + void checkEndAnalysis(ExplodedGraph &G, BugReporter &B,ExprEngine &Eng) const; + +private: + // Our assumption about a particular operation. + enum Assumption { Possible = 0, Impossible, Equal, LHSis1, RHSis1, LHSis0, + RHSis0 }; + + static void UpdateAssumption(Assumption &A, const Assumption &New); + + // False positive reduction methods + static bool isSelfAssign(const Expr *LHS, const Expr *RHS); + static bool isUnused(const Expr *E, AnalysisDeclContext *AC); + static bool isTruncationExtensionAssignment(const Expr *LHS, + const Expr *RHS); + static bool pathWasCompletelyAnalyzed(AnalysisDeclContext *AC, + const CFGBlock *CB, + const CoreEngine &CE); + static bool CanVary(const Expr *Ex, + AnalysisDeclContext *AC); + static bool isConstantOrPseudoConstant(const DeclRefExpr *DR, + AnalysisDeclContext *AC); + static bool containsNonLocalVarDecl(const Stmt *S); + + // Hash table and related data structures + struct BinaryOperatorData { + BinaryOperatorData() : assumption(Possible) {} + + Assumption assumption; + ExplodedNodeSet explodedNodes; // Set of ExplodedNodes that refer to a + // BinaryOperator + }; + typedef llvm::DenseMap + AssumptionMap; + mutable AssumptionMap hash; +}; +} + +void IdempotentOperationChecker::checkPreStmt(const BinaryOperator *B, + CheckerContext &C) const { + // Find or create an entry in the hash for this BinaryOperator instance. + // If we haven't done a lookup before, it will get default initialized to + // 'Possible'. At this stage we do not store the ExplodedNode, as it has not + // been created yet. + BinaryOperatorData &Data = hash[B]; + Assumption &A = Data.assumption; + AnalysisDeclContext *AC = C.getCurrentAnalysisDeclContext(); + + // If we already have visited this node on a path that does not contain an + // idempotent operation, return immediately. + if (A == Impossible) + return; + + // Retrieve both sides of the operator and determine if they can vary (which + // may mean this is a false positive. + const Expr *LHS = B->getLHS(); + const Expr *RHS = B->getRHS(); + + // At this stage we can calculate whether each side contains a false positive + // that applies to all operators. We only need to calculate this the first + // time. + bool LHSContainsFalsePositive = false, RHSContainsFalsePositive = false; + if (A == Possible) { + // An expression contains a false positive if it can't vary, or if it + // contains a known false positive VarDecl. + LHSContainsFalsePositive = !CanVary(LHS, AC) + || containsNonLocalVarDecl(LHS); + RHSContainsFalsePositive = !CanVary(RHS, AC) + || containsNonLocalVarDecl(RHS); + } + + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + SVal LHSVal = state->getSVal(LHS, LCtx); + SVal RHSVal = state->getSVal(RHS, LCtx); + + // If either value is unknown, we can't be 100% sure of all paths. + if (LHSVal.isUnknownOrUndef() || RHSVal.isUnknownOrUndef()) { + A = Impossible; + return; + } + BinaryOperator::Opcode Op = B->getOpcode(); + + // Dereference the LHS SVal if this is an assign operation + switch (Op) { + default: + break; + + // Fall through intentional + case BO_AddAssign: + case BO_SubAssign: + case BO_MulAssign: + case BO_DivAssign: + case BO_AndAssign: + case BO_OrAssign: + case BO_XorAssign: + case BO_ShlAssign: + case BO_ShrAssign: + case BO_Assign: + // Assign statements have one extra level of indirection + if (!isa(LHSVal)) { + A = Impossible; + return; + } + LHSVal = state->getSVal(cast(LHSVal), LHS->getType()); + } + + + // We now check for various cases which result in an idempotent operation. + + // x op x + switch (Op) { + default: + break; // We don't care about any other operators. + + // Fall through intentional + case BO_Assign: + // x Assign x can be used to silence unused variable warnings intentionally. + // If this is a self assignment and the variable is referenced elsewhere, + // and the assignment is not a truncation or extension, then it is a false + // positive. + if (isSelfAssign(LHS, RHS)) { + if (!isUnused(LHS, AC) && !isTruncationExtensionAssignment(LHS, RHS)) { + UpdateAssumption(A, Equal); + return; + } + else { + A = Impossible; + return; + } + } + + case BO_SubAssign: + case BO_DivAssign: + case BO_AndAssign: + case BO_OrAssign: + case BO_XorAssign: + case BO_Sub: + case BO_Div: + case BO_And: + case BO_Or: + case BO_Xor: + case BO_LOr: + case BO_LAnd: + case BO_EQ: + case BO_NE: + if (LHSVal != RHSVal || LHSContainsFalsePositive + || RHSContainsFalsePositive) + break; + UpdateAssumption(A, Equal); + return; + } + + // x op 1 + switch (Op) { + default: + break; // We don't care about any other operators. + + // Fall through intentional + case BO_MulAssign: + case BO_DivAssign: + case BO_Mul: + case BO_Div: + case BO_LOr: + case BO_LAnd: + if (!RHSVal.isConstant(1) || RHSContainsFalsePositive) + break; + UpdateAssumption(A, RHSis1); + return; + } + + // 1 op x + switch (Op) { + default: + break; // We don't care about any other operators. + + // Fall through intentional + case BO_MulAssign: + case BO_Mul: + case BO_LOr: + case BO_LAnd: + if (!LHSVal.isConstant(1) || LHSContainsFalsePositive) + break; + UpdateAssumption(A, LHSis1); + return; + } + + // x op 0 + switch (Op) { + default: + break; // We don't care about any other operators. + + // Fall through intentional + case BO_AddAssign: + case BO_SubAssign: + case BO_MulAssign: + case BO_AndAssign: + case BO_OrAssign: + case BO_XorAssign: + case BO_Add: + case BO_Sub: + case BO_Mul: + case BO_And: + case BO_Or: + case BO_Xor: + case BO_Shl: + case BO_Shr: + case BO_LOr: + case BO_LAnd: + if (!RHSVal.isConstant(0) || RHSContainsFalsePositive) + break; + UpdateAssumption(A, RHSis0); + return; + } + + // 0 op x + switch (Op) { + default: + break; // We don't care about any other operators. + + // Fall through intentional + //case BO_AddAssign: // Common false positive + case BO_SubAssign: // Check only if unsigned + case BO_MulAssign: + case BO_DivAssign: + case BO_AndAssign: + //case BO_OrAssign: // Common false positive + //case BO_XorAssign: // Common false positive + case BO_ShlAssign: + case BO_ShrAssign: + case BO_Add: + case BO_Sub: + case BO_Mul: + case BO_Div: + case BO_And: + case BO_Or: + case BO_Xor: + case BO_Shl: + case BO_Shr: + case BO_LOr: + case BO_LAnd: + if (!LHSVal.isConstant(0) || LHSContainsFalsePositive) + break; + UpdateAssumption(A, LHSis0); + return; + } + + // If we get to this point, there has been a valid use of this operation. + A = Impossible; +} + +// At the post visit stage, the predecessor ExplodedNode will be the +// BinaryOperator that was just created. We use this hook to collect the +// ExplodedNode. +void IdempotentOperationChecker::checkPostStmt(const BinaryOperator *B, + CheckerContext &C) const { + // Add the ExplodedNode we just visited + BinaryOperatorData &Data = hash[B]; + + const Stmt *predStmt + = cast(C.getPredecessor()->getLocation()).getStmt(); + + // Ignore implicit calls to setters. + if (!isa(predStmt)) + return; + + Data.explodedNodes.Add(C.getPredecessor()); +} + +void IdempotentOperationChecker::checkEndAnalysis(ExplodedGraph &G, + BugReporter &BR, + ExprEngine &Eng) const { + BugType *BT = new BugType("Idempotent operation", "Dead code"); + // Iterate over the hash to see if we have any paths with definite + // idempotent operations. + for (AssumptionMap::const_iterator i = hash.begin(); i != hash.end(); ++i) { + // Unpack the hash contents + const BinaryOperatorData &Data = i->second; + const Assumption &A = Data.assumption; + const ExplodedNodeSet &ES = Data.explodedNodes; + + // If there are no nodes accosted with the expression, nothing to report. + // FIXME: This is possible because the checker does part of processing in + // checkPreStmt and part in checkPostStmt. + if (ES.begin() == ES.end()) + continue; + + const BinaryOperator *B = i->first; + + if (A == Impossible) + continue; + + // If the analyzer did not finish, check to see if we can still emit this + // warning + if (Eng.hasWorkRemaining()) { + // If we can trace back + AnalysisDeclContext *AC = (*ES.begin())->getLocationContext() + ->getAnalysisDeclContext(); + if (!pathWasCompletelyAnalyzed(AC, + AC->getCFGStmtMap()->getBlock(B), + Eng.getCoreEngine())) + continue; + } + + // Select the error message and SourceRanges to report. + SmallString<128> buf; + llvm::raw_svector_ostream os(buf); + bool LHSRelevant = false, RHSRelevant = false; + switch (A) { + case Equal: + LHSRelevant = true; + RHSRelevant = true; + if (B->getOpcode() == BO_Assign) + os << "Assigned value is always the same as the existing value"; + else + os << "Both operands to '" << B->getOpcodeStr() + << "' always have the same value"; + break; + case LHSis1: + LHSRelevant = true; + os << "The left operand to '" << B->getOpcodeStr() << "' is always 1"; + break; + case RHSis1: + RHSRelevant = true; + os << "The right operand to '" << B->getOpcodeStr() << "' is always 1"; + break; + case LHSis0: + LHSRelevant = true; + os << "The left operand to '" << B->getOpcodeStr() << "' is always 0"; + break; + case RHSis0: + RHSRelevant = true; + os << "The right operand to '" << B->getOpcodeStr() << "' is always 0"; + break; + case Possible: + llvm_unreachable("Operation was never marked with an assumption"); + case Impossible: + llvm_unreachable(0); + } + + // Add a report for each ExplodedNode + for (ExplodedNodeSet::iterator I = ES.begin(), E = ES.end(); I != E; ++I) { + BugReport *report = new BugReport(*BT, os.str(), *I); + + // Add source ranges and visitor hooks + if (LHSRelevant) { + const Expr *LHS = i->first->getLHS(); + report->addRange(LHS->getSourceRange()); + FindLastStoreBRVisitor::registerStatementVarDecls(*report, LHS); + } + if (RHSRelevant) { + const Expr *RHS = i->first->getRHS(); + report->addRange(i->first->getRHS()->getSourceRange()); + FindLastStoreBRVisitor::registerStatementVarDecls(*report, RHS); + } + + BR.EmitReport(report); + } + } + + hash.clear(); +} + +// Updates the current assumption given the new assumption +inline void IdempotentOperationChecker::UpdateAssumption(Assumption &A, + const Assumption &New) { +// If the assumption is the same, there is nothing to do + if (A == New) + return; + + switch (A) { + // If we don't currently have an assumption, set it + case Possible: + A = New; + return; + + // If we have determined that a valid state happened, ignore the new + // assumption. + case Impossible: + return; + + // Any other case means that we had a different assumption last time. We don't + // currently support mixing assumptions for diagnostic reasons, so we set + // our assumption to be impossible. + default: + A = Impossible; + return; + } +} + +// Check for a statement where a variable is self assigned to possibly avoid an +// unused variable warning. +bool IdempotentOperationChecker::isSelfAssign(const Expr *LHS, const Expr *RHS) { + LHS = LHS->IgnoreParenCasts(); + RHS = RHS->IgnoreParenCasts(); + + const DeclRefExpr *LHS_DR = dyn_cast(LHS); + if (!LHS_DR) + return false; + + const VarDecl *VD = dyn_cast(LHS_DR->getDecl()); + if (!VD) + return false; + + const DeclRefExpr *RHS_DR = dyn_cast(RHS); + if (!RHS_DR) + return false; + + if (VD != RHS_DR->getDecl()) + return false; + + return true; +} + +// Returns true if the Expr points to a VarDecl that is not read anywhere +// outside of self-assignments. +bool IdempotentOperationChecker::isUnused(const Expr *E, + AnalysisDeclContext *AC) { + if (!E) + return false; + + const DeclRefExpr *DR = dyn_cast(E->IgnoreParenCasts()); + if (!DR) + return false; + + const VarDecl *VD = dyn_cast(DR->getDecl()); + if (!VD) + return false; + + if (AC->getPseudoConstantAnalysis()->wasReferenced(VD)) + return false; + + return true; +} + +// Check for self casts truncating/extending a variable +bool IdempotentOperationChecker::isTruncationExtensionAssignment( + const Expr *LHS, + const Expr *RHS) { + + const DeclRefExpr *LHS_DR = dyn_cast(LHS->IgnoreParenCasts()); + if (!LHS_DR) + return false; + + const VarDecl *VD = dyn_cast(LHS_DR->getDecl()); + if (!VD) + return false; + + const DeclRefExpr *RHS_DR = dyn_cast(RHS->IgnoreParenCasts()); + if (!RHS_DR) + return false; + + if (VD != RHS_DR->getDecl()) + return false; + + return dyn_cast(RHS->IgnoreParenLValueCasts()) == NULL; +} + +// Returns false if a path to this block was not completely analyzed, or true +// otherwise. +bool +IdempotentOperationChecker::pathWasCompletelyAnalyzed(AnalysisDeclContext *AC, + const CFGBlock *CB, + const CoreEngine &CE) { + + CFGReverseBlockReachabilityAnalysis *CRA = AC->getCFGReachablityAnalysis(); + + // Test for reachability from any aborted blocks to this block + typedef CoreEngine::BlocksExhausted::const_iterator ExhaustedIterator; + for (ExhaustedIterator I = CE.blocks_exhausted_begin(), + E = CE.blocks_exhausted_end(); I != E; ++I) { + const BlockEdge &BE = I->first; + + // The destination block on the BlockEdge is the first block that was not + // analyzed. If we can reach this block from the aborted block, then this + // block was not completely analyzed. + // + // Also explicitly check if the current block is the destination block. + // While technically reachable, it means we aborted the analysis on + // a path that included that block. + const CFGBlock *destBlock = BE.getDst(); + if (destBlock == CB || CRA->isReachable(destBlock, CB)) + return false; + } + + // Test for reachability from blocks we just gave up on. + typedef CoreEngine::BlocksAborted::const_iterator AbortedIterator; + for (AbortedIterator I = CE.blocks_aborted_begin(), + E = CE.blocks_aborted_end(); I != E; ++I) { + const CFGBlock *destBlock = I->first; + if (destBlock == CB || CRA->isReachable(destBlock, CB)) + return false; + } + + // For the items still on the worklist, see if they are in blocks that + // can eventually reach 'CB'. + class VisitWL : public WorkList::Visitor { + const CFGStmtMap *CBM; + const CFGBlock *TargetBlock; + CFGReverseBlockReachabilityAnalysis &CRA; + public: + VisitWL(const CFGStmtMap *cbm, const CFGBlock *targetBlock, + CFGReverseBlockReachabilityAnalysis &cra) + : CBM(cbm), TargetBlock(targetBlock), CRA(cra) {} + virtual bool visit(const WorkListUnit &U) { + ProgramPoint P = U.getNode()->getLocation(); + const CFGBlock *B = 0; + if (StmtPoint *SP = dyn_cast(&P)) { + B = CBM->getBlock(SP->getStmt()); + } + else if (BlockEdge *BE = dyn_cast(&P)) { + B = BE->getDst(); + } + else if (BlockEntrance *BEnt = dyn_cast(&P)) { + B = BEnt->getBlock(); + } + else if (BlockExit *BExit = dyn_cast(&P)) { + B = BExit->getBlock(); + } + if (!B) + return true; + + return B == TargetBlock || CRA.isReachable(B, TargetBlock); + } + }; + VisitWL visitWL(AC->getCFGStmtMap(), CB, *CRA); + // Were there any items in the worklist that could potentially reach + // this block? + if (CE.getWorkList()->visitItemsInWorkList(visitWL)) + return false; + + // Verify that this block is reachable from the entry block + if (!CRA->isReachable(&AC->getCFG()->getEntry(), CB)) + return false; + + // If we get to this point, there is no connection to the entry block or an + // aborted block. This path is unreachable and we can report the error. + return true; +} + +// Recursive function that determines whether an expression contains any element +// that varies. This could be due to a compile-time constant like sizeof. An +// expression may also involve a variable that behaves like a constant. The +// function returns true if the expression varies, and false otherwise. +bool IdempotentOperationChecker::CanVary(const Expr *Ex, + AnalysisDeclContext *AC) { + // Parentheses and casts are irrelevant here + Ex = Ex->IgnoreParenCasts(); + + if (Ex->getLocStart().isMacroID()) + return false; + + switch (Ex->getStmtClass()) { + // Trivially true cases + case Stmt::ArraySubscriptExprClass: + case Stmt::MemberExprClass: + case Stmt::StmtExprClass: + case Stmt::CallExprClass: + case Stmt::VAArgExprClass: + case Stmt::ShuffleVectorExprClass: + return true; + default: + return true; + + // Trivially false cases + case Stmt::IntegerLiteralClass: + case Stmt::CharacterLiteralClass: + case Stmt::FloatingLiteralClass: + case Stmt::PredefinedExprClass: + case Stmt::ImaginaryLiteralClass: + case Stmt::StringLiteralClass: + case Stmt::OffsetOfExprClass: + case Stmt::CompoundLiteralExprClass: + case Stmt::AddrLabelExprClass: + case Stmt::BinaryTypeTraitExprClass: + case Stmt::GNUNullExprClass: + case Stmt::InitListExprClass: + case Stmt::DesignatedInitExprClass: + case Stmt::BlockExprClass: + return false; + + // Cases requiring custom logic + case Stmt::UnaryExprOrTypeTraitExprClass: { + const UnaryExprOrTypeTraitExpr *SE = + cast(Ex); + if (SE->getKind() != UETT_SizeOf) + return false; + return SE->getTypeOfArgument()->isVariableArrayType(); + } + case Stmt::DeclRefExprClass: + // Check for constants/pseudoconstants + return !isConstantOrPseudoConstant(cast(Ex), AC); + + // The next cases require recursion for subexpressions + case Stmt::BinaryOperatorClass: { + const BinaryOperator *B = cast(Ex); + + // Exclude cases involving pointer arithmetic. These are usually + // false positives. + if (B->getOpcode() == BO_Sub || B->getOpcode() == BO_Add) + if (B->getLHS()->getType()->getAs()) + return false; + + return CanVary(B->getRHS(), AC) + || CanVary(B->getLHS(), AC); + } + case Stmt::UnaryOperatorClass: { + const UnaryOperator *U = cast(Ex); + // Handle trivial case first + switch (U->getOpcode()) { + case UO_Extension: + return false; + default: + return CanVary(U->getSubExpr(), AC); + } + } + case Stmt::ChooseExprClass: + return CanVary(cast(Ex)->getChosenSubExpr( + AC->getASTContext()), AC); + case Stmt::ConditionalOperatorClass: + case Stmt::BinaryConditionalOperatorClass: + return CanVary(cast(Ex)->getCond(), AC); + } +} + +// Returns true if a DeclRefExpr is or behaves like a constant. +bool IdempotentOperationChecker::isConstantOrPseudoConstant( + const DeclRefExpr *DR, + AnalysisDeclContext *AC) { + // Check if the type of the Decl is const-qualified + if (DR->getType().isConstQualified()) + return true; + + // Check for an enum + if (isa(DR->getDecl())) + return true; + + const VarDecl *VD = dyn_cast(DR->getDecl()); + if (!VD) + return true; + + // Check if the Decl behaves like a constant. This check also takes care of + // static variables, which can only change between function calls if they are + // modified in the AST. + PseudoConstantAnalysis *PCA = AC->getPseudoConstantAnalysis(); + if (PCA->isPseudoConstant(VD)) + return true; + + return false; +} + +// Recursively find any substatements containing VarDecl's with storage other +// than local +bool IdempotentOperationChecker::containsNonLocalVarDecl(const Stmt *S) { + const DeclRefExpr *DR = dyn_cast(S); + + if (DR) + if (const VarDecl *VD = dyn_cast(DR->getDecl())) + if (!VD->hasLocalStorage()) + return true; + + for (Stmt::const_child_iterator I = S->child_begin(); I != S->child_end(); + ++I) + if (const Stmt *child = *I) + if (containsNonLocalVarDecl(child)) + return true; + + return false; +} + + +void ento::registerIdempotentOperationChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/InterCheckerAPI.h b/clang/lib/StaticAnalyzer/Checkers/InterCheckerAPI.h new file mode 100644 index 0000000..e35557f --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/InterCheckerAPI.h @@ -0,0 +1,22 @@ +//==--- InterCheckerAPI.h ---------------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This file allows introduction of checker dependencies. It contains APIs for +// inter-checker communications. +//===----------------------------------------------------------------------===// + +#ifndef INTERCHECKERAPI_H_ +#define INTERCHECKERAPI_H_ +namespace clang { +namespace ento { + +/// Register the checker which evaluates CString API calls. +void registerCStringCheckerBasic(CheckerManager &Mgr); + +}} +#endif /* INTERCHECKERAPI_H_ */ diff --git a/clang/lib/StaticAnalyzer/Checkers/IntervalTest.cpp b/clang/lib/StaticAnalyzer/Checkers/IntervalTest.cpp new file mode 100644 index 0000000..2f3e155 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/IntervalTest.cpp @@ -0,0 +1,25 @@ +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/Analysis/Analyses/Interval.h" +#include "clang/Analysis/CallGraph.h" +#include "llvm/Support/Process.h" + +using namespace clang; +using namespace ento; + +namespace { +class IntervalTest: public Checker { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + if (IntervalAnalysis *a = mgr.getAnalysis(D)) { + a->runOnAllBlocks(); + } + } +}; +} + +void ento::registerIntervalTestChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/IteratorsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/IteratorsChecker.cpp new file mode 100644 index 0000000..b0bac33 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/IteratorsChecker.cpp @@ -0,0 +1,603 @@ +//=== IteratorsChecker.cpp - Check for Invalidated Iterators ------*- C++ -*---- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines IteratorsChecker, a number of small checks for conditions +// leading to invalid iterators being used. +// FIXME: Currently only supports 'vector' and 'deque' +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/DeclTemplate.h" +#include "clang/Basic/SourceManager.h" +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Type.h" +#include "clang/AST/PrettyPrinter.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringSwitch.h" + + +using namespace clang; +using namespace ento; + +// This is the state associated with each iterator which includes both the +// kind of state and the instance used to initialize it. +// FIXME: add location where invalidated for better error reporting. +namespace { +class RefState { + enum Kind { BeginValid, EndValid, Invalid, Undefined, Unknown } K; + const void *VR; + +public: + RefState(Kind k, const void *vr) : K(k), VR(vr) {} + + bool isValid() const { return K == BeginValid || K == EndValid; } + bool isInvalid() const { return K == Invalid; } + bool isUndefined() const { return K == Undefined; } + bool isUnknown() const { return K == Unknown; } + const MemRegion *getMemRegion() const { + if (K == BeginValid || K == EndValid) + return(const MemRegion *)VR; + return 0; + } + const MemberExpr *getMemberExpr() const { + if (K == Invalid) + return(const MemberExpr *)VR; + return 0; + } + + bool operator==(const RefState &X) const { + return K == X.K && VR == X.VR; + } + + static RefState getBeginValid(const MemRegion *vr) { + assert(vr); + return RefState(BeginValid, vr); + } + static RefState getEndValid(const MemRegion *vr) { + assert(vr); + return RefState(EndValid, vr); + } + static RefState getInvalid( const MemberExpr *ME ) { + return RefState(Invalid, ME); + } + static RefState getUndefined( void ) { + return RefState(Undefined, 0); + } + static RefState getUnknown( void ) { + return RefState(Unknown, 0); + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(K); + ID.AddPointer(VR); + } +}; + +enum RefKind { NoKind, VectorKind, VectorIteratorKind }; + +class IteratorsChecker : + public Checker, + check::PreStmt, + check::PreStmt, + check::PreStmt > + { + // Used when parsing iterators and vectors and deques. + BuiltinBug *BT_Invalid, *BT_Undefined, *BT_Incompatible; + +public: + IteratorsChecker() : + BT_Invalid(0), BT_Undefined(0), BT_Incompatible(0) + {} + static void *getTag() { static int tag; return &tag; } + + // Checker entry points. + void checkPreStmt(const CXXOperatorCallExpr *OCE, + CheckerContext &C) const; + + void checkPreStmt(const DeclStmt *DS, + CheckerContext &C) const; + + void checkPreStmt(const CXXMemberCallExpr *MCE, + CheckerContext &C) const; + + void checkPreStmt(const CallExpr *CE, + CheckerContext &C) const; + +private: + ProgramStateRef handleAssign(ProgramStateRef state, + const Expr *lexp, + const Expr *rexp, + const LocationContext *LC) const; + + ProgramStateRef handleAssign(ProgramStateRef state, + const MemRegion *MR, + const Expr *rexp, + const LocationContext *LC) const; + + ProgramStateRef invalidateIterators(ProgramStateRef state, + const MemRegion *MR, + const MemberExpr *ME) const; + + void checkExpr(CheckerContext &C, const Expr *E) const; + + void checkArgs(CheckerContext &C, const CallExpr *CE) const; + + const MemRegion *getRegion(ProgramStateRef state, + const Expr *E, + const LocationContext *LC) const; + + const DeclRefExpr *getDeclRefExpr(const Expr *E) const; +}; + +class IteratorState { +public: + typedef llvm::ImmutableMap EntryMap; +}; +} //end anonymous namespace + +namespace clang { + namespace ento { + template <> + struct ProgramStateTrait + : public ProgramStatePartialTrait { + static void *GDMIndex() { return IteratorsChecker::getTag(); } + }; + } +} + +void ento::registerIteratorsChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} + +// =============================================== +// Utility functions used by visitor functions +// =============================================== + +// check a templated type for std::vector or std::deque +static RefKind getTemplateKind(const NamedDecl *td) { + const DeclContext *dc = td->getDeclContext(); + const NamespaceDecl *nameSpace = dyn_cast(dc); + if (!nameSpace || !isa(nameSpace->getDeclContext()) + || nameSpace->getName() != "std") + return NoKind; + + StringRef name = td->getName(); + return llvm::StringSwitch(name) + .Cases("vector", "deque", VectorKind) + .Default(NoKind); +} + +static RefKind getTemplateKind(const DeclContext *dc) { + if (const ClassTemplateSpecializationDecl *td = + dyn_cast(dc)) + return getTemplateKind(cast(td)); + return NoKind; +} + +static RefKind getTemplateKind(const TypedefType *tdt) { + const TypedefNameDecl *td = tdt->getDecl(); + RefKind parentKind = getTemplateKind(td->getDeclContext()); + if (parentKind == VectorKind) { + return llvm::StringSwitch(td->getName()) + .Cases("iterator", + "const_iterator", + "reverse_iterator", VectorIteratorKind) + .Default(NoKind); + } + return NoKind; +} + +static RefKind getTemplateKind(const TemplateSpecializationType *tsp) { + const TemplateName &tname = tsp->getTemplateName(); + TemplateDecl *td = tname.getAsTemplateDecl(); + if (!td) + return NoKind; + return getTemplateKind(td); +} + +static RefKind getTemplateKind(QualType T) { + if (const TemplateSpecializationType *tsp = + T->getAs()) { + return getTemplateKind(tsp); + } + if (const ElaboratedType *ET = dyn_cast(T)) { + QualType namedType = ET->getNamedType(); + if (const TypedefType *tdt = namedType->getAs()) + return getTemplateKind(tdt); + if (const TemplateSpecializationType *tsp = + namedType->getAs()) { + return getTemplateKind(tsp); + } + } + return NoKind; +} + +// Iterate through our map and invalidate any iterators that were +// initialized fromt the specified instance MemRegion. +ProgramStateRef IteratorsChecker::invalidateIterators(ProgramStateRef state, + const MemRegion *MR, const MemberExpr *ME) const { + IteratorState::EntryMap Map = state->get(); + if (Map.isEmpty()) + return state; + + // Loop over the entries in the current state. + // The key doesn't change, so the map iterators won't change. + for (IteratorState::EntryMap::iterator I = Map.begin(), E = Map.end(); + I != E; ++I) { + RefState RS = I.getData(); + if (RS.getMemRegion() == MR) + state = state->set(I.getKey(), RefState::getInvalid(ME)); + } + + return state; +} + +// Handle assigning to an iterator where we don't have the LValue MemRegion. +ProgramStateRef IteratorsChecker::handleAssign(ProgramStateRef state, + const Expr *lexp, const Expr *rexp, const LocationContext *LC) const { + // Skip the cast if present. + if (const MaterializeTemporaryExpr *M + = dyn_cast(lexp)) + lexp = M->GetTemporaryExpr(); + if (const ImplicitCastExpr *ICE = dyn_cast(lexp)) + lexp = ICE->getSubExpr(); + SVal sv = state->getSVal(lexp, LC); + const MemRegion *MR = sv.getAsRegion(); + if (!MR) + return state; + RefKind kind = getTemplateKind(lexp->getType()); + + // If assigning to a vector, invalidate any iterators currently associated. + if (kind == VectorKind) + return invalidateIterators(state, MR, 0); + + // Make sure that we are assigning to an iterator. + if (getTemplateKind(lexp->getType()) != VectorIteratorKind) + return state; + return handleAssign(state, MR, rexp, LC); +} + +// handle assigning to an iterator +ProgramStateRef IteratorsChecker::handleAssign(ProgramStateRef state, + const MemRegion *MR, const Expr *rexp, const LocationContext *LC) const { + // Assume unknown until we find something definite. + state = state->set(MR, RefState::getUnknown()); + if (const MaterializeTemporaryExpr *M + = dyn_cast(rexp)) + rexp = M->GetTemporaryExpr(); + if (const ImplicitCastExpr *ICE = dyn_cast(rexp)) + rexp = ICE->getSubExpr(); + // Need to handle three cases: MemberCall, copy, copy with addition. + if (const CallExpr *CE = dyn_cast(rexp)) { + // Handle MemberCall. + if (const MemberExpr *ME = dyn_cast(CE->getCallee())) { + const DeclRefExpr *DRE = dyn_cast(ME->getBase()); + if (!DRE) + return state; + // Verify that the type is std::vector. + if (getTemplateKind(DRE->getType()) != VectorKind) + return state; + // Now get the MemRegion associated with the instance. + const VarDecl *VD = dyn_cast(DRE->getDecl()); + if (!VD) + return state; + const MemRegion *IMR = state->getRegion(VD, LC); + if (!IMR) + return state; + // Finally, see if it is one of the calls that will create + // a valid iterator and mark it if so, else mark as Unknown. + StringRef mName = ME->getMemberDecl()->getName(); + + if (llvm::StringSwitch(mName) + .Cases("begin", "insert", "erase", true).Default(false)) { + return state->set(MR, RefState::getBeginValid(IMR)); + } + if (mName == "end") + return state->set(MR, RefState::getEndValid(IMR)); + + return state->set(MR, RefState::getUnknown()); + } + } + // Handle straight copy from another iterator. + if (const DeclRefExpr *DRE = dyn_cast(rexp)) { + if (getTemplateKind(DRE->getType()) != VectorIteratorKind) + return state; + // Now get the MemRegion associated with the instance. + const VarDecl *VD = dyn_cast(DRE->getDecl()); + if (!VD) + return state; + const MemRegion *IMR = state->getRegion(VD, LC); + if (!IMR) + return state; + // Get the RefState of the iterator being copied. + const RefState *RS = state->get(IMR); + if (!RS) + return state; + // Use it to set the state of the LValue. + return state->set(MR, *RS); + } + // If we have operator+ or operator- ... + if (const CXXOperatorCallExpr *OCE = dyn_cast(rexp)) { + OverloadedOperatorKind Kind = OCE->getOperator(); + if (Kind == OO_Plus || Kind == OO_Minus) { + // Check left side of tree for a valid value. + state = handleAssign( state, MR, OCE->getArg(0), LC); + const RefState *RS = state->get(MR); + // If found, return it. + if (!RS->isUnknown()) + return state; + // Otherwise return what we find in the right side. + return handleAssign(state, MR, OCE->getArg(1), LC); + } + } + // Fall through if nothing matched. + return state; +} + +// Iterate through the arguments looking for an Invalid or Undefined iterator. +void IteratorsChecker::checkArgs(CheckerContext &C, const CallExpr *CE) const { + for (CallExpr::const_arg_iterator I = CE->arg_begin(), E = CE->arg_end(); + I != E; ++I) { + checkExpr(C, *I); + } +} + +// Get the DeclRefExpr associated with the expression. +const DeclRefExpr *IteratorsChecker::getDeclRefExpr(const Expr *E) const { + // If it is a CXXConstructExpr, need to get the subexpression. + if (const CXXConstructExpr *CE = dyn_cast(E)) { + if (CE->getNumArgs()== 1) { + CXXConstructorDecl *CD = CE->getConstructor(); + if (CD->isTrivial()) + E = CE->getArg(0); + } + } + if (const MaterializeTemporaryExpr *M = dyn_cast(E)) + E = M->GetTemporaryExpr(); + if (const ImplicitCastExpr *ICE = dyn_cast(E)) + E = ICE->getSubExpr(); + // If it isn't one of our types, don't do anything. + if (getTemplateKind(E->getType()) != VectorIteratorKind) + return NULL; + return dyn_cast(E); +} + +// Get the MemRegion associated with the expresssion. +const MemRegion *IteratorsChecker::getRegion(ProgramStateRef state, + const Expr *E, const LocationContext *LC) const { + const DeclRefExpr *DRE = getDeclRefExpr(E); + if (!DRE) + return NULL; + const VarDecl *VD = dyn_cast(DRE->getDecl()); + if (!VD) + return NULL; + // return the MemRegion associated with the iterator + return state->getRegion(VD, LC); +} + +// Check the expression and if it is an iterator, generate a diagnostic +// if the iterator is not valid. +// FIXME: this method can generate new nodes, and subsequent logic should +// use those nodes. We also cannot create multiple nodes at one ProgramPoint +// with the same tag. +void IteratorsChecker::checkExpr(CheckerContext &C, const Expr *E) const { + ProgramStateRef state = C.getState(); + const MemRegion *MR = getRegion(state, E, C.getLocationContext()); + if (!MR) + return; + + // Get the state associated with the iterator. + const RefState *RS = state->get(MR); + if (!RS) + return; + if (RS->isInvalid()) { + if (ExplodedNode *N = C.addTransition()) { + if (!BT_Invalid) + // FIXME: We are eluding constness here. + const_cast(this)->BT_Invalid = new BuiltinBug(""); + + std::string msg; + const MemberExpr *ME = RS->getMemberExpr(); + if (ME) { + std::string name = ME->getMemberNameInfo().getAsString(); + msg = "Attempt to use an iterator made invalid by call to '" + + name + "'"; + } + else { + msg = "Attempt to use an iterator made invalid by copying another " + "container to its container"; + } + + BugReport *R = new BugReport(*BT_Invalid, msg, N); + R->addRange(getDeclRefExpr(E)->getSourceRange()); + C.EmitReport(R); + } + } + else if (RS->isUndefined()) { + if (ExplodedNode *N = C.addTransition()) { + if (!BT_Undefined) + // FIXME: We are eluding constness here. + const_cast(this)->BT_Undefined = + new BuiltinBug("Use of iterator that is not defined"); + + BugReport *R = new BugReport(*BT_Undefined, + BT_Undefined->getDescription(), N); + R->addRange(getDeclRefExpr(E)->getSourceRange()); + C.EmitReport(R); + } + } +} + +// =============================================== +// Path analysis visitor functions +// =============================================== + +// For a generic Call, just check the args for bad iterators. +void IteratorsChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const{ + + // FIXME: These checks are to currently work around a bug + // in CheckerManager. + if (isa(CE)) + return; + if (isa(CE)) + return; + + checkArgs(C, CE); +} + +// Handle operator calls. First, if it is operator=, check the argument, +// and handle assigning and set target state appropriately. Otherwise, for +// other operators, check the args for bad iterators and handle comparisons. +void IteratorsChecker::checkPreStmt(const CXXOperatorCallExpr *OCE, + CheckerContext &C) const +{ + const LocationContext *LC = C.getLocationContext(); + ProgramStateRef state = C.getState(); + OverloadedOperatorKind Kind = OCE->getOperator(); + if (Kind == OO_Equal) { + checkExpr(C, OCE->getArg(1)); + state = handleAssign(state, OCE->getArg(0), OCE->getArg(1), LC); + C.addTransition(state); + return; + } + else { + checkArgs(C, OCE); + // If it is a compare and both are iterators, ensure that they are for + // the same container. + if (Kind == OO_EqualEqual || Kind == OO_ExclaimEqual || + Kind == OO_Less || Kind == OO_LessEqual || + Kind == OO_Greater || Kind == OO_GreaterEqual) { + const MemRegion *MR0, *MR1; + MR0 = getRegion(state, OCE->getArg(0), LC); + if (!MR0) + return; + MR1 = getRegion(state, OCE->getArg(1), LC); + if (!MR1) + return; + const RefState *RS0, *RS1; + RS0 = state->get(MR0); + if (!RS0) + return; + RS1 = state->get(MR1); + if (!RS1) + return; + if (RS0->getMemRegion() != RS1->getMemRegion()) { + if (ExplodedNode *N = C.addTransition()) { + if (!BT_Incompatible) + const_cast(this)->BT_Incompatible = + new BuiltinBug( + "Cannot compare iterators from different containers"); + + BugReport *R = new BugReport(*BT_Incompatible, + BT_Incompatible->getDescription(), N); + R->addRange(OCE->getSourceRange()); + C.EmitReport(R); + } + } + } + } +} + +// Need to handle DeclStmts to pick up initializing of iterators and to mark +// uninitialized ones as Undefined. +void IteratorsChecker::checkPreStmt(const DeclStmt *DS, + CheckerContext &C) const { + const Decl *D = *DS->decl_begin(); + const VarDecl *VD = dyn_cast(D); + // Only care about iterators. + if (getTemplateKind(VD->getType()) != VectorIteratorKind) + return; + + // Get the MemRegion associated with the iterator and mark it as Undefined. + ProgramStateRef state = C.getState(); + Loc VarLoc = state->getLValue(VD, C.getLocationContext()); + const MemRegion *MR = VarLoc.getAsRegion(); + if (!MR) + return; + state = state->set(MR, RefState::getUndefined()); + + // if there is an initializer, handle marking Valid if a proper initializer + const Expr *InitEx = VD->getInit(); + if (InitEx) { + // FIXME: This is too syntactic. Since 'InitEx' will be analyzed first + // it should resolve to an SVal that we can check for validity + // *semantically* instead of walking through the AST. + if (const CXXConstructExpr *CE = dyn_cast(InitEx)) { + if (CE->getNumArgs() == 1) { + const Expr *E = CE->getArg(0); + if (const MaterializeTemporaryExpr *M + = dyn_cast(E)) + E = M->GetTemporaryExpr(); + if (const ImplicitCastExpr *ICE = dyn_cast(E)) + InitEx = ICE->getSubExpr(); + state = handleAssign(state, MR, InitEx, C.getLocationContext()); + } + } + } + C.addTransition(state); +} + + +namespace { struct CalledReserved {}; } +namespace clang { namespace ento { +template<> struct ProgramStateTrait + : public ProgramStatePartialTrait > { + static void *GDMIndex() { static int index = 0; return &index; } +}; +}} + +// on a member call, first check the args for any bad iterators +// then, check to see if it is a call to a function that will invalidate +// the iterators +void IteratorsChecker::checkPreStmt(const CXXMemberCallExpr *MCE, + CheckerContext &C) const { + // Check the arguments. + checkArgs(C, MCE); + const MemberExpr *ME = dyn_cast(MCE->getCallee()); + if (!ME) + return; + // Make sure we have the right kind of container. + const DeclRefExpr *DRE = dyn_cast(ME->getBase()); + if (!DRE || getTemplateKind(DRE->getType()) != VectorKind) + return; + SVal tsv = C.getState()->getSVal(DRE, C.getLocationContext()); + // Get the MemRegion associated with the container instance. + const MemRegion *MR = tsv.getAsRegion(); + if (!MR) + return; + // If we are calling a function that invalidates iterators, mark them + // appropriately by finding matching instances. + ProgramStateRef state = C.getState(); + StringRef mName = ME->getMemberDecl()->getName(); + if (llvm::StringSwitch(mName) + .Cases("insert", "reserve", "push_back", true) + .Cases("erase", "pop_back", "clear", "resize", true) + .Default(false)) { + // If there was a 'reserve' call, assume iterators are good. + if (!state->contains(MR)) + state = invalidateIterators(state, MR, ME); + } + // Keep track of instances that have called 'reserve' + // note: do this after we invalidate any iterators by calling + // 'reserve' itself. + if (mName == "reserve") + state = state->add(MR); + + if (state != C.getState()) + C.addTransition(state); +} + diff --git a/clang/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp new file mode 100644 index 0000000..757a4ce --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp @@ -0,0 +1,314 @@ +//=== LLVMConventionsChecker.cpp - Check LLVM codebase conventions ---*- C++ -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines LLVMConventionsChecker, a bunch of small little checks +// for checking specific coding conventions in the LLVM/Clang codebase. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/AST/StmtVisitor.h" +#include "llvm/ADT/SmallString.h" + +using namespace clang; +using namespace ento; + +//===----------------------------------------------------------------------===// +// Generic type checking routines. +//===----------------------------------------------------------------------===// + +static bool IsLLVMStringRef(QualType T) { + const RecordType *RT = T->getAs(); + if (!RT) + return false; + + return StringRef(QualType(RT, 0).getAsString()) == + "class StringRef"; +} + +/// Check whether the declaration is semantically inside the top-level +/// namespace named by ns. +static bool InNamespace(const Decl *D, StringRef NS) { + const NamespaceDecl *ND = dyn_cast(D->getDeclContext()); + if (!ND) + return false; + const IdentifierInfo *II = ND->getIdentifier(); + if (!II || !II->getName().equals(NS)) + return false; + return isa(ND->getDeclContext()); +} + +static bool IsStdString(QualType T) { + if (const ElaboratedType *QT = T->getAs()) + T = QT->getNamedType(); + + const TypedefType *TT = T->getAs(); + if (!TT) + return false; + + const TypedefNameDecl *TD = TT->getDecl(); + + if (!InNamespace(TD, "std")) + return false; + + return TD->getName() == "string"; +} + +static bool IsClangType(const RecordDecl *RD) { + return RD->getName() == "Type" && InNamespace(RD, "clang"); +} + +static bool IsClangDecl(const RecordDecl *RD) { + return RD->getName() == "Decl" && InNamespace(RD, "clang"); +} + +static bool IsClangStmt(const RecordDecl *RD) { + return RD->getName() == "Stmt" && InNamespace(RD, "clang"); +} + +static bool IsClangAttr(const RecordDecl *RD) { + return RD->getName() == "Attr" && InNamespace(RD, "clang"); +} + +static bool IsStdVector(QualType T) { + const TemplateSpecializationType *TS = T->getAs(); + if (!TS) + return false; + + TemplateName TM = TS->getTemplateName(); + TemplateDecl *TD = TM.getAsTemplateDecl(); + + if (!TD || !InNamespace(TD, "std")) + return false; + + return TD->getName() == "vector"; +} + +static bool IsSmallVector(QualType T) { + const TemplateSpecializationType *TS = T->getAs(); + if (!TS) + return false; + + TemplateName TM = TS->getTemplateName(); + TemplateDecl *TD = TM.getAsTemplateDecl(); + + if (!TD || !InNamespace(TD, "llvm")) + return false; + + return TD->getName() == "SmallVector"; +} + +//===----------------------------------------------------------------------===// +// CHECK: a StringRef should not be bound to a temporary std::string whose +// lifetime is shorter than the StringRef's. +//===----------------------------------------------------------------------===// + +namespace { +class StringRefCheckerVisitor : public StmtVisitor { + BugReporter &BR; + const Decl *DeclWithIssue; +public: + StringRefCheckerVisitor(const Decl *declWithIssue, BugReporter &br) + : BR(br), DeclWithIssue(declWithIssue) {} + void VisitChildren(Stmt *S) { + for (Stmt::child_iterator I = S->child_begin(), E = S->child_end() ; + I != E; ++I) + if (Stmt *child = *I) + Visit(child); + } + void VisitStmt(Stmt *S) { VisitChildren(S); } + void VisitDeclStmt(DeclStmt *DS); +private: + void VisitVarDecl(VarDecl *VD); +}; +} // end anonymous namespace + +static void CheckStringRefAssignedTemporary(const Decl *D, BugReporter &BR) { + StringRefCheckerVisitor walker(D, BR); + walker.Visit(D->getBody()); +} + +void StringRefCheckerVisitor::VisitDeclStmt(DeclStmt *S) { + VisitChildren(S); + + for (DeclStmt::decl_iterator I = S->decl_begin(), E = S->decl_end();I!=E; ++I) + if (VarDecl *VD = dyn_cast(*I)) + VisitVarDecl(VD); +} + +void StringRefCheckerVisitor::VisitVarDecl(VarDecl *VD) { + Expr *Init = VD->getInit(); + if (!Init) + return; + + // Pattern match for: + // StringRef x = call() (where call returns std::string) + if (!IsLLVMStringRef(VD->getType())) + return; + ExprWithCleanups *Ex1 = dyn_cast(Init); + if (!Ex1) + return; + CXXConstructExpr *Ex2 = dyn_cast(Ex1->getSubExpr()); + if (!Ex2 || Ex2->getNumArgs() != 1) + return; + ImplicitCastExpr *Ex3 = dyn_cast(Ex2->getArg(0)); + if (!Ex3) + return; + CXXConstructExpr *Ex4 = dyn_cast(Ex3->getSubExpr()); + if (!Ex4 || Ex4->getNumArgs() != 1) + return; + ImplicitCastExpr *Ex5 = dyn_cast(Ex4->getArg(0)); + if (!Ex5) + return; + CXXBindTemporaryExpr *Ex6 = dyn_cast(Ex5->getSubExpr()); + if (!Ex6 || !IsStdString(Ex6->getType())) + return; + + // Okay, badness! Report an error. + const char *desc = "StringRef should not be bound to temporary " + "std::string that it outlives"; + PathDiagnosticLocation VDLoc = + PathDiagnosticLocation::createBegin(VD, BR.getSourceManager()); + BR.EmitBasicReport(DeclWithIssue, desc, "LLVM Conventions", desc, + VDLoc, Init->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// CHECK: Clang AST nodes should not have fields that can allocate +// memory. +//===----------------------------------------------------------------------===// + +static bool AllocatesMemory(QualType T) { + return IsStdVector(T) || IsStdString(T) || IsSmallVector(T); +} + +// This type checking could be sped up via dynamic programming. +static bool IsPartOfAST(const CXXRecordDecl *R) { + if (IsClangStmt(R) || IsClangType(R) || IsClangDecl(R) || IsClangAttr(R)) + return true; + + for (CXXRecordDecl::base_class_const_iterator I = R->bases_begin(), + E = R->bases_end(); I!=E; ++I) { + CXXBaseSpecifier BS = *I; + QualType T = BS.getType(); + if (const RecordType *baseT = T->getAs()) { + CXXRecordDecl *baseD = cast(baseT->getDecl()); + if (IsPartOfAST(baseD)) + return true; + } + } + + return false; +} + +namespace { +class ASTFieldVisitor { + SmallVector FieldChain; + const CXXRecordDecl *Root; + BugReporter &BR; +public: + ASTFieldVisitor(const CXXRecordDecl *root, BugReporter &br) + : Root(root), BR(br) {} + + void Visit(FieldDecl *D); + void ReportError(QualType T); +}; +} // end anonymous namespace + +static void CheckASTMemory(const CXXRecordDecl *R, BugReporter &BR) { + if (!IsPartOfAST(R)) + return; + + for (RecordDecl::field_iterator I = R->field_begin(), E = R->field_end(); + I != E; ++I) { + ASTFieldVisitor walker(R, BR); + walker.Visit(*I); + } +} + +void ASTFieldVisitor::Visit(FieldDecl *D) { + FieldChain.push_back(D); + + QualType T = D->getType(); + + if (AllocatesMemory(T)) + ReportError(T); + + if (const RecordType *RT = T->getAs()) { + const RecordDecl *RD = RT->getDecl()->getDefinition(); + for (RecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); + I != E; ++I) + Visit(*I); + } + + FieldChain.pop_back(); +} + +void ASTFieldVisitor::ReportError(QualType T) { + SmallString<1024> buf; + llvm::raw_svector_ostream os(buf); + + os << "AST class '" << Root->getName() << "' has a field '" + << FieldChain.front()->getName() << "' that allocates heap memory"; + if (FieldChain.size() > 1) { + os << " via the following chain: "; + bool isFirst = true; + for (SmallVectorImpl::iterator I=FieldChain.begin(), + E=FieldChain.end(); I!=E; ++I) { + if (!isFirst) + os << '.'; + else + isFirst = false; + os << (*I)->getName(); + } + } + os << " (type " << FieldChain.back()->getType().getAsString() << ")"; + os.flush(); + + // Note that this will fire for every translation unit that uses this + // class. This is suboptimal, but at least scan-build will merge + // duplicate HTML reports. In the future we need a unified way of merging + // duplicate reports across translation units. For C++ classes we cannot + // just report warnings when we see an out-of-line method definition for a + // class, as that heuristic doesn't always work (the complete definition of + // the class may be in the header file, for example). + PathDiagnosticLocation L = PathDiagnosticLocation::createBegin( + FieldChain.front(), BR.getSourceManager()); + BR.EmitBasicReport(Root, "AST node allocates heap memory", "LLVM Conventions", + os.str(), L); +} + +//===----------------------------------------------------------------------===// +// LLVMConventionsChecker +//===----------------------------------------------------------------------===// + +namespace { +class LLVMConventionsChecker : public Checker< + check::ASTDecl, + check::ASTCodeBody > { +public: + void checkASTDecl(const CXXRecordDecl *R, AnalysisManager& mgr, + BugReporter &BR) const { + if (R->isCompleteDefinition()) + CheckASTMemory(R, BR); + } + + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + CheckStringRefAssignedTemporary(D, BR); + } +}; +} + +void ento::registerLLVMConventionsChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp new file mode 100644 index 0000000..cb976e0 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp @@ -0,0 +1,681 @@ +//==--- MacOSKeychainAPIChecker.cpp ------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This checker flags misuses of KeyChainAPI. In particular, the password data +// allocated/returned by SecKeychainItemCopyContent, +// SecKeychainFindGenericPassword, SecKeychainFindInternetPassword functions has +// to be freed using a call to SecKeychainItemFreeContent. +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/ADT/SmallString.h" + +using namespace clang; +using namespace ento; + +namespace { +class MacOSKeychainAPIChecker : public Checker, + check::PreStmt, + check::PostStmt, + check::EndPath, + check::DeadSymbols> { + mutable OwningPtr BT; + +public: + /// AllocationState is a part of the checker specific state together with the + /// MemRegion corresponding to the allocated data. + struct AllocationState { + /// The index of the allocator function. + unsigned int AllocatorIdx; + SymbolRef Region; + + AllocationState(const Expr *E, unsigned int Idx, SymbolRef R) : + AllocatorIdx(Idx), + Region(R) {} + + bool operator==(const AllocationState &X) const { + return (AllocatorIdx == X.AllocatorIdx && + Region == X.Region); + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(AllocatorIdx); + ID.AddPointer(Region); + } + }; + + void checkPreStmt(const CallExpr *S, CheckerContext &C) const; + void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const; + void checkPostStmt(const CallExpr *S, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; + void checkEndPath(CheckerContext &C) const; + +private: + typedef std::pair AllocationPair; + typedef llvm::SmallVector AllocationPairVec; + + enum APIKind { + /// Denotes functions tracked by this checker. + ValidAPI = 0, + /// The functions commonly/mistakenly used in place of the given API. + ErrorAPI = 1, + /// The functions which may allocate the data. These are tracked to reduce + /// the false alarm rate. + PossibleAPI = 2 + }; + /// Stores the information about the allocator and deallocator functions - + /// these are the functions the checker is tracking. + struct ADFunctionInfo { + const char* Name; + unsigned int Param; + unsigned int DeallocatorIdx; + APIKind Kind; + }; + static const unsigned InvalidIdx = 100000; + static const unsigned FunctionsToTrackSize = 8; + static const ADFunctionInfo FunctionsToTrack[FunctionsToTrackSize]; + /// The value, which represents no error return value for allocator functions. + static const unsigned NoErr = 0; + + /// Given the function name, returns the index of the allocator/deallocator + /// function. + static unsigned getTrackedFunctionIndex(StringRef Name, bool IsAllocator); + + inline void initBugType() const { + if (!BT) + BT.reset(new BugType("Improper use of SecKeychain API", "Mac OS API")); + } + + void generateDeallocatorMismatchReport(const AllocationPair &AP, + const Expr *ArgExpr, + CheckerContext &C) const; + + /// Find the allocation site for Sym on the path leading to the node N. + const Stmt *getAllocationSite(const ExplodedNode *N, SymbolRef Sym, + CheckerContext &C) const; + + BugReport *generateAllocatedDataNotReleasedReport(const AllocationPair &AP, + ExplodedNode *N, + CheckerContext &C) const; + + /// Check if RetSym evaluates to an error value in the current state. + bool definitelyReturnedError(SymbolRef RetSym, + ProgramStateRef State, + SValBuilder &Builder, + bool noError = false) const; + + /// Check if RetSym evaluates to a NoErr value in the current state. + bool definitelyDidnotReturnError(SymbolRef RetSym, + ProgramStateRef State, + SValBuilder &Builder) const { + return definitelyReturnedError(RetSym, State, Builder, true); + } + + /// Mark an AllocationPair interesting for diagnostic reporting. + void markInteresting(BugReport *R, const AllocationPair &AP) const { + R->markInteresting(AP.first); + R->markInteresting(AP.second->Region); + } + + /// The bug visitor which allows us to print extra diagnostics along the + /// BugReport path. For example, showing the allocation site of the leaked + /// region. + class SecKeychainBugVisitor + : public BugReporterVisitorImpl { + protected: + // The allocated region symbol tracked by the main analysis. + SymbolRef Sym; + + public: + SecKeychainBugVisitor(SymbolRef S) : Sym(S) {} + virtual ~SecKeychainBugVisitor() {} + + void Profile(llvm::FoldingSetNodeID &ID) const { + static int X = 0; + ID.AddPointer(&X); + ID.AddPointer(Sym); + } + + PathDiagnosticPiece *VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR); + }; +}; +} + +/// ProgramState traits to store the currently allocated (and not yet freed) +/// symbols. This is a map from the allocated content symbol to the +/// corresponding AllocationState. +typedef llvm::ImmutableMap AllocatedSetTy; + +namespace { struct AllocatedData {}; } +namespace clang { namespace ento { +template<> struct ProgramStateTrait + : public ProgramStatePartialTrait { + static void *GDMIndex() { static int index = 0; return &index; } +}; +}} + +static bool isEnclosingFunctionParam(const Expr *E) { + E = E->IgnoreParenCasts(); + if (const DeclRefExpr *DRE = dyn_cast(E)) { + const ValueDecl *VD = DRE->getDecl(); + if (isa(VD) || isa(VD)) + return true; + } + return false; +} + +const MacOSKeychainAPIChecker::ADFunctionInfo + MacOSKeychainAPIChecker::FunctionsToTrack[FunctionsToTrackSize] = { + {"SecKeychainItemCopyContent", 4, 3, ValidAPI}, // 0 + {"SecKeychainFindGenericPassword", 6, 3, ValidAPI}, // 1 + {"SecKeychainFindInternetPassword", 13, 3, ValidAPI}, // 2 + {"SecKeychainItemFreeContent", 1, InvalidIdx, ValidAPI}, // 3 + {"SecKeychainItemCopyAttributesAndData", 5, 5, ValidAPI}, // 4 + {"SecKeychainItemFreeAttributesAndData", 1, InvalidIdx, ValidAPI}, // 5 + {"free", 0, InvalidIdx, ErrorAPI}, // 6 + {"CFStringCreateWithBytesNoCopy", 1, InvalidIdx, PossibleAPI}, // 7 +}; + +unsigned MacOSKeychainAPIChecker::getTrackedFunctionIndex(StringRef Name, + bool IsAllocator) { + for (unsigned I = 0; I < FunctionsToTrackSize; ++I) { + ADFunctionInfo FI = FunctionsToTrack[I]; + if (FI.Name != Name) + continue; + // Make sure the function is of the right type (allocator vs deallocator). + if (IsAllocator && (FI.DeallocatorIdx == InvalidIdx)) + return InvalidIdx; + if (!IsAllocator && (FI.DeallocatorIdx != InvalidIdx)) + return InvalidIdx; + + return I; + } + // The function is not tracked. + return InvalidIdx; +} + +static bool isBadDeallocationArgument(const MemRegion *Arg) { + if (!Arg) + return false; + if (isa(Arg) || + isa(Arg) || + isa(Arg)) { + return true; + } + return false; +} + +/// Given the address expression, retrieve the value it's pointing to. Assume +/// that value is itself an address, and return the corresponding symbol. +static SymbolRef getAsPointeeSymbol(const Expr *Expr, + CheckerContext &C) { + ProgramStateRef State = C.getState(); + SVal ArgV = State->getSVal(Expr, C.getLocationContext()); + + if (const loc::MemRegionVal *X = dyn_cast(&ArgV)) { + StoreManager& SM = C.getStoreManager(); + SymbolRef sym = SM.getBinding(State->getStore(), *X).getAsLocSymbol(); + if (sym) + return sym; + } + return 0; +} + +// When checking for error code, we need to consider the following cases: +// 1) noErr / [0] +// 2) someErr / [1, inf] +// 3) unknown +// If noError, returns true iff (1). +// If !noError, returns true iff (2). +bool MacOSKeychainAPIChecker::definitelyReturnedError(SymbolRef RetSym, + ProgramStateRef State, + SValBuilder &Builder, + bool noError) const { + DefinedOrUnknownSVal NoErrVal = Builder.makeIntVal(NoErr, + Builder.getSymbolManager().getType(RetSym)); + DefinedOrUnknownSVal NoErr = Builder.evalEQ(State, NoErrVal, + nonloc::SymbolVal(RetSym)); + ProgramStateRef ErrState = State->assume(NoErr, noError); + if (ErrState == State) { + return true; + } + + return false; +} + +// Report deallocator mismatch. Remove the region from tracking - reporting a +// missing free error after this one is redundant. +void MacOSKeychainAPIChecker:: + generateDeallocatorMismatchReport(const AllocationPair &AP, + const Expr *ArgExpr, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + State = State->remove(AP.first); + ExplodedNode *N = C.addTransition(State); + + if (!N) + return; + initBugType(); + SmallString<80> sbuf; + llvm::raw_svector_ostream os(sbuf); + unsigned int PDeallocIdx = + FunctionsToTrack[AP.second->AllocatorIdx].DeallocatorIdx; + + os << "Deallocator doesn't match the allocator: '" + << FunctionsToTrack[PDeallocIdx].Name << "' should be used."; + BugReport *Report = new BugReport(*BT, os.str(), N); + Report->addVisitor(new SecKeychainBugVisitor(AP.first)); + Report->addRange(ArgExpr->getSourceRange()); + markInteresting(Report, AP); + C.EmitReport(Report); +} + +void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + unsigned idx = InvalidIdx; + ProgramStateRef State = C.getState(); + + StringRef funName = C.getCalleeName(CE); + if (funName.empty()) + return; + + // If it is a call to an allocator function, it could be a double allocation. + idx = getTrackedFunctionIndex(funName, true); + if (idx != InvalidIdx) { + const Expr *ArgExpr = CE->getArg(FunctionsToTrack[idx].Param); + if (SymbolRef V = getAsPointeeSymbol(ArgExpr, C)) + if (const AllocationState *AS = State->get(V)) { + if (!definitelyReturnedError(AS->Region, State, C.getSValBuilder())) { + // Remove the value from the state. The new symbol will be added for + // tracking when the second allocator is processed in checkPostStmt(). + State = State->remove(V); + ExplodedNode *N = C.addTransition(State); + if (!N) + return; + initBugType(); + SmallString<128> sbuf; + llvm::raw_svector_ostream os(sbuf); + unsigned int DIdx = FunctionsToTrack[AS->AllocatorIdx].DeallocatorIdx; + os << "Allocated data should be released before another call to " + << "the allocator: missing a call to '" + << FunctionsToTrack[DIdx].Name + << "'."; + BugReport *Report = new BugReport(*BT, os.str(), N); + Report->addVisitor(new SecKeychainBugVisitor(V)); + Report->addRange(ArgExpr->getSourceRange()); + Report->markInteresting(AS->Region); + C.EmitReport(Report); + } + } + return; + } + + // Is it a call to one of deallocator functions? + idx = getTrackedFunctionIndex(funName, false); + if (idx == InvalidIdx) + return; + + // Check the argument to the deallocator. + const Expr *ArgExpr = CE->getArg(FunctionsToTrack[idx].Param); + SVal ArgSVal = State->getSVal(ArgExpr, C.getLocationContext()); + + // Undef is reported by another checker. + if (ArgSVal.isUndef()) + return; + + SymbolRef ArgSM = ArgSVal.getAsLocSymbol(); + + // If the argument is coming from the heap, globals, or unknown, do not + // report it. + bool RegionArgIsBad = false; + if (!ArgSM) { + if (!isBadDeallocationArgument(ArgSVal.getAsRegion())) + return; + RegionArgIsBad = true; + } + + // Is the argument to the call being tracked? + const AllocationState *AS = State->get(ArgSM); + if (!AS && FunctionsToTrack[idx].Kind != ValidAPI) { + return; + } + // If trying to free data which has not been allocated yet, report as a bug. + // TODO: We might want a more precise diagnostic for double free + // (that would involve tracking all the freed symbols in the checker state). + if (!AS || RegionArgIsBad) { + // It is possible that this is a false positive - the argument might + // have entered as an enclosing function parameter. + if (isEnclosingFunctionParam(ArgExpr)) + return; + + ExplodedNode *N = C.addTransition(State); + if (!N) + return; + initBugType(); + BugReport *Report = new BugReport(*BT, + "Trying to free data which has not been allocated.", N); + Report->addRange(ArgExpr->getSourceRange()); + if (AS) + Report->markInteresting(AS->Region); + C.EmitReport(Report); + return; + } + + // Process functions which might deallocate. + if (FunctionsToTrack[idx].Kind == PossibleAPI) { + + if (funName == "CFStringCreateWithBytesNoCopy") { + const Expr *DeallocatorExpr = CE->getArg(5)->IgnoreParenCasts(); + // NULL ~ default deallocator, so warn. + if (DeallocatorExpr->isNullPointerConstant(C.getASTContext(), + Expr::NPC_ValueDependentIsNotNull)) { + const AllocationPair AP = std::make_pair(ArgSM, AS); + generateDeallocatorMismatchReport(AP, ArgExpr, C); + return; + } + // One of the default allocators, so warn. + if (const DeclRefExpr *DE = dyn_cast(DeallocatorExpr)) { + StringRef DeallocatorName = DE->getFoundDecl()->getName(); + if (DeallocatorName == "kCFAllocatorDefault" || + DeallocatorName == "kCFAllocatorSystemDefault" || + DeallocatorName == "kCFAllocatorMalloc") { + const AllocationPair AP = std::make_pair(ArgSM, AS); + generateDeallocatorMismatchReport(AP, ArgExpr, C); + return; + } + // If kCFAllocatorNull, which does not deallocate, we still have to + // find the deallocator. Otherwise, assume that the user had written a + // custom deallocator which does the right thing. + if (DE->getFoundDecl()->getName() != "kCFAllocatorNull") { + State = State->remove(ArgSM); + C.addTransition(State); + return; + } + } + } + return; + } + + // The call is deallocating a value we previously allocated, so remove it + // from the next state. + State = State->remove(ArgSM); + + // Check if the proper deallocator is used. + unsigned int PDeallocIdx = FunctionsToTrack[AS->AllocatorIdx].DeallocatorIdx; + if (PDeallocIdx != idx || (FunctionsToTrack[idx].Kind == ErrorAPI)) { + const AllocationPair AP = std::make_pair(ArgSM, AS); + generateDeallocatorMismatchReport(AP, ArgExpr, C); + return; + } + + // If the buffer can be null and the return status can be an error, + // report a bad call to free. + if (State->assume(cast(ArgSVal), false) && + !definitelyDidnotReturnError(AS->Region, State, C.getSValBuilder())) { + ExplodedNode *N = C.addTransition(State); + if (!N) + return; + initBugType(); + BugReport *Report = new BugReport(*BT, + "Only call free if a valid (non-NULL) buffer was returned.", N); + Report->addVisitor(new SecKeychainBugVisitor(ArgSM)); + Report->addRange(ArgExpr->getSourceRange()); + Report->markInteresting(AS->Region); + C.EmitReport(Report); + return; + } + + C.addTransition(State); +} + +void MacOSKeychainAPIChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + StringRef funName = C.getCalleeName(CE); + + // If a value has been allocated, add it to the set for tracking. + unsigned idx = getTrackedFunctionIndex(funName, true); + if (idx == InvalidIdx) + return; + + const Expr *ArgExpr = CE->getArg(FunctionsToTrack[idx].Param); + // If the argument entered as an enclosing function parameter, skip it to + // avoid false positives. + if (isEnclosingFunctionParam(ArgExpr) && + C.getLocationContext()->getParent() == 0) + return; + + if (SymbolRef V = getAsPointeeSymbol(ArgExpr, C)) { + // If the argument points to something that's not a symbolic region, it + // can be: + // - unknown (cannot reason about it) + // - undefined (already reported by other checker) + // - constant (null - should not be tracked, + // other constant will generate a compiler warning) + // - goto (should be reported by other checker) + + // The call return value symbol should stay alive for as long as the + // allocated value symbol, since our diagnostics depend on the value + // returned by the call. Ex: Data should only be freed if noErr was + // returned during allocation.) + SymbolRef RetStatusSymbol = + State->getSVal(CE, C.getLocationContext()).getAsSymbol(); + C.getSymbolManager().addSymbolDependency(V, RetStatusSymbol); + + // Track the allocated value in the checker state. + State = State->set(V, AllocationState(ArgExpr, idx, + RetStatusSymbol)); + assert(State); + C.addTransition(State); + } +} + +void MacOSKeychainAPIChecker::checkPreStmt(const ReturnStmt *S, + CheckerContext &C) const { + const Expr *retExpr = S->getRetValue(); + if (!retExpr) + return; + + // If inside inlined call, skip it. + const LocationContext *LC = C.getLocationContext(); + if (LC->getParent() != 0) + return; + + // Check if the value is escaping through the return. + ProgramStateRef state = C.getState(); + SymbolRef sym = state->getSVal(retExpr, LC).getAsLocSymbol(); + if (!sym) + return; + state = state->remove(sym); + + // Proceed from the new state. + C.addTransition(state); +} + +// TODO: This logic is the same as in Malloc checker. +const Stmt * +MacOSKeychainAPIChecker::getAllocationSite(const ExplodedNode *N, + SymbolRef Sym, + CheckerContext &C) const { + const LocationContext *LeakContext = N->getLocationContext(); + // Walk the ExplodedGraph backwards and find the first node that referred to + // the tracked symbol. + const ExplodedNode *AllocNode = N; + + while (N) { + if (!N->getState()->get(Sym)) + break; + // Allocation node, is the last node in the current context in which the + // symbol was tracked. + if (N->getLocationContext() == LeakContext) + AllocNode = N; + N = N->pred_empty() ? NULL : *(N->pred_begin()); + } + + ProgramPoint P = AllocNode->getLocation(); + if (!isa(P)) + return 0; + return cast(P).getStmt(); +} + +BugReport *MacOSKeychainAPIChecker:: + generateAllocatedDataNotReleasedReport(const AllocationPair &AP, + ExplodedNode *N, + CheckerContext &C) const { + const ADFunctionInfo &FI = FunctionsToTrack[AP.second->AllocatorIdx]; + initBugType(); + SmallString<70> sbuf; + llvm::raw_svector_ostream os(sbuf); + os << "Allocated data is not released: missing a call to '" + << FunctionsToTrack[FI.DeallocatorIdx].Name << "'."; + + // Most bug reports are cached at the location where they occurred. + // With leaks, we want to unique them by the location where they were + // allocated, and only report a single path. + PathDiagnosticLocation LocUsedForUniqueing; + if (const Stmt *AllocStmt = getAllocationSite(N, AP.first, C)) + LocUsedForUniqueing = PathDiagnosticLocation::createBegin(AllocStmt, + C.getSourceManager(), N->getLocationContext()); + + BugReport *Report = new BugReport(*BT, os.str(), N, LocUsedForUniqueing); + Report->addVisitor(new SecKeychainBugVisitor(AP.first)); + markInteresting(Report, AP); + return Report; +} + +void MacOSKeychainAPIChecker::checkDeadSymbols(SymbolReaper &SR, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + AllocatedSetTy ASet = State->get(); + if (ASet.isEmpty()) + return; + + bool Changed = false; + AllocationPairVec Errors; + for (AllocatedSetTy::iterator I = ASet.begin(), E = ASet.end(); I != E; ++I) { + if (SR.isLive(I->first)) + continue; + + Changed = true; + State = State->remove(I->first); + // If the allocated symbol is null or if the allocation call might have + // returned an error, do not report. + if (State->getSymVal(I->first) || + definitelyReturnedError(I->second.Region, State, C.getSValBuilder())) + continue; + Errors.push_back(std::make_pair(I->first, &I->second)); + } + if (!Changed) { + // Generate the new, cleaned up state. + C.addTransition(State); + return; + } + + static SimpleProgramPointTag Tag("MacOSKeychainAPIChecker : DeadSymbolsLeak"); + ExplodedNode *N = C.addTransition(C.getState(), C.getPredecessor(), &Tag); + + // Generate the error reports. + for (AllocationPairVec::iterator I = Errors.begin(), E = Errors.end(); + I != E; ++I) { + C.EmitReport(generateAllocatedDataNotReleasedReport(*I, N, C)); + } + + // Generate the new, cleaned up state. + C.addTransition(State, N); +} + +// TODO: Remove this after we ensure that checkDeadSymbols are always called. +void MacOSKeychainAPIChecker::checkEndPath(CheckerContext &C) const { + ProgramStateRef state = C.getState(); + + // If inside inlined call, skip it. + if (C.getLocationContext()->getParent() != 0) + return; + + AllocatedSetTy AS = state->get(); + if (AS.isEmpty()) + return; + + // Anything which has been allocated but not freed (nor escaped) will be + // found here, so report it. + bool Changed = false; + AllocationPairVec Errors; + for (AllocatedSetTy::iterator I = AS.begin(), E = AS.end(); I != E; ++I ) { + Changed = true; + state = state->remove(I->first); + // If the allocated symbol is null or if error code was returned at + // allocation, do not report. + if (state->getSymVal(I.getKey()) || + definitelyReturnedError(I->second.Region, state, + C.getSValBuilder())) { + continue; + } + Errors.push_back(std::make_pair(I->first, &I->second)); + } + + // If no change, do not generate a new state. + if (!Changed) { + C.addTransition(state); + return; + } + + static SimpleProgramPointTag Tag("MacOSKeychainAPIChecker : EndPathLeak"); + ExplodedNode *N = C.addTransition(C.getState(), C.getPredecessor(), &Tag); + + // Generate the error reports. + for (AllocationPairVec::iterator I = Errors.begin(), E = Errors.end(); + I != E; ++I) { + C.EmitReport(generateAllocatedDataNotReleasedReport(*I, N, C)); + } + + C.addTransition(state, N); +} + + +PathDiagnosticPiece *MacOSKeychainAPIChecker::SecKeychainBugVisitor::VisitNode( + const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) { + const AllocationState *AS = N->getState()->get(Sym); + if (!AS) + return 0; + const AllocationState *ASPrev = PrevN->getState()->get(Sym); + if (ASPrev) + return 0; + + // (!ASPrev && AS) ~ We started tracking symbol in node N, it must be the + // allocation site. + const CallExpr *CE = cast(cast(N->getLocation()) + .getStmt()); + const FunctionDecl *funDecl = CE->getDirectCallee(); + assert(funDecl && "We do not support indirect function calls as of now."); + StringRef funName = funDecl->getName(); + + // Get the expression of the corresponding argument. + unsigned Idx = getTrackedFunctionIndex(funName, true); + assert(Idx != InvalidIdx && "This should be a call to an allocator."); + const Expr *ArgExpr = CE->getArg(FunctionsToTrack[Idx].Param); + PathDiagnosticLocation Pos(ArgExpr, BRC.getSourceManager(), + N->getLocationContext()); + return new PathDiagnosticEventPiece(Pos, "Data is allocated here."); +} + +void ento::registerMacOSKeychainAPIChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp new file mode 100644 index 0000000..cfdb55d --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp @@ -0,0 +1,116 @@ +// MacOSXAPIChecker.h - Checks proper use of various MacOS X APIs --*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines MacOSXAPIChecker, which is an assortment of checks on calls +// to various, widely used Mac OS X functions. +// +// FIXME: What's currently in BasicObjCFoundationChecks.cpp should be migrated +// to here, using the new Checker interface. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { +class MacOSXAPIChecker : public Checker< check::PreStmt > { + mutable OwningPtr BT_dispatchOnce; + +public: + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + + void CheckDispatchOnce(CheckerContext &C, const CallExpr *CE, + StringRef FName) const; + + typedef void (MacOSXAPIChecker::*SubChecker)(CheckerContext &, + const CallExpr *, + StringRef FName) const; +}; +} //end anonymous namespace + +//===----------------------------------------------------------------------===// +// dispatch_once and dispatch_once_f +//===----------------------------------------------------------------------===// + +void MacOSXAPIChecker::CheckDispatchOnce(CheckerContext &C, const CallExpr *CE, + StringRef FName) const { + if (CE->getNumArgs() < 1) + return; + + // Check if the first argument is stack allocated. If so, issue a warning + // because that's likely to be bad news. + ProgramStateRef state = C.getState(); + const MemRegion *R = + state->getSVal(CE->getArg(0), C.getLocationContext()).getAsRegion(); + if (!R || !isa(R->getMemorySpace())) + return; + + ExplodedNode *N = C.generateSink(state); + if (!N) + return; + + if (!BT_dispatchOnce) + BT_dispatchOnce.reset(new BugType("Improper use of 'dispatch_once'", + "Mac OS X API")); + + SmallString<256> S; + llvm::raw_svector_ostream os(S); + os << "Call to '" << FName << "' uses"; + if (const VarRegion *VR = dyn_cast(R)) + os << " the local variable '" << VR->getDecl()->getName() << '\''; + else + os << " stack allocated memory"; + os << " for the predicate value. Using such transient memory for " + "the predicate is potentially dangerous."; + if (isa(R) && isa(R->getMemorySpace())) + os << " Perhaps you intended to declare the variable as 'static'?"; + + BugReport *report = new BugReport(*BT_dispatchOnce, os.str(), N); + report->addRange(CE->getArg(0)->getSourceRange()); + C.EmitReport(report); +} + +//===----------------------------------------------------------------------===// +// Central dispatch function. +//===----------------------------------------------------------------------===// + +void MacOSXAPIChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + StringRef Name = C.getCalleeName(CE); + if (Name.empty()) + return; + + SubChecker SC = + llvm::StringSwitch(Name) + .Cases("dispatch_once", "dispatch_once_f", + &MacOSXAPIChecker::CheckDispatchOnce) + .Default(NULL); + + if (SC) + (this->*SC)(C, CE, Name); +} + +//===----------------------------------------------------------------------===// +// Registration. +//===----------------------------------------------------------------------===// + +void ento::registerMacOSXAPIChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/Makefile b/clang/lib/StaticAnalyzer/Checkers/Makefile new file mode 100644 index 0000000..2582908 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/Makefile @@ -0,0 +1,24 @@ +##===- clang/lib/Checker/Makefile --------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements analyses built on top of source-level CFGs. +# +##===----------------------------------------------------------------------===## + +CLANG_LEVEL := ../../.. +LIBRARYNAME := clangStaticAnalyzerCheckers + +BUILT_SOURCES = Checkers.inc +TABLEGEN_INC_FILES_COMMON = 1 + +include $(CLANG_LEVEL)/Makefile + +$(ObjDir)/Checkers.inc.tmp : Checkers.td $(PROJ_SRC_DIR)/$(CLANG_LEVEL)/include/clang/StaticAnalyzer/Checkers/CheckerBase.td $(CLANG_TBLGEN) $(ObjDir)/.dir + $(Echo) "Building Clang SA Checkers tables with tblgen" + $(Verb) $(ClangTableGen) -gen-clang-sa-checkers -I $(PROJ_SRC_DIR)/$(CLANG_LEVEL)/include -o $(call SYSPATH, $@) $< diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp new file mode 100644 index 0000000..8bce88a --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -0,0 +1,1463 @@ +//=== MallocChecker.cpp - A malloc/free checker -------------------*- C++ -*--// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines malloc/free checker, which checks for potential memory +// leaks, double free, and use-after-free problems. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "InterCheckerAPI.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include + +using namespace clang; +using namespace ento; + +namespace { + +class RefState { + enum Kind { AllocateUnchecked, AllocateFailed, Released, Escaped, + Relinquished } K; + const Stmt *S; + +public: + RefState(Kind k, const Stmt *s) : K(k), S(s) {} + + bool isAllocated() const { return K == AllocateUnchecked; } + bool isReleased() const { return K == Released; } + + const Stmt *getStmt() const { return S; } + + bool operator==(const RefState &X) const { + return K == X.K && S == X.S; + } + + static RefState getAllocateUnchecked(const Stmt *s) { + return RefState(AllocateUnchecked, s); + } + static RefState getAllocateFailed() { + return RefState(AllocateFailed, 0); + } + static RefState getReleased(const Stmt *s) { return RefState(Released, s); } + static RefState getEscaped(const Stmt *s) { return RefState(Escaped, s); } + static RefState getRelinquished(const Stmt *s) { + return RefState(Relinquished, s); + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(K); + ID.AddPointer(S); + } +}; + +struct ReallocPair { + SymbolRef ReallocatedSym; + bool IsFreeOnFailure; + ReallocPair(SymbolRef S, bool F) : ReallocatedSym(S), IsFreeOnFailure(F) {} + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(IsFreeOnFailure); + ID.AddPointer(ReallocatedSym); + } + bool operator==(const ReallocPair &X) const { + return ReallocatedSym == X.ReallocatedSym && + IsFreeOnFailure == X.IsFreeOnFailure; + } +}; + +typedef std::pair LeakInfo; + +class MallocChecker : public Checker, + check::PreStmt, + check::PostStmt, + check::PostStmt, + check::Location, + check::Bind, + eval::Assume, + check::RegionChanges> +{ + mutable OwningPtr BT_DoubleFree; + mutable OwningPtr BT_Leak; + mutable OwningPtr BT_UseFree; + mutable OwningPtr BT_BadFree; + mutable IdentifierInfo *II_malloc, *II_free, *II_realloc, *II_calloc, + *II_valloc, *II_reallocf, *II_strndup, *II_strdup; + +public: + MallocChecker() : II_malloc(0), II_free(0), II_realloc(0), II_calloc(0), + II_valloc(0), II_reallocf(0), II_strndup(0), II_strdup(0) {} + + /// In pessimistic mode, the checker assumes that it does not know which + /// functions might free the memory. + struct ChecksFilter { + DefaultBool CMallocPessimistic; + DefaultBool CMallocOptimistic; + }; + + ChecksFilter Filter; + + void checkPreStmt(const CallExpr *S, CheckerContext &C) const; + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; + void checkPostStmt(const BlockExpr *BE, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; + void checkEndPath(CheckerContext &C) const; + void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const; + ProgramStateRef evalAssume(ProgramStateRef state, SVal Cond, + bool Assumption) const; + void checkLocation(SVal l, bool isLoad, const Stmt *S, + CheckerContext &C) const; + void checkBind(SVal location, SVal val, const Stmt*S, + CheckerContext &C) const; + ProgramStateRef + checkRegionChanges(ProgramStateRef state, + const StoreManager::InvalidatedSymbols *invalidated, + ArrayRef ExplicitRegions, + ArrayRef Regions, + const CallOrObjCMessage *Call) const; + bool wantsRegionChangeUpdate(ProgramStateRef state) const { + return true; + } + +private: + void initIdentifierInfo(ASTContext &C) const; + + /// Check if this is one of the functions which can allocate/reallocate memory + /// pointed to by one of its arguments. + bool isMemFunction(const FunctionDecl *FD, ASTContext &C) const; + + static ProgramStateRef MallocMemReturnsAttr(CheckerContext &C, + const CallExpr *CE, + const OwnershipAttr* Att); + static ProgramStateRef MallocMemAux(CheckerContext &C, const CallExpr *CE, + const Expr *SizeEx, SVal Init, + ProgramStateRef state) { + return MallocMemAux(C, CE, + state->getSVal(SizeEx, C.getLocationContext()), + Init, state); + } + + static ProgramStateRef MallocMemAux(CheckerContext &C, const CallExpr *CE, + SVal SizeEx, SVal Init, + ProgramStateRef state); + + /// Update the RefState to reflect the new memory allocation. + static ProgramStateRef MallocUpdateRefState(CheckerContext &C, + const CallExpr *CE, + ProgramStateRef state); + + ProgramStateRef FreeMemAttr(CheckerContext &C, const CallExpr *CE, + const OwnershipAttr* Att) const; + ProgramStateRef FreeMemAux(CheckerContext &C, const CallExpr *CE, + ProgramStateRef state, unsigned Num, + bool Hold) const; + + ProgramStateRef ReallocMem(CheckerContext &C, const CallExpr *CE, + bool FreesMemOnFailure) const; + static ProgramStateRef CallocMem(CheckerContext &C, const CallExpr *CE); + + bool checkEscape(SymbolRef Sym, const Stmt *S, CheckerContext &C) const; + bool checkUseAfterFree(SymbolRef Sym, CheckerContext &C, + const Stmt *S = 0) const; + + /// Check if the function is not known to us. So, for example, we could + /// conservatively assume it can free/reallocate it's pointer arguments. + bool doesNotFreeMemory(const CallOrObjCMessage *Call, + ProgramStateRef State) const; + + static bool SummarizeValue(raw_ostream &os, SVal V); + static bool SummarizeRegion(raw_ostream &os, const MemRegion *MR); + void ReportBadFree(CheckerContext &C, SVal ArgVal, SourceRange range) const; + + /// Find the location of the allocation for Sym on the path leading to the + /// exploded node N. + LeakInfo getAllocationSite(const ExplodedNode *N, SymbolRef Sym, + CheckerContext &C) const; + + void reportLeak(SymbolRef Sym, ExplodedNode *N, CheckerContext &C) const; + + /// The bug visitor which allows us to print extra diagnostics along the + /// BugReport path. For example, showing the allocation site of the leaked + /// region. + class MallocBugVisitor : public BugReporterVisitorImpl { + protected: + enum NotificationMode { + Normal, + ReallocationFailed + }; + + // The allocated region symbol tracked by the main analysis. + SymbolRef Sym; + + // The mode we are in, i.e. what kind of diagnostics will be emitted. + NotificationMode Mode; + + // A symbol from when the primary region should have been reallocated. + SymbolRef FailedReallocSymbol; + + public: + MallocBugVisitor(SymbolRef S) + : Sym(S), Mode(Normal), FailedReallocSymbol(0) {} + + virtual ~MallocBugVisitor() {} + + void Profile(llvm::FoldingSetNodeID &ID) const { + static int X = 0; + ID.AddPointer(&X); + ID.AddPointer(Sym); + } + + inline bool isAllocated(const RefState *S, const RefState *SPrev, + const Stmt *Stmt) { + // Did not track -> allocated. Other state (released) -> allocated. + return (Stmt && isa(Stmt) && + (S && S->isAllocated()) && (!SPrev || !SPrev->isAllocated())); + } + + inline bool isReleased(const RefState *S, const RefState *SPrev, + const Stmt *Stmt) { + // Did not track -> released. Other state (allocated) -> released. + return (Stmt && isa(Stmt) && + (S && S->isReleased()) && (!SPrev || !SPrev->isReleased())); + } + + inline bool isReallocFailedCheck(const RefState *S, const RefState *SPrev, + const Stmt *Stmt) { + // If the expression is not a call, and the state change is + // released -> allocated, it must be the realloc return value + // check. If we have to handle more cases here, it might be cleaner just + // to track this extra bit in the state itself. + return ((!Stmt || !isa(Stmt)) && + (S && S->isAllocated()) && (SPrev && !SPrev->isAllocated())); + } + + PathDiagnosticPiece *VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR); + private: + class StackHintGeneratorForReallocationFailed + : public StackHintGeneratorForSymbol { + public: + StackHintGeneratorForReallocationFailed(SymbolRef S, StringRef M) + : StackHintGeneratorForSymbol(S, M) {} + + virtual std::string getMessageForArg(const Expr *ArgE, unsigned ArgIndex) { + SmallString<200> buf; + llvm::raw_svector_ostream os(buf); + + os << "Reallocation of "; + // Printed parameters start at 1, not 0. + printOrdinal(++ArgIndex, os); + os << " parameter failed"; + + return os.str(); + } + + virtual std::string getMessageForReturn(const CallExpr *CallExpr) { + return "Reallocation of returned value failed"; + } + }; + }; +}; +} // end anonymous namespace + +typedef llvm::ImmutableMap RegionStateTy; +typedef llvm::ImmutableMap ReallocMap; +class RegionState {}; +class ReallocPairs {}; +namespace clang { +namespace ento { + template <> + struct ProgramStateTrait + : public ProgramStatePartialTrait { + static void *GDMIndex() { static int x; return &x; } + }; + + template <> + struct ProgramStateTrait + : public ProgramStatePartialTrait { + static void *GDMIndex() { static int x; return &x; } + }; +} +} + +namespace { +class StopTrackingCallback : public SymbolVisitor { + ProgramStateRef state; +public: + StopTrackingCallback(ProgramStateRef st) : state(st) {} + ProgramStateRef getState() const { return state; } + + bool VisitSymbol(SymbolRef sym) { + state = state->remove(sym); + return true; + } +}; +} // end anonymous namespace + +void MallocChecker::initIdentifierInfo(ASTContext &Ctx) const { + if (!II_malloc) + II_malloc = &Ctx.Idents.get("malloc"); + if (!II_free) + II_free = &Ctx.Idents.get("free"); + if (!II_realloc) + II_realloc = &Ctx.Idents.get("realloc"); + if (!II_reallocf) + II_reallocf = &Ctx.Idents.get("reallocf"); + if (!II_calloc) + II_calloc = &Ctx.Idents.get("calloc"); + if (!II_valloc) + II_valloc = &Ctx.Idents.get("valloc"); + if (!II_strdup) + II_strdup = &Ctx.Idents.get("strdup"); + if (!II_strndup) + II_strndup = &Ctx.Idents.get("strndup"); +} + +bool MallocChecker::isMemFunction(const FunctionDecl *FD, ASTContext &C) const { + if (!FD) + return false; + IdentifierInfo *FunI = FD->getIdentifier(); + if (!FunI) + return false; + + initIdentifierInfo(C); + + if (FunI == II_malloc || FunI == II_free || FunI == II_realloc || + FunI == II_reallocf || FunI == II_calloc || FunI == II_valloc || + FunI == II_strdup || FunI == II_strndup) + return true; + + if (Filter.CMallocOptimistic && FD->hasAttrs() && + FD->specific_attr_begin() != + FD->specific_attr_end()) + return true; + + + return false; +} + +void MallocChecker::checkPostStmt(const CallExpr *CE, CheckerContext &C) const { + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD) + return; + + initIdentifierInfo(C.getASTContext()); + IdentifierInfo *FunI = FD->getIdentifier(); + if (!FunI) + return; + + ProgramStateRef State = C.getState(); + if (FunI == II_malloc || FunI == II_valloc) { + if (CE->getNumArgs() < 1) + return; + State = MallocMemAux(C, CE, CE->getArg(0), UndefinedVal(), State); + } else if (FunI == II_realloc) { + State = ReallocMem(C, CE, false); + } else if (FunI == II_reallocf) { + State = ReallocMem(C, CE, true); + } else if (FunI == II_calloc) { + State = CallocMem(C, CE); + } else if (FunI == II_free) { + State = FreeMemAux(C, CE, C.getState(), 0, false); + } else if (FunI == II_strdup) { + State = MallocUpdateRefState(C, CE, State); + } else if (FunI == II_strndup) { + State = MallocUpdateRefState(C, CE, State); + } else if (Filter.CMallocOptimistic) { + // Check all the attributes, if there are any. + // There can be multiple of these attributes. + if (FD->hasAttrs()) + for (specific_attr_iterator + i = FD->specific_attr_begin(), + e = FD->specific_attr_end(); + i != e; ++i) { + switch ((*i)->getOwnKind()) { + case OwnershipAttr::Returns: + State = MallocMemReturnsAttr(C, CE, *i); + break; + case OwnershipAttr::Takes: + case OwnershipAttr::Holds: + State = FreeMemAttr(C, CE, *i); + break; + } + } + } + C.addTransition(State); +} + +ProgramStateRef MallocChecker::MallocMemReturnsAttr(CheckerContext &C, + const CallExpr *CE, + const OwnershipAttr* Att) { + if (Att->getModule() != "malloc") + return 0; + + OwnershipAttr::args_iterator I = Att->args_begin(), E = Att->args_end(); + if (I != E) { + return MallocMemAux(C, CE, CE->getArg(*I), UndefinedVal(), C.getState()); + } + return MallocMemAux(C, CE, UnknownVal(), UndefinedVal(), C.getState()); +} + +ProgramStateRef MallocChecker::MallocMemAux(CheckerContext &C, + const CallExpr *CE, + SVal Size, SVal Init, + ProgramStateRef state) { + // Get the return value. + SVal retVal = state->getSVal(CE, C.getLocationContext()); + + // We expect the malloc functions to return a pointer. + if (!isa(retVal)) + return 0; + + // Fill the region with the initialization value. + state = state->bindDefault(retVal, Init); + + // Set the region's extent equal to the Size parameter. + const SymbolicRegion *R = + dyn_cast_or_null(retVal.getAsRegion()); + if (!R) + return 0; + if (isa(Size)) { + SValBuilder &svalBuilder = C.getSValBuilder(); + DefinedOrUnknownSVal Extent = R->getExtent(svalBuilder); + DefinedOrUnknownSVal DefinedSize = cast(Size); + DefinedOrUnknownSVal extentMatchesSize = + svalBuilder.evalEQ(state, Extent, DefinedSize); + + state = state->assume(extentMatchesSize, true); + assert(state); + } + + return MallocUpdateRefState(C, CE, state); +} + +ProgramStateRef MallocChecker::MallocUpdateRefState(CheckerContext &C, + const CallExpr *CE, + ProgramStateRef state) { + // Get the return value. + SVal retVal = state->getSVal(CE, C.getLocationContext()); + + // We expect the malloc functions to return a pointer. + if (!isa(retVal)) + return 0; + + SymbolRef Sym = retVal.getAsLocSymbol(); + assert(Sym); + + // Set the symbol's state to Allocated. + return state->set(Sym, RefState::getAllocateUnchecked(CE)); + +} + +ProgramStateRef MallocChecker::FreeMemAttr(CheckerContext &C, + const CallExpr *CE, + const OwnershipAttr* Att) const { + if (Att->getModule() != "malloc") + return 0; + + ProgramStateRef State = C.getState(); + + for (OwnershipAttr::args_iterator I = Att->args_begin(), E = Att->args_end(); + I != E; ++I) { + ProgramStateRef StateI = FreeMemAux(C, CE, State, *I, + Att->getOwnKind() == OwnershipAttr::Holds); + if (StateI) + State = StateI; + } + return State; +} + +ProgramStateRef MallocChecker::FreeMemAux(CheckerContext &C, + const CallExpr *CE, + ProgramStateRef state, + unsigned Num, + bool Hold) const { + if (CE->getNumArgs() < (Num + 1)) + return 0; + + const Expr *ArgExpr = CE->getArg(Num); + SVal ArgVal = state->getSVal(ArgExpr, C.getLocationContext()); + if (!isa(ArgVal)) + return 0; + DefinedOrUnknownSVal location = cast(ArgVal); + + // Check for null dereferences. + if (!isa(location)) + return 0; + + // The explicit NULL case, no operation is performed. + ProgramStateRef notNullState, nullState; + llvm::tie(notNullState, nullState) = state->assume(location); + if (nullState && !notNullState) + return 0; + + // Unknown values could easily be okay + // Undefined values are handled elsewhere + if (ArgVal.isUnknownOrUndef()) + return 0; + + const MemRegion *R = ArgVal.getAsRegion(); + + // Nonlocs can't be freed, of course. + // Non-region locations (labels and fixed addresses) also shouldn't be freed. + if (!R) { + ReportBadFree(C, ArgVal, ArgExpr->getSourceRange()); + return 0; + } + + R = R->StripCasts(); + + // Blocks might show up as heap data, but should not be free()d + if (isa(R)) { + ReportBadFree(C, ArgVal, ArgExpr->getSourceRange()); + return 0; + } + + const MemSpaceRegion *MS = R->getMemorySpace(); + + // Parameters, locals, statics, and globals shouldn't be freed. + if (!(isa(MS) || isa(MS))) { + // FIXME: at the time this code was written, malloc() regions were + // represented by conjured symbols, which are all in UnknownSpaceRegion. + // This means that there isn't actually anything from HeapSpaceRegion + // that should be freed, even though we allow it here. + // Of course, free() can work on memory allocated outside the current + // function, so UnknownSpaceRegion is always a possibility. + // False negatives are better than false positives. + + ReportBadFree(C, ArgVal, ArgExpr->getSourceRange()); + return 0; + } + + const SymbolicRegion *SR = dyn_cast(R); + // Various cases could lead to non-symbol values here. + // For now, ignore them. + if (!SR) + return 0; + + SymbolRef Sym = SR->getSymbol(); + const RefState *RS = state->get(Sym); + + // If the symbol has not been tracked, return. This is possible when free() is + // called on a pointer that does not get its pointee directly from malloc(). + // Full support of this requires inter-procedural analysis. + if (!RS) + return 0; + + // Check double free. + if (RS->isReleased()) { + if (ExplodedNode *N = C.generateSink()) { + if (!BT_DoubleFree) + BT_DoubleFree.reset( + new BugType("Double free", "Memory Error")); + BugReport *R = new BugReport(*BT_DoubleFree, + "Attempt to free released memory", N); + R->addRange(ArgExpr->getSourceRange()); + R->markInteresting(Sym); + R->addVisitor(new MallocBugVisitor(Sym)); + C.EmitReport(R); + } + return 0; + } + + // Normal free. + if (Hold) + return state->set(Sym, RefState::getRelinquished(CE)); + return state->set(Sym, RefState::getReleased(CE)); +} + +bool MallocChecker::SummarizeValue(raw_ostream &os, SVal V) { + if (nonloc::ConcreteInt *IntVal = dyn_cast(&V)) + os << "an integer (" << IntVal->getValue() << ")"; + else if (loc::ConcreteInt *ConstAddr = dyn_cast(&V)) + os << "a constant address (" << ConstAddr->getValue() << ")"; + else if (loc::GotoLabel *Label = dyn_cast(&V)) + os << "the address of the label '" << Label->getLabel()->getName() << "'"; + else + return false; + + return true; +} + +bool MallocChecker::SummarizeRegion(raw_ostream &os, + const MemRegion *MR) { + switch (MR->getKind()) { + case MemRegion::FunctionTextRegionKind: { + const FunctionDecl *FD = cast(MR)->getDecl(); + if (FD) + os << "the address of the function '" << *FD << '\''; + else + os << "the address of a function"; + return true; + } + case MemRegion::BlockTextRegionKind: + os << "block text"; + return true; + case MemRegion::BlockDataRegionKind: + // FIXME: where the block came from? + os << "a block"; + return true; + default: { + const MemSpaceRegion *MS = MR->getMemorySpace(); + + if (isa(MS)) { + const VarRegion *VR = dyn_cast(MR); + const VarDecl *VD; + if (VR) + VD = VR->getDecl(); + else + VD = NULL; + + if (VD) + os << "the address of the local variable '" << VD->getName() << "'"; + else + os << "the address of a local stack variable"; + return true; + } + + if (isa(MS)) { + const VarRegion *VR = dyn_cast(MR); + const VarDecl *VD; + if (VR) + VD = VR->getDecl(); + else + VD = NULL; + + if (VD) + os << "the address of the parameter '" << VD->getName() << "'"; + else + os << "the address of a parameter"; + return true; + } + + if (isa(MS)) { + const VarRegion *VR = dyn_cast(MR); + const VarDecl *VD; + if (VR) + VD = VR->getDecl(); + else + VD = NULL; + + if (VD) { + if (VD->isStaticLocal()) + os << "the address of the static variable '" << VD->getName() << "'"; + else + os << "the address of the global variable '" << VD->getName() << "'"; + } else + os << "the address of a global variable"; + return true; + } + + return false; + } + } +} + +void MallocChecker::ReportBadFree(CheckerContext &C, SVal ArgVal, + SourceRange range) const { + if (ExplodedNode *N = C.generateSink()) { + if (!BT_BadFree) + BT_BadFree.reset(new BugType("Bad free", "Memory Error")); + + SmallString<100> buf; + llvm::raw_svector_ostream os(buf); + + const MemRegion *MR = ArgVal.getAsRegion(); + if (MR) { + while (const ElementRegion *ER = dyn_cast(MR)) + MR = ER->getSuperRegion(); + + // Special case for alloca() + if (isa(MR)) + os << "Argument to free() was allocated by alloca(), not malloc()"; + else { + os << "Argument to free() is "; + if (SummarizeRegion(os, MR)) + os << ", which is not memory allocated by malloc()"; + else + os << "not memory allocated by malloc()"; + } + } else { + os << "Argument to free() is "; + if (SummarizeValue(os, ArgVal)) + os << ", which is not memory allocated by malloc()"; + else + os << "not memory allocated by malloc()"; + } + + BugReport *R = new BugReport(*BT_BadFree, os.str(), N); + R->markInteresting(MR); + R->addRange(range); + C.EmitReport(R); + } +} + +ProgramStateRef MallocChecker::ReallocMem(CheckerContext &C, + const CallExpr *CE, + bool FreesOnFail) const { + if (CE->getNumArgs() < 2) + return 0; + + ProgramStateRef state = C.getState(); + const Expr *arg0Expr = CE->getArg(0); + const LocationContext *LCtx = C.getLocationContext(); + SVal Arg0Val = state->getSVal(arg0Expr, LCtx); + if (!isa(Arg0Val)) + return 0; + DefinedOrUnknownSVal arg0Val = cast(Arg0Val); + + SValBuilder &svalBuilder = C.getSValBuilder(); + + DefinedOrUnknownSVal PtrEQ = + svalBuilder.evalEQ(state, arg0Val, svalBuilder.makeNull()); + + // Get the size argument. If there is no size arg then give up. + const Expr *Arg1 = CE->getArg(1); + if (!Arg1) + return 0; + + // Get the value of the size argument. + SVal Arg1ValG = state->getSVal(Arg1, LCtx); + if (!isa(Arg1ValG)) + return 0; + DefinedOrUnknownSVal Arg1Val = cast(Arg1ValG); + + // Compare the size argument to 0. + DefinedOrUnknownSVal SizeZero = + svalBuilder.evalEQ(state, Arg1Val, + svalBuilder.makeIntValWithPtrWidth(0, false)); + + ProgramStateRef StatePtrIsNull, StatePtrNotNull; + llvm::tie(StatePtrIsNull, StatePtrNotNull) = state->assume(PtrEQ); + ProgramStateRef StateSizeIsZero, StateSizeNotZero; + llvm::tie(StateSizeIsZero, StateSizeNotZero) = state->assume(SizeZero); + // We only assume exceptional states if they are definitely true; if the + // state is under-constrained, assume regular realloc behavior. + bool PrtIsNull = StatePtrIsNull && !StatePtrNotNull; + bool SizeIsZero = StateSizeIsZero && !StateSizeNotZero; + + // If the ptr is NULL and the size is not 0, the call is equivalent to + // malloc(size). + if ( PrtIsNull && !SizeIsZero) { + ProgramStateRef stateMalloc = MallocMemAux(C, CE, CE->getArg(1), + UndefinedVal(), StatePtrIsNull); + return stateMalloc; + } + + if (PrtIsNull && SizeIsZero) + return 0; + + // Get the from and to pointer symbols as in toPtr = realloc(fromPtr, size). + assert(!PrtIsNull); + SymbolRef FromPtr = arg0Val.getAsSymbol(); + SVal RetVal = state->getSVal(CE, LCtx); + SymbolRef ToPtr = RetVal.getAsSymbol(); + if (!FromPtr || !ToPtr) + return 0; + + // If the size is 0, free the memory. + if (SizeIsZero) + if (ProgramStateRef stateFree = FreeMemAux(C, CE, StateSizeIsZero,0,false)){ + // The semantics of the return value are: + // If size was equal to 0, either NULL or a pointer suitable to be passed + // to free() is returned. + stateFree = stateFree->set(ToPtr, + ReallocPair(FromPtr, FreesOnFail)); + C.getSymbolManager().addSymbolDependency(ToPtr, FromPtr); + return stateFree; + } + + // Default behavior. + if (ProgramStateRef stateFree = FreeMemAux(C, CE, state, 0, false)) { + // FIXME: We should copy the content of the original buffer. + ProgramStateRef stateRealloc = MallocMemAux(C, CE, CE->getArg(1), + UnknownVal(), stateFree); + if (!stateRealloc) + return 0; + stateRealloc = stateRealloc->set(ToPtr, + ReallocPair(FromPtr, FreesOnFail)); + C.getSymbolManager().addSymbolDependency(ToPtr, FromPtr); + return stateRealloc; + } + return 0; +} + +ProgramStateRef MallocChecker::CallocMem(CheckerContext &C, const CallExpr *CE){ + if (CE->getNumArgs() < 2) + return 0; + + ProgramStateRef state = C.getState(); + SValBuilder &svalBuilder = C.getSValBuilder(); + const LocationContext *LCtx = C.getLocationContext(); + SVal count = state->getSVal(CE->getArg(0), LCtx); + SVal elementSize = state->getSVal(CE->getArg(1), LCtx); + SVal TotalSize = svalBuilder.evalBinOp(state, BO_Mul, count, elementSize, + svalBuilder.getContext().getSizeType()); + SVal zeroVal = svalBuilder.makeZeroVal(svalBuilder.getContext().CharTy); + + return MallocMemAux(C, CE, TotalSize, zeroVal, state); +} + +LeakInfo +MallocChecker::getAllocationSite(const ExplodedNode *N, SymbolRef Sym, + CheckerContext &C) const { + const LocationContext *LeakContext = N->getLocationContext(); + // Walk the ExplodedGraph backwards and find the first node that referred to + // the tracked symbol. + const ExplodedNode *AllocNode = N; + const MemRegion *ReferenceRegion = 0; + + while (N) { + ProgramStateRef State = N->getState(); + if (!State->get(Sym)) + break; + + // Find the most recent expression bound to the symbol in the current + // context. + if (!ReferenceRegion) { + if (const MemRegion *MR = C.getLocationRegionIfPostStore(N)) { + SVal Val = State->getSVal(MR); + if (Val.getAsLocSymbol() == Sym) + ReferenceRegion = MR; + } + } + + // Allocation node, is the last node in the current context in which the + // symbol was tracked. + if (N->getLocationContext() == LeakContext) + AllocNode = N; + N = N->pred_empty() ? NULL : *(N->pred_begin()); + } + + ProgramPoint P = AllocNode->getLocation(); + const Stmt *AllocationStmt = 0; + if (isa(P)) + AllocationStmt = cast(P).getStmt(); + + return LeakInfo(AllocationStmt, ReferenceRegion); +} + +void MallocChecker::reportLeak(SymbolRef Sym, ExplodedNode *N, + CheckerContext &C) const { + assert(N); + if (!BT_Leak) { + BT_Leak.reset(new BugType("Memory leak", "Memory Error")); + // Leaks should not be reported if they are post-dominated by a sink: + // (1) Sinks are higher importance bugs. + // (2) NoReturnFunctionChecker uses sink nodes to represent paths ending + // with __noreturn functions such as assert() or exit(). We choose not + // to report leaks on such paths. + BT_Leak->setSuppressOnSink(true); + } + + // Most bug reports are cached at the location where they occurred. + // With leaks, we want to unique them by the location where they were + // allocated, and only report a single path. + PathDiagnosticLocation LocUsedForUniqueing; + const Stmt *AllocStmt = 0; + const MemRegion *Region = 0; + llvm::tie(AllocStmt, Region) = getAllocationSite(N, Sym, C); + if (AllocStmt) + LocUsedForUniqueing = PathDiagnosticLocation::createBegin(AllocStmt, + C.getSourceManager(), N->getLocationContext()); + + SmallString<200> buf; + llvm::raw_svector_ostream os(buf); + os << "Memory is never released; potential leak"; + if (Region) { + os << " of memory pointed to by '"; + Region->dumpPretty(os); + os <<'\''; + } + + BugReport *R = new BugReport(*BT_Leak, os.str(), N, LocUsedForUniqueing); + R->markInteresting(Sym); + R->addVisitor(new MallocBugVisitor(Sym)); + C.EmitReport(R); +} + +void MallocChecker::checkDeadSymbols(SymbolReaper &SymReaper, + CheckerContext &C) const +{ + if (!SymReaper.hasDeadSymbols()) + return; + + ProgramStateRef state = C.getState(); + RegionStateTy RS = state->get(); + RegionStateTy::Factory &F = state->get_context(); + + bool generateReport = false; + llvm::SmallVector Errors; + for (RegionStateTy::iterator I = RS.begin(), E = RS.end(); I != E; ++I) { + if (SymReaper.isDead(I->first)) { + if (I->second.isAllocated()) { + generateReport = true; + Errors.push_back(I->first); + } + // Remove the dead symbol from the map. + RS = F.remove(RS, I->first); + + } + } + + // Cleanup the Realloc Pairs Map. + ReallocMap RP = state->get(); + for (ReallocMap::iterator I = RP.begin(), E = RP.end(); I != E; ++I) { + if (SymReaper.isDead(I->first) || + SymReaper.isDead(I->second.ReallocatedSym)) { + state = state->remove(I->first); + } + } + + // Generate leak node. + static SimpleProgramPointTag Tag("MallocChecker : DeadSymbolsLeak"); + ExplodedNode *N = C.addTransition(C.getState(), C.getPredecessor(), &Tag); + + if (generateReport) { + for (llvm::SmallVector::iterator + I = Errors.begin(), E = Errors.end(); I != E; ++I) { + reportLeak(*I, N, C); + } + } + C.addTransition(state->set(RS), N); +} + +void MallocChecker::checkEndPath(CheckerContext &C) const { + ProgramStateRef state = C.getState(); + RegionStateTy M = state->get(); + + // If inside inlined call, skip it. + if (C.getLocationContext()->getParent() != 0) + return; + + for (RegionStateTy::iterator I = M.begin(), E = M.end(); I != E; ++I) { + RefState RS = I->second; + if (RS.isAllocated()) { + ExplodedNode *N = C.addTransition(state); + if (N) + reportLeak(I->first, N, C); + } + } +} + +bool MallocChecker::checkEscape(SymbolRef Sym, const Stmt *S, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + const RefState *RS = state->get(Sym); + if (!RS) + return false; + + if (RS->isAllocated()) { + state = state->set(Sym, RefState::getEscaped(S)); + C.addTransition(state); + return true; + } + return false; +} + +void MallocChecker::checkPreStmt(const CallExpr *CE, CheckerContext &C) const { + if (isMemFunction(C.getCalleeDecl(CE), C.getASTContext())) + return; + + // Check use after free, when a freed pointer is passed to a call. + ProgramStateRef State = C.getState(); + for (CallExpr::const_arg_iterator I = CE->arg_begin(), + E = CE->arg_end(); I != E; ++I) { + const Expr *A = *I; + if (A->getType().getTypePtr()->isAnyPointerType()) { + SymbolRef Sym = State->getSVal(A, C.getLocationContext()).getAsSymbol(); + if (!Sym) + continue; + if (checkUseAfterFree(Sym, C, A)) + return; + } + } +} + +void MallocChecker::checkPreStmt(const ReturnStmt *S, CheckerContext &C) const { + const Expr *E = S->getRetValue(); + if (!E) + return; + + // Check if we are returning a symbol. + SVal RetVal = C.getState()->getSVal(E, C.getLocationContext()); + SymbolRef Sym = RetVal.getAsSymbol(); + if (!Sym) + // If we are returning a field of the allocated struct or an array element, + // the callee could still free the memory. + // TODO: This logic should be a part of generic symbol escape callback. + if (const MemRegion *MR = RetVal.getAsRegion()) + if (isa(MR) || isa(MR)) + if (const SymbolicRegion *BMR = + dyn_cast(MR->getBaseRegion())) + Sym = BMR->getSymbol(); + if (!Sym) + return; + + // Check if we are returning freed memory. + if (checkUseAfterFree(Sym, C, E)) + return; + + // If this function body is not inlined, check if the symbol is escaping. + if (C.getLocationContext()->getParent() == 0) + checkEscape(Sym, E, C); +} + +// TODO: Blocks should be either inlined or should call invalidate regions +// upon invocation. After that's in place, special casing here will not be +// needed. +void MallocChecker::checkPostStmt(const BlockExpr *BE, + CheckerContext &C) const { + + // Scan the BlockDecRefExprs for any object the retain count checker + // may be tracking. + if (!BE->getBlockDecl()->hasCaptures()) + return; + + ProgramStateRef state = C.getState(); + const BlockDataRegion *R = + cast(state->getSVal(BE, + C.getLocationContext()).getAsRegion()); + + BlockDataRegion::referenced_vars_iterator I = R->referenced_vars_begin(), + E = R->referenced_vars_end(); + + if (I == E) + return; + + SmallVector Regions; + const LocationContext *LC = C.getLocationContext(); + MemRegionManager &MemMgr = C.getSValBuilder().getRegionManager(); + + for ( ; I != E; ++I) { + const VarRegion *VR = *I; + if (VR->getSuperRegion() == R) { + VR = MemMgr.getVarRegion(VR->getDecl(), LC); + } + Regions.push_back(VR); + } + + state = + state->scanReachableSymbols(Regions.data(), + Regions.data() + Regions.size()).getState(); + C.addTransition(state); +} + +bool MallocChecker::checkUseAfterFree(SymbolRef Sym, CheckerContext &C, + const Stmt *S) const { + assert(Sym); + const RefState *RS = C.getState()->get(Sym); + if (RS && RS->isReleased()) { + if (ExplodedNode *N = C.generateSink()) { + if (!BT_UseFree) + BT_UseFree.reset(new BugType("Use-after-free", "Memory Error")); + + BugReport *R = new BugReport(*BT_UseFree, + "Use of memory after it is freed",N); + if (S) + R->addRange(S->getSourceRange()); + R->markInteresting(Sym); + R->addVisitor(new MallocBugVisitor(Sym)); + C.EmitReport(R); + return true; + } + } + return false; +} + +// Check if the location is a freed symbolic region. +void MallocChecker::checkLocation(SVal l, bool isLoad, const Stmt *S, + CheckerContext &C) const { + SymbolRef Sym = l.getLocSymbolInBase(); + if (Sym) + checkUseAfterFree(Sym, C); +} + +//===----------------------------------------------------------------------===// +// Check various ways a symbol can be invalidated. +// TODO: This logic (the next 3 functions) is copied/similar to the +// RetainRelease checker. We might want to factor this out. +//===----------------------------------------------------------------------===// + +// Stop tracking symbols when a value escapes as a result of checkBind. +// A value escapes in three possible cases: +// (1) we are binding to something that is not a memory region. +// (2) we are binding to a memregion that does not have stack storage +// (3) we are binding to a memregion with stack storage that the store +// does not understand. +void MallocChecker::checkBind(SVal loc, SVal val, const Stmt *S, + CheckerContext &C) const { + // Are we storing to something that causes the value to "escape"? + bool escapes = true; + ProgramStateRef state = C.getState(); + + if (loc::MemRegionVal *regionLoc = dyn_cast(&loc)) { + escapes = !regionLoc->getRegion()->hasStackStorage(); + + if (!escapes) { + // To test (3), generate a new state with the binding added. If it is + // the same state, then it escapes (since the store cannot represent + // the binding). + escapes = (state == (state->bindLoc(*regionLoc, val))); + } + if (!escapes) { + // Case 4: We do not currently model what happens when a symbol is + // assigned to a struct field, so be conservative here and let the symbol + // go. TODO: This could definitely be improved upon. + escapes = !isa(regionLoc->getRegion()); + } + } + + // If our store can represent the binding and we aren't storing to something + // that doesn't have local storage then just return and have the simulation + // state continue as is. + if (!escapes) + return; + + // Otherwise, find all symbols referenced by 'val' that we are tracking + // and stop tracking them. + state = state->scanReachableSymbols(val).getState(); + C.addTransition(state); +} + +// If a symbolic region is assumed to NULL (or another constant), stop tracking +// it - assuming that allocation failed on this path. +ProgramStateRef MallocChecker::evalAssume(ProgramStateRef state, + SVal Cond, + bool Assumption) const { + RegionStateTy RS = state->get(); + for (RegionStateTy::iterator I = RS.begin(), E = RS.end(); I != E; ++I) { + // If the symbol is assumed to NULL or another constant, this will + // return an APSInt*. + if (state->getSymVal(I.getKey())) + state = state->remove(I.getKey()); + } + + // Realloc returns 0 when reallocation fails, which means that we should + // restore the state of the pointer being reallocated. + ReallocMap RP = state->get(); + for (ReallocMap::iterator I = RP.begin(), E = RP.end(); I != E; ++I) { + // If the symbol is assumed to NULL or another constant, this will + // return an APSInt*. + if (state->getSymVal(I.getKey())) { + SymbolRef ReallocSym = I.getData().ReallocatedSym; + const RefState *RS = state->get(ReallocSym); + if (RS) { + if (RS->isReleased() && ! I.getData().IsFreeOnFailure) + state = state->set(ReallocSym, + RefState::getAllocateUnchecked(RS->getStmt())); + } + state = state->remove(I.getKey()); + } + } + + return state; +} + +// Check if the function is known to us. So, for example, we could +// conservatively assume it can free/reallocate it's pointer arguments. +// (We assume that the pointers cannot escape through calls to system +// functions not handled by this checker.) +bool MallocChecker::doesNotFreeMemory(const CallOrObjCMessage *Call, + ProgramStateRef State) const { + if (!Call) + return false; + + // For now, assume that any C++ call can free memory. + // TODO: If we want to be more optimistic here, we'll need to make sure that + // regions escape to C++ containers. They seem to do that even now, but for + // mysterious reasons. + if (Call->isCXXCall()) + return false; + + const Decl *D = Call->getDecl(); + if (!D) + return false; + + ASTContext &ASTC = State->getStateManager().getContext(); + + // If it's one of the allocation functions we can reason about, we model + // its behavior explicitly. + if (isa(D) && isMemFunction(cast(D), ASTC)) { + return true; + } + + // If it's not a system call, assume it frees memory. + SourceManager &SM = ASTC.getSourceManager(); + if (!SM.isInSystemHeader(D->getLocation())) + return false; + + // Process C/ObjC functions. + if (const FunctionDecl *FD = dyn_cast(D)) { + // White list the system functions whose arguments escape. + const IdentifierInfo *II = FD->getIdentifier(); + if (!II) + return true; + StringRef FName = II->getName(); + + // White list thread local storage. + if (FName.equals("pthread_setspecific")) + return false; + + // White list the 'XXXNoCopy' ObjC functions. + if (FName.endswith("NoCopy")) { + // Look for the deallocator argument. We know that the memory ownership + // is not transfered only if the deallocator argument is + // 'kCFAllocatorNull'. + for (unsigned i = 1; i < Call->getNumArgs(); ++i) { + const Expr *ArgE = Call->getArg(i)->IgnoreParenCasts(); + if (const DeclRefExpr *DE = dyn_cast(ArgE)) { + StringRef DeallocatorName = DE->getFoundDecl()->getName(); + if (DeallocatorName == "kCFAllocatorNull") + return true; + } + } + return false; + } + + // PR12101 + // Many CoreFoundation and CoreGraphics might allow a tracked object + // to escape. + if (Call->isCFCGAllowingEscape(FName)) + return false; + + // Associating streams with malloced buffers. The pointer can escape if + // 'closefn' is specified (and if that function does free memory). + // Currently, we do not inspect the 'closefn' function (PR12101). + if (FName == "funopen") + if (Call->getNumArgs() >= 4 && !Call->getArgSVal(4).isConstant(0)) + return false; + + // Do not warn on pointers passed to 'setbuf' when used with std streams, + // these leaks might be intentional when setting the buffer for stdio. + // http://stackoverflow.com/questions/2671151/who-frees-setvbuf-buffer + if (FName == "setbuf" || FName =="setbuffer" || + FName == "setlinebuf" || FName == "setvbuf") { + if (Call->getNumArgs() >= 1) + if (const DeclRefExpr *Arg = + dyn_cast(Call->getArg(0)->IgnoreParenCasts())) + if (const VarDecl *D = dyn_cast(Arg->getDecl())) + if (D->getCanonicalDecl()->getName().find("std") + != StringRef::npos) + return false; + } + + // A bunch of other functions, which take ownership of a pointer (See retain + // release checker). Not all the parameters here are invalidated, but the + // Malloc checker cannot differentiate between them. The right way of doing + // this would be to implement a pointer escapes callback. + if (FName == "CVPixelBufferCreateWithBytes" || + FName == "CGBitmapContextCreateWithData" || + FName == "CVPixelBufferCreateWithPlanarBytes" || + FName == "OSAtomicEnqueue") { + return false; + } + + // Whitelist NSXXInsertXX, for example NSMapInsertIfAbsent, since they can + // be deallocated by NSMapRemove. + if (FName.startswith("NS") && (FName.find("Insert") != StringRef::npos)) + return false; + + // Otherwise, assume that the function does not free memory. + // Most system calls, do not free the memory. + return true; + + // Process ObjC functions. + } else if (const ObjCMethodDecl * ObjCD = dyn_cast(D)) { + Selector S = ObjCD->getSelector(); + + // White list the ObjC functions which do free memory. + // - Anything containing 'freeWhenDone' param set to 1. + // Ex: dataWithBytesNoCopy:length:freeWhenDone. + for (unsigned i = 1; i < S.getNumArgs(); ++i) { + if (S.getNameForSlot(i).equals("freeWhenDone")) { + if (Call->getArgSVal(i).isConstant(1)) + return false; + else + return true; + } + } + + // If the first selector ends with NoCopy, assume that the ownership is + // transfered as well. + // Ex: [NSData dataWithBytesNoCopy:bytes length:10]; + if (S.getNameForSlot(0).endswith("NoCopy")) { + return false; + } + + // Otherwise, assume that the function does not free memory. + // Most system calls, do not free the memory. + return true; + } + + // Otherwise, assume that the function can free memory. + return false; + +} + +// If the symbol we are tracking is invalidated, but not explicitly (ex: the &p +// escapes, when we are tracking p), do not track the symbol as we cannot reason +// about it anymore. +ProgramStateRef +MallocChecker::checkRegionChanges(ProgramStateRef State, + const StoreManager::InvalidatedSymbols *invalidated, + ArrayRef ExplicitRegions, + ArrayRef Regions, + const CallOrObjCMessage *Call) const { + if (!invalidated || invalidated->empty()) + return State; + llvm::SmallPtrSet WhitelistedSymbols; + + // If it's a call which might free or reallocate memory, we assume that all + // regions (explicit and implicit) escaped. + + // Otherwise, whitelist explicit pointers; we still can track them. + if (!Call || doesNotFreeMemory(Call, State)) { + for (ArrayRef::iterator I = ExplicitRegions.begin(), + E = ExplicitRegions.end(); I != E; ++I) { + if (const SymbolicRegion *R = (*I)->StripCasts()->getAs()) + WhitelistedSymbols.insert(R->getSymbol()); + } + } + + for (StoreManager::InvalidatedSymbols::const_iterator I=invalidated->begin(), + E = invalidated->end(); I!=E; ++I) { + SymbolRef sym = *I; + if (WhitelistedSymbols.count(sym)) + continue; + // The symbol escaped. + if (const RefState *RS = State->get(sym)) + State = State->set(sym, RefState::getEscaped(RS->getStmt())); + } + return State; +} + +static SymbolRef findFailedReallocSymbol(ProgramStateRef currState, + ProgramStateRef prevState) { + ReallocMap currMap = currState->get(); + ReallocMap prevMap = prevState->get(); + + for (ReallocMap::iterator I = prevMap.begin(), E = prevMap.end(); + I != E; ++I) { + SymbolRef sym = I.getKey(); + if (!currMap.lookup(sym)) + return sym; + } + + return NULL; +} + +PathDiagnosticPiece * +MallocChecker::MallocBugVisitor::VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) { + ProgramStateRef state = N->getState(); + ProgramStateRef statePrev = PrevN->getState(); + + const RefState *RS = state->get(Sym); + const RefState *RSPrev = statePrev->get(Sym); + if (!RS && !RSPrev) + return 0; + + const Stmt *S = 0; + const char *Msg = 0; + StackHintGeneratorForSymbol *StackHint = 0; + + // Retrieve the associated statement. + ProgramPoint ProgLoc = N->getLocation(); + if (isa(ProgLoc)) + S = cast(ProgLoc).getStmt(); + // If an assumption was made on a branch, it should be caught + // here by looking at the state transition. + if (isa(ProgLoc)) { + const CFGBlock *srcBlk = cast(ProgLoc).getSrc(); + S = srcBlk->getTerminator(); + } + if (!S) + return 0; + + // Find out if this is an interesting point and what is the kind. + if (Mode == Normal) { + if (isAllocated(RS, RSPrev, S)) { + Msg = "Memory is allocated"; + StackHint = new StackHintGeneratorForSymbol(Sym, + "Returned allocated memory"); + } else if (isReleased(RS, RSPrev, S)) { + Msg = "Memory is released"; + StackHint = new StackHintGeneratorForSymbol(Sym, + "Returned released memory"); + } else if (isReallocFailedCheck(RS, RSPrev, S)) { + Mode = ReallocationFailed; + Msg = "Reallocation failed"; + StackHint = new StackHintGeneratorForReallocationFailed(Sym, + "Reallocation failed"); + + if (SymbolRef sym = findFailedReallocSymbol(state, statePrev)) { + // Is it possible to fail two reallocs WITHOUT testing in between? + assert((!FailedReallocSymbol || FailedReallocSymbol == sym) && + "We only support one failed realloc at a time."); + BR.markInteresting(sym); + FailedReallocSymbol = sym; + } + } + + // We are in a special mode if a reallocation failed later in the path. + } else if (Mode == ReallocationFailed) { + assert(FailedReallocSymbol && "No symbol to look for."); + + // Is this is the first appearance of the reallocated symbol? + if (!statePrev->get(FailedReallocSymbol)) { + // If we ever hit this assert, that means BugReporter has decided to skip + // node pairs or visit them out of order. + assert(state->get(FailedReallocSymbol) && + "Missed the reallocation point"); + + // We're at the reallocation point. + Msg = "Attempt to reallocate memory"; + StackHint = new StackHintGeneratorForSymbol(Sym, + "Returned reallocated memory"); + FailedReallocSymbol = NULL; + Mode = Normal; + } + } + + if (!Msg) + return 0; + assert(StackHint); + + // Generate the extra diagnostic. + PathDiagnosticLocation Pos(S, BRC.getSourceManager(), + N->getLocationContext()); + return new PathDiagnosticEventPiece(Pos, Msg, true, StackHint); +} + + +#define REGISTER_CHECKER(name) \ +void ento::register##name(CheckerManager &mgr) {\ + registerCStringCheckerBasic(mgr); \ + mgr.registerChecker()->Filter.C##name = true;\ +} + +REGISTER_CHECKER(MallocPessimistic) +REGISTER_CHECKER(MallocOptimistic) diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp new file mode 100644 index 0000000..daec418 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp @@ -0,0 +1,267 @@ +// MallocOverflowSecurityChecker.cpp - Check for malloc overflows -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This checker detects a common memory allocation security flaw. +// Suppose 'unsigned int n' comes from an untrusted source. If the +// code looks like 'malloc (n * 4)', and an attacker can make 'n' be +// say MAX_UINT/4+2, then instead of allocating the correct 'n' 4-byte +// elements, this will actually allocate only two because of overflow. +// Then when the rest of the program attempts to store values past the +// second element, these values will actually overwrite other items in +// the heap, probably allowing the attacker to execute arbitrary code. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/AST/EvaluatedExprVisitor.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "llvm/ADT/SmallVector.h" + +using namespace clang; +using namespace ento; + +namespace { +struct MallocOverflowCheck { + const BinaryOperator *mulop; + const Expr *variable; + + MallocOverflowCheck (const BinaryOperator *m, const Expr *v) + : mulop(m), variable (v) + {} +}; + +class MallocOverflowSecurityChecker : public Checker { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager &mgr, + BugReporter &BR) const; + + void CheckMallocArgument( + llvm::SmallVectorImpl &PossibleMallocOverflows, + const Expr *TheArgument, ASTContext &Context) const; + + void OutputPossibleOverflows( + llvm::SmallVectorImpl &PossibleMallocOverflows, + const Decl *D, BugReporter &BR, AnalysisManager &mgr) const; + +}; +} // end anonymous namespace + +void MallocOverflowSecurityChecker::CheckMallocArgument( + llvm::SmallVectorImpl &PossibleMallocOverflows, + const Expr *TheArgument, + ASTContext &Context) const { + + /* Look for a linear combination with a single variable, and at least + one multiplication. + Reject anything that applies to the variable: an explicit cast, + conditional expression, an operation that could reduce the range + of the result, or anything too complicated :-). */ + const Expr * e = TheArgument; + const BinaryOperator * mulop = NULL; + + for (;;) { + e = e->IgnoreParenImpCasts(); + if (isa(e)) { + const BinaryOperator * binop = dyn_cast(e); + BinaryOperatorKind opc = binop->getOpcode(); + // TODO: ignore multiplications by 1, reject if multiplied by 0. + if (mulop == NULL && opc == BO_Mul) + mulop = binop; + if (opc != BO_Mul && opc != BO_Add && opc != BO_Sub && opc != BO_Shl) + return; + + const Expr *lhs = binop->getLHS(); + const Expr *rhs = binop->getRHS(); + if (rhs->isEvaluatable(Context)) + e = lhs; + else if ((opc == BO_Add || opc == BO_Mul) + && lhs->isEvaluatable(Context)) + e = rhs; + else + return; + } + else if (isa(e) || isa(e)) + break; + else + return; + } + + if (mulop == NULL) + return; + + // We've found the right structure of malloc argument, now save + // the data so when the body of the function is completely available + // we can check for comparisons. + + // TODO: Could push this into the innermost scope where 'e' is + // defined, rather than the whole function. + PossibleMallocOverflows.push_back(MallocOverflowCheck(mulop, e)); +} + +namespace { +// A worker class for OutputPossibleOverflows. +class CheckOverflowOps : + public EvaluatedExprVisitor { +public: + typedef llvm::SmallVectorImpl theVecType; + +private: + theVecType &toScanFor; + ASTContext &Context; + + bool isIntZeroExpr(const Expr *E) const { + if (!E->getType()->isIntegralOrEnumerationType()) + return false; + llvm::APSInt Result; + if (E->EvaluateAsInt(Result, Context)) + return Result == 0; + return false; + } + + void CheckExpr(const Expr *E_p) { + const Expr *E = E_p->IgnoreParenImpCasts(); + + theVecType::iterator i = toScanFor.end(); + theVecType::iterator e = toScanFor.begin(); + + if (const DeclRefExpr *DR = dyn_cast(E)) { + const Decl * EdreD = DR->getDecl(); + while (i != e) { + --i; + if (const DeclRefExpr *DR_i = dyn_cast(i->variable)) { + if (DR_i->getDecl() == EdreD) + i = toScanFor.erase(i); + } + } + } + else if (isa(E)) { + // No points-to analysis, just look at the member + const Decl * EmeMD = dyn_cast(E)->getMemberDecl(); + while (i != e) { + --i; + if (isa(i->variable)) { + if (dyn_cast(i->variable)->getMemberDecl() == EmeMD) + i = toScanFor.erase (i); + } + } + } + } + + public: + void VisitBinaryOperator(BinaryOperator *E) { + if (E->isComparisonOp()) { + const Expr * lhs = E->getLHS(); + const Expr * rhs = E->getRHS(); + // Ignore comparisons against zero, since they generally don't + // protect against an overflow. + if (!isIntZeroExpr(lhs) && ! isIntZeroExpr(rhs)) { + CheckExpr(lhs); + CheckExpr(rhs); + } + } + EvaluatedExprVisitor::VisitBinaryOperator(E); + } + + /* We specifically ignore loop conditions, because they're typically + not error checks. */ + void VisitWhileStmt(WhileStmt *S) { + return this->Visit(S->getBody()); + } + void VisitForStmt(ForStmt *S) { + return this->Visit(S->getBody()); + } + void VisitDoStmt(DoStmt *S) { + return this->Visit(S->getBody()); + } + + CheckOverflowOps(theVecType &v, ASTContext &ctx) + : EvaluatedExprVisitor(ctx), + toScanFor(v), Context(ctx) + { } + }; +} + +// OutputPossibleOverflows - We've found a possible overflow earlier, +// now check whether Body might contain a comparison which might be +// preventing the overflow. +// This doesn't do flow analysis, range analysis, or points-to analysis; it's +// just a dumb "is there a comparison" scan. The aim here is to +// detect the most blatent cases of overflow and educate the +// programmer. +void MallocOverflowSecurityChecker::OutputPossibleOverflows( + llvm::SmallVectorImpl &PossibleMallocOverflows, + const Decl *D, BugReporter &BR, AnalysisManager &mgr) const { + // By far the most common case: nothing to check. + if (PossibleMallocOverflows.empty()) + return; + + // Delete any possible overflows which have a comparison. + CheckOverflowOps c(PossibleMallocOverflows, BR.getContext()); + c.Visit(mgr.getAnalysisDeclContext(D)->getBody()); + + // Output warnings for all overflows that are left. + for (CheckOverflowOps::theVecType::iterator + i = PossibleMallocOverflows.begin(), + e = PossibleMallocOverflows.end(); + i != e; + ++i) { + SourceRange R = i->mulop->getSourceRange(); + BR.EmitBasicReport(D, "malloc() size overflow", categories::UnixAPI, + "the computation of the size of the memory allocation may overflow", + PathDiagnosticLocation::createOperatorLoc(i->mulop, + BR.getSourceManager()), &R, 1); + } +} + +void MallocOverflowSecurityChecker::checkASTCodeBody(const Decl *D, + AnalysisManager &mgr, + BugReporter &BR) const { + + CFG *cfg = mgr.getCFG(D); + if (!cfg) + return; + + // A list of variables referenced in possibly overflowing malloc operands. + llvm::SmallVector PossibleMallocOverflows; + + for (CFG::iterator it = cfg->begin(), ei = cfg->end(); it != ei; ++it) { + CFGBlock *block = *it; + for (CFGBlock::iterator bi = block->begin(), be = block->end(); + bi != be; ++bi) { + if (const CFGStmt *CS = bi->getAs()) { + if (const CallExpr *TheCall = dyn_cast(CS->getStmt())) { + // Get the callee. + const FunctionDecl *FD = TheCall->getDirectCallee(); + + if (!FD) + return; + + // Get the name of the callee. If it's a builtin, strip off the prefix. + IdentifierInfo *FnInfo = FD->getIdentifier(); + if (!FnInfo) + return; + + if (FnInfo->isStr ("malloc") || FnInfo->isStr ("_MALLOC")) { + if (TheCall->getNumArgs() == 1) + CheckMallocArgument(PossibleMallocOverflows, TheCall->getArg(0), + mgr.getASTContext()); + } + } + } + } + } + + OutputPossibleOverflows(PossibleMallocOverflows, D, BR, mgr); +} + +void ento::registerMallocOverflowSecurityChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp new file mode 100644 index 0000000..08a9da1 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp @@ -0,0 +1,211 @@ +// MallocSizeofChecker.cpp - Check for dubious malloc arguments ---*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Reports inconsistencies between the casted type of the return value of a +// malloc/calloc/realloc call and the operand of any sizeof expressions +// contained within its argument(s). +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/TypeLoc.h" +#include "llvm/ADT/SmallString.h" + +using namespace clang; +using namespace ento; + +namespace { + +typedef std::pair TypeCallPair; +typedef llvm::PointerUnion ExprParent; + +class CastedAllocFinder + : public ConstStmtVisitor { + IdentifierInfo *II_malloc, *II_calloc, *II_realloc; + +public: + struct CallRecord { + ExprParent CastedExprParent; + const Expr *CastedExpr; + const TypeSourceInfo *ExplicitCastType; + const CallExpr *AllocCall; + + CallRecord(ExprParent CastedExprParent, const Expr *CastedExpr, + const TypeSourceInfo *ExplicitCastType, + const CallExpr *AllocCall) + : CastedExprParent(CastedExprParent), CastedExpr(CastedExpr), + ExplicitCastType(ExplicitCastType), AllocCall(AllocCall) {} + }; + + typedef std::vector CallVec; + CallVec Calls; + + CastedAllocFinder(ASTContext *Ctx) : + II_malloc(&Ctx->Idents.get("malloc")), + II_calloc(&Ctx->Idents.get("calloc")), + II_realloc(&Ctx->Idents.get("realloc")) {} + + void VisitChild(ExprParent Parent, const Stmt *S) { + TypeCallPair AllocCall = Visit(S); + if (AllocCall.second && AllocCall.second != S) + Calls.push_back(CallRecord(Parent, cast(S), AllocCall.first, + AllocCall.second)); + } + + void VisitChildren(const Stmt *S) { + for (Stmt::const_child_iterator I = S->child_begin(), E = S->child_end(); + I!=E; ++I) + if (const Stmt *child = *I) + VisitChild(S, child); + } + + TypeCallPair VisitCastExpr(const CastExpr *E) { + return Visit(E->getSubExpr()); + } + + TypeCallPair VisitExplicitCastExpr(const ExplicitCastExpr *E) { + return TypeCallPair(E->getTypeInfoAsWritten(), + Visit(E->getSubExpr()).second); + } + + TypeCallPair VisitParenExpr(const ParenExpr *E) { + return Visit(E->getSubExpr()); + } + + TypeCallPair VisitStmt(const Stmt *S) { + VisitChildren(S); + return TypeCallPair(); + } + + TypeCallPair VisitCallExpr(const CallExpr *E) { + VisitChildren(E); + const FunctionDecl *FD = E->getDirectCallee(); + if (FD) { + IdentifierInfo *II = FD->getIdentifier(); + if (II == II_malloc || II == II_calloc || II == II_realloc) + return TypeCallPair((const TypeSourceInfo *)0, E); + } + return TypeCallPair(); + } + + TypeCallPair VisitDeclStmt(const DeclStmt *S) { + for (DeclStmt::const_decl_iterator I = S->decl_begin(), E = S->decl_end(); + I!=E; ++I) + if (const VarDecl *VD = dyn_cast(*I)) + if (const Expr *Init = VD->getInit()) + VisitChild(VD, Init); + return TypeCallPair(); + } +}; + +class SizeofFinder : public ConstStmtVisitor { +public: + std::vector Sizeofs; + + void VisitBinMul(const BinaryOperator *E) { + Visit(E->getLHS()); + Visit(E->getRHS()); + } + + void VisitBinAdd(const BinaryOperator *E) { + Visit(E->getLHS()); + Visit(E->getRHS()); + } + + void VisitImplicitCastExpr(const ImplicitCastExpr *E) { + return Visit(E->getSubExpr()); + } + + void VisitParenExpr(const ParenExpr *E) { + return Visit(E->getSubExpr()); + } + + void VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *E) { + if (E->getKind() != UETT_SizeOf) + return; + + Sizeofs.push_back(E); + } +}; + +class MallocSizeofChecker : public Checker { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + AnalysisDeclContext *ADC = mgr.getAnalysisDeclContext(D); + CastedAllocFinder Finder(&BR.getContext()); + Finder.Visit(D->getBody()); + for (CastedAllocFinder::CallVec::iterator i = Finder.Calls.begin(), + e = Finder.Calls.end(); i != e; ++i) { + QualType CastedType = i->CastedExpr->getType(); + if (!CastedType->isPointerType()) + continue; + QualType PointeeType = CastedType->getAs()->getPointeeType(); + if (PointeeType->isVoidType()) + continue; + + for (CallExpr::const_arg_iterator ai = i->AllocCall->arg_begin(), + ae = i->AllocCall->arg_end(); ai != ae; ++ai) { + if (!(*ai)->getType()->isIntegerType()) + continue; + + SizeofFinder SFinder; + SFinder.Visit(*ai); + if (SFinder.Sizeofs.size() != 1) + continue; + + QualType SizeofType = SFinder.Sizeofs[0]->getTypeOfArgument(); + if (!BR.getContext().hasSameUnqualifiedType(PointeeType, SizeofType)) { + const TypeSourceInfo *TSI = 0; + if (i->CastedExprParent.is()) { + TSI = + i->CastedExprParent.get()->getTypeSourceInfo(); + } else { + TSI = i->ExplicitCastType; + } + + SmallString<64> buf; + llvm::raw_svector_ostream OS(buf); + + OS << "Result of '" + << i->AllocCall->getDirectCallee()->getIdentifier()->getName() + << "' is converted to type '" + << CastedType.getAsString() << "', whose pointee type '" + << PointeeType.getAsString() << "' is incompatible with " + << "sizeof operand type '" << SizeofType.getAsString() << "'"; + llvm::SmallVector Ranges; + Ranges.push_back(i->AllocCall->getCallee()->getSourceRange()); + Ranges.push_back(SFinder.Sizeofs[0]->getSourceRange()); + if (TSI) + Ranges.push_back(TSI->getTypeLoc().getSourceRange()); + + PathDiagnosticLocation L = + PathDiagnosticLocation::createBegin(i->AllocCall->getCallee(), + BR.getSourceManager(), ADC); + + BR.EmitBasicReport(D, "allocator sizeof operand mismatch", + categories::UnixAPI, + OS.str(), + L, Ranges.data(), Ranges.size()); + } + } + } + } +}; + +} + +void ento::registerMallocSizeofChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp new file mode 100644 index 0000000..4989ba8 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp @@ -0,0 +1,89 @@ +//=- NSAutoreleasePoolChecker.cpp --------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a NSAutoreleasePoolChecker, a small checker that warns +// about subpar uses of NSAutoreleasePool. Note that while the check itself +// (in its current form) could be written as a flow-insensitive check, in +// can be potentially enhanced in the future with flow-sensitive information. +// It is also a good example of the CheckerVisitor interface. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Decl.h" + +using namespace clang; +using namespace ento; + +namespace { +class NSAutoreleasePoolChecker + : public Checker { + mutable OwningPtr BT; + mutable Selector releaseS; + +public: + void checkPreObjCMessage(ObjCMessage msg, CheckerContext &C) const; +}; + +} // end anonymous namespace + +void NSAutoreleasePoolChecker::checkPreObjCMessage(ObjCMessage msg, + CheckerContext &C) const { + + const Expr *receiver = msg.getInstanceReceiver(); + if (!receiver) + return; + + // FIXME: Enhance with value-tracking information instead of consulting + // the type of the expression. + const ObjCObjectPointerType* PT = + receiver->getType()->getAs(); + + if (!PT) + return; + const ObjCInterfaceDecl *OD = PT->getInterfaceDecl(); + if (!OD) + return; + if (!OD->getIdentifier()->getName().equals("NSAutoreleasePool")) + return; + + if (releaseS.isNull()) + releaseS = GetNullarySelector("release", C.getASTContext()); + // Sending 'release' message? + if (msg.getSelector() != releaseS) + return; + + if (!BT) + BT.reset(new BugType("Use -drain instead of -release", + "API Upgrade (Apple)")); + + ExplodedNode *N = C.addTransition(); + if (!N) { + assert(0); + return; + } + + BugReport *Report = new BugReport(*BT, "Use -drain instead of -release when " + "using NSAutoreleasePool and garbage collection", N); + Report->addRange(msg.getSourceRange()); + C.EmitReport(Report); +} + +void ento::registerNSAutoreleasePoolChecker(CheckerManager &mgr) { + if (mgr.getLangOpts().getGC() != LangOptions::NonGC) + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp new file mode 100644 index 0000000..f826573 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp @@ -0,0 +1,334 @@ +//=- NSErrorChecker.cpp - Coding conventions for uses of NSError -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a CheckNSError, a flow-insenstive check +// that determines if an Objective-C class interface correctly returns +// a non-void return type. +// +// File under feature request PR 2600. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Decl.h" +#include "llvm/ADT/SmallVector.h" + +using namespace clang; +using namespace ento; + +static bool IsNSError(QualType T, IdentifierInfo *II); +static bool IsCFError(QualType T, IdentifierInfo *II); + +//===----------------------------------------------------------------------===// +// NSErrorMethodChecker +//===----------------------------------------------------------------------===// + +namespace { +class NSErrorMethodChecker + : public Checker< check::ASTDecl > { + mutable IdentifierInfo *II; + +public: + NSErrorMethodChecker() : II(0) { } + + void checkASTDecl(const ObjCMethodDecl *D, + AnalysisManager &mgr, BugReporter &BR) const; +}; +} + +void NSErrorMethodChecker::checkASTDecl(const ObjCMethodDecl *D, + AnalysisManager &mgr, + BugReporter &BR) const { + if (!D->isThisDeclarationADefinition()) + return; + if (!D->getResultType()->isVoidType()) + return; + + if (!II) + II = &D->getASTContext().Idents.get("NSError"); + + bool hasNSError = false; + for (ObjCMethodDecl::param_const_iterator + I = D->param_begin(), E = D->param_end(); I != E; ++I) { + if (IsNSError((*I)->getType(), II)) { + hasNSError = true; + break; + } + } + + if (hasNSError) { + const char *err = "Method accepting NSError** " + "should have a non-void return value to indicate whether or not an " + "error occurred"; + PathDiagnosticLocation L = + PathDiagnosticLocation::create(D, BR.getSourceManager()); + BR.EmitBasicReport(D, "Bad return type when passing NSError**", + "Coding conventions (Apple)", err, L); + } +} + +//===----------------------------------------------------------------------===// +// CFErrorFunctionChecker +//===----------------------------------------------------------------------===// + +namespace { +class CFErrorFunctionChecker + : public Checker< check::ASTDecl > { + mutable IdentifierInfo *II; + +public: + CFErrorFunctionChecker() : II(0) { } + + void checkASTDecl(const FunctionDecl *D, + AnalysisManager &mgr, BugReporter &BR) const; +}; +} + +void CFErrorFunctionChecker::checkASTDecl(const FunctionDecl *D, + AnalysisManager &mgr, + BugReporter &BR) const { + if (!D->doesThisDeclarationHaveABody()) + return; + if (!D->getResultType()->isVoidType()) + return; + + if (!II) + II = &D->getASTContext().Idents.get("CFErrorRef"); + + bool hasCFError = false; + for (FunctionDecl::param_const_iterator + I = D->param_begin(), E = D->param_end(); I != E; ++I) { + if (IsCFError((*I)->getType(), II)) { + hasCFError = true; + break; + } + } + + if (hasCFError) { + const char *err = "Function accepting CFErrorRef* " + "should have a non-void return value to indicate whether or not an " + "error occurred"; + PathDiagnosticLocation L = + PathDiagnosticLocation::create(D, BR.getSourceManager()); + BR.EmitBasicReport(D, "Bad return type when passing CFErrorRef*", + "Coding conventions (Apple)", err, L); + } +} + +//===----------------------------------------------------------------------===// +// NSOrCFErrorDerefChecker +//===----------------------------------------------------------------------===// + +namespace { + +class NSErrorDerefBug : public BugType { +public: + NSErrorDerefBug() : BugType("NSError** null dereference", + "Coding conventions (Apple)") {} +}; + +class CFErrorDerefBug : public BugType { +public: + CFErrorDerefBug() : BugType("CFErrorRef* null dereference", + "Coding conventions (Apple)") {} +}; + +} + +namespace { +class NSOrCFErrorDerefChecker + : public Checker< check::Location, + check::Event > { + mutable IdentifierInfo *NSErrorII, *CFErrorII; +public: + bool ShouldCheckNSError, ShouldCheckCFError; + NSOrCFErrorDerefChecker() : NSErrorII(0), CFErrorII(0), + ShouldCheckNSError(0), ShouldCheckCFError(0) { } + + void checkLocation(SVal loc, bool isLoad, const Stmt *S, + CheckerContext &C) const; + void checkEvent(ImplicitNullDerefEvent event) const; +}; +} + +namespace { struct NSErrorOut {}; } +namespace { struct CFErrorOut {}; } + +typedef llvm::ImmutableMap ErrorOutFlag; + +namespace clang { +namespace ento { + template <> + struct ProgramStateTrait : public ProgramStatePartialTrait { + static void *GDMIndex() { static int index = 0; return &index; } + }; + template <> + struct ProgramStateTrait : public ProgramStatePartialTrait { + static void *GDMIndex() { static int index = 0; return &index; } + }; +} +} + +template +static bool hasFlag(SVal val, ProgramStateRef state) { + if (SymbolRef sym = val.getAsSymbol()) + if (const unsigned *attachedFlags = state->get(sym)) + return *attachedFlags; + return false; +} + +template +static void setFlag(ProgramStateRef state, SVal val, CheckerContext &C) { + // We tag the symbol that the SVal wraps. + if (SymbolRef sym = val.getAsSymbol()) + C.addTransition(state->set(sym, true)); +} + +static QualType parameterTypeFromSVal(SVal val, CheckerContext &C) { + const StackFrameContext * + SFC = C.getLocationContext()->getCurrentStackFrame(); + if (const loc::MemRegionVal* X = dyn_cast(&val)) { + const MemRegion* R = X->getRegion(); + if (const VarRegion *VR = R->getAs()) + if (const StackArgumentsSpaceRegion * + stackReg = dyn_cast(VR->getMemorySpace())) + if (stackReg->getStackFrame() == SFC) + return VR->getValueType(); + } + + return QualType(); +} + +void NSOrCFErrorDerefChecker::checkLocation(SVal loc, bool isLoad, + const Stmt *S, + CheckerContext &C) const { + if (!isLoad) + return; + if (loc.isUndef() || !isa(loc)) + return; + + ASTContext &Ctx = C.getASTContext(); + ProgramStateRef state = C.getState(); + + // If we are loading from NSError**/CFErrorRef* parameter, mark the resulting + // SVal so that we can later check it when handling the + // ImplicitNullDerefEvent event. + // FIXME: Cumbersome! Maybe add hook at construction of SVals at start of + // function ? + + QualType parmT = parameterTypeFromSVal(loc, C); + if (parmT.isNull()) + return; + + if (!NSErrorII) + NSErrorII = &Ctx.Idents.get("NSError"); + if (!CFErrorII) + CFErrorII = &Ctx.Idents.get("CFErrorRef"); + + if (ShouldCheckNSError && IsNSError(parmT, NSErrorII)) { + setFlag(state, state->getSVal(cast(loc)), C); + return; + } + + if (ShouldCheckCFError && IsCFError(parmT, CFErrorII)) { + setFlag(state, state->getSVal(cast(loc)), C); + return; + } +} + +void NSOrCFErrorDerefChecker::checkEvent(ImplicitNullDerefEvent event) const { + if (event.IsLoad) + return; + + SVal loc = event.Location; + ProgramStateRef state = event.SinkNode->getState(); + BugReporter &BR = *event.BR; + + bool isNSError = hasFlag(loc, state); + bool isCFError = false; + if (!isNSError) + isCFError = hasFlag(loc, state); + + if (!(isNSError || isCFError)) + return; + + // Storing to possible null NSError/CFErrorRef out parameter. + + // Emit an error. + std::string err; + llvm::raw_string_ostream os(err); + os << "Potential null dereference. According to coding standards "; + + if (isNSError) + os << "in 'Creating and Returning NSError Objects' the parameter '"; + else + os << "documented in CoreFoundation/CFError.h the parameter '"; + + os << "' may be null."; + + BugType *bug = 0; + if (isNSError) + bug = new NSErrorDerefBug(); + else + bug = new CFErrorDerefBug(); + BugReport *report = new BugReport(*bug, os.str(), + event.SinkNode); + BR.EmitReport(report); +} + +static bool IsNSError(QualType T, IdentifierInfo *II) { + + const PointerType* PPT = T->getAs(); + if (!PPT) + return false; + + const ObjCObjectPointerType* PT = + PPT->getPointeeType()->getAs(); + + if (!PT) + return false; + + const ObjCInterfaceDecl *ID = PT->getInterfaceDecl(); + + // FIXME: Can ID ever be NULL? + if (ID) + return II == ID->getIdentifier(); + + return false; +} + +static bool IsCFError(QualType T, IdentifierInfo *II) { + const PointerType* PPT = T->getAs(); + if (!PPT) return false; + + const TypedefType* TT = PPT->getPointeeType()->getAs(); + if (!TT) return false; + + return TT->getDecl()->getIdentifier() == II; +} + +void ento::registerNSErrorChecker(CheckerManager &mgr) { + mgr.registerChecker(); + NSOrCFErrorDerefChecker * + checker = mgr.registerChecker(); + checker->ShouldCheckNSError = true; +} + +void ento::registerCFErrorChecker(CheckerManager &mgr) { + mgr.registerChecker(); + NSOrCFErrorDerefChecker * + checker = mgr.registerChecker(); + checker->ShouldCheckCFError = true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp new file mode 100644 index 0000000..c2d7c09 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp @@ -0,0 +1,146 @@ +//=== NoReturnFunctionChecker.cpp -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines NoReturnFunctionChecker, which evaluates functions that do not +// return to the caller. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" +#include "llvm/ADT/StringSwitch.h" +#include + +using namespace clang; +using namespace ento; + +namespace { + +class NoReturnFunctionChecker : public Checker< check::PostStmt, + check::PostObjCMessage > { +public: + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; + void checkPostObjCMessage(const ObjCMessage &msg, CheckerContext &C) const; +}; + +} + +void NoReturnFunctionChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + const Expr *Callee = CE->getCallee(); + + bool BuildSinks = getFunctionExtInfo(Callee->getType()).getNoReturn(); + + if (!BuildSinks) { + SVal L = state->getSVal(Callee, C.getLocationContext()); + const FunctionDecl *FD = L.getAsFunctionDecl(); + if (!FD) + return; + + if (FD->getAttr()) + BuildSinks = true; + else if (const IdentifierInfo *II = FD->getIdentifier()) { + // HACK: Some functions are not marked noreturn, and don't return. + // Here are a few hardwired ones. If this takes too long, we can + // potentially cache these results. + BuildSinks + = llvm::StringSwitch(StringRef(II->getName())) + .Case("exit", true) + .Case("panic", true) + .Case("error", true) + .Case("Assert", true) + // FIXME: This is just a wrapper around throwing an exception. + // Eventually inter-procedural analysis should handle this easily. + .Case("ziperr", true) + .Case("assfail", true) + .Case("db_error", true) + .Case("__assert", true) + .Case("__assert_rtn", true) + .Case("__assert_fail", true) + .Case("dtrace_assfail", true) + .Case("yy_fatal_error", true) + .Case("_XCAssertionFailureHandler", true) + .Case("_DTAssertionFailureHandler", true) + .Case("_TSAssertionFailureHandler", true) + .Default(false); + } + } + + if (BuildSinks) + C.generateSink(); +} + +static bool END_WITH_NULL isMultiArgSelector(const Selector *Sel, ...) { + va_list argp; + va_start(argp, Sel); + + unsigned Slot = 0; + const char *Arg; + while ((Arg = va_arg(argp, const char *))) { + if (!Sel->getNameForSlot(Slot).equals(Arg)) + break; // still need to va_end! + ++Slot; + } + + va_end(argp); + + // We only succeeded if we made it to the end of the argument list. + return (Arg == NULL); +} + +void NoReturnFunctionChecker::checkPostObjCMessage(const ObjCMessage &Msg, + CheckerContext &C) const { + // HACK: This entire check is to handle two messages in the Cocoa frameworks: + // -[NSAssertionHandler + // handleFailureInMethod:object:file:lineNumber:description:] + // -[NSAssertionHandler + // handleFailureInFunction:file:lineNumber:description:] + // Eventually these should be annotated with __attribute__((noreturn)). + // Because ObjC messages use dynamic dispatch, it is not generally safe to + // assume certain methods can't return. In cases where it is definitely valid, + // see if you can mark the methods noreturn or analyzer_noreturn instead of + // adding more explicit checks to this method. + + if (!Msg.isInstanceMessage()) + return; + + const ObjCInterfaceDecl *Receiver = Msg.getReceiverInterface(); + if (!Receiver) + return; + if (!Receiver->getIdentifier()->isStr("NSAssertionHandler")) + return; + + Selector Sel = Msg.getSelector(); + switch (Sel.getNumArgs()) { + default: + return; + case 4: + if (!isMultiArgSelector(&Sel, "handleFailureInFunction", "file", + "lineNumber", "description", NULL)) + return; + break; + case 5: + if (!isMultiArgSelector(&Sel, "handleFailureInMethod", "object", "file", + "lineNumber", "description", NULL)) + return; + break; + } + + // If we got here, it's one of the messages we care about. + C.generateSink(); +} + + +void ento::registerNoReturnFunctionChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/OSAtomicChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/OSAtomicChecker.cpp new file mode 100644 index 0000000..7b724d2 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/OSAtomicChecker.cpp @@ -0,0 +1,218 @@ +//=== OSAtomicChecker.cpp - OSAtomic functions evaluator --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This checker evaluates OSAtomic functions. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/Basic/Builtins.h" + +using namespace clang; +using namespace ento; + +namespace { + +class OSAtomicChecker : public Checker { +public: + bool inlineCall(const CallExpr *CE, ExprEngine &Eng, + ExplodedNode *Pred, ExplodedNodeSet &Dst) const; + +private: + bool evalOSAtomicCompareAndSwap(const CallExpr *CE, + ExprEngine &Eng, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) const; +}; +} + +static StringRef getCalleeName(ProgramStateRef State, + const CallExpr *CE, + const LocationContext *LCtx) { + const Expr *Callee = CE->getCallee(); + SVal L = State->getSVal(Callee, LCtx); + const FunctionDecl *funDecl = L.getAsFunctionDecl(); + if (!funDecl) + return StringRef(); + IdentifierInfo *funI = funDecl->getIdentifier(); + if (!funI) + return StringRef(); + return funI->getName(); +} + +bool OSAtomicChecker::inlineCall(const CallExpr *CE, + ExprEngine &Eng, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) const { + StringRef FName = getCalleeName(Pred->getState(), + CE, Pred->getLocationContext()); + if (FName.empty()) + return false; + + // Check for compare and swap. + if (FName.startswith("OSAtomicCompareAndSwap") || + FName.startswith("objc_atomicCompareAndSwap")) + return evalOSAtomicCompareAndSwap(CE, Eng, Pred, Dst); + + // FIXME: Other atomics. + return false; +} + +bool OSAtomicChecker::evalOSAtomicCompareAndSwap(const CallExpr *CE, + ExprEngine &Eng, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) const { + // Not enough arguments to match OSAtomicCompareAndSwap? + if (CE->getNumArgs() != 3) + return false; + + ASTContext &Ctx = Eng.getContext(); + const Expr *oldValueExpr = CE->getArg(0); + QualType oldValueType = Ctx.getCanonicalType(oldValueExpr->getType()); + + const Expr *newValueExpr = CE->getArg(1); + QualType newValueType = Ctx.getCanonicalType(newValueExpr->getType()); + + // Do the types of 'oldValue' and 'newValue' match? + if (oldValueType != newValueType) + return false; + + const Expr *theValueExpr = CE->getArg(2); + const PointerType *theValueType=theValueExpr->getType()->getAs(); + + // theValueType not a pointer? + if (!theValueType) + return false; + + QualType theValueTypePointee = + Ctx.getCanonicalType(theValueType->getPointeeType()).getUnqualifiedType(); + + // The pointee must match newValueType and oldValueType. + if (theValueTypePointee != newValueType) + return false; + + static SimpleProgramPointTag OSAtomicLoadTag("OSAtomicChecker : Load"); + static SimpleProgramPointTag OSAtomicStoreTag("OSAtomicChecker : Store"); + + // Load 'theValue'. + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + ExplodedNodeSet Tmp; + SVal location = state->getSVal(theValueExpr, LCtx); + // Here we should use the value type of the region as the load type, because + // we are simulating the semantics of the function, not the semantics of + // passing argument. So the type of theValue expr is not we are loading. + // But usually the type of the varregion is not the type we want either, + // we still need to do a CastRetrievedVal in store manager. So actually this + // LoadTy specifying can be omitted. But we put it here to emphasize the + // semantics. + QualType LoadTy; + if (const TypedValueRegion *TR = + dyn_cast_or_null(location.getAsRegion())) { + LoadTy = TR->getValueType(); + } + Eng.evalLoad(Tmp, CE, theValueExpr, Pred, + state, location, &OSAtomicLoadTag, LoadTy); + + if (Tmp.empty()) { + // If no nodes were generated, other checkers must have generated sinks. + // We return an empty Dst. + return true; + } + + for (ExplodedNodeSet::iterator I = Tmp.begin(), E = Tmp.end(); + I != E; ++I) { + + ExplodedNode *N = *I; + ProgramStateRef stateLoad = N->getState(); + + // Use direct bindings from the environment since we are forcing a load + // from a location that the Environment would typically not be used + // to bind a value. + SVal theValueVal_untested = stateLoad->getSVal(theValueExpr, LCtx, true); + + SVal oldValueVal_untested = stateLoad->getSVal(oldValueExpr, LCtx); + + // FIXME: Issue an error. + if (theValueVal_untested.isUndef() || oldValueVal_untested.isUndef()) { + return false; + } + + DefinedOrUnknownSVal theValueVal = + cast(theValueVal_untested); + DefinedOrUnknownSVal oldValueVal = + cast(oldValueVal_untested); + + SValBuilder &svalBuilder = Eng.getSValBuilder(); + + // Perform the comparison. + DefinedOrUnknownSVal Cmp = + svalBuilder.evalEQ(stateLoad,theValueVal,oldValueVal); + + ProgramStateRef stateEqual = stateLoad->assume(Cmp, true); + + // Were they equal? + if (stateEqual) { + // Perform the store. + ExplodedNodeSet TmpStore; + SVal val = stateEqual->getSVal(newValueExpr, LCtx); + + // Handle implicit value casts. + if (const TypedValueRegion *R = + dyn_cast_or_null(location.getAsRegion())) { + val = svalBuilder.evalCast(val,R->getValueType(), newValueExpr->getType()); + } + + Eng.evalStore(TmpStore, CE, theValueExpr, N, + stateEqual, location, val, &OSAtomicStoreTag); + + if (TmpStore.empty()) { + // If no nodes were generated, other checkers must have generated sinks. + // We return an empty Dst. + return true; + } + + StmtNodeBuilder B(TmpStore, Dst, Eng.getBuilderContext()); + // Now bind the result of the comparison. + for (ExplodedNodeSet::iterator I2 = TmpStore.begin(), + E2 = TmpStore.end(); I2 != E2; ++I2) { + ExplodedNode *predNew = *I2; + ProgramStateRef stateNew = predNew->getState(); + // Check for 'void' return type if we have a bogus function prototype. + SVal Res = UnknownVal(); + QualType T = CE->getType(); + if (!T->isVoidType()) + Res = Eng.getSValBuilder().makeTruthVal(true, T); + B.generateNode(CE, predNew, stateNew->BindExpr(CE, LCtx, Res), + false, this); + } + } + + // Were they not equal? + if (ProgramStateRef stateNotEqual = stateLoad->assume(Cmp, false)) { + // Check for 'void' return type if we have a bogus function prototype. + SVal Res = UnknownVal(); + QualType T = CE->getType(); + if (!T->isVoidType()) + Res = Eng.getSValBuilder().makeTruthVal(false, CE->getType()); + StmtNodeBuilder B(N, Dst, Eng.getBuilderContext()); + B.generateNode(CE, N, stateNotEqual->BindExpr(CE, LCtx, Res), + false, this); + } + } + + return true; +} + +void ento::registerOSAtomicChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp new file mode 100644 index 0000000..777e9ea --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp @@ -0,0 +1,96 @@ +//== ObjCAtSyncChecker.cpp - nil mutex checker for @synchronized -*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines ObjCAtSyncChecker, a builtin check that checks for null pointers +// used as mutexes for @synchronized. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/AST/StmtObjC.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Checkers/DereferenceChecker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" + +using namespace clang; +using namespace ento; + +namespace { +class ObjCAtSyncChecker + : public Checker< check::PreStmt > { + mutable OwningPtr BT_null; + mutable OwningPtr BT_undef; + +public: + void checkPreStmt(const ObjCAtSynchronizedStmt *S, CheckerContext &C) const; +}; +} // end anonymous namespace + +void ObjCAtSyncChecker::checkPreStmt(const ObjCAtSynchronizedStmt *S, + CheckerContext &C) const { + + const Expr *Ex = S->getSynchExpr(); + ProgramStateRef state = C.getState(); + SVal V = state->getSVal(Ex, C.getLocationContext()); + + // Uninitialized value used for the mutex? + if (isa(V)) { + if (ExplodedNode *N = C.generateSink()) { + if (!BT_undef) + BT_undef.reset(new BuiltinBug("Uninitialized value used as mutex " + "for @synchronized")); + BugReport *report = + new BugReport(*BT_undef, BT_undef->getDescription(), N); + report->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, Ex, + report)); + C.EmitReport(report); + } + return; + } + + if (V.isUnknown()) + return; + + // Check for null mutexes. + ProgramStateRef notNullState, nullState; + llvm::tie(notNullState, nullState) = state->assume(cast(V)); + + if (nullState) { + if (!notNullState) { + // Generate an error node. This isn't a sink since + // a null mutex just means no synchronization occurs. + if (ExplodedNode *N = C.addTransition(nullState)) { + if (!BT_null) + BT_null.reset(new BuiltinBug("Nil value used as mutex for @synchronized() " + "(no synchronization will occur)")); + BugReport *report = + new BugReport(*BT_null, BT_null->getDescription(), N); + report->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, Ex, + report)); + + C.EmitReport(report); + return; + } + } + // Don't add a transition for 'nullState'. If the value is + // under-constrained to be null or non-null, assume it is non-null + // afterwards. + } + + if (notNullState) + C.addTransition(notNullState); +} + +void ento::registerObjCAtSyncChecker(CheckerManager &mgr) { + if (mgr.getLangOpts().ObjC2) + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp new file mode 100644 index 0000000..f2929c0 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp @@ -0,0 +1,174 @@ +//== ObjCContainersASTChecker.cpp - CoreFoundation containers API *- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// An AST checker that looks for common pitfalls when using 'CFArray', +// 'CFDictionary', 'CFSet' APIs. +// +//===----------------------------------------------------------------------===// +#include "ClangSACheckers.h" +#include "clang/Analysis/AnalysisContext.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { +class WalkAST : public StmtVisitor { + BugReporter &BR; + AnalysisDeclContext* AC; + ASTContext &ASTC; + uint64_t PtrWidth; + + static const unsigned InvalidArgIndex = UINT_MAX; + + /// Check if the type has pointer size (very conservative). + inline bool isPointerSize(const Type *T) { + if (!T) + return true; + if (T->isIncompleteType()) + return true; + return (ASTC.getTypeSize(T) == PtrWidth); + } + + /// Check if the type is a pointer/array to pointer sized values. + inline bool hasPointerToPointerSizedType(const Expr *E) { + QualType T = E->getType(); + + // The type could be either a pointer or array. + const Type *TP = T.getTypePtr(); + QualType PointeeT = TP->getPointeeType(); + if (!PointeeT.isNull()) { + // If the type is a pointer to an array, check the size of the array + // elements. To avoid false positives coming from assumption that the + // values x and &x are equal when x is an array. + if (const Type *TElem = PointeeT->getArrayElementTypeNoTypeQual()) + if (isPointerSize(TElem)) + return true; + + // Else, check the pointee size. + return isPointerSize(PointeeT.getTypePtr()); + } + + if (const Type *TElem = TP->getArrayElementTypeNoTypeQual()) + return isPointerSize(TElem); + + // The type must be an array/pointer type. + + // This could be a null constant, which is allowed. + if (E->isNullPointerConstant(ASTC, Expr::NPC_ValueDependentIsNull)) + return true; + return false; + } + +public: + WalkAST(BugReporter &br, AnalysisDeclContext* ac) + : BR(br), AC(ac), ASTC(AC->getASTContext()), + PtrWidth(ASTC.getTargetInfo().getPointerWidth(0)) {} + + // Statement visitor methods. + void VisitChildren(Stmt *S); + void VisitStmt(Stmt *S) { VisitChildren(S); } + void VisitCallExpr(CallExpr *CE); +}; +} // end anonymous namespace + +static StringRef getCalleeName(CallExpr *CE) { + const FunctionDecl *FD = CE->getDirectCallee(); + if (!FD) + return StringRef(); + + IdentifierInfo *II = FD->getIdentifier(); + if (!II) // if no identifier, not a simple C function + return StringRef(); + + return II->getName(); +} + +void WalkAST::VisitCallExpr(CallExpr *CE) { + StringRef Name = getCalleeName(CE); + if (Name.empty()) + return; + + const Expr *Arg = 0; + unsigned ArgNum = InvalidArgIndex; + + if (Name.equals("CFArrayCreate") || Name.equals("CFSetCreate")) { + ArgNum = 1; + Arg = CE->getArg(ArgNum)->IgnoreParenCasts(); + if (hasPointerToPointerSizedType(Arg)) + return; + } + + if (Arg == 0 && Name.equals("CFDictionaryCreate")) { + // Check first argument. + ArgNum = 1; + Arg = CE->getArg(ArgNum)->IgnoreParenCasts(); + if (hasPointerToPointerSizedType(Arg)) { + // Check second argument. + ArgNum = 2; + Arg = CE->getArg(ArgNum)->IgnoreParenCasts(); + if (hasPointerToPointerSizedType(Arg)) + // Both are good, return. + return; + } + } + + if (ArgNum != InvalidArgIndex) { + assert(ArgNum == 1 || ArgNum == 2); + + SmallString<256> BufName; + llvm::raw_svector_ostream OsName(BufName); + assert(ArgNum == 1 || ArgNum == 2); + OsName << " Invalid use of '" << Name << "'" ; + + SmallString<256> Buf; + llvm::raw_svector_ostream Os(Buf); + Os << " The "<< ((ArgNum == 1) ? "first" : "second") << " argument to '" + << Name << "' must be a C array of pointer-sized values, not '" + << Arg->getType().getAsString() << "'"; + + SourceRange R = Arg->getSourceRange(); + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), + OsName.str(), categories::CoreFoundationObjectiveC, + Os.str(), CELoc, &R, 1); + } + + // Recurse and check children. + VisitChildren(CE); +} + +void WalkAST::VisitChildren(Stmt *S) { + for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I!=E; ++I) + if (Stmt *child = *I) + Visit(child); +} + +namespace { +class ObjCContainersASTChecker : public Checker { +public: + + void checkASTCodeBody(const Decl *D, AnalysisManager& Mgr, + BugReporter &BR) const { + WalkAST walker(BR, Mgr.getAnalysisDeclContext(D)); + walker.Visit(D->getBody()); + } +}; +} + +void ento::registerObjCContainersASTChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp new file mode 100644 index 0000000..f4655b6 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp @@ -0,0 +1,159 @@ +//== ObjCContainersChecker.cpp - Path sensitive checker for CFArray *- C++ -*=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Performs path sensitive checks of Core Foundation static containers like +// CFArray. +// 1) Check for buffer overflows: +// In CFArrayGetArrayAtIndex( myArray, index), if the index is outside the +// index space of theArray (0 to N-1 inclusive (where N is the count of +// theArray), the behavior is undefined. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/AST/ParentMap.h" + +using namespace clang; +using namespace ento; + +namespace { +class ObjCContainersChecker : public Checker< check::PreStmt, + check::PostStmt > { + mutable OwningPtr BT; + inline void initBugType() const { + if (!BT) + BT.reset(new BugType("CFArray API", + categories::CoreFoundationObjectiveC)); + } + + inline SymbolRef getArraySym(const Expr *E, CheckerContext &C) const { + SVal ArrayRef = C.getState()->getSVal(E, C.getLocationContext()); + SymbolRef ArraySym = ArrayRef.getAsSymbol(); + return ArraySym; + } + + void addSizeInfo(const Expr *Array, const Expr *Size, + CheckerContext &C) const; + +public: + /// A tag to id this checker. + static void *getTag() { static int Tag; return &Tag; } + + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; +}; +} // end anonymous namespace + +// ProgramState trait - a map from array symbol to it's state. +typedef llvm::ImmutableMap ArraySizeM; + +namespace { struct ArraySizeMap {}; } +namespace clang { namespace ento { +template<> struct ProgramStateTrait + : public ProgramStatePartialTrait { + static void *GDMIndex() { return ObjCContainersChecker::getTag(); } +}; +}} + +void ObjCContainersChecker::addSizeInfo(const Expr *Array, const Expr *Size, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + SVal SizeV = State->getSVal(Size, C.getLocationContext()); + // Undefined is reported by another checker. + if (SizeV.isUnknownOrUndef()) + return; + + // Get the ArrayRef symbol. + SVal ArrayRef = State->getSVal(Array, C.getLocationContext()); + SymbolRef ArraySym = ArrayRef.getAsSymbol(); + if (!ArraySym) + return; + + C.addTransition(State->set(ArraySym, cast(SizeV))); + return; +} + +void ObjCContainersChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + StringRef Name = C.getCalleeName(CE); + if (Name.empty() || CE->getNumArgs() < 1) + return; + + // Add array size information to the state. + if (Name.equals("CFArrayCreate")) { + if (CE->getNumArgs() < 3) + return; + // Note, we can visit the Create method in the post-visit because + // the CFIndex parameter is passed in by value and will not be invalidated + // by the call. + addSizeInfo(CE, CE->getArg(2), C); + return; + } + + if (Name.equals("CFArrayGetCount")) { + addSizeInfo(CE->getArg(0), CE, C); + return; + } +} + +void ObjCContainersChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + StringRef Name = C.getCalleeName(CE); + if (Name.empty() || CE->getNumArgs() < 2) + return; + + // Check the array access. + if (Name.equals("CFArrayGetValueAtIndex")) { + ProgramStateRef State = C.getState(); + // Retrieve the size. + // Find out if we saw this array symbol before and have information about it. + const Expr *ArrayExpr = CE->getArg(0); + SymbolRef ArraySym = getArraySym(ArrayExpr, C); + if (!ArraySym) + return; + + const DefinedSVal *Size = State->get(ArraySym); + + if (!Size) + return; + + // Get the index. + const Expr *IdxExpr = CE->getArg(1); + SVal IdxVal = State->getSVal(IdxExpr, C.getLocationContext()); + if (IdxVal.isUnknownOrUndef()) + return; + DefinedSVal Idx = cast(IdxVal); + + // Now, check if 'Idx in [0, Size-1]'. + const QualType T = IdxExpr->getType(); + ProgramStateRef StInBound = State->assumeInBound(Idx, *Size, true, T); + ProgramStateRef StOutBound = State->assumeInBound(Idx, *Size, false, T); + if (StOutBound && !StInBound) { + ExplodedNode *N = C.generateSink(StOutBound); + if (!N) + return; + initBugType(); + BugReport *R = new BugReport(*BT, "Index is out of bounds", N); + R->addRange(IdxExpr->getSourceRange()); + C.EmitReport(R); + return; + } + } +} + +/// Register checker. +void ento::registerObjCContainersChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp new file mode 100644 index 0000000..97b58cf --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp @@ -0,0 +1,396 @@ +//== ObjCSelfInitChecker.cpp - Checker for 'self' initialization -*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines ObjCSelfInitChecker, a builtin check that checks for uses of +// 'self' before proper initialization. +// +//===----------------------------------------------------------------------===// + +// This checks initialization methods to verify that they assign 'self' to the +// result of an initialization call (e.g. [super init], or [self initWith..]) +// before using 'self' or any instance variable. +// +// To perform the required checking, values are tagged with flags that indicate +// 1) if the object is the one pointed to by 'self', and 2) if the object +// is the result of an initializer (e.g. [super init]). +// +// Uses of an object that is true for 1) but not 2) trigger a diagnostic. +// The uses that are currently checked are: +// - Using instance variables. +// - Returning the object. +// +// Note that we don't check for an invalid 'self' that is the receiver of an +// obj-c message expression to cut down false positives where logging functions +// get information from self (like its class) or doing "invalidation" on self +// when the initialization fails. +// +// Because the object that 'self' points to gets invalidated when a call +// receives a reference to 'self', the checker keeps track and passes the flags +// for 1) and 2) to the new object that 'self' points to after the call. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/AST/ParentMap.h" + +using namespace clang; +using namespace ento; + +static bool shouldRunOnFunctionOrMethod(const NamedDecl *ND); +static bool isInitializationMethod(const ObjCMethodDecl *MD); +static bool isInitMessage(const ObjCMessage &msg); +static bool isSelfVar(SVal location, CheckerContext &C); + +namespace { +class ObjCSelfInitChecker : public Checker< check::PreObjCMessage, + check::PostObjCMessage, + check::PostStmt, + check::PreStmt, + check::PreStmt, + check::PostStmt, + check::Location > { +public: + void checkPreObjCMessage(ObjCMessage msg, CheckerContext &C) const; + void checkPostObjCMessage(ObjCMessage msg, CheckerContext &C) const; + void checkPostStmt(const ObjCIvarRefExpr *E, CheckerContext &C) const; + void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const; + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; + void checkLocation(SVal location, bool isLoad, const Stmt *S, + CheckerContext &C) const; + + void checkPreStmt(const CallOrObjCMessage &CE, CheckerContext &C) const; + void checkPostStmt(const CallOrObjCMessage &CE, CheckerContext &C) const; + +}; +} // end anonymous namespace + +namespace { + +class InitSelfBug : public BugType { + const std::string desc; +public: + InitSelfBug() : BugType("Missing \"self = [(super or self) init...]\"", + categories::CoreFoundationObjectiveC) {} +}; + +} // end anonymous namespace + +namespace { +enum SelfFlagEnum { + /// \brief No flag set. + SelfFlag_None = 0x0, + /// \brief Value came from 'self'. + SelfFlag_Self = 0x1, + /// \brief Value came from the result of an initializer (e.g. [super init]). + SelfFlag_InitRes = 0x2 +}; +} + +typedef llvm::ImmutableMap SelfFlag; +namespace { struct CalledInit {}; } +namespace { struct PreCallSelfFlags {}; } + +namespace clang { +namespace ento { + template<> + struct ProgramStateTrait : public ProgramStatePartialTrait { + static void *GDMIndex() { static int index = 0; return &index; } + }; + template <> + struct ProgramStateTrait : public ProgramStatePartialTrait { + static void *GDMIndex() { static int index = 0; return &index; } + }; + + /// \brief A call receiving a reference to 'self' invalidates the object that + /// 'self' contains. This keeps the "self flags" assigned to the 'self' + /// object before the call so we can assign them to the new object that 'self' + /// points to after the call. + template <> + struct ProgramStateTrait : public ProgramStatePartialTrait { + static void *GDMIndex() { static int index = 0; return &index; } + }; +} +} + +static SelfFlagEnum getSelfFlags(SVal val, ProgramStateRef state) { + if (SymbolRef sym = val.getAsSymbol()) + if (const unsigned *attachedFlags = state->get(sym)) + return (SelfFlagEnum)*attachedFlags; + return SelfFlag_None; +} + +static SelfFlagEnum getSelfFlags(SVal val, CheckerContext &C) { + return getSelfFlags(val, C.getState()); +} + +static void addSelfFlag(ProgramStateRef state, SVal val, + SelfFlagEnum flag, CheckerContext &C) { + // We tag the symbol that the SVal wraps. + if (SymbolRef sym = val.getAsSymbol()) + C.addTransition(state->set(sym, getSelfFlags(val, C) | flag)); +} + +static bool hasSelfFlag(SVal val, SelfFlagEnum flag, CheckerContext &C) { + return getSelfFlags(val, C) & flag; +} + +/// \brief Returns true of the value of the expression is the object that 'self' +/// points to and is an object that did not come from the result of calling +/// an initializer. +static bool isInvalidSelf(const Expr *E, CheckerContext &C) { + SVal exprVal = C.getState()->getSVal(E, C.getLocationContext()); + if (!hasSelfFlag(exprVal, SelfFlag_Self, C)) + return false; // value did not come from 'self'. + if (hasSelfFlag(exprVal, SelfFlag_InitRes, C)) + return false; // 'self' is properly initialized. + + return true; +} + +static void checkForInvalidSelf(const Expr *E, CheckerContext &C, + const char *errorStr) { + if (!E) + return; + + if (!C.getState()->get()) + return; + + if (!isInvalidSelf(E, C)) + return; + + // Generate an error node. + ExplodedNode *N = C.generateSink(); + if (!N) + return; + + BugReport *report = + new BugReport(*new InitSelfBug(), errorStr, N); + C.EmitReport(report); +} + +void ObjCSelfInitChecker::checkPostObjCMessage(ObjCMessage msg, + CheckerContext &C) const { + // When encountering a message that does initialization (init rule), + // tag the return value so that we know later on that if self has this value + // then it is properly initialized. + + // FIXME: A callback should disable checkers at the start of functions. + if (!shouldRunOnFunctionOrMethod(dyn_cast( + C.getCurrentAnalysisDeclContext()->getDecl()))) + return; + + if (isInitMessage(msg)) { + // Tag the return value as the result of an initializer. + ProgramStateRef state = C.getState(); + + // FIXME this really should be context sensitive, where we record + // the current stack frame (for IPA). Also, we need to clean this + // value out when we return from this method. + state = state->set(true); + + SVal V = state->getSVal(msg.getMessageExpr(), C.getLocationContext()); + addSelfFlag(state, V, SelfFlag_InitRes, C); + return; + } + + CallOrObjCMessage MsgWrapper(msg, C.getState(), C.getLocationContext()); + checkPostStmt(MsgWrapper, C); + + // We don't check for an invalid 'self' in an obj-c message expression to cut + // down false positives where logging functions get information from self + // (like its class) or doing "invalidation" on self when the initialization + // fails. +} + +void ObjCSelfInitChecker::checkPostStmt(const ObjCIvarRefExpr *E, + CheckerContext &C) const { + // FIXME: A callback should disable checkers at the start of functions. + if (!shouldRunOnFunctionOrMethod(dyn_cast( + C.getCurrentAnalysisDeclContext()->getDecl()))) + return; + + checkForInvalidSelf(E->getBase(), C, + "Instance variable used while 'self' is not set to the result of " + "'[(super or self) init...]'"); +} + +void ObjCSelfInitChecker::checkPreStmt(const ReturnStmt *S, + CheckerContext &C) const { + // FIXME: A callback should disable checkers at the start of functions. + if (!shouldRunOnFunctionOrMethod(dyn_cast( + C.getCurrentAnalysisDeclContext()->getDecl()))) + return; + + checkForInvalidSelf(S->getRetValue(), C, + "Returning 'self' while it is not set to the result of " + "'[(super or self) init...]'"); +} + +// When a call receives a reference to 'self', [Pre/Post]VisitGenericCall pass +// the SelfFlags from the object 'self' point to before the call, to the new +// object after the call. This is to avoid invalidation of 'self' by logging +// functions. +// Another common pattern in classes with multiple initializers is to put the +// subclass's common initialization bits into a static function that receives +// the value of 'self', e.g: +// @code +// if (!(self = [super init])) +// return nil; +// if (!(self = _commonInit(self))) +// return nil; +// @endcode +// Until we can use inter-procedural analysis, in such a call, transfer the +// SelfFlags to the result of the call. + +void ObjCSelfInitChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + CallOrObjCMessage CEWrapper(CE, C.getState(), C.getLocationContext()); + checkPreStmt(CEWrapper, C); +} + +void ObjCSelfInitChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + CallOrObjCMessage CEWrapper(CE, C.getState(), C.getLocationContext()); + checkPostStmt(CEWrapper, C); +} + +void ObjCSelfInitChecker::checkPreObjCMessage(ObjCMessage Msg, + CheckerContext &C) const { + CallOrObjCMessage MsgWrapper(Msg, C.getState(), C.getLocationContext()); + checkPreStmt(MsgWrapper, C); +} + +void ObjCSelfInitChecker::checkPreStmt(const CallOrObjCMessage &CE, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + unsigned NumArgs = CE.getNumArgs(); + // If we passed 'self' as and argument to the call, record it in the state + // to be propagated after the call. + // Note, we could have just given up, but try to be more optimistic here and + // assume that the functions are going to continue initialization or will not + // modify self. + for (unsigned i = 0; i < NumArgs; ++i) { + SVal argV = CE.getArgSVal(i); + if (isSelfVar(argV, C)) { + unsigned selfFlags = getSelfFlags(state->getSVal(cast(argV)), C); + C.addTransition(state->set(selfFlags)); + return; + } else if (hasSelfFlag(argV, SelfFlag_Self, C)) { + unsigned selfFlags = getSelfFlags(argV, C); + C.addTransition(state->set(selfFlags)); + return; + } + } +} + +void ObjCSelfInitChecker::checkPostStmt(const CallOrObjCMessage &CE, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + unsigned NumArgs = CE.getNumArgs(); + for (unsigned i = 0; i < NumArgs; ++i) { + SVal argV = CE.getArgSVal(i); + if (isSelfVar(argV, C)) { + // If the address of 'self' is being passed to the call, assume that the + // 'self' after the call will have the same flags. + // EX: log(&self) + SelfFlagEnum prevFlags = (SelfFlagEnum)state->get(); + state = state->remove(); + addSelfFlag(state, state->getSVal(cast(argV)), prevFlags, C); + return; + } else if (hasSelfFlag(argV, SelfFlag_Self, C)) { + // If 'self' is passed to the call by value, assume that the function + // returns 'self'. So assign the flags, which were set on 'self' to the + // return value. + // EX: self = performMoreInitialization(self) + SelfFlagEnum prevFlags = (SelfFlagEnum)state->get(); + state = state->remove(); + const Expr *CallExpr = CE.getOriginExpr(); + if (CallExpr) + addSelfFlag(state, state->getSVal(CallExpr, C.getLocationContext()), + prevFlags, C); + return; + } + } +} + +void ObjCSelfInitChecker::checkLocation(SVal location, bool isLoad, + const Stmt *S, + CheckerContext &C) const { + // Tag the result of a load from 'self' so that we can easily know that the + // value is the object that 'self' points to. + ProgramStateRef state = C.getState(); + if (isSelfVar(location, C)) + addSelfFlag(state, state->getSVal(cast(location)), SelfFlag_Self, C); +} + +// FIXME: A callback should disable checkers at the start of functions. +static bool shouldRunOnFunctionOrMethod(const NamedDecl *ND) { + if (!ND) + return false; + + const ObjCMethodDecl *MD = dyn_cast(ND); + if (!MD) + return false; + if (!isInitializationMethod(MD)) + return false; + + // self = [super init] applies only to NSObject subclasses. + // For instance, NSProxy doesn't implement -init. + ASTContext &Ctx = MD->getASTContext(); + IdentifierInfo* NSObjectII = &Ctx.Idents.get("NSObject"); + ObjCInterfaceDecl *ID = MD->getClassInterface()->getSuperClass(); + for ( ; ID ; ID = ID->getSuperClass()) { + IdentifierInfo *II = ID->getIdentifier(); + + if (II == NSObjectII) + break; + } + if (!ID) + return false; + + return true; +} + +/// \brief Returns true if the location is 'self'. +static bool isSelfVar(SVal location, CheckerContext &C) { + AnalysisDeclContext *analCtx = C.getCurrentAnalysisDeclContext(); + if (!analCtx->getSelfDecl()) + return false; + if (!isa(location)) + return false; + + loc::MemRegionVal MRV = cast(location); + if (const DeclRegion *DR = dyn_cast(MRV.stripCasts())) + return (DR->getDecl() == analCtx->getSelfDecl()); + + return false; +} + +static bool isInitializationMethod(const ObjCMethodDecl *MD) { + return MD->getMethodFamily() == OMF_init; +} + +static bool isInitMessage(const ObjCMessage &msg) { + return msg.getMethodFamily() == OMF_init; +} + +//===----------------------------------------------------------------------===// +// Registration. +//===----------------------------------------------------------------------===// + +void ento::registerObjCSelfInitChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp new file mode 100644 index 0000000..4718dc7 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp @@ -0,0 +1,186 @@ +//==- ObjCUnusedIVarsChecker.cpp - Check for unused ivars --------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a CheckObjCUnusedIvars, a checker that +// analyzes an Objective-C class's interface/implementation to determine if it +// has any ivars that are never accessed. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/AST/ExprObjC.h" +#include "clang/AST/Expr.h" +#include "clang/AST/DeclObjC.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceManager.h" + +using namespace clang; +using namespace ento; + +enum IVarState { Unused, Used }; +typedef llvm::DenseMap IvarUsageMap; + +static void Scan(IvarUsageMap& M, const Stmt *S) { + if (!S) + return; + + if (const ObjCIvarRefExpr *Ex = dyn_cast(S)) { + const ObjCIvarDecl *D = Ex->getDecl(); + IvarUsageMap::iterator I = M.find(D); + if (I != M.end()) + I->second = Used; + return; + } + + // Blocks can reference an instance variable of a class. + if (const BlockExpr *BE = dyn_cast(S)) { + Scan(M, BE->getBody()); + return; + } + + for (Stmt::const_child_iterator I=S->child_begin(),E=S->child_end(); I!=E;++I) + Scan(M, *I); +} + +static void Scan(IvarUsageMap& M, const ObjCPropertyImplDecl *D) { + if (!D) + return; + + const ObjCIvarDecl *ID = D->getPropertyIvarDecl(); + + if (!ID) + return; + + IvarUsageMap::iterator I = M.find(ID); + if (I != M.end()) + I->second = Used; +} + +static void Scan(IvarUsageMap& M, const ObjCContainerDecl *D) { + // Scan the methods for accesses. + for (ObjCContainerDecl::instmeth_iterator I = D->instmeth_begin(), + E = D->instmeth_end(); I!=E; ++I) + Scan(M, (*I)->getBody()); + + if (const ObjCImplementationDecl *ID = dyn_cast(D)) { + // Scan for @synthesized property methods that act as setters/getters + // to an ivar. + for (ObjCImplementationDecl::propimpl_iterator I = ID->propimpl_begin(), + E = ID->propimpl_end(); I!=E; ++I) + Scan(M, *I); + + // Scan the associated categories as well. + for (const ObjCCategoryDecl *CD = + ID->getClassInterface()->getCategoryList(); CD ; + CD = CD->getNextClassCategory()) { + if (const ObjCCategoryImplDecl *CID = CD->getImplementation()) + Scan(M, CID); + } + } +} + +static void Scan(IvarUsageMap &M, const DeclContext *C, const FileID FID, + SourceManager &SM) { + for (DeclContext::decl_iterator I=C->decls_begin(), E=C->decls_end(); + I!=E; ++I) + if (const FunctionDecl *FD = dyn_cast(*I)) { + SourceLocation L = FD->getLocStart(); + if (SM.getFileID(L) == FID) + Scan(M, FD->getBody()); + } +} + +static void checkObjCUnusedIvar(const ObjCImplementationDecl *D, + BugReporter &BR) { + + const ObjCInterfaceDecl *ID = D->getClassInterface(); + IvarUsageMap M; + + // Iterate over the ivars. + for (ObjCInterfaceDecl::ivar_iterator I=ID->ivar_begin(), + E=ID->ivar_end(); I!=E; ++I) { + + const ObjCIvarDecl *ID = *I; + + // Ignore ivars that... + // (a) aren't private + // (b) explicitly marked unused + // (c) are iboutlets + // (d) are unnamed bitfields + if (ID->getAccessControl() != ObjCIvarDecl::Private || + ID->getAttr() || ID->getAttr() || + ID->getAttr() || + ID->isUnnamedBitfield()) + continue; + + M[ID] = Unused; + } + + if (M.empty()) + return; + + // Now scan the implementation declaration. + Scan(M, D); + + // Any potentially unused ivars? + bool hasUnused = false; + for (IvarUsageMap::iterator I = M.begin(), E = M.end(); I!=E; ++I) + if (I->second == Unused) { + hasUnused = true; + break; + } + + if (!hasUnused) + return; + + // We found some potentially unused ivars. Scan the entire translation unit + // for functions inside the @implementation that reference these ivars. + // FIXME: In the future hopefully we can just use the lexical DeclContext + // to go from the ObjCImplementationDecl to the lexically "nested" + // C functions. + SourceManager &SM = BR.getSourceManager(); + Scan(M, D->getDeclContext(), SM.getFileID(D->getLocation()), SM); + + // Find ivars that are unused. + for (IvarUsageMap::iterator I = M.begin(), E = M.end(); I!=E; ++I) + if (I->second == Unused) { + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + os << "Instance variable '" << *I->first << "' in class '" << *ID + << "' is never used by the methods in its @implementation " + "(although it may be used by category methods)."; + + PathDiagnosticLocation L = + PathDiagnosticLocation::create(I->first, BR.getSourceManager()); + BR.EmitBasicReport(D, "Unused instance variable", "Optimization", + os.str(), L); + } +} + +//===----------------------------------------------------------------------===// +// ObjCUnusedIvarsChecker +//===----------------------------------------------------------------------===// + +namespace { +class ObjCUnusedIvarsChecker : public Checker< + check::ASTDecl > { +public: + void checkASTDecl(const ObjCImplementationDecl *D, AnalysisManager& mgr, + BugReporter &BR) const { + checkObjCUnusedIvar(D, BR); + } +}; +} + +void ento::registerObjCUnusedIvarsChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp new file mode 100644 index 0000000..fe4845b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp @@ -0,0 +1,69 @@ +//=== PointerArithChecker.cpp - Pointer arithmetic checker -----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This files defines PointerArithChecker, a builtin checker that checks for +// pointer arithmetic on locations other than array elements. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" + +using namespace clang; +using namespace ento; + +namespace { +class PointerArithChecker + : public Checker< check::PreStmt > { + mutable OwningPtr BT; + +public: + void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const; +}; +} + +void PointerArithChecker::checkPreStmt(const BinaryOperator *B, + CheckerContext &C) const { + if (B->getOpcode() != BO_Sub && B->getOpcode() != BO_Add) + return; + + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + SVal LV = state->getSVal(B->getLHS(), LCtx); + SVal RV = state->getSVal(B->getRHS(), LCtx); + + const MemRegion *LR = LV.getAsRegion(); + + if (!LR || !RV.isConstant()) + return; + + // If pointer arithmetic is done on variables of non-array type, this often + // means behavior rely on memory organization, which is dangerous. + if (isa(LR) || isa(LR) || + isa(LR)) { + + if (ExplodedNode *N = C.addTransition()) { + if (!BT) + BT.reset(new BuiltinBug("Dangerous pointer arithmetic", + "Pointer arithmetic done on non-array variables " + "means reliance on memory layout, which is " + "dangerous.")); + BugReport *R = new BugReport(*BT, BT->getDescription(), N); + R->addRange(B->getSourceRange()); + C.EmitReport(R); + } + } +} + +void ento::registerPointerArithChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp new file mode 100644 index 0000000..fa5c6a3 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp @@ -0,0 +1,76 @@ +//=== PointerSubChecker.cpp - Pointer subtraction checker ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This files defines PointerSubChecker, a builtin checker that checks for +// pointer subtractions on two pointers pointing to different memory chunks. +// This check corresponds to CWE-469. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" + +using namespace clang; +using namespace ento; + +namespace { +class PointerSubChecker + : public Checker< check::PreStmt > { + mutable OwningPtr BT; + +public: + void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const; +}; +} + +void PointerSubChecker::checkPreStmt(const BinaryOperator *B, + CheckerContext &C) const { + // When doing pointer subtraction, if the two pointers do not point to the + // same memory chunk, emit a warning. + if (B->getOpcode() != BO_Sub) + return; + + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + SVal LV = state->getSVal(B->getLHS(), LCtx); + SVal RV = state->getSVal(B->getRHS(), LCtx); + + const MemRegion *LR = LV.getAsRegion(); + const MemRegion *RR = RV.getAsRegion(); + + if (!(LR && RR)) + return; + + const MemRegion *BaseLR = LR->getBaseRegion(); + const MemRegion *BaseRR = RR->getBaseRegion(); + + if (BaseLR == BaseRR) + return; + + // Allow arithmetic on different symbolic regions. + if (isa(BaseLR) || isa(BaseRR)) + return; + + if (ExplodedNode *N = C.addTransition()) { + if (!BT) + BT.reset(new BuiltinBug("Pointer subtraction", + "Subtraction of two pointers that do not point to " + "the same memory chunk may cause incorrect result.")); + BugReport *R = new BugReport(*BT, BT->getDescription(), N); + R->addRange(B->getSourceRange()); + C.EmitReport(R); + } +} + +void ento::registerPointerSubChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp new file mode 100644 index 0000000..2d018ef --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp @@ -0,0 +1,198 @@ +//===--- PthreadLockChecker.cpp - Check for locking problems ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines PthreadLockChecker, a simple lock -> unlock checker. +// Also handles XNU locks, which behave similarly enough to share code. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/ADT/ImmutableList.h" + +using namespace clang; +using namespace ento; + +namespace { +class PthreadLockChecker : public Checker< check::PostStmt > { + mutable OwningPtr BT_doublelock; + mutable OwningPtr BT_lor; + enum LockingSemantics { + NotApplicable = 0, + PthreadSemantics, + XNUSemantics + }; +public: + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; + + void AcquireLock(CheckerContext &C, const CallExpr *CE, SVal lock, + bool isTryLock, enum LockingSemantics semantics) const; + + void ReleaseLock(CheckerContext &C, const CallExpr *CE, SVal lock) const; +}; +} // end anonymous namespace + +// GDM Entry for tracking lock state. +namespace { class LockSet {}; } +namespace clang { +namespace ento { +template <> struct ProgramStateTrait : + public ProgramStatePartialTrait > { + static void *GDMIndex() { static int x = 0; return &x; } +}; +} // end of ento (ProgramState) namespace +} // end clang namespace + + +void PthreadLockChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + StringRef FName = C.getCalleeName(CE); + if (FName.empty()) + return; + + if (CE->getNumArgs() != 1) + return; + + if (FName == "pthread_mutex_lock" || + FName == "pthread_rwlock_rdlock" || + FName == "pthread_rwlock_wrlock") + AcquireLock(C, CE, state->getSVal(CE->getArg(0), LCtx), + false, PthreadSemantics); + else if (FName == "lck_mtx_lock" || + FName == "lck_rw_lock_exclusive" || + FName == "lck_rw_lock_shared") + AcquireLock(C, CE, state->getSVal(CE->getArg(0), LCtx), + false, XNUSemantics); + else if (FName == "pthread_mutex_trylock" || + FName == "pthread_rwlock_tryrdlock" || + FName == "pthread_rwlock_tryrwlock") + AcquireLock(C, CE, state->getSVal(CE->getArg(0), LCtx), + true, PthreadSemantics); + else if (FName == "lck_mtx_try_lock" || + FName == "lck_rw_try_lock_exclusive" || + FName == "lck_rw_try_lock_shared") + AcquireLock(C, CE, state->getSVal(CE->getArg(0), LCtx), + true, XNUSemantics); + else if (FName == "pthread_mutex_unlock" || + FName == "pthread_rwlock_unlock" || + FName == "lck_mtx_unlock" || + FName == "lck_rw_done") + ReleaseLock(C, CE, state->getSVal(CE->getArg(0), LCtx)); +} + +void PthreadLockChecker::AcquireLock(CheckerContext &C, const CallExpr *CE, + SVal lock, bool isTryLock, + enum LockingSemantics semantics) const { + + const MemRegion *lockR = lock.getAsRegion(); + if (!lockR) + return; + + ProgramStateRef state = C.getState(); + + SVal X = state->getSVal(CE, C.getLocationContext()); + if (X.isUnknownOrUndef()) + return; + + DefinedSVal retVal = cast(X); + + if (state->contains(lockR)) { + if (!BT_doublelock) + BT_doublelock.reset(new BugType("Double locking", "Lock checker")); + ExplodedNode *N = C.generateSink(); + if (!N) + return; + BugReport *report = new BugReport(*BT_doublelock, + "This lock has already " + "been acquired", N); + report->addRange(CE->getArg(0)->getSourceRange()); + C.EmitReport(report); + return; + } + + ProgramStateRef lockSucc = state; + if (isTryLock) { + // Bifurcate the state, and allow a mode where the lock acquisition fails. + ProgramStateRef lockFail; + switch (semantics) { + case PthreadSemantics: + llvm::tie(lockFail, lockSucc) = state->assume(retVal); + break; + case XNUSemantics: + llvm::tie(lockSucc, lockFail) = state->assume(retVal); + break; + default: + llvm_unreachable("Unknown tryLock locking semantics"); + } + assert(lockFail && lockSucc); + C.addTransition(lockFail); + + } else if (semantics == PthreadSemantics) { + // Assume that the return value was 0. + lockSucc = state->assume(retVal, false); + assert(lockSucc); + + } else { + // XNU locking semantics return void on non-try locks + assert((semantics == XNUSemantics) && "Unknown locking semantics"); + lockSucc = state; + } + + // Record that the lock was acquired. + lockSucc = lockSucc->add(lockR); + C.addTransition(lockSucc); +} + +void PthreadLockChecker::ReleaseLock(CheckerContext &C, const CallExpr *CE, + SVal lock) const { + + const MemRegion *lockR = lock.getAsRegion(); + if (!lockR) + return; + + ProgramStateRef state = C.getState(); + llvm::ImmutableList LS = state->get(); + + // FIXME: Better analysis requires IPA for wrappers. + // FIXME: check for double unlocks + if (LS.isEmpty()) + return; + + const MemRegion *firstLockR = LS.getHead(); + if (firstLockR != lockR) { + if (!BT_lor) + BT_lor.reset(new BugType("Lock order reversal", "Lock checker")); + ExplodedNode *N = C.generateSink(); + if (!N) + return; + BugReport *report = new BugReport(*BT_lor, + "This was not the most " + "recently acquired lock. " + "Possible lock order " + "reversal", N); + report->addRange(CE->getArg(0)->getSourceRange()); + C.EmitReport(report); + return; + } + + // Record that the lock was released. + state = state->set(LS.getTail()); + C.addTransition(state); +} + + +void ento::registerPthreadLockChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp new file mode 100644 index 0000000..b569e41 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp @@ -0,0 +1,3702 @@ +//==-- RetainCountChecker.cpp - Checks for leaks and other issues -*- C++ -*--// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the methods for RetainCountChecker, which implements +// a reference count checker for Core Foundation and Cocoa on (Mac OS X). +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclCXX.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Analysis/DomainSpecific/CocoaConventions.h" +#include "clang/AST/ParentMap.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/ImmutableList.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include + +using namespace clang; +using namespace ento; +using llvm::StrInStrNoCase; + +namespace { +/// Wrapper around different kinds of node builder, so that helper functions +/// can have a common interface. +class GenericNodeBuilderRefCount { + CheckerContext *C; + const ProgramPointTag *tag; +public: + GenericNodeBuilderRefCount(CheckerContext &c, + const ProgramPointTag *t = 0) + : C(&c), tag(t){} + + ExplodedNode *MakeNode(ProgramStateRef state, ExplodedNode *Pred, + bool MarkAsSink = false) { + return C->addTransition(state, Pred, tag, MarkAsSink); + } +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Primitives used for constructing summaries for function/method calls. +//===----------------------------------------------------------------------===// + +/// ArgEffect is used to summarize a function/method call's effect on a +/// particular argument. +enum ArgEffect { DoNothing, Autorelease, Dealloc, DecRef, DecRefMsg, + DecRefBridgedTransfered, + IncRefMsg, IncRef, MakeCollectable, MayEscape, + NewAutoreleasePool, SelfOwn, StopTracking }; + +namespace llvm { +template <> struct FoldingSetTrait { +static inline void Profile(const ArgEffect X, FoldingSetNodeID& ID) { + ID.AddInteger((unsigned) X); +} +}; +} // end llvm namespace + +/// ArgEffects summarizes the effects of a function/method call on all of +/// its arguments. +typedef llvm::ImmutableMap ArgEffects; + +namespace { + +/// RetEffect is used to summarize a function/method call's behavior with +/// respect to its return value. +class RetEffect { +public: + enum Kind { NoRet, OwnedSymbol, OwnedAllocatedSymbol, + NotOwnedSymbol, GCNotOwnedSymbol, ARCNotOwnedSymbol, + OwnedWhenTrackedReceiver }; + + enum ObjKind { CF, ObjC, AnyObj }; + +private: + Kind K; + ObjKind O; + + RetEffect(Kind k, ObjKind o = AnyObj) : K(k), O(o) {} + +public: + Kind getKind() const { return K; } + + ObjKind getObjKind() const { return O; } + + bool isOwned() const { + return K == OwnedSymbol || K == OwnedAllocatedSymbol || + K == OwnedWhenTrackedReceiver; + } + + bool operator==(const RetEffect &Other) const { + return K == Other.K && O == Other.O; + } + + static RetEffect MakeOwnedWhenTrackedReceiver() { + return RetEffect(OwnedWhenTrackedReceiver, ObjC); + } + + static RetEffect MakeOwned(ObjKind o, bool isAllocated = false) { + return RetEffect(isAllocated ? OwnedAllocatedSymbol : OwnedSymbol, o); + } + static RetEffect MakeNotOwned(ObjKind o) { + return RetEffect(NotOwnedSymbol, o); + } + static RetEffect MakeGCNotOwned() { + return RetEffect(GCNotOwnedSymbol, ObjC); + } + static RetEffect MakeARCNotOwned() { + return RetEffect(ARCNotOwnedSymbol, ObjC); + } + static RetEffect MakeNoRet() { + return RetEffect(NoRet); + } + + void Profile(llvm::FoldingSetNodeID& ID) const { + ID.AddInteger((unsigned) K); + ID.AddInteger((unsigned) O); + } +}; + +//===----------------------------------------------------------------------===// +// Reference-counting logic (typestate + counts). +//===----------------------------------------------------------------------===// + +class RefVal { +public: + enum Kind { + Owned = 0, // Owning reference. + NotOwned, // Reference is not owned by still valid (not freed). + Released, // Object has been released. + ReturnedOwned, // Returned object passes ownership to caller. + ReturnedNotOwned, // Return object does not pass ownership to caller. + ERROR_START, + ErrorDeallocNotOwned, // -dealloc called on non-owned object. + ErrorDeallocGC, // Calling -dealloc with GC enabled. + ErrorUseAfterRelease, // Object used after released. + ErrorReleaseNotOwned, // Release of an object that was not owned. + ERROR_LEAK_START, + ErrorLeak, // A memory leak due to excessive reference counts. + ErrorLeakReturned, // A memory leak due to the returning method not having + // the correct naming conventions. + ErrorGCLeakReturned, + ErrorOverAutorelease, + ErrorReturnedNotOwned + }; + +private: + Kind kind; + RetEffect::ObjKind okind; + unsigned Cnt; + unsigned ACnt; + QualType T; + + RefVal(Kind k, RetEffect::ObjKind o, unsigned cnt, unsigned acnt, QualType t) + : kind(k), okind(o), Cnt(cnt), ACnt(acnt), T(t) {} + +public: + Kind getKind() const { return kind; } + + RetEffect::ObjKind getObjKind() const { return okind; } + + unsigned getCount() const { return Cnt; } + unsigned getAutoreleaseCount() const { return ACnt; } + unsigned getCombinedCounts() const { return Cnt + ACnt; } + void clearCounts() { Cnt = 0; ACnt = 0; } + void setCount(unsigned i) { Cnt = i; } + void setAutoreleaseCount(unsigned i) { ACnt = i; } + + QualType getType() const { return T; } + + bool isOwned() const { + return getKind() == Owned; + } + + bool isNotOwned() const { + return getKind() == NotOwned; + } + + bool isReturnedOwned() const { + return getKind() == ReturnedOwned; + } + + bool isReturnedNotOwned() const { + return getKind() == ReturnedNotOwned; + } + + static RefVal makeOwned(RetEffect::ObjKind o, QualType t, + unsigned Count = 1) { + return RefVal(Owned, o, Count, 0, t); + } + + static RefVal makeNotOwned(RetEffect::ObjKind o, QualType t, + unsigned Count = 0) { + return RefVal(NotOwned, o, Count, 0, t); + } + + // Comparison, profiling, and pretty-printing. + + bool operator==(const RefVal& X) const { + return kind == X.kind && Cnt == X.Cnt && T == X.T && ACnt == X.ACnt; + } + + RefVal operator-(size_t i) const { + return RefVal(getKind(), getObjKind(), getCount() - i, + getAutoreleaseCount(), getType()); + } + + RefVal operator+(size_t i) const { + return RefVal(getKind(), getObjKind(), getCount() + i, + getAutoreleaseCount(), getType()); + } + + RefVal operator^(Kind k) const { + return RefVal(k, getObjKind(), getCount(), getAutoreleaseCount(), + getType()); + } + + RefVal autorelease() const { + return RefVal(getKind(), getObjKind(), getCount(), getAutoreleaseCount()+1, + getType()); + } + + void Profile(llvm::FoldingSetNodeID& ID) const { + ID.AddInteger((unsigned) kind); + ID.AddInteger(Cnt); + ID.AddInteger(ACnt); + ID.Add(T); + } + + void print(raw_ostream &Out) const; +}; + +void RefVal::print(raw_ostream &Out) const { + if (!T.isNull()) + Out << "Tracked " << T.getAsString() << '/'; + + switch (getKind()) { + default: llvm_unreachable("Invalid RefVal kind"); + case Owned: { + Out << "Owned"; + unsigned cnt = getCount(); + if (cnt) Out << " (+ " << cnt << ")"; + break; + } + + case NotOwned: { + Out << "NotOwned"; + unsigned cnt = getCount(); + if (cnt) Out << " (+ " << cnt << ")"; + break; + } + + case ReturnedOwned: { + Out << "ReturnedOwned"; + unsigned cnt = getCount(); + if (cnt) Out << " (+ " << cnt << ")"; + break; + } + + case ReturnedNotOwned: { + Out << "ReturnedNotOwned"; + unsigned cnt = getCount(); + if (cnt) Out << " (+ " << cnt << ")"; + break; + } + + case Released: + Out << "Released"; + break; + + case ErrorDeallocGC: + Out << "-dealloc (GC)"; + break; + + case ErrorDeallocNotOwned: + Out << "-dealloc (not-owned)"; + break; + + case ErrorLeak: + Out << "Leaked"; + break; + + case ErrorLeakReturned: + Out << "Leaked (Bad naming)"; + break; + + case ErrorGCLeakReturned: + Out << "Leaked (GC-ed at return)"; + break; + + case ErrorUseAfterRelease: + Out << "Use-After-Release [ERROR]"; + break; + + case ErrorReleaseNotOwned: + Out << "Release of Not-Owned [ERROR]"; + break; + + case RefVal::ErrorOverAutorelease: + Out << "Over autoreleased"; + break; + + case RefVal::ErrorReturnedNotOwned: + Out << "Non-owned object returned instead of owned"; + break; + } + + if (ACnt) { + Out << " [ARC +" << ACnt << ']'; + } +} +} //end anonymous namespace + +//===----------------------------------------------------------------------===// +// RefBindings - State used to track object reference counts. +//===----------------------------------------------------------------------===// + +typedef llvm::ImmutableMap RefBindings; + +namespace clang { +namespace ento { +template<> +struct ProgramStateTrait + : public ProgramStatePartialTrait { + static void *GDMIndex() { + static int RefBIndex = 0; + return &RefBIndex; + } +}; +} +} + +//===----------------------------------------------------------------------===// +// Function/Method behavior summaries. +//===----------------------------------------------------------------------===// + +namespace { +class RetainSummary { + /// Args - a map of (index, ArgEffect) pairs, where index + /// specifies the argument (starting from 0). This can be sparsely + /// populated; arguments with no entry in Args use 'DefaultArgEffect'. + ArgEffects Args; + + /// DefaultArgEffect - The default ArgEffect to apply to arguments that + /// do not have an entry in Args. + ArgEffect DefaultArgEffect; + + /// Receiver - If this summary applies to an Objective-C message expression, + /// this is the effect applied to the state of the receiver. + ArgEffect Receiver; + + /// Ret - The effect on the return value. Used to indicate if the + /// function/method call returns a new tracked symbol. + RetEffect Ret; + +public: + RetainSummary(ArgEffects A, RetEffect R, ArgEffect defaultEff, + ArgEffect ReceiverEff) + : Args(A), DefaultArgEffect(defaultEff), Receiver(ReceiverEff), Ret(R) {} + + /// getArg - Return the argument effect on the argument specified by + /// idx (starting from 0). + ArgEffect getArg(unsigned idx) const { + if (const ArgEffect *AE = Args.lookup(idx)) + return *AE; + + return DefaultArgEffect; + } + + void addArg(ArgEffects::Factory &af, unsigned idx, ArgEffect e) { + Args = af.add(Args, idx, e); + } + + /// setDefaultArgEffect - Set the default argument effect. + void setDefaultArgEffect(ArgEffect E) { + DefaultArgEffect = E; + } + + /// getRetEffect - Returns the effect on the return value of the call. + RetEffect getRetEffect() const { return Ret; } + + /// setRetEffect - Set the effect of the return value of the call. + void setRetEffect(RetEffect E) { Ret = E; } + + + /// Sets the effect on the receiver of the message. + void setReceiverEffect(ArgEffect e) { Receiver = e; } + + /// getReceiverEffect - Returns the effect on the receiver of the call. + /// This is only meaningful if the summary applies to an ObjCMessageExpr*. + ArgEffect getReceiverEffect() const { return Receiver; } + + /// Test if two retain summaries are identical. Note that merely equivalent + /// summaries are not necessarily identical (for example, if an explicit + /// argument effect matches the default effect). + bool operator==(const RetainSummary &Other) const { + return Args == Other.Args && DefaultArgEffect == Other.DefaultArgEffect && + Receiver == Other.Receiver && Ret == Other.Ret; + } + + /// Profile this summary for inclusion in a FoldingSet. + void Profile(llvm::FoldingSetNodeID& ID) const { + ID.Add(Args); + ID.Add(DefaultArgEffect); + ID.Add(Receiver); + ID.Add(Ret); + } + + /// A retain summary is simple if it has no ArgEffects other than the default. + bool isSimple() const { + return Args.isEmpty(); + } +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Data structures for constructing summaries. +//===----------------------------------------------------------------------===// + +namespace { +class ObjCSummaryKey { + IdentifierInfo* II; + Selector S; +public: + ObjCSummaryKey(IdentifierInfo* ii, Selector s) + : II(ii), S(s) {} + + ObjCSummaryKey(const ObjCInterfaceDecl *d, Selector s) + : II(d ? d->getIdentifier() : 0), S(s) {} + + ObjCSummaryKey(const ObjCInterfaceDecl *d, IdentifierInfo *ii, Selector s) + : II(d ? d->getIdentifier() : ii), S(s) {} + + ObjCSummaryKey(Selector s) + : II(0), S(s) {} + + IdentifierInfo *getIdentifier() const { return II; } + Selector getSelector() const { return S; } +}; +} + +namespace llvm { +template <> struct DenseMapInfo { + static inline ObjCSummaryKey getEmptyKey() { + return ObjCSummaryKey(DenseMapInfo::getEmptyKey(), + DenseMapInfo::getEmptyKey()); + } + + static inline ObjCSummaryKey getTombstoneKey() { + return ObjCSummaryKey(DenseMapInfo::getTombstoneKey(), + DenseMapInfo::getTombstoneKey()); + } + + static unsigned getHashValue(const ObjCSummaryKey &V) { + return (DenseMapInfo::getHashValue(V.getIdentifier()) + & 0x88888888) + | (DenseMapInfo::getHashValue(V.getSelector()) + & 0x55555555); + } + + static bool isEqual(const ObjCSummaryKey& LHS, const ObjCSummaryKey& RHS) { + return DenseMapInfo::isEqual(LHS.getIdentifier(), + RHS.getIdentifier()) && + DenseMapInfo::isEqual(LHS.getSelector(), + RHS.getSelector()); + } + +}; +template <> +struct isPodLike { static const bool value = true; }; +} // end llvm namespace + +namespace { +class ObjCSummaryCache { + typedef llvm::DenseMap MapTy; + MapTy M; +public: + ObjCSummaryCache() {} + + const RetainSummary * find(const ObjCInterfaceDecl *D, IdentifierInfo *ClsName, + Selector S) { + // Lookup the method using the decl for the class @interface. If we + // have no decl, lookup using the class name. + return D ? find(D, S) : find(ClsName, S); + } + + const RetainSummary * find(const ObjCInterfaceDecl *D, Selector S) { + // Do a lookup with the (D,S) pair. If we find a match return + // the iterator. + ObjCSummaryKey K(D, S); + MapTy::iterator I = M.find(K); + + if (I != M.end() || !D) + return I->second; + + // Walk the super chain. If we find a hit with a parent, we'll end + // up returning that summary. We actually allow that key (null,S), as + // we cache summaries for the null ObjCInterfaceDecl* to allow us to + // generate initial summaries without having to worry about NSObject + // being declared. + // FIXME: We may change this at some point. + for (ObjCInterfaceDecl *C=D->getSuperClass() ;; C=C->getSuperClass()) { + if ((I = M.find(ObjCSummaryKey(C, S))) != M.end()) + break; + + if (!C) + return NULL; + } + + // Cache the summary with original key to make the next lookup faster + // and return the iterator. + const RetainSummary *Summ = I->second; + M[K] = Summ; + return Summ; + } + + const RetainSummary *find(IdentifierInfo* II, Selector S) { + // FIXME: Class method lookup. Right now we dont' have a good way + // of going between IdentifierInfo* and the class hierarchy. + MapTy::iterator I = M.find(ObjCSummaryKey(II, S)); + + if (I == M.end()) + I = M.find(ObjCSummaryKey(S)); + + return I == M.end() ? NULL : I->second; + } + + const RetainSummary *& operator[](ObjCSummaryKey K) { + return M[K]; + } + + const RetainSummary *& operator[](Selector S) { + return M[ ObjCSummaryKey(S) ]; + } +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Data structures for managing collections of summaries. +//===----------------------------------------------------------------------===// + +namespace { +class RetainSummaryManager { + + //==-----------------------------------------------------------------==// + // Typedefs. + //==-----------------------------------------------------------------==// + + typedef llvm::DenseMap + FuncSummariesTy; + + typedef ObjCSummaryCache ObjCMethodSummariesTy; + + typedef llvm::FoldingSetNodeWrapper CachedSummaryNode; + + //==-----------------------------------------------------------------==// + // Data. + //==-----------------------------------------------------------------==// + + /// Ctx - The ASTContext object for the analyzed ASTs. + ASTContext &Ctx; + + /// GCEnabled - Records whether or not the analyzed code runs in GC mode. + const bool GCEnabled; + + /// Records whether or not the analyzed code runs in ARC mode. + const bool ARCEnabled; + + /// FuncSummaries - A map from FunctionDecls to summaries. + FuncSummariesTy FuncSummaries; + + /// ObjCClassMethodSummaries - A map from selectors (for instance methods) + /// to summaries. + ObjCMethodSummariesTy ObjCClassMethodSummaries; + + /// ObjCMethodSummaries - A map from selectors to summaries. + ObjCMethodSummariesTy ObjCMethodSummaries; + + /// BPAlloc - A BumpPtrAllocator used for allocating summaries, ArgEffects, + /// and all other data used by the checker. + llvm::BumpPtrAllocator BPAlloc; + + /// AF - A factory for ArgEffects objects. + ArgEffects::Factory AF; + + /// ScratchArgs - A holding buffer for construct ArgEffects. + ArgEffects ScratchArgs; + + /// ObjCAllocRetE - Default return effect for methods returning Objective-C + /// objects. + RetEffect ObjCAllocRetE; + + /// ObjCInitRetE - Default return effect for init methods returning + /// Objective-C objects. + RetEffect ObjCInitRetE; + + /// SimpleSummaries - Used for uniquing summaries that don't have special + /// effects. + llvm::FoldingSet SimpleSummaries; + + //==-----------------------------------------------------------------==// + // Methods. + //==-----------------------------------------------------------------==// + + /// getArgEffects - Returns a persistent ArgEffects object based on the + /// data in ScratchArgs. + ArgEffects getArgEffects(); + + enum UnaryFuncKind { cfretain, cfrelease, cfmakecollectable }; + +public: + RetEffect getObjAllocRetEffect() const { return ObjCAllocRetE; } + + const RetainSummary *getUnarySummary(const FunctionType* FT, + UnaryFuncKind func); + + const RetainSummary *getCFSummaryCreateRule(const FunctionDecl *FD); + const RetainSummary *getCFSummaryGetRule(const FunctionDecl *FD); + const RetainSummary *getCFCreateGetRuleSummary(const FunctionDecl *FD); + + const RetainSummary *getPersistentSummary(const RetainSummary &OldSumm); + + const RetainSummary *getPersistentSummary(RetEffect RetEff, + ArgEffect ReceiverEff = DoNothing, + ArgEffect DefaultEff = MayEscape) { + RetainSummary Summ(getArgEffects(), RetEff, DefaultEff, ReceiverEff); + return getPersistentSummary(Summ); + } + + const RetainSummary *getDefaultSummary() { + return getPersistentSummary(RetEffect::MakeNoRet(), + DoNothing, MayEscape); + } + + const RetainSummary *getPersistentStopSummary() { + return getPersistentSummary(RetEffect::MakeNoRet(), + StopTracking, StopTracking); + } + + void InitializeClassMethodSummaries(); + void InitializeMethodSummaries(); +private: + void addNSObjectClsMethSummary(Selector S, const RetainSummary *Summ) { + ObjCClassMethodSummaries[S] = Summ; + } + + void addNSObjectMethSummary(Selector S, const RetainSummary *Summ) { + ObjCMethodSummaries[S] = Summ; + } + + void addClassMethSummary(const char* Cls, const char* name, + const RetainSummary *Summ, bool isNullary = true) { + IdentifierInfo* ClsII = &Ctx.Idents.get(Cls); + Selector S = isNullary ? GetNullarySelector(name, Ctx) + : GetUnarySelector(name, Ctx); + ObjCClassMethodSummaries[ObjCSummaryKey(ClsII, S)] = Summ; + } + + void addInstMethSummary(const char* Cls, const char* nullaryName, + const RetainSummary *Summ) { + IdentifierInfo* ClsII = &Ctx.Idents.get(Cls); + Selector S = GetNullarySelector(nullaryName, Ctx); + ObjCMethodSummaries[ObjCSummaryKey(ClsII, S)] = Summ; + } + + Selector generateSelector(va_list argp) { + SmallVector II; + + while (const char* s = va_arg(argp, const char*)) + II.push_back(&Ctx.Idents.get(s)); + + return Ctx.Selectors.getSelector(II.size(), &II[0]); + } + + void addMethodSummary(IdentifierInfo *ClsII, ObjCMethodSummariesTy& Summaries, + const RetainSummary * Summ, va_list argp) { + Selector S = generateSelector(argp); + Summaries[ObjCSummaryKey(ClsII, S)] = Summ; + } + + void addInstMethSummary(const char* Cls, const RetainSummary * Summ, ...) { + va_list argp; + va_start(argp, Summ); + addMethodSummary(&Ctx.Idents.get(Cls), ObjCMethodSummaries, Summ, argp); + va_end(argp); + } + + void addClsMethSummary(const char* Cls, const RetainSummary * Summ, ...) { + va_list argp; + va_start(argp, Summ); + addMethodSummary(&Ctx.Idents.get(Cls),ObjCClassMethodSummaries, Summ, argp); + va_end(argp); + } + + void addClsMethSummary(IdentifierInfo *II, const RetainSummary * Summ, ...) { + va_list argp; + va_start(argp, Summ); + addMethodSummary(II, ObjCClassMethodSummaries, Summ, argp); + va_end(argp); + } + +public: + + RetainSummaryManager(ASTContext &ctx, bool gcenabled, bool usesARC) + : Ctx(ctx), + GCEnabled(gcenabled), + ARCEnabled(usesARC), + AF(BPAlloc), ScratchArgs(AF.getEmptyMap()), + ObjCAllocRetE(gcenabled + ? RetEffect::MakeGCNotOwned() + : (usesARC ? RetEffect::MakeARCNotOwned() + : RetEffect::MakeOwned(RetEffect::ObjC, true))), + ObjCInitRetE(gcenabled + ? RetEffect::MakeGCNotOwned() + : (usesARC ? RetEffect::MakeARCNotOwned() + : RetEffect::MakeOwnedWhenTrackedReceiver())) { + InitializeClassMethodSummaries(); + InitializeMethodSummaries(); + } + + const RetainSummary *getSummary(const FunctionDecl *FD); + + const RetainSummary *getMethodSummary(Selector S, IdentifierInfo *ClsName, + const ObjCInterfaceDecl *ID, + const ObjCMethodDecl *MD, + QualType RetTy, + ObjCMethodSummariesTy &CachedSummaries); + + const RetainSummary *getInstanceMethodSummary(const ObjCMessage &msg, + ProgramStateRef state, + const LocationContext *LC); + + const RetainSummary *getInstanceMethodSummary(const ObjCMessage &msg, + const ObjCInterfaceDecl *ID) { + return getMethodSummary(msg.getSelector(), 0, ID, msg.getMethodDecl(), + msg.getType(Ctx), ObjCMethodSummaries); + } + + const RetainSummary *getClassMethodSummary(const ObjCMessage &msg) { + const ObjCInterfaceDecl *Class = 0; + if (!msg.isInstanceMessage()) + Class = msg.getReceiverInterface(); + + return getMethodSummary(msg.getSelector(), Class->getIdentifier(), + Class, msg.getMethodDecl(), msg.getType(Ctx), + ObjCClassMethodSummaries); + } + + /// getMethodSummary - This version of getMethodSummary is used to query + /// the summary for the current method being analyzed. + const RetainSummary *getMethodSummary(const ObjCMethodDecl *MD) { + // FIXME: Eventually this should be unneeded. + const ObjCInterfaceDecl *ID = MD->getClassInterface(); + Selector S = MD->getSelector(); + IdentifierInfo *ClsName = ID->getIdentifier(); + QualType ResultTy = MD->getResultType(); + + ObjCMethodSummariesTy *CachedSummaries; + if (MD->isInstanceMethod()) + CachedSummaries = &ObjCMethodSummaries; + else + CachedSummaries = &ObjCClassMethodSummaries; + + return getMethodSummary(S, ClsName, ID, MD, ResultTy, *CachedSummaries); + } + + const RetainSummary *getStandardMethodSummary(const ObjCMethodDecl *MD, + Selector S, QualType RetTy); + + void updateSummaryFromAnnotations(const RetainSummary *&Summ, + const ObjCMethodDecl *MD); + + void updateSummaryFromAnnotations(const RetainSummary *&Summ, + const FunctionDecl *FD); + + bool isGCEnabled() const { return GCEnabled; } + + bool isARCEnabled() const { return ARCEnabled; } + + bool isARCorGCEnabled() const { return GCEnabled || ARCEnabled; } +}; + +// Used to avoid allocating long-term (BPAlloc'd) memory for default retain +// summaries. If a function or method looks like it has a default summary, but +// it has annotations, the annotations are added to the stack-based template +// and then copied into managed memory. +class RetainSummaryTemplate { + RetainSummaryManager &Manager; + const RetainSummary *&RealSummary; + RetainSummary ScratchSummary; + bool Accessed; +public: + RetainSummaryTemplate(const RetainSummary *&real, const RetainSummary &base, + RetainSummaryManager &mgr) + : Manager(mgr), RealSummary(real), ScratchSummary(real ? *real : base), + Accessed(false) {} + + ~RetainSummaryTemplate() { + if (Accessed) + RealSummary = Manager.getPersistentSummary(ScratchSummary); + } + + RetainSummary &operator*() { + Accessed = true; + return ScratchSummary; + } + + RetainSummary *operator->() { + Accessed = true; + return &ScratchSummary; + } +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Implementation of checker data structures. +//===----------------------------------------------------------------------===// + +ArgEffects RetainSummaryManager::getArgEffects() { + ArgEffects AE = ScratchArgs; + ScratchArgs = AF.getEmptyMap(); + return AE; +} + +const RetainSummary * +RetainSummaryManager::getPersistentSummary(const RetainSummary &OldSumm) { + // Unique "simple" summaries -- those without ArgEffects. + if (OldSumm.isSimple()) { + llvm::FoldingSetNodeID ID; + OldSumm.Profile(ID); + + void *Pos; + CachedSummaryNode *N = SimpleSummaries.FindNodeOrInsertPos(ID, Pos); + + if (!N) { + N = (CachedSummaryNode *) BPAlloc.Allocate(); + new (N) CachedSummaryNode(OldSumm); + SimpleSummaries.InsertNode(N, Pos); + } + + return &N->getValue(); + } + + RetainSummary *Summ = (RetainSummary *) BPAlloc.Allocate(); + new (Summ) RetainSummary(OldSumm); + return Summ; +} + +//===----------------------------------------------------------------------===// +// Summary creation for functions (largely uses of Core Foundation). +//===----------------------------------------------------------------------===// + +static bool isRetain(const FunctionDecl *FD, StringRef FName) { + return FName.endswith("Retain"); +} + +static bool isRelease(const FunctionDecl *FD, StringRef FName) { + return FName.endswith("Release"); +} + +static bool isMakeCollectable(const FunctionDecl *FD, StringRef FName) { + // FIXME: Remove FunctionDecl parameter. + // FIXME: Is it really okay if MakeCollectable isn't a suffix? + return FName.find("MakeCollectable") != StringRef::npos; +} + +const RetainSummary * RetainSummaryManager::getSummary(const FunctionDecl *FD) { + // Look up a summary in our cache of FunctionDecls -> Summaries. + FuncSummariesTy::iterator I = FuncSummaries.find(FD); + if (I != FuncSummaries.end()) + return I->second; + + // No summary? Generate one. + const RetainSummary *S = 0; + + do { + // We generate "stop" summaries for implicitly defined functions. + if (FD->isImplicit()) { + S = getPersistentStopSummary(); + break; + } + // For C++ methods, generate an implicit "stop" summary as well. We + // can relax this once we have a clear policy for C++ methods and + // ownership attributes. + if (isa(FD)) { + S = getPersistentStopSummary(); + break; + } + + // [PR 3337] Use 'getAs' to strip away any typedefs on the + // function's type. + const FunctionType* FT = FD->getType()->getAs(); + const IdentifierInfo *II = FD->getIdentifier(); + if (!II) + break; + + StringRef FName = II->getName(); + + // Strip away preceding '_'. Doing this here will effect all the checks + // down below. + FName = FName.substr(FName.find_first_not_of('_')); + + // Inspect the result type. + QualType RetTy = FT->getResultType(); + + // FIXME: This should all be refactored into a chain of "summary lookup" + // filters. + assert(ScratchArgs.isEmpty()); + + if (FName == "pthread_create") { + // Part of: . This will be addressed + // better with IPA. + S = getPersistentStopSummary(); + } else if (FName == "NSMakeCollectable") { + // Handle: id NSMakeCollectable(CFTypeRef) + S = (RetTy->isObjCIdType()) + ? getUnarySummary(FT, cfmakecollectable) + : getPersistentStopSummary(); + } else if (FName == "IOBSDNameMatching" || + FName == "IOServiceMatching" || + FName == "IOServiceNameMatching" || + FName == "IORegistryEntryIDMatching" || + FName == "IOOpenFirmwarePathMatching") { + // Part of . (IOKit) + // This should be addressed using a API table. + S = getPersistentSummary(RetEffect::MakeOwned(RetEffect::CF, true), + DoNothing, DoNothing); + } else if (FName == "IOServiceGetMatchingService" || + FName == "IOServiceGetMatchingServices") { + // FIXES: + // This should be addressed using a API table. This strcmp is also + // a little gross, but there is no need to super optimize here. + ScratchArgs = AF.add(ScratchArgs, 1, DecRef); + S = getPersistentSummary(RetEffect::MakeNoRet(), DoNothing, DoNothing); + } else if (FName == "IOServiceAddNotification" || + FName == "IOServiceAddMatchingNotification") { + // Part of . (IOKit) + // This should be addressed using a API table. + ScratchArgs = AF.add(ScratchArgs, 2, DecRef); + S = getPersistentSummary(RetEffect::MakeNoRet(), DoNothing, DoNothing); + } else if (FName == "CVPixelBufferCreateWithBytes") { + // FIXES: + // Eventually this can be improved by recognizing that the pixel + // buffer passed to CVPixelBufferCreateWithBytes is released via + // a callback and doing full IPA to make sure this is done correctly. + // FIXME: This function has an out parameter that returns an + // allocated object. + ScratchArgs = AF.add(ScratchArgs, 7, StopTracking); + S = getPersistentSummary(RetEffect::MakeNoRet(), DoNothing, DoNothing); + } else if (FName == "CGBitmapContextCreateWithData") { + // FIXES: + // Eventually this can be improved by recognizing that 'releaseInfo' + // passed to CGBitmapContextCreateWithData is released via + // a callback and doing full IPA to make sure this is done correctly. + ScratchArgs = AF.add(ScratchArgs, 8, StopTracking); + S = getPersistentSummary(RetEffect::MakeOwned(RetEffect::CF, true), + DoNothing, DoNothing); + } else if (FName == "CVPixelBufferCreateWithPlanarBytes") { + // FIXES: + // Eventually this can be improved by recognizing that the pixel + // buffer passed to CVPixelBufferCreateWithPlanarBytes is released + // via a callback and doing full IPA to make sure this is done + // correctly. + ScratchArgs = AF.add(ScratchArgs, 12, StopTracking); + S = getPersistentSummary(RetEffect::MakeNoRet(), DoNothing, DoNothing); + } else if (FName == "dispatch_set_context") { + // - The analyzer currently doesn't have + // a good way to reason about the finalizer function for libdispatch. + // If we pass a context object that is memory managed, stop tracking it. + // FIXME: this hack should possibly go away once we can handle + // libdispatch finalizers. + ScratchArgs = AF.add(ScratchArgs, 1, StopTracking); + S = getPersistentSummary(RetEffect::MakeNoRet(), DoNothing, DoNothing); + } else if (FName.startswith("NS") && + (FName.find("Insert") != StringRef::npos)) { + // Whitelist NSXXInsertXX, for example NSMapInsertIfAbsent, since they can + // be deallocated by NSMapRemove. (radar://11152419) + ScratchArgs = AF.add(ScratchArgs, 1, StopTracking); + ScratchArgs = AF.add(ScratchArgs, 2, StopTracking); + S = getPersistentSummary(RetEffect::MakeNoRet(), DoNothing, DoNothing); + } + + // Did we get a summary? + if (S) + break; + + // Enable this code once the semantics of NSDeallocateObject are resolved + // for GC. +#if 0 + // Handle: NSDeallocateObject(id anObject); + // This method does allow 'nil' (although we don't check it now). + if (strcmp(FName, "NSDeallocateObject") == 0) { + return RetTy == Ctx.VoidTy + ? getPersistentSummary(RetEffect::MakeNoRet(), DoNothing, Dealloc) + : getPersistentStopSummary(); + } +#endif + + if (RetTy->isPointerType()) { + // For CoreFoundation ('CF') types. + if (cocoa::isRefType(RetTy, "CF", FName)) { + if (isRetain(FD, FName)) + S = getUnarySummary(FT, cfretain); + else if (isMakeCollectable(FD, FName)) + S = getUnarySummary(FT, cfmakecollectable); + else + S = getCFCreateGetRuleSummary(FD); + + break; + } + + // For CoreGraphics ('CG') types. + if (cocoa::isRefType(RetTy, "CG", FName)) { + if (isRetain(FD, FName)) + S = getUnarySummary(FT, cfretain); + else + S = getCFCreateGetRuleSummary(FD); + + break; + } + + // For the Disk Arbitration API (DiskArbitration/DADisk.h) + if (cocoa::isRefType(RetTy, "DADisk") || + cocoa::isRefType(RetTy, "DADissenter") || + cocoa::isRefType(RetTy, "DASessionRef")) { + S = getCFCreateGetRuleSummary(FD); + break; + } + + break; + } + + // Check for release functions, the only kind of functions that we care + // about that don't return a pointer type. + if (FName[0] == 'C' && (FName[1] == 'F' || FName[1] == 'G')) { + // Test for 'CGCF'. + FName = FName.substr(FName.startswith("CGCF") ? 4 : 2); + + if (isRelease(FD, FName)) + S = getUnarySummary(FT, cfrelease); + else { + assert (ScratchArgs.isEmpty()); + // Remaining CoreFoundation and CoreGraphics functions. + // We use to assume that they all strictly followed the ownership idiom + // and that ownership cannot be transferred. While this is technically + // correct, many methods allow a tracked object to escape. For example: + // + // CFMutableDictionaryRef x = CFDictionaryCreateMutable(...); + // CFDictionaryAddValue(y, key, x); + // CFRelease(x); + // ... it is okay to use 'x' since 'y' has a reference to it + // + // We handle this and similar cases with the follow heuristic. If the + // function name contains "InsertValue", "SetValue", "AddValue", + // "AppendValue", or "SetAttribute", then we assume that arguments may + // "escape." This means that something else holds on to the object, + // allowing it be used even after its local retain count drops to 0. + ArgEffect E = (StrInStrNoCase(FName, "InsertValue") != StringRef::npos|| + StrInStrNoCase(FName, "AddValue") != StringRef::npos || + StrInStrNoCase(FName, "SetValue") != StringRef::npos || + StrInStrNoCase(FName, "AppendValue") != StringRef::npos|| + StrInStrNoCase(FName, "SetAttribute") != StringRef::npos) + ? MayEscape : DoNothing; + + S = getPersistentSummary(RetEffect::MakeNoRet(), DoNothing, E); + } + } + } + while (0); + + // Annotations override defaults. + updateSummaryFromAnnotations(S, FD); + + FuncSummaries[FD] = S; + return S; +} + +const RetainSummary * +RetainSummaryManager::getCFCreateGetRuleSummary(const FunctionDecl *FD) { + if (coreFoundation::followsCreateRule(FD)) + return getCFSummaryCreateRule(FD); + + return getCFSummaryGetRule(FD); +} + +const RetainSummary * +RetainSummaryManager::getUnarySummary(const FunctionType* FT, + UnaryFuncKind func) { + + // Sanity check that this is *really* a unary function. This can + // happen if people do weird things. + const FunctionProtoType* FTP = dyn_cast(FT); + if (!FTP || FTP->getNumArgs() != 1) + return getPersistentStopSummary(); + + assert (ScratchArgs.isEmpty()); + + ArgEffect Effect; + switch (func) { + case cfretain: Effect = IncRef; break; + case cfrelease: Effect = DecRef; break; + case cfmakecollectable: Effect = MakeCollectable; break; + } + + ScratchArgs = AF.add(ScratchArgs, 0, Effect); + return getPersistentSummary(RetEffect::MakeNoRet(), DoNothing, DoNothing); +} + +const RetainSummary * +RetainSummaryManager::getCFSummaryCreateRule(const FunctionDecl *FD) { + assert (ScratchArgs.isEmpty()); + + return getPersistentSummary(RetEffect::MakeOwned(RetEffect::CF, true)); +} + +const RetainSummary * +RetainSummaryManager::getCFSummaryGetRule(const FunctionDecl *FD) { + assert (ScratchArgs.isEmpty()); + return getPersistentSummary(RetEffect::MakeNotOwned(RetEffect::CF), + DoNothing, DoNothing); +} + +//===----------------------------------------------------------------------===// +// Summary creation for Selectors. +//===----------------------------------------------------------------------===// + +void +RetainSummaryManager::updateSummaryFromAnnotations(const RetainSummary *&Summ, + const FunctionDecl *FD) { + if (!FD) + return; + + RetainSummaryTemplate Template(Summ, *getDefaultSummary(), *this); + + // Effects on the parameters. + unsigned parm_idx = 0; + for (FunctionDecl::param_const_iterator pi = FD->param_begin(), + pe = FD->param_end(); pi != pe; ++pi, ++parm_idx) { + const ParmVarDecl *pd = *pi; + if (pd->getAttr()) { + if (!GCEnabled) { + Template->addArg(AF, parm_idx, DecRef); + } + } else if (pd->getAttr()) { + Template->addArg(AF, parm_idx, DecRef); + } + } + + QualType RetTy = FD->getResultType(); + + // Determine if there is a special return effect for this method. + if (cocoa::isCocoaObjectRef(RetTy)) { + if (FD->getAttr()) { + Template->setRetEffect(ObjCAllocRetE); + } + else if (FD->getAttr()) { + Template->setRetEffect(RetEffect::MakeOwned(RetEffect::CF, true)); + } + else if (FD->getAttr()) { + Template->setRetEffect(RetEffect::MakeNotOwned(RetEffect::ObjC)); + } + else if (FD->getAttr()) { + Template->setRetEffect(RetEffect::MakeNotOwned(RetEffect::CF)); + } + } else if (RetTy->getAs()) { + if (FD->getAttr()) { + Template->setRetEffect(RetEffect::MakeOwned(RetEffect::CF, true)); + } + else if (FD->getAttr()) { + Template->setRetEffect(RetEffect::MakeNotOwned(RetEffect::CF)); + } + } +} + +void +RetainSummaryManager::updateSummaryFromAnnotations(const RetainSummary *&Summ, + const ObjCMethodDecl *MD) { + if (!MD) + return; + + RetainSummaryTemplate Template(Summ, *getDefaultSummary(), *this); + bool isTrackedLoc = false; + + // Effects on the receiver. + if (MD->getAttr()) { + if (!GCEnabled) + Template->setReceiverEffect(DecRefMsg); + } + + // Effects on the parameters. + unsigned parm_idx = 0; + for (ObjCMethodDecl::param_const_iterator + pi=MD->param_begin(), pe=MD->param_end(); + pi != pe; ++pi, ++parm_idx) { + const ParmVarDecl *pd = *pi; + if (pd->getAttr()) { + if (!GCEnabled) + Template->addArg(AF, parm_idx, DecRef); + } + else if(pd->getAttr()) { + Template->addArg(AF, parm_idx, DecRef); + } + } + + // Determine if there is a special return effect for this method. + if (cocoa::isCocoaObjectRef(MD->getResultType())) { + if (MD->getAttr()) { + Template->setRetEffect(ObjCAllocRetE); + return; + } + if (MD->getAttr()) { + Template->setRetEffect(RetEffect::MakeNotOwned(RetEffect::ObjC)); + return; + } + + isTrackedLoc = true; + } else { + isTrackedLoc = MD->getResultType()->getAs() != NULL; + } + + if (isTrackedLoc) { + if (MD->getAttr()) + Template->setRetEffect(RetEffect::MakeOwned(RetEffect::CF, true)); + else if (MD->getAttr()) + Template->setRetEffect(RetEffect::MakeNotOwned(RetEffect::CF)); + } +} + +const RetainSummary * +RetainSummaryManager::getStandardMethodSummary(const ObjCMethodDecl *MD, + Selector S, QualType RetTy) { + + if (MD) { + // Scan the method decl for 'void*' arguments. These should be treated + // as 'StopTracking' because they are often used with delegates. + // Delegates are a frequent form of false positives with the retain + // count checker. + unsigned i = 0; + for (ObjCMethodDecl::param_const_iterator I = MD->param_begin(), + E = MD->param_end(); I != E; ++I, ++i) + if (const ParmVarDecl *PD = *I) { + QualType Ty = Ctx.getCanonicalType(PD->getType()); + if (Ty.getLocalUnqualifiedType() == Ctx.VoidPtrTy) + ScratchArgs = AF.add(ScratchArgs, i, StopTracking); + } + } + + // Any special effects? + ArgEffect ReceiverEff = DoNothing; + RetEffect ResultEff = RetEffect::MakeNoRet(); + + // Check the method family, and apply any default annotations. + switch (MD ? MD->getMethodFamily() : S.getMethodFamily()) { + case OMF_None: + case OMF_performSelector: + // Assume all Objective-C methods follow Cocoa Memory Management rules. + // FIXME: Does the non-threaded performSelector family really belong here? + // The selector could be, say, @selector(copy). + if (cocoa::isCocoaObjectRef(RetTy)) + ResultEff = RetEffect::MakeNotOwned(RetEffect::ObjC); + else if (coreFoundation::isCFObjectRef(RetTy)) { + // ObjCMethodDecl currently doesn't consider CF objects as valid return + // values for alloc, new, copy, or mutableCopy, so we have to + // double-check with the selector. This is ugly, but there aren't that + // many Objective-C methods that return CF objects, right? + if (MD) { + switch (S.getMethodFamily()) { + case OMF_alloc: + case OMF_new: + case OMF_copy: + case OMF_mutableCopy: + ResultEff = RetEffect::MakeOwned(RetEffect::CF, true); + break; + default: + ResultEff = RetEffect::MakeNotOwned(RetEffect::CF); + break; + } + } else { + ResultEff = RetEffect::MakeNotOwned(RetEffect::CF); + } + } + break; + case OMF_init: + ResultEff = ObjCInitRetE; + ReceiverEff = DecRefMsg; + break; + case OMF_alloc: + case OMF_new: + case OMF_copy: + case OMF_mutableCopy: + if (cocoa::isCocoaObjectRef(RetTy)) + ResultEff = ObjCAllocRetE; + else if (coreFoundation::isCFObjectRef(RetTy)) + ResultEff = RetEffect::MakeOwned(RetEffect::CF, true); + break; + case OMF_autorelease: + ReceiverEff = Autorelease; + break; + case OMF_retain: + ReceiverEff = IncRefMsg; + break; + case OMF_release: + ReceiverEff = DecRefMsg; + break; + case OMF_dealloc: + ReceiverEff = Dealloc; + break; + case OMF_self: + // -self is handled specially by the ExprEngine to propagate the receiver. + break; + case OMF_retainCount: + case OMF_finalize: + // These methods don't return objects. + break; + } + + // If one of the arguments in the selector has the keyword 'delegate' we + // should stop tracking the reference count for the receiver. This is + // because the reference count is quite possibly handled by a delegate + // method. + if (S.isKeywordSelector()) { + const std::string &str = S.getAsString(); + assert(!str.empty()); + if (StrInStrNoCase(str, "delegate:") != StringRef::npos) + ReceiverEff = StopTracking; + } + + if (ScratchArgs.isEmpty() && ReceiverEff == DoNothing && + ResultEff.getKind() == RetEffect::NoRet) + return getDefaultSummary(); + + return getPersistentSummary(ResultEff, ReceiverEff, MayEscape); +} + +const RetainSummary * +RetainSummaryManager::getInstanceMethodSummary(const ObjCMessage &msg, + ProgramStateRef state, + const LocationContext *LC) { + + // We need the type-information of the tracked receiver object + // Retrieve it from the state. + const Expr *Receiver = msg.getInstanceReceiver(); + const ObjCInterfaceDecl *ID = 0; + + // FIXME: Is this really working as expected? There are cases where + // we just use the 'ID' from the message expression. + SVal receiverV; + + if (Receiver) { + receiverV = state->getSValAsScalarOrLoc(Receiver, LC); + + // FIXME: Eventually replace the use of state->get with + // a generic API for reasoning about the Objective-C types of symbolic + // objects. + if (SymbolRef Sym = receiverV.getAsLocSymbol()) + if (const RefVal *T = state->get(Sym)) + if (const ObjCObjectPointerType* PT = + T->getType()->getAs()) + ID = PT->getInterfaceDecl(); + + // FIXME: this is a hack. This may or may not be the actual method + // that is called. + if (!ID) { + if (const ObjCObjectPointerType *PT = + Receiver->getType()->getAs()) + ID = PT->getInterfaceDecl(); + } + } else { + // FIXME: Hack for 'super'. + ID = msg.getReceiverInterface(); + } + + // FIXME: The receiver could be a reference to a class, meaning that + // we should use the class method. + return getInstanceMethodSummary(msg, ID); +} + +const RetainSummary * +RetainSummaryManager::getMethodSummary(Selector S, IdentifierInfo *ClsName, + const ObjCInterfaceDecl *ID, + const ObjCMethodDecl *MD, QualType RetTy, + ObjCMethodSummariesTy &CachedSummaries) { + + // Look up a summary in our summary cache. + const RetainSummary *Summ = CachedSummaries.find(ID, ClsName, S); + + if (!Summ) { + Summ = getStandardMethodSummary(MD, S, RetTy); + + // Annotations override defaults. + updateSummaryFromAnnotations(Summ, MD); + + // Memoize the summary. + CachedSummaries[ObjCSummaryKey(ID, ClsName, S)] = Summ; + } + + return Summ; +} + +void RetainSummaryManager::InitializeClassMethodSummaries() { + assert(ScratchArgs.isEmpty()); + // Create the [NSAssertionHandler currentHander] summary. + addClassMethSummary("NSAssertionHandler", "currentHandler", + getPersistentSummary(RetEffect::MakeNotOwned(RetEffect::ObjC))); + + // Create the [NSAutoreleasePool addObject:] summary. + ScratchArgs = AF.add(ScratchArgs, 0, Autorelease); + addClassMethSummary("NSAutoreleasePool", "addObject", + getPersistentSummary(RetEffect::MakeNoRet(), + DoNothing, Autorelease)); + + // Create the summaries for [NSObject performSelector...]. We treat + // these as 'stop tracking' for the arguments because they are often + // used for delegates that can release the object. When we have better + // inter-procedural analysis we can potentially do something better. This + // workaround is to remove false positives. + const RetainSummary *Summ = + getPersistentSummary(RetEffect::MakeNoRet(), DoNothing, StopTracking); + IdentifierInfo *NSObjectII = &Ctx.Idents.get("NSObject"); + addClsMethSummary(NSObjectII, Summ, "performSelector", "withObject", + "afterDelay", NULL); + addClsMethSummary(NSObjectII, Summ, "performSelector", "withObject", + "afterDelay", "inModes", NULL); + addClsMethSummary(NSObjectII, Summ, "performSelectorOnMainThread", + "withObject", "waitUntilDone", NULL); + addClsMethSummary(NSObjectII, Summ, "performSelectorOnMainThread", + "withObject", "waitUntilDone", "modes", NULL); + addClsMethSummary(NSObjectII, Summ, "performSelector", "onThread", + "withObject", "waitUntilDone", NULL); + addClsMethSummary(NSObjectII, Summ, "performSelector", "onThread", + "withObject", "waitUntilDone", "modes", NULL); + addClsMethSummary(NSObjectII, Summ, "performSelectorInBackground", + "withObject", NULL); +} + +void RetainSummaryManager::InitializeMethodSummaries() { + + assert (ScratchArgs.isEmpty()); + + // Create the "init" selector. It just acts as a pass-through for the + // receiver. + const RetainSummary *InitSumm = getPersistentSummary(ObjCInitRetE, DecRefMsg); + addNSObjectMethSummary(GetNullarySelector("init", Ctx), InitSumm); + + // awakeAfterUsingCoder: behaves basically like an 'init' method. It + // claims the receiver and returns a retained object. + addNSObjectMethSummary(GetUnarySelector("awakeAfterUsingCoder", Ctx), + InitSumm); + + // The next methods are allocators. + const RetainSummary *AllocSumm = getPersistentSummary(ObjCAllocRetE); + const RetainSummary *CFAllocSumm = + getPersistentSummary(RetEffect::MakeOwned(RetEffect::CF, true)); + + // Create the "retain" selector. + RetEffect NoRet = RetEffect::MakeNoRet(); + const RetainSummary *Summ = getPersistentSummary(NoRet, IncRefMsg); + addNSObjectMethSummary(GetNullarySelector("retain", Ctx), Summ); + + // Create the "release" selector. + Summ = getPersistentSummary(NoRet, DecRefMsg); + addNSObjectMethSummary(GetNullarySelector("release", Ctx), Summ); + + // Create the "drain" selector. + Summ = getPersistentSummary(NoRet, isGCEnabled() ? DoNothing : DecRef); + addNSObjectMethSummary(GetNullarySelector("drain", Ctx), Summ); + + // Create the -dealloc summary. + Summ = getPersistentSummary(NoRet, Dealloc); + addNSObjectMethSummary(GetNullarySelector("dealloc", Ctx), Summ); + + // Create the "autorelease" selector. + Summ = getPersistentSummary(NoRet, Autorelease); + addNSObjectMethSummary(GetNullarySelector("autorelease", Ctx), Summ); + + // Specially handle NSAutoreleasePool. + addInstMethSummary("NSAutoreleasePool", "init", + getPersistentSummary(NoRet, NewAutoreleasePool)); + + // For NSWindow, allocated objects are (initially) self-owned. + // FIXME: For now we opt for false negatives with NSWindow, as these objects + // self-own themselves. However, they only do this once they are displayed. + // Thus, we need to track an NSWindow's display status. + // This is tracked in . + // See also http://llvm.org/bugs/show_bug.cgi?id=3714. + const RetainSummary *NoTrackYet = getPersistentSummary(RetEffect::MakeNoRet(), + StopTracking, + StopTracking); + + addClassMethSummary("NSWindow", "alloc", NoTrackYet); + +#if 0 + addInstMethSummary("NSWindow", NoTrackYet, "initWithContentRect", + "styleMask", "backing", "defer", NULL); + + addInstMethSummary("NSWindow", NoTrackYet, "initWithContentRect", + "styleMask", "backing", "defer", "screen", NULL); +#endif + + // For NSPanel (which subclasses NSWindow), allocated objects are not + // self-owned. + // FIXME: For now we don't track NSPanels. object for the same reason + // as for NSWindow objects. + addClassMethSummary("NSPanel", "alloc", NoTrackYet); + +#if 0 + addInstMethSummary("NSPanel", NoTrackYet, "initWithContentRect", + "styleMask", "backing", "defer", NULL); + + addInstMethSummary("NSPanel", NoTrackYet, "initWithContentRect", + "styleMask", "backing", "defer", "screen", NULL); +#endif + + // Don't track allocated autorelease pools yet, as it is okay to prematurely + // exit a method. + addClassMethSummary("NSAutoreleasePool", "alloc", NoTrackYet); + addClassMethSummary("NSAutoreleasePool", "allocWithZone", NoTrackYet, false); + + // Create summaries QCRenderer/QCView -createSnapShotImageOfType: + addInstMethSummary("QCRenderer", AllocSumm, + "createSnapshotImageOfType", NULL); + addInstMethSummary("QCView", AllocSumm, + "createSnapshotImageOfType", NULL); + + // Create summaries for CIContext, 'createCGImage' and + // 'createCGLayerWithSize'. These objects are CF objects, and are not + // automatically garbage collected. + addInstMethSummary("CIContext", CFAllocSumm, + "createCGImage", "fromRect", NULL); + addInstMethSummary("CIContext", CFAllocSumm, + "createCGImage", "fromRect", "format", "colorSpace", NULL); + addInstMethSummary("CIContext", CFAllocSumm, "createCGLayerWithSize", + "info", NULL); +} + +//===----------------------------------------------------------------------===// +// AutoreleaseBindings - State used to track objects in autorelease pools. +//===----------------------------------------------------------------------===// + +typedef llvm::ImmutableMap ARCounts; +typedef llvm::ImmutableMap ARPoolContents; +typedef llvm::ImmutableList ARStack; + +static int AutoRCIndex = 0; +static int AutoRBIndex = 0; + +namespace { class AutoreleasePoolContents {}; } +namespace { class AutoreleaseStack {}; } + +namespace clang { +namespace ento { +template<> struct ProgramStateTrait + : public ProgramStatePartialTrait { + static inline void *GDMIndex() { return &AutoRBIndex; } +}; + +template<> struct ProgramStateTrait + : public ProgramStatePartialTrait { + static inline void *GDMIndex() { return &AutoRCIndex; } +}; +} // end GR namespace +} // end clang namespace + +static SymbolRef GetCurrentAutoreleasePool(ProgramStateRef state) { + ARStack stack = state->get(); + return stack.isEmpty() ? SymbolRef() : stack.getHead(); +} + +static ProgramStateRef +SendAutorelease(ProgramStateRef state, + ARCounts::Factory &F, + SymbolRef sym) { + SymbolRef pool = GetCurrentAutoreleasePool(state); + const ARCounts *cnts = state->get(pool); + ARCounts newCnts(0); + + if (cnts) { + const unsigned *cnt = (*cnts).lookup(sym); + newCnts = F.add(*cnts, sym, cnt ? *cnt + 1 : 1); + } + else + newCnts = F.add(F.getEmptyMap(), sym, 1); + + return state->set(pool, newCnts); +} + +//===----------------------------------------------------------------------===// +// Error reporting. +//===----------------------------------------------------------------------===// +namespace { + typedef llvm::DenseMap + SummaryLogTy; + + //===-------------===// + // Bug Descriptions. // + //===-------------===// + + class CFRefBug : public BugType { + protected: + CFRefBug(StringRef name) + : BugType(name, categories::MemoryCoreFoundationObjectiveC) {} + public: + + // FIXME: Eventually remove. + virtual const char *getDescription() const = 0; + + virtual bool isLeak() const { return false; } + }; + + class UseAfterRelease : public CFRefBug { + public: + UseAfterRelease() : CFRefBug("Use-after-release") {} + + const char *getDescription() const { + return "Reference-counted object is used after it is released"; + } + }; + + class BadRelease : public CFRefBug { + public: + BadRelease() : CFRefBug("Bad release") {} + + const char *getDescription() const { + return "Incorrect decrement of the reference count of an object that is " + "not owned at this point by the caller"; + } + }; + + class DeallocGC : public CFRefBug { + public: + DeallocGC() + : CFRefBug("-dealloc called while using garbage collection") {} + + const char *getDescription() const { + return "-dealloc called while using garbage collection"; + } + }; + + class DeallocNotOwned : public CFRefBug { + public: + DeallocNotOwned() + : CFRefBug("-dealloc sent to non-exclusively owned object") {} + + const char *getDescription() const { + return "-dealloc sent to object that may be referenced elsewhere"; + } + }; + + class OverAutorelease : public CFRefBug { + public: + OverAutorelease() + : CFRefBug("Object sent -autorelease too many times") {} + + const char *getDescription() const { + return "Object sent -autorelease too many times"; + } + }; + + class ReturnedNotOwnedForOwned : public CFRefBug { + public: + ReturnedNotOwnedForOwned() + : CFRefBug("Method should return an owned object") {} + + const char *getDescription() const { + return "Object with a +0 retain count returned to caller where a +1 " + "(owning) retain count is expected"; + } + }; + + class Leak : public CFRefBug { + const bool isReturn; + protected: + Leak(StringRef name, bool isRet) + : CFRefBug(name), isReturn(isRet) { + // Leaks should not be reported if they are post-dominated by a sink. + setSuppressOnSink(true); + } + public: + + const char *getDescription() const { return ""; } + + bool isLeak() const { return true; } + }; + + class LeakAtReturn : public Leak { + public: + LeakAtReturn(StringRef name) + : Leak(name, true) {} + }; + + class LeakWithinFunction : public Leak { + public: + LeakWithinFunction(StringRef name) + : Leak(name, false) {} + }; + + //===---------===// + // Bug Reports. // + //===---------===// + + class CFRefReportVisitor : public BugReporterVisitorImpl { + protected: + SymbolRef Sym; + const SummaryLogTy &SummaryLog; + bool GCEnabled; + + public: + CFRefReportVisitor(SymbolRef sym, bool gcEnabled, const SummaryLogTy &log) + : Sym(sym), SummaryLog(log), GCEnabled(gcEnabled) {} + + virtual void Profile(llvm::FoldingSetNodeID &ID) const { + static int x = 0; + ID.AddPointer(&x); + ID.AddPointer(Sym); + } + + virtual PathDiagnosticPiece *VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR); + + virtual PathDiagnosticPiece *getEndPath(BugReporterContext &BRC, + const ExplodedNode *N, + BugReport &BR); + }; + + class CFRefLeakReportVisitor : public CFRefReportVisitor { + public: + CFRefLeakReportVisitor(SymbolRef sym, bool GCEnabled, + const SummaryLogTy &log) + : CFRefReportVisitor(sym, GCEnabled, log) {} + + PathDiagnosticPiece *getEndPath(BugReporterContext &BRC, + const ExplodedNode *N, + BugReport &BR); + + virtual BugReporterVisitor *clone() const { + // The curiously-recurring template pattern only works for one level of + // subclassing. Rather than make a new template base for + // CFRefReportVisitor, we simply override clone() to do the right thing. + // This could be trouble someday if BugReporterVisitorImpl is ever + // used for something else besides a convenient implementation of clone(). + return new CFRefLeakReportVisitor(*this); + } + }; + + class CFRefReport : public BugReport { + void addGCModeDescription(const LangOptions &LOpts, bool GCEnabled); + + public: + CFRefReport(CFRefBug &D, const LangOptions &LOpts, bool GCEnabled, + const SummaryLogTy &Log, ExplodedNode *n, SymbolRef sym, + bool registerVisitor = true) + : BugReport(D, D.getDescription(), n) { + if (registerVisitor) + addVisitor(new CFRefReportVisitor(sym, GCEnabled, Log)); + addGCModeDescription(LOpts, GCEnabled); + } + + CFRefReport(CFRefBug &D, const LangOptions &LOpts, bool GCEnabled, + const SummaryLogTy &Log, ExplodedNode *n, SymbolRef sym, + StringRef endText) + : BugReport(D, D.getDescription(), endText, n) { + addVisitor(new CFRefReportVisitor(sym, GCEnabled, Log)); + addGCModeDescription(LOpts, GCEnabled); + } + + virtual std::pair getRanges() { + const CFRefBug& BugTy = static_cast(getBugType()); + if (!BugTy.isLeak()) + return BugReport::getRanges(); + else + return std::make_pair(ranges_iterator(), ranges_iterator()); + } + }; + + class CFRefLeakReport : public CFRefReport { + const MemRegion* AllocBinding; + + public: + CFRefLeakReport(CFRefBug &D, const LangOptions &LOpts, bool GCEnabled, + const SummaryLogTy &Log, ExplodedNode *n, SymbolRef sym, + CheckerContext &Ctx); + + PathDiagnosticLocation getLocation(const SourceManager &SM) const { + assert(Location.isValid()); + return Location; + } + }; +} // end anonymous namespace + +void CFRefReport::addGCModeDescription(const LangOptions &LOpts, + bool GCEnabled) { + const char *GCModeDescription = 0; + + switch (LOpts.getGC()) { + case LangOptions::GCOnly: + assert(GCEnabled); + GCModeDescription = "Code is compiled to only use garbage collection"; + break; + + case LangOptions::NonGC: + assert(!GCEnabled); + GCModeDescription = "Code is compiled to use reference counts"; + break; + + case LangOptions::HybridGC: + if (GCEnabled) { + GCModeDescription = "Code is compiled to use either garbage collection " + "(GC) or reference counts (non-GC). The bug occurs " + "with GC enabled"; + break; + } else { + GCModeDescription = "Code is compiled to use either garbage collection " + "(GC) or reference counts (non-GC). The bug occurs " + "in non-GC mode"; + break; + } + } + + assert(GCModeDescription && "invalid/unknown GC mode"); + addExtraText(GCModeDescription); +} + +// FIXME: This should be a method on SmallVector. +static inline bool contains(const SmallVectorImpl& V, + ArgEffect X) { + for (SmallVectorImpl::const_iterator I=V.begin(), E=V.end(); + I!=E; ++I) + if (*I == X) return true; + + return false; +} + +static bool isPropertyAccess(const Stmt *S, ParentMap &PM) { + unsigned maxDepth = 4; + while (S && maxDepth) { + if (const PseudoObjectExpr *PO = dyn_cast(S)) { + if (!isa(PO->getSyntacticForm())) + return true; + return false; + } + S = PM.getParent(S); + --maxDepth; + } + return false; +} + +PathDiagnosticPiece *CFRefReportVisitor::VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) { + + if (!isa(N->getLocation())) + return NULL; + + // Check if the type state has changed. + ProgramStateRef PrevSt = PrevN->getState(); + ProgramStateRef CurrSt = N->getState(); + const LocationContext *LCtx = N->getLocationContext(); + + const RefVal* CurrT = CurrSt->get(Sym); + if (!CurrT) return NULL; + + const RefVal &CurrV = *CurrT; + const RefVal *PrevT = PrevSt->get(Sym); + + // Create a string buffer to constain all the useful things we want + // to tell the user. + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + + // This is the allocation site since the previous node had no bindings + // for this symbol. + if (!PrevT) { + const Stmt *S = cast(N->getLocation()).getStmt(); + + if (isa(S)) { + os << "NSArray literal is an object with a +0 retain count"; + } + else if (isa(S)) { + os << "NSDictionary literal is an object with a +0 retain count"; + } + else { + if (const CallExpr *CE = dyn_cast(S)) { + // Get the name of the callee (if it is available). + SVal X = CurrSt->getSValAsScalarOrLoc(CE->getCallee(), LCtx); + if (const FunctionDecl *FD = X.getAsFunctionDecl()) + os << "Call to function '" << *FD << '\''; + else + os << "function call"; + } + else { + assert(isa(S)); + // The message expression may have between written directly or as + // a property access. Lazily determine which case we are looking at. + os << (isPropertyAccess(S, N->getParentMap()) ? "Property" : "Method"); + } + + if (CurrV.getObjKind() == RetEffect::CF) { + os << " returns a Core Foundation object with a "; + } + else { + assert (CurrV.getObjKind() == RetEffect::ObjC); + os << " returns an Objective-C object with a "; + } + + if (CurrV.isOwned()) { + os << "+1 retain count"; + + if (GCEnabled) { + assert(CurrV.getObjKind() == RetEffect::CF); + os << ". " + "Core Foundation objects are not automatically garbage collected."; + } + } + else { + assert (CurrV.isNotOwned()); + os << "+0 retain count"; + } + } + + PathDiagnosticLocation Pos(S, BRC.getSourceManager(), + N->getLocationContext()); + return new PathDiagnosticEventPiece(Pos, os.str()); + } + + // Gather up the effects that were performed on the object at this + // program point + SmallVector AEffects; + + const ExplodedNode *OrigNode = BRC.getNodeResolver().getOriginalNode(N); + if (const RetainSummary *Summ = SummaryLog.lookup(OrigNode)) { + // We only have summaries attached to nodes after evaluating CallExpr and + // ObjCMessageExprs. + const Stmt *S = cast(N->getLocation()).getStmt(); + + if (const CallExpr *CE = dyn_cast(S)) { + // Iterate through the parameter expressions and see if the symbol + // was ever passed as an argument. + unsigned i = 0; + + for (CallExpr::const_arg_iterator AI=CE->arg_begin(), AE=CE->arg_end(); + AI!=AE; ++AI, ++i) { + + // Retrieve the value of the argument. Is it the symbol + // we are interested in? + if (CurrSt->getSValAsScalarOrLoc(*AI, LCtx).getAsLocSymbol() != Sym) + continue; + + // We have an argument. Get the effect! + AEffects.push_back(Summ->getArg(i)); + } + } + else if (const ObjCMessageExpr *ME = dyn_cast(S)) { + if (const Expr *receiver = ME->getInstanceReceiver()) + if (CurrSt->getSValAsScalarOrLoc(receiver, LCtx) + .getAsLocSymbol() == Sym) { + // The symbol we are tracking is the receiver. + AEffects.push_back(Summ->getReceiverEffect()); + } + } + } + + do { + // Get the previous type state. + RefVal PrevV = *PrevT; + + // Specially handle -dealloc. + if (!GCEnabled && contains(AEffects, Dealloc)) { + // Determine if the object's reference count was pushed to zero. + assert(!(PrevV == CurrV) && "The typestate *must* have changed."); + // We may not have transitioned to 'release' if we hit an error. + // This case is handled elsewhere. + if (CurrV.getKind() == RefVal::Released) { + assert(CurrV.getCombinedCounts() == 0); + os << "Object released by directly sending the '-dealloc' message"; + break; + } + } + + // Specially handle CFMakeCollectable and friends. + if (contains(AEffects, MakeCollectable)) { + // Get the name of the function. + const Stmt *S = cast(N->getLocation()).getStmt(); + SVal X = + CurrSt->getSValAsScalarOrLoc(cast(S)->getCallee(), LCtx); + const FunctionDecl *FD = X.getAsFunctionDecl(); + + if (GCEnabled) { + // Determine if the object's reference count was pushed to zero. + assert(!(PrevV == CurrV) && "The typestate *must* have changed."); + + os << "In GC mode a call to '" << *FD + << "' decrements an object's retain count and registers the " + "object with the garbage collector. "; + + if (CurrV.getKind() == RefVal::Released) { + assert(CurrV.getCount() == 0); + os << "Since it now has a 0 retain count the object can be " + "automatically collected by the garbage collector."; + } + else + os << "An object must have a 0 retain count to be garbage collected. " + "After this call its retain count is +" << CurrV.getCount() + << '.'; + } + else + os << "When GC is not enabled a call to '" << *FD + << "' has no effect on its argument."; + + // Nothing more to say. + break; + } + + // Determine if the typestate has changed. + if (!(PrevV == CurrV)) + switch (CurrV.getKind()) { + case RefVal::Owned: + case RefVal::NotOwned: + + if (PrevV.getCount() == CurrV.getCount()) { + // Did an autorelease message get sent? + if (PrevV.getAutoreleaseCount() == CurrV.getAutoreleaseCount()) + return 0; + + assert(PrevV.getAutoreleaseCount() < CurrV.getAutoreleaseCount()); + os << "Object sent -autorelease message"; + break; + } + + if (PrevV.getCount() > CurrV.getCount()) + os << "Reference count decremented."; + else + os << "Reference count incremented."; + + if (unsigned Count = CurrV.getCount()) + os << " The object now has a +" << Count << " retain count."; + + if (PrevV.getKind() == RefVal::Released) { + assert(GCEnabled && CurrV.getCount() > 0); + os << " The object is not eligible for garbage collection until " + "the retain count reaches 0 again."; + } + + break; + + case RefVal::Released: + os << "Object released."; + break; + + case RefVal::ReturnedOwned: + // Autoreleases can be applied after marking a node ReturnedOwned. + if (CurrV.getAutoreleaseCount()) + return NULL; + + os << "Object returned to caller as an owning reference (single " + "retain count transferred to caller)"; + break; + + case RefVal::ReturnedNotOwned: + os << "Object returned to caller with a +0 retain count"; + break; + + default: + return NULL; + } + + // Emit any remaining diagnostics for the argument effects (if any). + for (SmallVectorImpl::iterator I=AEffects.begin(), + E=AEffects.end(); I != E; ++I) { + + // A bunch of things have alternate behavior under GC. + if (GCEnabled) + switch (*I) { + default: break; + case Autorelease: + os << "In GC mode an 'autorelease' has no effect."; + continue; + case IncRefMsg: + os << "In GC mode the 'retain' message has no effect."; + continue; + case DecRefMsg: + os << "In GC mode the 'release' message has no effect."; + continue; + } + } + } while (0); + + if (os.str().empty()) + return 0; // We have nothing to say! + + const Stmt *S = cast(N->getLocation()).getStmt(); + PathDiagnosticLocation Pos(S, BRC.getSourceManager(), + N->getLocationContext()); + PathDiagnosticPiece *P = new PathDiagnosticEventPiece(Pos, os.str()); + + // Add the range by scanning the children of the statement for any bindings + // to Sym. + for (Stmt::const_child_iterator I = S->child_begin(), E = S->child_end(); + I!=E; ++I) + if (const Expr *Exp = dyn_cast_or_null(*I)) + if (CurrSt->getSValAsScalarOrLoc(Exp, LCtx).getAsLocSymbol() == Sym) { + P->addRange(Exp->getSourceRange()); + break; + } + + return P; +} + +// Find the first node in the current function context that referred to the +// tracked symbol and the memory location that value was stored to. Note, the +// value is only reported if the allocation occurred in the same function as +// the leak. +static std::pair +GetAllocationSite(ProgramStateManager& StateMgr, const ExplodedNode *N, + SymbolRef Sym) { + const ExplodedNode *Last = N; + const MemRegion* FirstBinding = 0; + const LocationContext *LeakContext = N->getLocationContext(); + + while (N) { + ProgramStateRef St = N->getState(); + RefBindings B = St->get(); + + if (!B.lookup(Sym)) + break; + + StoreManager::FindUniqueBinding FB(Sym); + StateMgr.iterBindings(St, FB); + if (FB) FirstBinding = FB.getRegion(); + + // Allocation node, is the last node in the current context in which the + // symbol was tracked. + if (N->getLocationContext() == LeakContext) + Last = N; + + N = N->pred_empty() ? NULL : *(N->pred_begin()); + } + + // If allocation happened in a function different from the leak node context, + // do not report the binding. + if (N->getLocationContext() != LeakContext) { + FirstBinding = 0; + } + + return std::make_pair(Last, FirstBinding); +} + +PathDiagnosticPiece* +CFRefReportVisitor::getEndPath(BugReporterContext &BRC, + const ExplodedNode *EndN, + BugReport &BR) { + BR.markInteresting(Sym); + return BugReporterVisitor::getDefaultEndPath(BRC, EndN, BR); +} + +PathDiagnosticPiece* +CFRefLeakReportVisitor::getEndPath(BugReporterContext &BRC, + const ExplodedNode *EndN, + BugReport &BR) { + + // Tell the BugReporterContext to report cases when the tracked symbol is + // assigned to different variables, etc. + BR.markInteresting(Sym); + + // We are reporting a leak. Walk up the graph to get to the first node where + // the symbol appeared, and also get the first VarDecl that tracked object + // is stored to. + const ExplodedNode *AllocNode = 0; + const MemRegion* FirstBinding = 0; + + llvm::tie(AllocNode, FirstBinding) = + GetAllocationSite(BRC.getStateManager(), EndN, Sym); + + SourceManager& SM = BRC.getSourceManager(); + + // Compute an actual location for the leak. Sometimes a leak doesn't + // occur at an actual statement (e.g., transition between blocks; end + // of function) so we need to walk the graph and compute a real location. + const ExplodedNode *LeakN = EndN; + PathDiagnosticLocation L = PathDiagnosticLocation::createEndOfPath(LeakN, SM); + + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + + os << "Object leaked: "; + + if (FirstBinding) { + os << "object allocated and stored into '" + << FirstBinding->getString() << '\''; + } + else + os << "allocated object"; + + // Get the retain count. + const RefVal* RV = EndN->getState()->get(Sym); + + if (RV->getKind() == RefVal::ErrorLeakReturned) { + // FIXME: Per comments in rdar://6320065, "create" only applies to CF + // objects. Only "copy", "alloc", "retain" and "new" transfer ownership + // to the caller for NS objects. + const Decl *D = &EndN->getCodeDecl(); + if (const ObjCMethodDecl *MD = dyn_cast(D)) { + os << " is returned from a method whose name ('" + << MD->getSelector().getAsString() + << "') does not start with 'copy', 'mutableCopy', 'alloc' or 'new'." + " This violates the naming convention rules" + " given in the Memory Management Guide for Cocoa"; + } + else { + const FunctionDecl *FD = cast(D); + os << " is returned from a function whose name ('" + << *FD + << "') does not contain 'Copy' or 'Create'. This violates the naming" + " convention rules given in the Memory Management Guide for Core" + " Foundation"; + } + } + else if (RV->getKind() == RefVal::ErrorGCLeakReturned) { + ObjCMethodDecl &MD = cast(EndN->getCodeDecl()); + os << " and returned from method '" << MD.getSelector().getAsString() + << "' is potentially leaked when using garbage collection. Callers " + "of this method do not expect a returned object with a +1 retain " + "count since they expect the object to be managed by the garbage " + "collector"; + } + else + os << " is not referenced later in this execution path and has a retain " + "count of +" << RV->getCount(); + + return new PathDiagnosticEventPiece(L, os.str()); +} + +CFRefLeakReport::CFRefLeakReport(CFRefBug &D, const LangOptions &LOpts, + bool GCEnabled, const SummaryLogTy &Log, + ExplodedNode *n, SymbolRef sym, + CheckerContext &Ctx) +: CFRefReport(D, LOpts, GCEnabled, Log, n, sym, false) { + + // Most bug reports are cached at the location where they occurred. + // With leaks, we want to unique them by the location where they were + // allocated, and only report a single path. To do this, we need to find + // the allocation site of a piece of tracked memory, which we do via a + // call to GetAllocationSite. This will walk the ExplodedGraph backwards. + // Note that this is *not* the trimmed graph; we are guaranteed, however, + // that all ancestor nodes that represent the allocation site have the + // same SourceLocation. + const ExplodedNode *AllocNode = 0; + + const SourceManager& SMgr = Ctx.getSourceManager(); + + llvm::tie(AllocNode, AllocBinding) = // Set AllocBinding. + GetAllocationSite(Ctx.getStateManager(), getErrorNode(), sym); + + // Get the SourceLocation for the allocation site. + ProgramPoint P = AllocNode->getLocation(); + const Stmt *AllocStmt = cast(P).getStmt(); + Location = PathDiagnosticLocation::createBegin(AllocStmt, SMgr, + n->getLocationContext()); + // Fill in the description of the bug. + Description.clear(); + llvm::raw_string_ostream os(Description); + os << "Potential leak "; + if (GCEnabled) + os << "(when using garbage collection) "; + os << "of an object"; + + // FIXME: AllocBinding doesn't get populated for RegionStore yet. + if (AllocBinding) + os << " stored into '" << AllocBinding->getString() << '\''; + + addVisitor(new CFRefLeakReportVisitor(sym, GCEnabled, Log)); +} + +//===----------------------------------------------------------------------===// +// Main checker logic. +//===----------------------------------------------------------------------===// + +namespace { +class RetainCountChecker + : public Checker< check::Bind, + check::DeadSymbols, + check::EndAnalysis, + check::EndPath, + check::PostStmt, + check::PostStmt, + check::PostStmt, + check::PostStmt, + check::PostStmt, + check::PostStmt, + check::PostObjCMessage, + check::PreStmt, + check::RegionChanges, + eval::Assume, + eval::Call > { + mutable OwningPtr useAfterRelease, releaseNotOwned; + mutable OwningPtr deallocGC, deallocNotOwned; + mutable OwningPtr overAutorelease, returnNotOwnedForOwned; + mutable OwningPtr leakWithinFunction, leakAtReturn; + mutable OwningPtr leakWithinFunctionGC, leakAtReturnGC; + + typedef llvm::DenseMap SymbolTagMap; + + // This map is only used to ensure proper deletion of any allocated tags. + mutable SymbolTagMap DeadSymbolTags; + + mutable OwningPtr Summaries; + mutable OwningPtr SummariesGC; + + mutable ARCounts::Factory ARCountFactory; + + mutable SummaryLogTy SummaryLog; + mutable bool ShouldResetSummaryLog; + +public: + RetainCountChecker() : ShouldResetSummaryLog(false) {} + + virtual ~RetainCountChecker() { + DeleteContainerSeconds(DeadSymbolTags); + } + + void checkEndAnalysis(ExplodedGraph &G, BugReporter &BR, + ExprEngine &Eng) const { + // FIXME: This is a hack to make sure the summary log gets cleared between + // analyses of different code bodies. + // + // Why is this necessary? Because a checker's lifetime is tied to a + // translation unit, but an ExplodedGraph's lifetime is just a code body. + // Once in a blue moon, a new ExplodedNode will have the same address as an + // old one with an associated summary, and the bug report visitor gets very + // confused. (To make things worse, the summary lifetime is currently also + // tied to a code body, so we get a crash instead of incorrect results.) + // + // Why is this a bad solution? Because if the lifetime of the ExplodedGraph + // changes, things will start going wrong again. Really the lifetime of this + // log needs to be tied to either the specific nodes in it or the entire + // ExplodedGraph, not to a specific part of the code being analyzed. + // + // (Also, having stateful local data means that the same checker can't be + // used from multiple threads, but a lot of checkers have incorrect + // assumptions about that anyway. So that wasn't a priority at the time of + // this fix.) + // + // This happens at the end of analysis, but bug reports are emitted /after/ + // this point. So we can't just clear the summary log now. Instead, we mark + // that the next time we access the summary log, it should be cleared. + + // If we never reset the summary log during /this/ code body analysis, + // there were no new summaries. There might still have been summaries from + // the /last/ analysis, so clear them out to make sure the bug report + // visitors don't get confused. + if (ShouldResetSummaryLog) + SummaryLog.clear(); + + ShouldResetSummaryLog = !SummaryLog.empty(); + } + + CFRefBug *getLeakWithinFunctionBug(const LangOptions &LOpts, + bool GCEnabled) const { + if (GCEnabled) { + if (!leakWithinFunctionGC) + leakWithinFunctionGC.reset(new LeakWithinFunction("Leak of object when " + "using garbage " + "collection")); + return leakWithinFunctionGC.get(); + } else { + if (!leakWithinFunction) { + if (LOpts.getGC() == LangOptions::HybridGC) { + leakWithinFunction.reset(new LeakWithinFunction("Leak of object when " + "not using garbage " + "collection (GC) in " + "dual GC/non-GC " + "code")); + } else { + leakWithinFunction.reset(new LeakWithinFunction("Leak")); + } + } + return leakWithinFunction.get(); + } + } + + CFRefBug *getLeakAtReturnBug(const LangOptions &LOpts, bool GCEnabled) const { + if (GCEnabled) { + if (!leakAtReturnGC) + leakAtReturnGC.reset(new LeakAtReturn("Leak of returned object when " + "using garbage collection")); + return leakAtReturnGC.get(); + } else { + if (!leakAtReturn) { + if (LOpts.getGC() == LangOptions::HybridGC) { + leakAtReturn.reset(new LeakAtReturn("Leak of returned object when " + "not using garbage collection " + "(GC) in dual GC/non-GC code")); + } else { + leakAtReturn.reset(new LeakAtReturn("Leak of returned object")); + } + } + return leakAtReturn.get(); + } + } + + RetainSummaryManager &getSummaryManager(ASTContext &Ctx, + bool GCEnabled) const { + // FIXME: We don't support ARC being turned on and off during one analysis. + // (nor, for that matter, do we support changing ASTContexts) + bool ARCEnabled = (bool)Ctx.getLangOpts().ObjCAutoRefCount; + if (GCEnabled) { + if (!SummariesGC) + SummariesGC.reset(new RetainSummaryManager(Ctx, true, ARCEnabled)); + else + assert(SummariesGC->isARCEnabled() == ARCEnabled); + return *SummariesGC; + } else { + if (!Summaries) + Summaries.reset(new RetainSummaryManager(Ctx, false, ARCEnabled)); + else + assert(Summaries->isARCEnabled() == ARCEnabled); + return *Summaries; + } + } + + RetainSummaryManager &getSummaryManager(CheckerContext &C) const { + return getSummaryManager(C.getASTContext(), C.isObjCGCEnabled()); + } + + void printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const; + + void checkBind(SVal loc, SVal val, const Stmt *S, CheckerContext &C) const; + void checkPostStmt(const BlockExpr *BE, CheckerContext &C) const; + void checkPostStmt(const CastExpr *CE, CheckerContext &C) const; + + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; + void checkPostStmt(const CXXConstructExpr *CE, CheckerContext &C) const; + void checkPostStmt(const ObjCArrayLiteral *AL, CheckerContext &C) const; + void checkPostStmt(const ObjCDictionaryLiteral *DL, CheckerContext &C) const; + void checkPostObjCMessage(const ObjCMessage &Msg, CheckerContext &C) const; + + void checkSummary(const RetainSummary &Summ, const CallOrObjCMessage &Call, + CheckerContext &C) const; + + bool evalCall(const CallExpr *CE, CheckerContext &C) const; + + ProgramStateRef evalAssume(ProgramStateRef state, SVal Cond, + bool Assumption) const; + + ProgramStateRef + checkRegionChanges(ProgramStateRef state, + const StoreManager::InvalidatedSymbols *invalidated, + ArrayRef ExplicitRegions, + ArrayRef Regions, + const CallOrObjCMessage *Call) const; + + bool wantsRegionChangeUpdate(ProgramStateRef state) const { + return true; + } + + void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const; + void checkReturnWithRetEffect(const ReturnStmt *S, CheckerContext &C, + ExplodedNode *Pred, RetEffect RE, RefVal X, + SymbolRef Sym, ProgramStateRef state) const; + + void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; + void checkEndPath(CheckerContext &C) const; + + ProgramStateRef updateSymbol(ProgramStateRef state, SymbolRef sym, + RefVal V, ArgEffect E, RefVal::Kind &hasErr, + CheckerContext &C) const; + + void processNonLeakError(ProgramStateRef St, SourceRange ErrorRange, + RefVal::Kind ErrorKind, SymbolRef Sym, + CheckerContext &C) const; + + void processObjCLiterals(CheckerContext &C, const Expr *Ex) const; + + const ProgramPointTag *getDeadSymbolTag(SymbolRef sym) const; + + ProgramStateRef handleSymbolDeath(ProgramStateRef state, + SymbolRef sid, RefVal V, + SmallVectorImpl &Leaked) const; + + std::pair + handleAutoreleaseCounts(ProgramStateRef state, + GenericNodeBuilderRefCount Bd, ExplodedNode *Pred, + CheckerContext &Ctx, SymbolRef Sym, RefVal V) const; + + ExplodedNode *processLeaks(ProgramStateRef state, + SmallVectorImpl &Leaked, + GenericNodeBuilderRefCount &Builder, + CheckerContext &Ctx, + ExplodedNode *Pred = 0) const; +}; +} // end anonymous namespace + +namespace { +class StopTrackingCallback : public SymbolVisitor { + ProgramStateRef state; +public: + StopTrackingCallback(ProgramStateRef st) : state(st) {} + ProgramStateRef getState() const { return state; } + + bool VisitSymbol(SymbolRef sym) { + state = state->remove(sym); + return true; + } +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Handle statements that may have an effect on refcounts. +//===----------------------------------------------------------------------===// + +void RetainCountChecker::checkPostStmt(const BlockExpr *BE, + CheckerContext &C) const { + + // Scan the BlockDecRefExprs for any object the retain count checker + // may be tracking. + if (!BE->getBlockDecl()->hasCaptures()) + return; + + ProgramStateRef state = C.getState(); + const BlockDataRegion *R = + cast(state->getSVal(BE, + C.getLocationContext()).getAsRegion()); + + BlockDataRegion::referenced_vars_iterator I = R->referenced_vars_begin(), + E = R->referenced_vars_end(); + + if (I == E) + return; + + // FIXME: For now we invalidate the tracking of all symbols passed to blocks + // via captured variables, even though captured variables result in a copy + // and in implicit increment/decrement of a retain count. + SmallVector Regions; + const LocationContext *LC = C.getLocationContext(); + MemRegionManager &MemMgr = C.getSValBuilder().getRegionManager(); + + for ( ; I != E; ++I) { + const VarRegion *VR = *I; + if (VR->getSuperRegion() == R) { + VR = MemMgr.getVarRegion(VR->getDecl(), LC); + } + Regions.push_back(VR); + } + + state = + state->scanReachableSymbols(Regions.data(), + Regions.data() + Regions.size()).getState(); + C.addTransition(state); +} + +void RetainCountChecker::checkPostStmt(const CastExpr *CE, + CheckerContext &C) const { + const ObjCBridgedCastExpr *BE = dyn_cast(CE); + if (!BE) + return; + + ArgEffect AE = IncRef; + + switch (BE->getBridgeKind()) { + case clang::OBC_Bridge: + // Do nothing. + return; + case clang::OBC_BridgeRetained: + AE = IncRef; + break; + case clang::OBC_BridgeTransfer: + AE = DecRefBridgedTransfered; + break; + } + + ProgramStateRef state = C.getState(); + SymbolRef Sym = state->getSVal(CE, C.getLocationContext()).getAsLocSymbol(); + if (!Sym) + return; + const RefVal* T = state->get(Sym); + if (!T) + return; + + RefVal::Kind hasErr = (RefVal::Kind) 0; + state = updateSymbol(state, Sym, *T, AE, hasErr, C); + + if (hasErr) { + // FIXME: If we get an error during a bridge cast, should we report it? + // Should we assert that there is no error? + return; + } + + C.addTransition(state); +} + +void RetainCountChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + if (C.wasInlined) + return; + + // Get the callee. + ProgramStateRef state = C.getState(); + const Expr *Callee = CE->getCallee(); + SVal L = state->getSVal(Callee, C.getLocationContext()); + + RetainSummaryManager &Summaries = getSummaryManager(C); + const RetainSummary *Summ = 0; + + // FIXME: Better support for blocks. For now we stop tracking anything + // that is passed to blocks. + // FIXME: Need to handle variables that are "captured" by the block. + if (dyn_cast_or_null(L.getAsRegion())) { + Summ = Summaries.getPersistentStopSummary(); + } else if (const FunctionDecl *FD = L.getAsFunctionDecl()) { + Summ = Summaries.getSummary(FD); + } else if (const CXXMemberCallExpr *me = dyn_cast(CE)) { + if (const CXXMethodDecl *MD = me->getMethodDecl()) + Summ = Summaries.getSummary(MD); + } + + if (!Summ) + Summ = Summaries.getDefaultSummary(); + + checkSummary(*Summ, CallOrObjCMessage(CE, state, C.getLocationContext()), C); +} + +void RetainCountChecker::checkPostStmt(const CXXConstructExpr *CE, + CheckerContext &C) const { + const CXXConstructorDecl *Ctor = CE->getConstructor(); + if (!Ctor) + return; + + RetainSummaryManager &Summaries = getSummaryManager(C); + const RetainSummary *Summ = Summaries.getSummary(Ctor); + + // If we didn't get a summary, this constructor doesn't affect retain counts. + if (!Summ) + return; + + ProgramStateRef state = C.getState(); + checkSummary(*Summ, CallOrObjCMessage(CE, state, C.getLocationContext()), C); +} + +void RetainCountChecker::processObjCLiterals(CheckerContext &C, + const Expr *Ex) const { + ProgramStateRef state = C.getState(); + const ExplodedNode *pred = C.getPredecessor(); + for (Stmt::const_child_iterator it = Ex->child_begin(), et = Ex->child_end() ; + it != et ; ++it) { + const Stmt *child = *it; + SVal V = state->getSVal(child, pred->getLocationContext()); + if (SymbolRef sym = V.getAsSymbol()) + if (const RefVal* T = state->get(sym)) { + RefVal::Kind hasErr = (RefVal::Kind) 0; + state = updateSymbol(state, sym, *T, MayEscape, hasErr, C); + if (hasErr) { + processNonLeakError(state, child->getSourceRange(), hasErr, sym, C); + return; + } + } + } + + // Return the object as autoreleased. + // RetEffect RE = RetEffect::MakeNotOwned(RetEffect::ObjC); + if (SymbolRef sym = + state->getSVal(Ex, pred->getLocationContext()).getAsSymbol()) { + QualType ResultTy = Ex->getType(); + state = state->set(sym, RefVal::makeNotOwned(RetEffect::ObjC, + ResultTy)); + } + + C.addTransition(state); +} + +void RetainCountChecker::checkPostStmt(const ObjCArrayLiteral *AL, + CheckerContext &C) const { + // Apply the 'MayEscape' to all values. + processObjCLiterals(C, AL); +} + +void RetainCountChecker::checkPostStmt(const ObjCDictionaryLiteral *DL, + CheckerContext &C) const { + // Apply the 'MayEscape' to all keys and values. + processObjCLiterals(C, DL); +} + +void RetainCountChecker::checkPostObjCMessage(const ObjCMessage &Msg, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + + RetainSummaryManager &Summaries = getSummaryManager(C); + + const RetainSummary *Summ; + if (Msg.isInstanceMessage()) { + const LocationContext *LC = C.getLocationContext(); + Summ = Summaries.getInstanceMethodSummary(Msg, state, LC); + } else { + Summ = Summaries.getClassMethodSummary(Msg); + } + + // If we didn't get a summary, this message doesn't affect retain counts. + if (!Summ) + return; + + checkSummary(*Summ, CallOrObjCMessage(Msg, state, C.getLocationContext()), C); +} + +/// GetReturnType - Used to get the return type of a message expression or +/// function call with the intention of affixing that type to a tracked symbol. +/// While the the return type can be queried directly from RetEx, when +/// invoking class methods we augment to the return type to be that of +/// a pointer to the class (as opposed it just being id). +// FIXME: We may be able to do this with related result types instead. +// This function is probably overestimating. +static QualType GetReturnType(const Expr *RetE, ASTContext &Ctx) { + QualType RetTy = RetE->getType(); + // If RetE is not a message expression just return its type. + // If RetE is a message expression, return its types if it is something + /// more specific than id. + if (const ObjCMessageExpr *ME = dyn_cast(RetE)) + if (const ObjCObjectPointerType *PT = RetTy->getAs()) + if (PT->isObjCQualifiedIdType() || PT->isObjCIdType() || + PT->isObjCClassType()) { + // At this point we know the return type of the message expression is + // id, id<...>, or Class. If we have an ObjCInterfaceDecl, we know this + // is a call to a class method whose type we can resolve. In such + // cases, promote the return type to XXX* (where XXX is the class). + const ObjCInterfaceDecl *D = ME->getReceiverInterface(); + return !D ? RetTy : + Ctx.getObjCObjectPointerType(Ctx.getObjCInterfaceType(D)); + } + + return RetTy; +} + +void RetainCountChecker::checkSummary(const RetainSummary &Summ, + const CallOrObjCMessage &CallOrMsg, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + + // Evaluate the effect of the arguments. + RefVal::Kind hasErr = (RefVal::Kind) 0; + SourceRange ErrorRange; + SymbolRef ErrorSym = 0; + + for (unsigned idx = 0, e = CallOrMsg.getNumArgs(); idx != e; ++idx) { + SVal V = CallOrMsg.getArgSVal(idx); + + if (SymbolRef Sym = V.getAsLocSymbol()) { + if (RefBindings::data_type *T = state->get(Sym)) { + state = updateSymbol(state, Sym, *T, Summ.getArg(idx), hasErr, C); + if (hasErr) { + ErrorRange = CallOrMsg.getArgSourceRange(idx); + ErrorSym = Sym; + break; + } + } + } + } + + // Evaluate the effect on the message receiver. + bool ReceiverIsTracked = false; + if (!hasErr && CallOrMsg.isObjCMessage()) { + const LocationContext *LC = C.getLocationContext(); + SVal Receiver = CallOrMsg.getInstanceMessageReceiver(LC); + if (SymbolRef Sym = Receiver.getAsLocSymbol()) { + if (const RefVal *T = state->get(Sym)) { + ReceiverIsTracked = true; + state = updateSymbol(state, Sym, *T, Summ.getReceiverEffect(), + hasErr, C); + if (hasErr) { + ErrorRange = CallOrMsg.getReceiverSourceRange(); + ErrorSym = Sym; + } + } + } + } + + // Process any errors. + if (hasErr) { + processNonLeakError(state, ErrorRange, hasErr, ErrorSym, C); + return; + } + + // Consult the summary for the return value. + RetEffect RE = Summ.getRetEffect(); + + if (RE.getKind() == RetEffect::OwnedWhenTrackedReceiver) { + if (ReceiverIsTracked) + RE = getSummaryManager(C).getObjAllocRetEffect(); + else + RE = RetEffect::MakeNoRet(); + } + + switch (RE.getKind()) { + default: + llvm_unreachable("Unhandled RetEffect."); + + case RetEffect::NoRet: + // No work necessary. + break; + + case RetEffect::OwnedAllocatedSymbol: + case RetEffect::OwnedSymbol: { + SymbolRef Sym = state->getSVal(CallOrMsg.getOriginExpr(), + C.getLocationContext()).getAsSymbol(); + if (!Sym) + break; + + // Use the result type from callOrMsg as it automatically adjusts + // for methods/functions that return references. + QualType ResultTy = CallOrMsg.getResultType(C.getASTContext()); + state = state->set(Sym, RefVal::makeOwned(RE.getObjKind(), + ResultTy)); + + // FIXME: Add a flag to the checker where allocations are assumed to + // *not* fail. (The code below is out-of-date, though.) +#if 0 + if (RE.getKind() == RetEffect::OwnedAllocatedSymbol) { + bool isFeasible; + state = state.assume(loc::SymbolVal(Sym), true, isFeasible); + assert(isFeasible && "Cannot assume fresh symbol is non-null."); + } +#endif + + break; + } + + case RetEffect::GCNotOwnedSymbol: + case RetEffect::ARCNotOwnedSymbol: + case RetEffect::NotOwnedSymbol: { + const Expr *Ex = CallOrMsg.getOriginExpr(); + SymbolRef Sym = state->getSVal(Ex, C.getLocationContext()).getAsSymbol(); + if (!Sym) + break; + + // Use GetReturnType in order to give [NSFoo alloc] the type NSFoo *. + QualType ResultTy = GetReturnType(Ex, C.getASTContext()); + state = state->set(Sym, RefVal::makeNotOwned(RE.getObjKind(), + ResultTy)); + break; + } + } + + // This check is actually necessary; otherwise the statement builder thinks + // we've hit a previously-found path. + // Normally addTransition takes care of this, but we want the node pointer. + ExplodedNode *NewNode; + if (state == C.getState()) { + NewNode = C.getPredecessor(); + } else { + NewNode = C.addTransition(state); + } + + // Annotate the node with summary we used. + if (NewNode) { + // FIXME: This is ugly. See checkEndAnalysis for why it's necessary. + if (ShouldResetSummaryLog) { + SummaryLog.clear(); + ShouldResetSummaryLog = false; + } + SummaryLog[NewNode] = &Summ; + } +} + + +ProgramStateRef +RetainCountChecker::updateSymbol(ProgramStateRef state, SymbolRef sym, + RefVal V, ArgEffect E, RefVal::Kind &hasErr, + CheckerContext &C) const { + // In GC mode [... release] and [... retain] do nothing. + // In ARC mode they shouldn't exist at all, but we just ignore them. + bool IgnoreRetainMsg = C.isObjCGCEnabled(); + if (!IgnoreRetainMsg) + IgnoreRetainMsg = (bool)C.getASTContext().getLangOpts().ObjCAutoRefCount; + + switch (E) { + default: break; + case IncRefMsg: E = IgnoreRetainMsg ? DoNothing : IncRef; break; + case DecRefMsg: E = IgnoreRetainMsg ? DoNothing : DecRef; break; + case MakeCollectable: E = C.isObjCGCEnabled() ? DecRef : DoNothing; break; + case NewAutoreleasePool: E = C.isObjCGCEnabled() ? DoNothing : + NewAutoreleasePool; break; + } + + // Handle all use-after-releases. + if (!C.isObjCGCEnabled() && V.getKind() == RefVal::Released) { + V = V ^ RefVal::ErrorUseAfterRelease; + hasErr = V.getKind(); + return state->set(sym, V); + } + + switch (E) { + case DecRefMsg: + case IncRefMsg: + case MakeCollectable: + llvm_unreachable("DecRefMsg/IncRefMsg/MakeCollectable already converted"); + + case Dealloc: + // Any use of -dealloc in GC is *bad*. + if (C.isObjCGCEnabled()) { + V = V ^ RefVal::ErrorDeallocGC; + hasErr = V.getKind(); + break; + } + + switch (V.getKind()) { + default: + llvm_unreachable("Invalid RefVal state for an explicit dealloc."); + case RefVal::Owned: + // The object immediately transitions to the released state. + V = V ^ RefVal::Released; + V.clearCounts(); + return state->set(sym, V); + case RefVal::NotOwned: + V = V ^ RefVal::ErrorDeallocNotOwned; + hasErr = V.getKind(); + break; + } + break; + + case NewAutoreleasePool: + assert(!C.isObjCGCEnabled()); + return state->add(sym); + + case MayEscape: + if (V.getKind() == RefVal::Owned) { + V = V ^ RefVal::NotOwned; + break; + } + + // Fall-through. + + case DoNothing: + return state; + + case Autorelease: + if (C.isObjCGCEnabled()) + return state; + + // Update the autorelease counts. + state = SendAutorelease(state, ARCountFactory, sym); + V = V.autorelease(); + break; + + case StopTracking: + return state->remove(sym); + + case IncRef: + switch (V.getKind()) { + default: + llvm_unreachable("Invalid RefVal state for a retain."); + case RefVal::Owned: + case RefVal::NotOwned: + V = V + 1; + break; + case RefVal::Released: + // Non-GC cases are handled above. + assert(C.isObjCGCEnabled()); + V = (V ^ RefVal::Owned) + 1; + break; + } + break; + + case SelfOwn: + V = V ^ RefVal::NotOwned; + // Fall-through. + case DecRef: + case DecRefBridgedTransfered: + switch (V.getKind()) { + default: + // case 'RefVal::Released' handled above. + llvm_unreachable("Invalid RefVal state for a release."); + + case RefVal::Owned: + assert(V.getCount() > 0); + if (V.getCount() == 1) + V = V ^ (E == DecRefBridgedTransfered ? + RefVal::NotOwned : RefVal::Released); + V = V - 1; + break; + + case RefVal::NotOwned: + if (V.getCount() > 0) + V = V - 1; + else { + V = V ^ RefVal::ErrorReleaseNotOwned; + hasErr = V.getKind(); + } + break; + + case RefVal::Released: + // Non-GC cases are handled above. + assert(C.isObjCGCEnabled()); + V = V ^ RefVal::ErrorUseAfterRelease; + hasErr = V.getKind(); + break; + } + break; + } + return state->set(sym, V); +} + +void RetainCountChecker::processNonLeakError(ProgramStateRef St, + SourceRange ErrorRange, + RefVal::Kind ErrorKind, + SymbolRef Sym, + CheckerContext &C) const { + ExplodedNode *N = C.generateSink(St); + if (!N) + return; + + CFRefBug *BT; + switch (ErrorKind) { + default: + llvm_unreachable("Unhandled error."); + case RefVal::ErrorUseAfterRelease: + if (!useAfterRelease) + useAfterRelease.reset(new UseAfterRelease()); + BT = &*useAfterRelease; + break; + case RefVal::ErrorReleaseNotOwned: + if (!releaseNotOwned) + releaseNotOwned.reset(new BadRelease()); + BT = &*releaseNotOwned; + break; + case RefVal::ErrorDeallocGC: + if (!deallocGC) + deallocGC.reset(new DeallocGC()); + BT = &*deallocGC; + break; + case RefVal::ErrorDeallocNotOwned: + if (!deallocNotOwned) + deallocNotOwned.reset(new DeallocNotOwned()); + BT = &*deallocNotOwned; + break; + } + + assert(BT); + CFRefReport *report = new CFRefReport(*BT, C.getASTContext().getLangOpts(), + C.isObjCGCEnabled(), SummaryLog, + N, Sym); + report->addRange(ErrorRange); + C.EmitReport(report); +} + +//===----------------------------------------------------------------------===// +// Handle the return values of retain-count-related functions. +//===----------------------------------------------------------------------===// + +bool RetainCountChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { + // Get the callee. We're only interested in simple C functions. + ProgramStateRef state = C.getState(); + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD) + return false; + + IdentifierInfo *II = FD->getIdentifier(); + if (!II) + return false; + + // For now, we're only handling the functions that return aliases of their + // arguments: CFRetain and CFMakeCollectable (and their families). + // Eventually we should add other functions we can model entirely, + // such as CFRelease, which don't invalidate their arguments or globals. + if (CE->getNumArgs() != 1) + return false; + + // Get the name of the function. + StringRef FName = II->getName(); + FName = FName.substr(FName.find_first_not_of('_')); + + // See if it's one of the specific functions we know how to eval. + bool canEval = false; + + QualType ResultTy = CE->getCallReturnType(); + if (ResultTy->isObjCIdType()) { + // Handle: id NSMakeCollectable(CFTypeRef) + canEval = II->isStr("NSMakeCollectable"); + } else if (ResultTy->isPointerType()) { + // Handle: (CF|CG)Retain + // CFMakeCollectable + // It's okay to be a little sloppy here (CGMakeCollectable doesn't exist). + if (cocoa::isRefType(ResultTy, "CF", FName) || + cocoa::isRefType(ResultTy, "CG", FName)) { + canEval = isRetain(FD, FName) || isMakeCollectable(FD, FName); + } + } + + if (!canEval) + return false; + + // Bind the return value. + const LocationContext *LCtx = C.getLocationContext(); + SVal RetVal = state->getSVal(CE->getArg(0), LCtx); + if (RetVal.isUnknown()) { + // If the receiver is unknown, conjure a return value. + SValBuilder &SVB = C.getSValBuilder(); + unsigned Count = C.getCurrentBlockCount(); + SVal RetVal = SVB.getConjuredSymbolVal(0, CE, LCtx, ResultTy, Count); + } + state = state->BindExpr(CE, LCtx, RetVal, false); + + // FIXME: This should not be necessary, but otherwise the argument seems to be + // considered alive during the next statement. + if (const MemRegion *ArgRegion = RetVal.getAsRegion()) { + // Save the refcount status of the argument. + SymbolRef Sym = RetVal.getAsLocSymbol(); + RefBindings::data_type *Binding = 0; + if (Sym) + Binding = state->get(Sym); + + // Invalidate the argument region. + unsigned Count = C.getCurrentBlockCount(); + state = state->invalidateRegions(ArgRegion, CE, Count, LCtx); + + // Restore the refcount status of the argument. + if (Binding) + state = state->set(Sym, *Binding); + } + + C.addTransition(state); + return true; +} + +//===----------------------------------------------------------------------===// +// Handle return statements. +//===----------------------------------------------------------------------===// + +// Return true if the current LocationContext has no caller context. +static bool inTopFrame(CheckerContext &C) { + const LocationContext *LC = C.getLocationContext(); + return LC->getParent() == 0; +} + +void RetainCountChecker::checkPreStmt(const ReturnStmt *S, + CheckerContext &C) const { + + // Only adjust the reference count if this is the top-level call frame, + // and not the result of inlining. In the future, we should do + // better checking even for inlined calls, and see if they match + // with their expected semantics (e.g., the method should return a retained + // object, etc.). + if (!inTopFrame(C)) + return; + + const Expr *RetE = S->getRetValue(); + if (!RetE) + return; + + ProgramStateRef state = C.getState(); + SymbolRef Sym = + state->getSValAsScalarOrLoc(RetE, C.getLocationContext()).getAsLocSymbol(); + if (!Sym) + return; + + // Get the reference count binding (if any). + const RefVal *T = state->get(Sym); + if (!T) + return; + + // Change the reference count. + RefVal X = *T; + + switch (X.getKind()) { + case RefVal::Owned: { + unsigned cnt = X.getCount(); + assert(cnt > 0); + X.setCount(cnt - 1); + X = X ^ RefVal::ReturnedOwned; + break; + } + + case RefVal::NotOwned: { + unsigned cnt = X.getCount(); + if (cnt) { + X.setCount(cnt - 1); + X = X ^ RefVal::ReturnedOwned; + } + else { + X = X ^ RefVal::ReturnedNotOwned; + } + break; + } + + default: + return; + } + + // Update the binding. + state = state->set(Sym, X); + ExplodedNode *Pred = C.addTransition(state); + + // At this point we have updated the state properly. + // Everything after this is merely checking to see if the return value has + // been over- or under-retained. + + // Did we cache out? + if (!Pred) + return; + + // Update the autorelease counts. + static SimpleProgramPointTag + AutoreleaseTag("RetainCountChecker : Autorelease"); + GenericNodeBuilderRefCount Bd(C, &AutoreleaseTag); + llvm::tie(Pred, state) = handleAutoreleaseCounts(state, Bd, Pred, C, Sym, X); + + // Did we cache out? + if (!Pred) + return; + + // Get the updated binding. + T = state->get(Sym); + assert(T); + X = *T; + + // Consult the summary of the enclosing method. + RetainSummaryManager &Summaries = getSummaryManager(C); + const Decl *CD = &Pred->getCodeDecl(); + + if (const ObjCMethodDecl *MD = dyn_cast(CD)) { + // Unlike regular functions, /all/ ObjC methods are assumed to always + // follow Cocoa retain-count conventions, not just those with special + // names or attributes. + const RetainSummary *Summ = Summaries.getMethodSummary(MD); + RetEffect RE = Summ ? Summ->getRetEffect() : RetEffect::MakeNoRet(); + checkReturnWithRetEffect(S, C, Pred, RE, X, Sym, state); + } + + if (const FunctionDecl *FD = dyn_cast(CD)) { + if (!isa(FD)) + if (const RetainSummary *Summ = Summaries.getSummary(FD)) + checkReturnWithRetEffect(S, C, Pred, Summ->getRetEffect(), X, + Sym, state); + } +} + +void RetainCountChecker::checkReturnWithRetEffect(const ReturnStmt *S, + CheckerContext &C, + ExplodedNode *Pred, + RetEffect RE, RefVal X, + SymbolRef Sym, + ProgramStateRef state) const { + // Any leaks or other errors? + if (X.isReturnedOwned() && X.getCount() == 0) { + if (RE.getKind() != RetEffect::NoRet) { + bool hasError = false; + if (C.isObjCGCEnabled() && RE.getObjKind() == RetEffect::ObjC) { + // Things are more complicated with garbage collection. If the + // returned object is suppose to be an Objective-C object, we have + // a leak (as the caller expects a GC'ed object) because no + // method should return ownership unless it returns a CF object. + hasError = true; + X = X ^ RefVal::ErrorGCLeakReturned; + } + else if (!RE.isOwned()) { + // Either we are using GC and the returned object is a CF type + // or we aren't using GC. In either case, we expect that the + // enclosing method is expected to return ownership. + hasError = true; + X = X ^ RefVal::ErrorLeakReturned; + } + + if (hasError) { + // Generate an error node. + state = state->set(Sym, X); + + static SimpleProgramPointTag + ReturnOwnLeakTag("RetainCountChecker : ReturnsOwnLeak"); + ExplodedNode *N = C.addTransition(state, Pred, &ReturnOwnLeakTag); + if (N) { + const LangOptions &LOpts = C.getASTContext().getLangOpts(); + bool GCEnabled = C.isObjCGCEnabled(); + CFRefReport *report = + new CFRefLeakReport(*getLeakAtReturnBug(LOpts, GCEnabled), + LOpts, GCEnabled, SummaryLog, + N, Sym, C); + C.EmitReport(report); + } + } + } + } else if (X.isReturnedNotOwned()) { + if (RE.isOwned()) { + // Trying to return a not owned object to a caller expecting an + // owned object. + state = state->set(Sym, X ^ RefVal::ErrorReturnedNotOwned); + + static SimpleProgramPointTag + ReturnNotOwnedTag("RetainCountChecker : ReturnNotOwnedForOwned"); + ExplodedNode *N = C.addTransition(state, Pred, &ReturnNotOwnedTag); + if (N) { + if (!returnNotOwnedForOwned) + returnNotOwnedForOwned.reset(new ReturnedNotOwnedForOwned()); + + CFRefReport *report = + new CFRefReport(*returnNotOwnedForOwned, + C.getASTContext().getLangOpts(), + C.isObjCGCEnabled(), SummaryLog, N, Sym); + C.EmitReport(report); + } + } + } +} + +//===----------------------------------------------------------------------===// +// Check various ways a symbol can be invalidated. +//===----------------------------------------------------------------------===// + +void RetainCountChecker::checkBind(SVal loc, SVal val, const Stmt *S, + CheckerContext &C) const { + // Are we storing to something that causes the value to "escape"? + bool escapes = true; + + // A value escapes in three possible cases (this may change): + // + // (1) we are binding to something that is not a memory region. + // (2) we are binding to a memregion that does not have stack storage + // (3) we are binding to a memregion with stack storage that the store + // does not understand. + ProgramStateRef state = C.getState(); + + if (loc::MemRegionVal *regionLoc = dyn_cast(&loc)) { + escapes = !regionLoc->getRegion()->hasStackStorage(); + + if (!escapes) { + // To test (3), generate a new state with the binding added. If it is + // the same state, then it escapes (since the store cannot represent + // the binding). + escapes = (state == (state->bindLoc(*regionLoc, val))); + } + if (!escapes) { + // Case 4: We do not currently model what happens when a symbol is + // assigned to a struct field, so be conservative here and let the symbol + // go. TODO: This could definitely be improved upon. + escapes = !isa(regionLoc->getRegion()); + } + } + + // If our store can represent the binding and we aren't storing to something + // that doesn't have local storage then just return and have the simulation + // state continue as is. + if (!escapes) + return; + + // Otherwise, find all symbols referenced by 'val' that we are tracking + // and stop tracking them. + state = state->scanReachableSymbols(val).getState(); + C.addTransition(state); +} + +ProgramStateRef RetainCountChecker::evalAssume(ProgramStateRef state, + SVal Cond, + bool Assumption) const { + + // FIXME: We may add to the interface of evalAssume the list of symbols + // whose assumptions have changed. For now we just iterate through the + // bindings and check if any of the tracked symbols are NULL. This isn't + // too bad since the number of symbols we will track in practice are + // probably small and evalAssume is only called at branches and a few + // other places. + RefBindings B = state->get(); + + if (B.isEmpty()) + return state; + + bool changed = false; + RefBindings::Factory &RefBFactory = state->get_context(); + + for (RefBindings::iterator I = B.begin(), E = B.end(); I != E; ++I) { + // Check if the symbol is null (or equal to any constant). + // If this is the case, stop tracking the symbol. + if (state->getSymVal(I.getKey())) { + changed = true; + B = RefBFactory.remove(B, I.getKey()); + } + } + + if (changed) + state = state->set(B); + + return state; +} + +ProgramStateRef +RetainCountChecker::checkRegionChanges(ProgramStateRef state, + const StoreManager::InvalidatedSymbols *invalidated, + ArrayRef ExplicitRegions, + ArrayRef Regions, + const CallOrObjCMessage *Call) const { + if (!invalidated) + return state; + + llvm::SmallPtrSet WhitelistedSymbols; + for (ArrayRef::iterator I = ExplicitRegions.begin(), + E = ExplicitRegions.end(); I != E; ++I) { + if (const SymbolicRegion *SR = (*I)->StripCasts()->getAs()) + WhitelistedSymbols.insert(SR->getSymbol()); + } + + for (StoreManager::InvalidatedSymbols::const_iterator I=invalidated->begin(), + E = invalidated->end(); I!=E; ++I) { + SymbolRef sym = *I; + if (WhitelistedSymbols.count(sym)) + continue; + // Remove any existing reference-count binding. + state = state->remove(sym); + } + return state; +} + +//===----------------------------------------------------------------------===// +// Handle dead symbols and end-of-path. +//===----------------------------------------------------------------------===// + +std::pair +RetainCountChecker::handleAutoreleaseCounts(ProgramStateRef state, + GenericNodeBuilderRefCount Bd, + ExplodedNode *Pred, + CheckerContext &Ctx, + SymbolRef Sym, RefVal V) const { + unsigned ACnt = V.getAutoreleaseCount(); + + // No autorelease counts? Nothing to be done. + if (!ACnt) + return std::make_pair(Pred, state); + + assert(!Ctx.isObjCGCEnabled() && "Autorelease counts in GC mode?"); + unsigned Cnt = V.getCount(); + + // FIXME: Handle sending 'autorelease' to already released object. + + if (V.getKind() == RefVal::ReturnedOwned) + ++Cnt; + + if (ACnt <= Cnt) { + if (ACnt == Cnt) { + V.clearCounts(); + if (V.getKind() == RefVal::ReturnedOwned) + V = V ^ RefVal::ReturnedNotOwned; + else + V = V ^ RefVal::NotOwned; + } else { + V.setCount(Cnt - ACnt); + V.setAutoreleaseCount(0); + } + state = state->set(Sym, V); + ExplodedNode *N = Bd.MakeNode(state, Pred); + if (N == 0) + state = 0; + return std::make_pair(N, state); + } + + // Woah! More autorelease counts then retain counts left. + // Emit hard error. + V = V ^ RefVal::ErrorOverAutorelease; + state = state->set(Sym, V); + + if (ExplodedNode *N = Bd.MakeNode(state, Pred, true)) { + SmallString<128> sbuf; + llvm::raw_svector_ostream os(sbuf); + os << "Object over-autoreleased: object was sent -autorelease "; + if (V.getAutoreleaseCount() > 1) + os << V.getAutoreleaseCount() << " times "; + os << "but the object has a +" << V.getCount() << " retain count"; + + if (!overAutorelease) + overAutorelease.reset(new OverAutorelease()); + + const LangOptions &LOpts = Ctx.getASTContext().getLangOpts(); + CFRefReport *report = + new CFRefReport(*overAutorelease, LOpts, /* GCEnabled = */ false, + SummaryLog, N, Sym, os.str()); + Ctx.EmitReport(report); + } + + return std::make_pair((ExplodedNode *)0, (ProgramStateRef )0); +} + +ProgramStateRef +RetainCountChecker::handleSymbolDeath(ProgramStateRef state, + SymbolRef sid, RefVal V, + SmallVectorImpl &Leaked) const { + bool hasLeak = false; + if (V.isOwned()) + hasLeak = true; + else if (V.isNotOwned() || V.isReturnedOwned()) + hasLeak = (V.getCount() > 0); + + if (!hasLeak) + return state->remove(sid); + + Leaked.push_back(sid); + return state->set(sid, V ^ RefVal::ErrorLeak); +} + +ExplodedNode * +RetainCountChecker::processLeaks(ProgramStateRef state, + SmallVectorImpl &Leaked, + GenericNodeBuilderRefCount &Builder, + CheckerContext &Ctx, + ExplodedNode *Pred) const { + if (Leaked.empty()) + return Pred; + + // Generate an intermediate node representing the leak point. + ExplodedNode *N = Builder.MakeNode(state, Pred); + + if (N) { + for (SmallVectorImpl::iterator + I = Leaked.begin(), E = Leaked.end(); I != E; ++I) { + + const LangOptions &LOpts = Ctx.getASTContext().getLangOpts(); + bool GCEnabled = Ctx.isObjCGCEnabled(); + CFRefBug *BT = Pred ? getLeakWithinFunctionBug(LOpts, GCEnabled) + : getLeakAtReturnBug(LOpts, GCEnabled); + assert(BT && "BugType not initialized."); + + CFRefLeakReport *report = new CFRefLeakReport(*BT, LOpts, GCEnabled, + SummaryLog, N, *I, Ctx); + Ctx.EmitReport(report); + } + } + + return N; +} + +void RetainCountChecker::checkEndPath(CheckerContext &Ctx) const { + ProgramStateRef state = Ctx.getState(); + GenericNodeBuilderRefCount Bd(Ctx); + RefBindings B = state->get(); + ExplodedNode *Pred = Ctx.getPredecessor(); + + for (RefBindings::iterator I = B.begin(), E = B.end(); I != E; ++I) { + llvm::tie(Pred, state) = handleAutoreleaseCounts(state, Bd, Pred, Ctx, + I->first, I->second); + if (!state) + return; + } + + // If the current LocationContext has a parent, don't check for leaks. + // We will do that later. + // FIXME: we should instead check for imblances of the retain/releases, + // and suggest annotations. + if (Ctx.getLocationContext()->getParent()) + return; + + B = state->get(); + SmallVector Leaked; + + for (RefBindings::iterator I = B.begin(), E = B.end(); I != E; ++I) + state = handleSymbolDeath(state, I->first, I->second, Leaked); + + processLeaks(state, Leaked, Bd, Ctx, Pred); +} + +const ProgramPointTag * +RetainCountChecker::getDeadSymbolTag(SymbolRef sym) const { + const SimpleProgramPointTag *&tag = DeadSymbolTags[sym]; + if (!tag) { + SmallString<64> buf; + llvm::raw_svector_ostream out(buf); + out << "RetainCountChecker : Dead Symbol : "; + sym->dumpToStream(out); + tag = new SimpleProgramPointTag(out.str()); + } + return tag; +} + +void RetainCountChecker::checkDeadSymbols(SymbolReaper &SymReaper, + CheckerContext &C) const { + ExplodedNode *Pred = C.getPredecessor(); + + ProgramStateRef state = C.getState(); + RefBindings B = state->get(); + + // Update counts from autorelease pools + for (SymbolReaper::dead_iterator I = SymReaper.dead_begin(), + E = SymReaper.dead_end(); I != E; ++I) { + SymbolRef Sym = *I; + if (const RefVal *T = B.lookup(Sym)){ + // Use the symbol as the tag. + // FIXME: This might not be as unique as we would like. + GenericNodeBuilderRefCount Bd(C, getDeadSymbolTag(Sym)); + llvm::tie(Pred, state) = handleAutoreleaseCounts(state, Bd, Pred, C, + Sym, *T); + if (!state) + return; + } + } + + B = state->get(); + SmallVector Leaked; + + for (SymbolReaper::dead_iterator I = SymReaper.dead_begin(), + E = SymReaper.dead_end(); I != E; ++I) { + if (const RefVal *T = B.lookup(*I)) + state = handleSymbolDeath(state, *I, *T, Leaked); + } + + { + GenericNodeBuilderRefCount Bd(C, this); + Pred = processLeaks(state, Leaked, Bd, C, Pred); + } + + // Did we cache out? + if (!Pred) + return; + + // Now generate a new node that nukes the old bindings. + RefBindings::Factory &F = state->get_context(); + + for (SymbolReaper::dead_iterator I = SymReaper.dead_begin(), + E = SymReaper.dead_end(); I != E; ++I) + B = F.remove(B, *I); + + state = state->set(B); + C.addTransition(state, Pred); +} + +//===----------------------------------------------------------------------===// +// Debug printing of refcount bindings and autorelease pools. +//===----------------------------------------------------------------------===// + +static void PrintPool(raw_ostream &Out, SymbolRef Sym, + ProgramStateRef State) { + Out << ' '; + if (Sym) + Sym->dumpToStream(Out); + else + Out << ""; + Out << ":{"; + + // Get the contents of the pool. + if (const ARCounts *Cnts = State->get(Sym)) + for (ARCounts::iterator I = Cnts->begin(), E = Cnts->end(); I != E; ++I) + Out << '(' << I.getKey() << ',' << I.getData() << ')'; + + Out << '}'; +} + +static bool UsesAutorelease(ProgramStateRef state) { + // A state uses autorelease if it allocated an autorelease pool or if it has + // objects in the caller's autorelease pool. + return !state->get().isEmpty() || + state->get(SymbolRef()); +} + +void RetainCountChecker::printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const { + + RefBindings B = State->get(); + + if (!B.isEmpty()) + Out << Sep << NL; + + for (RefBindings::iterator I = B.begin(), E = B.end(); I != E; ++I) { + Out << I->first << " : "; + I->second.print(Out); + Out << NL; + } + + // Print the autorelease stack. + if (UsesAutorelease(State)) { + Out << Sep << NL << "AR pool stack:"; + ARStack Stack = State->get(); + + PrintPool(Out, SymbolRef(), State); // Print the caller's pool. + for (ARStack::iterator I = Stack.begin(), E = Stack.end(); I != E; ++I) + PrintPool(Out, *I, State); + + Out << NL; + } +} + +//===----------------------------------------------------------------------===// +// Checker registration. +//===----------------------------------------------------------------------===// + +void ento::registerRetainCountChecker(CheckerManager &Mgr) { + Mgr.registerChecker(); +} + diff --git a/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp new file mode 100644 index 0000000..6e56593 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp @@ -0,0 +1,91 @@ +//== ReturnPointerRangeChecker.cpp ------------------------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines ReturnPointerRangeChecker, which is a path-sensitive check +// which looks for an out-of-bound pointer being returned to callers. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" + +using namespace clang; +using namespace ento; + +namespace { +class ReturnPointerRangeChecker : + public Checker< check::PreStmt > { + mutable OwningPtr BT; +public: + void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const; +}; +} + +void ReturnPointerRangeChecker::checkPreStmt(const ReturnStmt *RS, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + + const Expr *RetE = RS->getRetValue(); + if (!RetE) + return; + + SVal V = state->getSVal(RetE, C.getLocationContext()); + const MemRegion *R = V.getAsRegion(); + + const ElementRegion *ER = dyn_cast_or_null(R); + if (!ER) + return; + + DefinedOrUnknownSVal Idx = cast(ER->getIndex()); + // Zero index is always in bound, this also passes ElementRegions created for + // pointer casts. + if (Idx.isZeroConstant()) + return; + // FIXME: All of this out-of-bounds checking should eventually be refactored + // into a common place. + + DefinedOrUnknownSVal NumElements + = C.getStoreManager().getSizeInElements(state, ER->getSuperRegion(), + ER->getValueType()); + + ProgramStateRef StInBound = state->assumeInBound(Idx, NumElements, true); + ProgramStateRef StOutBound = state->assumeInBound(Idx, NumElements, false); + if (StOutBound && !StInBound) { + ExplodedNode *N = C.generateSink(StOutBound); + + if (!N) + return; + + // FIXME: This bug correspond to CWE-466. Eventually we should have bug + // types explicitly reference such exploit categories (when applicable). + if (!BT) + BT.reset(new BuiltinBug("Return of pointer value outside of expected range", + "Returned pointer value points outside the original object " + "(potential buffer overflow)")); + + // FIXME: It would be nice to eventually make this diagnostic more clear, + // e.g., by referencing the original declaration or by saying *why* this + // reference is outside the range. + + // Generate a report for this bug. + BugReport *report = + new BugReport(*BT, BT->getDescription(), N); + + report->addRange(RetE->getSourceRange()); + C.EmitReport(report); + } +} + +void ento::registerReturnPointerRangeChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp new file mode 100644 index 0000000..7b1f0b1 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp @@ -0,0 +1,65 @@ +//== ReturnUndefChecker.cpp -------------------------------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines ReturnUndefChecker, which is a path-sensitive +// check which looks for undefined or garbage values being returned to the +// caller. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" + +using namespace clang; +using namespace ento; + +namespace { +class ReturnUndefChecker : + public Checker< check::PreStmt > { + mutable OwningPtr BT; +public: + void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const; +}; +} + +void ReturnUndefChecker::checkPreStmt(const ReturnStmt *RS, + CheckerContext &C) const { + + const Expr *RetE = RS->getRetValue(); + if (!RetE) + return; + + if (!C.getState()->getSVal(RetE, C.getLocationContext()).isUndef()) + return; + + ExplodedNode *N = C.generateSink(); + + if (!N) + return; + + if (!BT) + BT.reset(new BuiltinBug("Garbage return value", + "Undefined or garbage value returned to caller")); + + BugReport *report = + new BugReport(*BT, BT->getDescription(), N); + + report->addRange(RetE->getSourceRange()); + report->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, RetE, + report)); + + C.EmitReport(report); +} + +void ento::registerReturnUndefChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp new file mode 100644 index 0000000..54cf569 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp @@ -0,0 +1,230 @@ +//=== StackAddrEscapeChecker.cpp ----------------------------------*- C++ -*--// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines stack address leak checker, which checks if an invalid +// stack address is stored into a global or heap location. See CERT DCL30-C. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/SmallString.h" +using namespace clang; +using namespace ento; + +namespace { +class StackAddrEscapeChecker : public Checker< check::PreStmt, + check::EndPath > { + mutable OwningPtr BT_stackleak; + mutable OwningPtr BT_returnstack; + +public: + void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const; + void checkEndPath(CheckerContext &Ctx) const; +private: + void EmitStackError(CheckerContext &C, const MemRegion *R, + const Expr *RetE) const; + static SourceRange GenName(raw_ostream &os, const MemRegion *R, + SourceManager &SM); +}; +} + +SourceRange StackAddrEscapeChecker::GenName(raw_ostream &os, + const MemRegion *R, + SourceManager &SM) { + // Get the base region, stripping away fields and elements. + R = R->getBaseRegion(); + SourceRange range; + os << "Address of "; + + // Check if the region is a compound literal. + if (const CompoundLiteralRegion* CR = dyn_cast(R)) { + const CompoundLiteralExpr *CL = CR->getLiteralExpr(); + os << "stack memory associated with a compound literal " + "declared on line " + << SM.getExpansionLineNumber(CL->getLocStart()) + << " returned to caller"; + range = CL->getSourceRange(); + } + else if (const AllocaRegion* AR = dyn_cast(R)) { + const Expr *ARE = AR->getExpr(); + SourceLocation L = ARE->getLocStart(); + range = ARE->getSourceRange(); + os << "stack memory allocated by call to alloca() on line " + << SM.getExpansionLineNumber(L); + } + else if (const BlockDataRegion *BR = dyn_cast(R)) { + const BlockDecl *BD = BR->getCodeRegion()->getDecl(); + SourceLocation L = BD->getLocStart(); + range = BD->getSourceRange(); + os << "stack-allocated block declared on line " + << SM.getExpansionLineNumber(L); + } + else if (const VarRegion *VR = dyn_cast(R)) { + os << "stack memory associated with local variable '" + << VR->getString() << '\''; + range = VR->getDecl()->getSourceRange(); + } + else if (const CXXTempObjectRegion *TOR = dyn_cast(R)) { + os << "stack memory associated with temporary object of type '" + << TOR->getValueType().getAsString() << '\''; + range = TOR->getExpr()->getSourceRange(); + } + else { + llvm_unreachable("Invalid region in ReturnStackAddressChecker."); + } + + return range; +} + +void StackAddrEscapeChecker::EmitStackError(CheckerContext &C, const MemRegion *R, + const Expr *RetE) const { + ExplodedNode *N = C.generateSink(); + + if (!N) + return; + + if (!BT_returnstack) + BT_returnstack.reset( + new BuiltinBug("Return of address to stack-allocated memory")); + + // Generate a report for this bug. + SmallString<512> buf; + llvm::raw_svector_ostream os(buf); + SourceRange range = GenName(os, R, C.getSourceManager()); + os << " returned to caller"; + BugReport *report = new BugReport(*BT_returnstack, os.str(), N); + report->addRange(RetE->getSourceRange()); + if (range.isValid()) + report->addRange(range); + + C.EmitReport(report); +} + +void StackAddrEscapeChecker::checkPreStmt(const ReturnStmt *RS, + CheckerContext &C) const { + + const Expr *RetE = RS->getRetValue(); + if (!RetE) + return; + + SVal V = C.getState()->getSVal(RetE, C.getLocationContext()); + const MemRegion *R = V.getAsRegion(); + + if (!R) + return; + + const StackSpaceRegion *SS = + dyn_cast_or_null(R->getMemorySpace()); + + if (!SS) + return; + + // Return stack memory in an ancestor stack frame is fine. + const StackFrameContext *SFC = SS->getStackFrame(); + if (SFC != C.getLocationContext()->getCurrentStackFrame()) + return; + + // Automatic reference counting automatically copies blocks. + if (C.getASTContext().getLangOpts().ObjCAutoRefCount && + isa(R)) + return; + + EmitStackError(C, R, RetE); +} + +void StackAddrEscapeChecker::checkEndPath(CheckerContext &Ctx) const { + ProgramStateRef state = Ctx.getState(); + + // Iterate over all bindings to global variables and see if it contains + // a memory region in the stack space. + class CallBack : public StoreManager::BindingsHandler { + private: + CheckerContext &Ctx; + const StackFrameContext *CurSFC; + public: + SmallVector, 10> V; + + CallBack(CheckerContext &CC) : + Ctx(CC), + CurSFC(CC.getLocationContext()->getCurrentStackFrame()) + {} + + bool HandleBinding(StoreManager &SMgr, Store store, + const MemRegion *region, SVal val) { + + if (!isa(region->getMemorySpace())) + return true; + + const MemRegion *vR = val.getAsRegion(); + if (!vR) + return true; + + // Under automated retain release, it is okay to assign a block + // directly to a global variable. + if (Ctx.getASTContext().getLangOpts().ObjCAutoRefCount && + isa(vR)) + return true; + + if (const StackSpaceRegion *SSR = + dyn_cast(vR->getMemorySpace())) { + // If the global variable holds a location in the current stack frame, + // record the binding to emit a warning. + if (SSR->getStackFrame() == CurSFC) + V.push_back(std::make_pair(region, vR)); + } + + return true; + } + }; + + CallBack cb(Ctx); + state->getStateManager().getStoreManager().iterBindings(state->getStore(),cb); + + if (cb.V.empty()) + return; + + // Generate an error node. + ExplodedNode *N = Ctx.addTransition(state); + if (!N) + return; + + if (!BT_stackleak) + BT_stackleak.reset( + new BuiltinBug("Stack address stored into global variable", + "Stack address was saved into a global variable. " + "This is dangerous because the address will become " + "invalid after returning from the function")); + + for (unsigned i = 0, e = cb.V.size(); i != e; ++i) { + // Generate a report for this bug. + SmallString<512> buf; + llvm::raw_svector_ostream os(buf); + SourceRange range = GenName(os, cb.V[i].second, + Ctx.getSourceManager()); + os << " is still referred to by the global variable '"; + const VarRegion *VR = cast(cb.V[i].first->getBaseRegion()); + os << *VR->getDecl() + << "' upon returning to the caller. This will be a dangling reference"; + BugReport *report = new BugReport(*BT_stackleak, os.str(), N); + if (range.isValid()) + report->addRange(range); + + Ctx.EmitReport(report); + } +} + +void ento::registerStackAddrEscapeChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp new file mode 100644 index 0000000..3745d4a --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -0,0 +1,475 @@ +//===-- StreamChecker.cpp -----------------------------------------*- C++ -*--// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines checkers that model and check stream handling functions. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" +#include "llvm/ADT/ImmutableMap.h" + +using namespace clang; +using namespace ento; + +namespace { + +struct StreamState { + enum Kind { Opened, Closed, OpenFailed, Escaped } K; + const Stmt *S; + + StreamState(Kind k, const Stmt *s) : K(k), S(s) {} + + bool isOpened() const { return K == Opened; } + bool isClosed() const { return K == Closed; } + //bool isOpenFailed() const { return K == OpenFailed; } + //bool isEscaped() const { return K == Escaped; } + + bool operator==(const StreamState &X) const { + return K == X.K && S == X.S; + } + + static StreamState getOpened(const Stmt *s) { return StreamState(Opened, s); } + static StreamState getClosed(const Stmt *s) { return StreamState(Closed, s); } + static StreamState getOpenFailed(const Stmt *s) { + return StreamState(OpenFailed, s); + } + static StreamState getEscaped(const Stmt *s) { + return StreamState(Escaped, s); + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(K); + ID.AddPointer(S); + } +}; + +class StreamChecker : public Checker > { + mutable IdentifierInfo *II_fopen, *II_tmpfile, *II_fclose, *II_fread, + *II_fwrite, + *II_fseek, *II_ftell, *II_rewind, *II_fgetpos, *II_fsetpos, + *II_clearerr, *II_feof, *II_ferror, *II_fileno; + mutable OwningPtr BT_nullfp, BT_illegalwhence, + BT_doubleclose, BT_ResourceLeak; + +public: + StreamChecker() + : II_fopen(0), II_tmpfile(0) ,II_fclose(0), II_fread(0), II_fwrite(0), + II_fseek(0), II_ftell(0), II_rewind(0), II_fgetpos(0), II_fsetpos(0), + II_clearerr(0), II_feof(0), II_ferror(0), II_fileno(0) {} + + bool evalCall(const CallExpr *CE, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; + void checkEndPath(CheckerContext &Ctx) const; + void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const; + +private: + void Fopen(CheckerContext &C, const CallExpr *CE) const; + void Tmpfile(CheckerContext &C, const CallExpr *CE) const; + void Fclose(CheckerContext &C, const CallExpr *CE) const; + void Fread(CheckerContext &C, const CallExpr *CE) const; + void Fwrite(CheckerContext &C, const CallExpr *CE) const; + void Fseek(CheckerContext &C, const CallExpr *CE) const; + void Ftell(CheckerContext &C, const CallExpr *CE) const; + void Rewind(CheckerContext &C, const CallExpr *CE) const; + void Fgetpos(CheckerContext &C, const CallExpr *CE) const; + void Fsetpos(CheckerContext &C, const CallExpr *CE) const; + void Clearerr(CheckerContext &C, const CallExpr *CE) const; + void Feof(CheckerContext &C, const CallExpr *CE) const; + void Ferror(CheckerContext &C, const CallExpr *CE) const; + void Fileno(CheckerContext &C, const CallExpr *CE) const; + + void OpenFileAux(CheckerContext &C, const CallExpr *CE) const; + + ProgramStateRef CheckNullStream(SVal SV, ProgramStateRef state, + CheckerContext &C) const; + ProgramStateRef CheckDoubleClose(const CallExpr *CE, ProgramStateRef state, + CheckerContext &C) const; +}; + +} // end anonymous namespace + +namespace clang { +namespace ento { + template <> + struct ProgramStateTrait + : public ProgramStatePartialTrait > { + static void *GDMIndex() { static int x; return &x; } + }; +} +} + +bool StreamChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD) + return false; + + ASTContext &Ctx = C.getASTContext(); + if (!II_fopen) + II_fopen = &Ctx.Idents.get("fopen"); + if (!II_tmpfile) + II_tmpfile = &Ctx.Idents.get("tmpfile"); + if (!II_fclose) + II_fclose = &Ctx.Idents.get("fclose"); + if (!II_fread) + II_fread = &Ctx.Idents.get("fread"); + if (!II_fwrite) + II_fwrite = &Ctx.Idents.get("fwrite"); + if (!II_fseek) + II_fseek = &Ctx.Idents.get("fseek"); + if (!II_ftell) + II_ftell = &Ctx.Idents.get("ftell"); + if (!II_rewind) + II_rewind = &Ctx.Idents.get("rewind"); + if (!II_fgetpos) + II_fgetpos = &Ctx.Idents.get("fgetpos"); + if (!II_fsetpos) + II_fsetpos = &Ctx.Idents.get("fsetpos"); + if (!II_clearerr) + II_clearerr = &Ctx.Idents.get("clearerr"); + if (!II_feof) + II_feof = &Ctx.Idents.get("feof"); + if (!II_ferror) + II_ferror = &Ctx.Idents.get("ferror"); + if (!II_fileno) + II_fileno = &Ctx.Idents.get("fileno"); + + if (FD->getIdentifier() == II_fopen) { + Fopen(C, CE); + return true; + } + if (FD->getIdentifier() == II_tmpfile) { + Tmpfile(C, CE); + return true; + } + if (FD->getIdentifier() == II_fclose) { + Fclose(C, CE); + return true; + } + if (FD->getIdentifier() == II_fread) { + Fread(C, CE); + return true; + } + if (FD->getIdentifier() == II_fwrite) { + Fwrite(C, CE); + return true; + } + if (FD->getIdentifier() == II_fseek) { + Fseek(C, CE); + return true; + } + if (FD->getIdentifier() == II_ftell) { + Ftell(C, CE); + return true; + } + if (FD->getIdentifier() == II_rewind) { + Rewind(C, CE); + return true; + } + if (FD->getIdentifier() == II_fgetpos) { + Fgetpos(C, CE); + return true; + } + if (FD->getIdentifier() == II_fsetpos) { + Fsetpos(C, CE); + return true; + } + if (FD->getIdentifier() == II_clearerr) { + Clearerr(C, CE); + return true; + } + if (FD->getIdentifier() == II_feof) { + Feof(C, CE); + return true; + } + if (FD->getIdentifier() == II_ferror) { + Ferror(C, CE); + return true; + } + if (FD->getIdentifier() == II_fileno) { + Fileno(C, CE); + return true; + } + + return false; +} + +void StreamChecker::Fopen(CheckerContext &C, const CallExpr *CE) const { + OpenFileAux(C, CE); +} + +void StreamChecker::Tmpfile(CheckerContext &C, const CallExpr *CE) const { + OpenFileAux(C, CE); +} + +void StreamChecker::OpenFileAux(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + unsigned Count = C.getCurrentBlockCount(); + SValBuilder &svalBuilder = C.getSValBuilder(); + const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); + DefinedSVal RetVal = + cast(svalBuilder.getConjuredSymbolVal(0, CE, LCtx, Count)); + state = state->BindExpr(CE, C.getLocationContext(), RetVal); + + ConstraintManager &CM = C.getConstraintManager(); + // Bifurcate the state into two: one with a valid FILE* pointer, the other + // with a NULL. + ProgramStateRef stateNotNull, stateNull; + llvm::tie(stateNotNull, stateNull) = CM.assumeDual(state, RetVal); + + if (SymbolRef Sym = RetVal.getAsSymbol()) { + // if RetVal is not NULL, set the symbol's state to Opened. + stateNotNull = + stateNotNull->set(Sym,StreamState::getOpened(CE)); + stateNull = + stateNull->set(Sym, StreamState::getOpenFailed(CE)); + + C.addTransition(stateNotNull); + C.addTransition(stateNull); + } +} + +void StreamChecker::Fclose(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = CheckDoubleClose(CE, C.getState(), C); + if (state) + C.addTransition(state); +} + +void StreamChecker::Fread(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(state->getSVal(CE->getArg(3), C.getLocationContext()), + state, C)) + return; +} + +void StreamChecker::Fwrite(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(state->getSVal(CE->getArg(3), C.getLocationContext()), + state, C)) + return; +} + +void StreamChecker::Fseek(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!(state = CheckNullStream(state->getSVal(CE->getArg(0), + C.getLocationContext()), state, C))) + return; + // Check the legality of the 'whence' argument of 'fseek'. + SVal Whence = state->getSVal(CE->getArg(2), C.getLocationContext()); + const nonloc::ConcreteInt *CI = dyn_cast(&Whence); + + if (!CI) + return; + + int64_t x = CI->getValue().getSExtValue(); + if (x >= 0 && x <= 2) + return; + + if (ExplodedNode *N = C.addTransition(state)) { + if (!BT_illegalwhence) + BT_illegalwhence.reset(new BuiltinBug("Illegal whence argument", + "The whence argument to fseek() should be " + "SEEK_SET, SEEK_END, or SEEK_CUR.")); + BugReport *R = new BugReport(*BT_illegalwhence, + BT_illegalwhence->getDescription(), N); + C.EmitReport(R); + } +} + +void StreamChecker::Ftell(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()), + state, C)) + return; +} + +void StreamChecker::Rewind(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()), + state, C)) + return; +} + +void StreamChecker::Fgetpos(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()), + state, C)) + return; +} + +void StreamChecker::Fsetpos(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()), + state, C)) + return; +} + +void StreamChecker::Clearerr(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()), + state, C)) + return; +} + +void StreamChecker::Feof(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()), + state, C)) + return; +} + +void StreamChecker::Ferror(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()), + state, C)) + return; +} + +void StreamChecker::Fileno(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()), + state, C)) + return; +} + +ProgramStateRef StreamChecker::CheckNullStream(SVal SV, ProgramStateRef state, + CheckerContext &C) const { + const DefinedSVal *DV = dyn_cast(&SV); + if (!DV) + return 0; + + ConstraintManager &CM = C.getConstraintManager(); + ProgramStateRef stateNotNull, stateNull; + llvm::tie(stateNotNull, stateNull) = CM.assumeDual(state, *DV); + + if (!stateNotNull && stateNull) { + if (ExplodedNode *N = C.generateSink(stateNull)) { + if (!BT_nullfp) + BT_nullfp.reset(new BuiltinBug("NULL stream pointer", + "Stream pointer might be NULL.")); + BugReport *R =new BugReport(*BT_nullfp, BT_nullfp->getDescription(), N); + C.EmitReport(R); + } + return 0; + } + return stateNotNull; +} + +ProgramStateRef StreamChecker::CheckDoubleClose(const CallExpr *CE, + ProgramStateRef state, + CheckerContext &C) const { + SymbolRef Sym = + state->getSVal(CE->getArg(0), C.getLocationContext()).getAsSymbol(); + if (!Sym) + return state; + + const StreamState *SS = state->get(Sym); + + // If the file stream is not tracked, return. + if (!SS) + return state; + + // Check: Double close a File Descriptor could cause undefined behaviour. + // Conforming to man-pages + if (SS->isClosed()) { + ExplodedNode *N = C.generateSink(); + if (N) { + if (!BT_doubleclose) + BT_doubleclose.reset(new BuiltinBug("Double fclose", + "Try to close a file Descriptor already" + " closed. Cause undefined behaviour.")); + BugReport *R = new BugReport(*BT_doubleclose, + BT_doubleclose->getDescription(), N); + C.EmitReport(R); + } + return NULL; + } + + // Close the File Descriptor. + return state->set(Sym, StreamState::getClosed(CE)); +} + +void StreamChecker::checkDeadSymbols(SymbolReaper &SymReaper, + CheckerContext &C) const { + for (SymbolReaper::dead_iterator I = SymReaper.dead_begin(), + E = SymReaper.dead_end(); I != E; ++I) { + SymbolRef Sym = *I; + ProgramStateRef state = C.getState(); + const StreamState *SS = state->get(Sym); + if (!SS) + return; + + if (SS->isOpened()) { + ExplodedNode *N = C.generateSink(); + if (N) { + if (!BT_ResourceLeak) + BT_ResourceLeak.reset(new BuiltinBug("Resource Leak", + "Opened File never closed. Potential Resource leak.")); + BugReport *R = new BugReport(*BT_ResourceLeak, + BT_ResourceLeak->getDescription(), N); + C.EmitReport(R); + } + } + } +} + +void StreamChecker::checkEndPath(CheckerContext &Ctx) const { + ProgramStateRef state = Ctx.getState(); + typedef llvm::ImmutableMap SymMap; + SymMap M = state->get(); + + for (SymMap::iterator I = M.begin(), E = M.end(); I != E; ++I) { + StreamState SS = I->second; + if (SS.isOpened()) { + ExplodedNode *N = Ctx.addTransition(state); + if (N) { + if (!BT_ResourceLeak) + BT_ResourceLeak.reset(new BuiltinBug("Resource Leak", + "Opened File never closed. Potential Resource leak.")); + BugReport *R = new BugReport(*BT_ResourceLeak, + BT_ResourceLeak->getDescription(), N); + Ctx.EmitReport(R); + } + } + } +} + +void StreamChecker::checkPreStmt(const ReturnStmt *S, CheckerContext &C) const { + const Expr *RetE = S->getRetValue(); + if (!RetE) + return; + + ProgramStateRef state = C.getState(); + SymbolRef Sym = state->getSVal(RetE, C.getLocationContext()).getAsSymbol(); + + if (!Sym) + return; + + const StreamState *SS = state->get(Sym); + if(!SS) + return; + + if (SS->isOpened()) + state = state->set(Sym, StreamState::getEscaped(S)); + + C.addTransition(state); +} + +void ento::registerStreamChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp new file mode 100644 index 0000000..1133682 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp @@ -0,0 +1,62 @@ +//== TaintTesterChecker.cpp ----------------------------------- -*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This checker can be used for testing how taint data is propagated. +// +//===----------------------------------------------------------------------===// +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" + +using namespace clang; +using namespace ento; + +namespace { +class TaintTesterChecker : public Checker< check::PostStmt > { + + mutable OwningPtr BT; + void initBugType() const; + + /// Given a pointer argument, get the symbol of the value it contains + /// (points to). + SymbolRef getPointedToSymbol(CheckerContext &C, + const Expr* Arg, + bool IssueWarning = true) const; + +public: + void checkPostStmt(const Expr *E, CheckerContext &C) const; +}; +} + +inline void TaintTesterChecker::initBugType() const { + if (!BT) + BT.reset(new BugType("Tainted data", "General")); +} + +void TaintTesterChecker::checkPostStmt(const Expr *E, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + if (!State) + return; + + if (State->isTainted(E, C.getLocationContext())) { + if (ExplodedNode *N = C.addTransition()) { + initBugType(); + BugReport *report = new BugReport(*BT, "tainted",N); + report->addRange(E->getSourceRange()); + C.EmitReport(report); + } + } +} + +void ento::registerTaintTesterChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp new file mode 100644 index 0000000..a30f6d5 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp @@ -0,0 +1,112 @@ +//=== UndefBranchChecker.cpp -----------------------------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines UndefBranchChecker, which checks for undefined branch +// condition. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" + +using namespace clang; +using namespace ento; + +namespace { + +class UndefBranchChecker : public Checker { + mutable OwningPtr BT; + + struct FindUndefExpr { + ProgramStateRef St; + const LocationContext *LCtx; + + FindUndefExpr(ProgramStateRef S, const LocationContext *L) + : St(S), LCtx(L) {} + + const Expr *FindExpr(const Expr *Ex) { + if (!MatchesCriteria(Ex)) + return 0; + + for (Stmt::const_child_iterator I = Ex->child_begin(), + E = Ex->child_end();I!=E;++I) + if (const Expr *ExI = dyn_cast_or_null(*I)) { + const Expr *E2 = FindExpr(ExI); + if (E2) return E2; + } + + return Ex; + } + + bool MatchesCriteria(const Expr *Ex) { + return St->getSVal(Ex, LCtx).isUndef(); + } + }; + +public: + void checkBranchCondition(const Stmt *Condition, CheckerContext &Ctx) const; +}; + +} + +void UndefBranchChecker::checkBranchCondition(const Stmt *Condition, + CheckerContext &Ctx) const { + SVal X = Ctx.getState()->getSVal(Condition, Ctx.getLocationContext()); + if (X.isUndef()) { + // Generate a sink node, which implicitly marks both outgoing branches as + // infeasible. + ExplodedNode *N = Ctx.generateSink(); + if (N) { + if (!BT) + BT.reset( + new BuiltinBug("Branch condition evaluates to a garbage value")); + + // What's going on here: we want to highlight the subexpression of the + // condition that is the most likely source of the "uninitialized + // branch condition." We do a recursive walk of the condition's + // subexpressions and roughly look for the most nested subexpression + // that binds to Undefined. We then highlight that expression's range. + + // Get the predecessor node and check if is a PostStmt with the Stmt + // being the terminator condition. We want to inspect the state + // of that node instead because it will contain main information about + // the subexpressions. + + // Note: any predecessor will do. They should have identical state, + // since all the BlockEdge did was act as an error sink since the value + // had to already be undefined. + assert (!N->pred_empty()); + const Expr *Ex = cast(Condition); + ExplodedNode *PrevN = *N->pred_begin(); + ProgramPoint P = PrevN->getLocation(); + ProgramStateRef St = N->getState(); + + if (PostStmt *PS = dyn_cast(&P)) + if (PS->getStmt() == Ex) + St = PrevN->getState(); + + FindUndefExpr FindIt(St, Ctx.getLocationContext()); + Ex = FindIt.FindExpr(Ex); + + // Emit the bug report. + BugReport *R = new BugReport(*BT, BT->getDescription(), N); + R->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, Ex, R)); + R->addRange(Ex->getSourceRange()); + + Ctx.EmitReport(R); + } + } +} + +void ento::registerUndefBranchChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp new file mode 100644 index 0000000..d57767e --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp @@ -0,0 +1,105 @@ +// UndefCapturedBlockVarChecker.cpp - Uninitialized captured vars -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This checker detects blocks that capture uninitialized values. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { +class UndefCapturedBlockVarChecker + : public Checker< check::PostStmt > { + mutable OwningPtr BT; + +public: + void checkPostStmt(const BlockExpr *BE, CheckerContext &C) const; +}; +} // end anonymous namespace + +static const DeclRefExpr *FindBlockDeclRefExpr(const Stmt *S, + const VarDecl *VD) { + if (const DeclRefExpr *BR = dyn_cast(S)) + if (BR->getDecl() == VD) + return BR; + + for (Stmt::const_child_iterator I = S->child_begin(), E = S->child_end(); + I!=E; ++I) + if (const Stmt *child = *I) { + const DeclRefExpr *BR = FindBlockDeclRefExpr(child, VD); + if (BR) + return BR; + } + + return NULL; +} + +void +UndefCapturedBlockVarChecker::checkPostStmt(const BlockExpr *BE, + CheckerContext &C) const { + if (!BE->getBlockDecl()->hasCaptures()) + return; + + ProgramStateRef state = C.getState(); + const BlockDataRegion *R = + cast(state->getSVal(BE, + C.getLocationContext()).getAsRegion()); + + BlockDataRegion::referenced_vars_iterator I = R->referenced_vars_begin(), + E = R->referenced_vars_end(); + + for (; I != E; ++I) { + // This VarRegion is the region associated with the block; we need + // the one associated with the encompassing context. + const VarRegion *VR = *I; + const VarDecl *VD = VR->getDecl(); + + if (VD->getAttr() || !VD->hasLocalStorage()) + continue; + + // Get the VarRegion associated with VD in the local stack frame. + const LocationContext *LC = C.getLocationContext(); + VR = C.getSValBuilder().getRegionManager().getVarRegion(VD, LC); + SVal VRVal = state->getSVal(VR); + + if (VRVal.isUndef()) + if (ExplodedNode *N = C.generateSink()) { + if (!BT) + BT.reset(new BuiltinBug("uninitialized variable captured by block")); + + // Generate a bug report. + SmallString<128> buf; + llvm::raw_svector_ostream os(buf); + + os << "Variable '" << VD->getName() + << "' is uninitialized when captured by block"; + + BugReport *R = new BugReport(*BT, os.str(), N); + if (const Expr *Ex = FindBlockDeclRefExpr(BE->getBody(), VD)) + R->addRange(Ex->getSourceRange()); + R->addVisitor(new FindLastStoreBRVisitor(VRVal, VR)); + // need location of block + C.EmitReport(R); + } + } +} + +void ento::registerUndefCapturedBlockVarChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp new file mode 100644 index 0000000..c3c9ed7 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp @@ -0,0 +1,91 @@ +//=== UndefResultChecker.cpp ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines UndefResultChecker, a builtin check in ExprEngine that +// performs checks for undefined results of non-assignment binary operators. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "llvm/ADT/SmallString.h" + +using namespace clang; +using namespace ento; + +namespace { +class UndefResultChecker + : public Checker< check::PostStmt > { + + mutable OwningPtr BT; + +public: + void checkPostStmt(const BinaryOperator *B, CheckerContext &C) const; +}; +} // end anonymous namespace + +void UndefResultChecker::checkPostStmt(const BinaryOperator *B, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + if (state->getSVal(B, LCtx).isUndef()) { + // Generate an error node. + ExplodedNode *N = C.generateSink(); + if (!N) + return; + + if (!BT) + BT.reset(new BuiltinBug("Result of operation is garbage or undefined")); + + SmallString<256> sbuf; + llvm::raw_svector_ostream OS(sbuf); + const Expr *Ex = NULL; + bool isLeft = true; + + if (state->getSVal(B->getLHS(), LCtx).isUndef()) { + Ex = B->getLHS()->IgnoreParenCasts(); + isLeft = true; + } + else if (state->getSVal(B->getRHS(), LCtx).isUndef()) { + Ex = B->getRHS()->IgnoreParenCasts(); + isLeft = false; + } + + if (Ex) { + OS << "The " << (isLeft ? "left" : "right") + << " operand of '" + << BinaryOperator::getOpcodeStr(B->getOpcode()) + << "' is a garbage value"; + } + else { + // Neither operand was undefined, but the result is undefined. + OS << "The result of the '" + << BinaryOperator::getOpcodeStr(B->getOpcode()) + << "' expression is undefined"; + } + BugReport *report = new BugReport(*BT, OS.str(), N); + if (Ex) { + report->addRange(Ex->getSourceRange()); + report->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, Ex, + report)); + } + else + report->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, B, + report)); + C.EmitReport(report); + } +} + +void ento::registerUndefResultChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp new file mode 100644 index 0000000..0297c4e --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp @@ -0,0 +1,55 @@ +//===--- UndefinedArraySubscriptChecker.h ----------------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines UndefinedArraySubscriptChecker, a builtin check in ExprEngine +// that performs checks for undefined array subscripts. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" + +using namespace clang; +using namespace ento; + +namespace { +class UndefinedArraySubscriptChecker + : public Checker< check::PreStmt > { + mutable OwningPtr BT; + +public: + void checkPreStmt(const ArraySubscriptExpr *A, CheckerContext &C) const; +}; +} // end anonymous namespace + +void +UndefinedArraySubscriptChecker::checkPreStmt(const ArraySubscriptExpr *A, + CheckerContext &C) const { + if (C.getState()->getSVal(A->getIdx(), C.getLocationContext()).isUndef()) { + if (ExplodedNode *N = C.generateSink()) { + if (!BT) + BT.reset(new BuiltinBug("Array subscript is undefined")); + + // Generate a report for this bug. + BugReport *R = new BugReport(*BT, BT->getName(), N); + R->addRange(A->getIdx()->getSourceRange()); + R->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, + A->getIdx(), + R)); + C.EmitReport(R); + } + } +} + +void ento::registerUndefinedArraySubscriptChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp new file mode 100644 index 0000000..78f7fa6 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp @@ -0,0 +1,88 @@ +//===--- UndefinedAssignmentChecker.h ---------------------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines UndefinedAssignmentChecker, a builtin check in ExprEngine that +// checks for assigning undefined values. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" + +using namespace clang; +using namespace ento; + +namespace { +class UndefinedAssignmentChecker + : public Checker { + mutable OwningPtr BT; + +public: + void checkBind(SVal location, SVal val, const Stmt *S, + CheckerContext &C) const; +}; +} + +void UndefinedAssignmentChecker::checkBind(SVal location, SVal val, + const Stmt *StoreE, + CheckerContext &C) const { + if (!val.isUndef()) + return; + + ExplodedNode *N = C.generateSink(); + + if (!N) + return; + + const char *str = "Assigned value is garbage or undefined"; + + if (!BT) + BT.reset(new BuiltinBug(str)); + + // Generate a report for this bug. + const Expr *ex = 0; + + while (StoreE) { + if (const BinaryOperator *B = dyn_cast(StoreE)) { + if (B->isCompoundAssignmentOp()) { + ProgramStateRef state = C.getState(); + if (state->getSVal(B->getLHS(), C.getLocationContext()).isUndef()) { + str = "The left expression of the compound assignment is an " + "uninitialized value. The computed value will also be garbage"; + ex = B->getLHS(); + break; + } + } + + ex = B->getRHS(); + break; + } + + if (const DeclStmt *DS = dyn_cast(StoreE)) { + const VarDecl *VD = dyn_cast(DS->getSingleDecl()); + ex = VD->getInit(); + } + + break; + } + + BugReport *R = new BugReport(*BT, str, N); + if (ex) { + R->addRange(ex->getSourceRange()); + R->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, ex, R)); + } + C.EmitReport(R); +} + +void ento::registerUndefinedAssignmentChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp new file mode 100644 index 0000000..60e665f --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp @@ -0,0 +1,353 @@ +//= UnixAPIChecker.h - Checks preconditions for various Unix APIs --*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines UnixAPIChecker, which is an assortment of checks on calls +// to various, widely used UNIX/Posix functions. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include + +using namespace clang; +using namespace ento; +using llvm::Optional; + +namespace { +class UnixAPIChecker : public Checker< check::PreStmt > { + mutable OwningPtr BT_open, BT_pthreadOnce, BT_mallocZero; + mutable Optional Val_O_CREAT; + +public: + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + + void CheckOpen(CheckerContext &C, const CallExpr *CE) const; + void CheckPthreadOnce(CheckerContext &C, const CallExpr *CE) const; + void CheckCallocZero(CheckerContext &C, const CallExpr *CE) const; + void CheckMallocZero(CheckerContext &C, const CallExpr *CE) const; + void CheckReallocZero(CheckerContext &C, const CallExpr *CE) const; + void CheckAllocaZero(CheckerContext &C, const CallExpr *CE) const; + void CheckVallocZero(CheckerContext &C, const CallExpr *CE) const; + + typedef void (UnixAPIChecker::*SubChecker)(CheckerContext &, + const CallExpr *) const; +private: + bool ReportZeroByteAllocation(CheckerContext &C, + ProgramStateRef falseState, + const Expr *arg, + const char *fn_name) const; + void BasicAllocationCheck(CheckerContext &C, + const CallExpr *CE, + const unsigned numArgs, + const unsigned sizeArg, + const char *fn) const; +}; +} //end anonymous namespace + +//===----------------------------------------------------------------------===// +// Utility functions. +//===----------------------------------------------------------------------===// + +static inline void LazyInitialize(OwningPtr &BT, + const char *name) { + if (BT) + return; + BT.reset(new BugType(name, categories::UnixAPI)); +} + +//===----------------------------------------------------------------------===// +// "open" (man 2 open) +//===----------------------------------------------------------------------===// + +void UnixAPIChecker::CheckOpen(CheckerContext &C, const CallExpr *CE) const { + // The definition of O_CREAT is platform specific. We need a better way + // of querying this information from the checking environment. + if (!Val_O_CREAT.hasValue()) { + if (C.getASTContext().getTargetInfo().getTriple().getVendor() + == llvm::Triple::Apple) + Val_O_CREAT = 0x0200; + else { + // FIXME: We need a more general way of getting the O_CREAT value. + // We could possibly grovel through the preprocessor state, but + // that would require passing the Preprocessor object to the ExprEngine. + return; + } + } + + // Look at the 'oflags' argument for the O_CREAT flag. + ProgramStateRef state = C.getState(); + + if (CE->getNumArgs() < 2) { + // The frontend should issue a warning for this case, so this is a sanity + // check. + return; + } + + // Now check if oflags has O_CREAT set. + const Expr *oflagsEx = CE->getArg(1); + const SVal V = state->getSVal(oflagsEx, C.getLocationContext()); + if (!isa(V)) { + // The case where 'V' can be a location can only be due to a bad header, + // so in this case bail out. + return; + } + NonLoc oflags = cast(V); + NonLoc ocreateFlag = + cast(C.getSValBuilder().makeIntVal(Val_O_CREAT.getValue(), + oflagsEx->getType())); + SVal maskedFlagsUC = C.getSValBuilder().evalBinOpNN(state, BO_And, + oflags, ocreateFlag, + oflagsEx->getType()); + if (maskedFlagsUC.isUnknownOrUndef()) + return; + DefinedSVal maskedFlags = cast(maskedFlagsUC); + + // Check if maskedFlags is non-zero. + ProgramStateRef trueState, falseState; + llvm::tie(trueState, falseState) = state->assume(maskedFlags); + + // Only emit an error if the value of 'maskedFlags' is properly + // constrained; + if (!(trueState && !falseState)) + return; + + if (CE->getNumArgs() < 3) { + ExplodedNode *N = C.generateSink(trueState); + if (!N) + return; + + LazyInitialize(BT_open, "Improper use of 'open'"); + + BugReport *report = + new BugReport(*BT_open, + "Call to 'open' requires a third argument when " + "the 'O_CREAT' flag is set", N); + report->addRange(oflagsEx->getSourceRange()); + C.EmitReport(report); + } +} + +//===----------------------------------------------------------------------===// +// pthread_once +//===----------------------------------------------------------------------===// + +void UnixAPIChecker::CheckPthreadOnce(CheckerContext &C, + const CallExpr *CE) const { + + // This is similar to 'CheckDispatchOnce' in the MacOSXAPIChecker. + // They can possibly be refactored. + + if (CE->getNumArgs() < 1) + return; + + // Check if the first argument is stack allocated. If so, issue a warning + // because that's likely to be bad news. + ProgramStateRef state = C.getState(); + const MemRegion *R = + state->getSVal(CE->getArg(0), C.getLocationContext()).getAsRegion(); + if (!R || !isa(R->getMemorySpace())) + return; + + ExplodedNode *N = C.generateSink(state); + if (!N) + return; + + SmallString<256> S; + llvm::raw_svector_ostream os(S); + os << "Call to 'pthread_once' uses"; + if (const VarRegion *VR = dyn_cast(R)) + os << " the local variable '" << VR->getDecl()->getName() << '\''; + else + os << " stack allocated memory"; + os << " for the \"control\" value. Using such transient memory for " + "the control value is potentially dangerous."; + if (isa(R) && isa(R->getMemorySpace())) + os << " Perhaps you intended to declare the variable as 'static'?"; + + LazyInitialize(BT_pthreadOnce, "Improper use of 'pthread_once'"); + + BugReport *report = new BugReport(*BT_pthreadOnce, os.str(), N); + report->addRange(CE->getArg(0)->getSourceRange()); + C.EmitReport(report); +} + +//===----------------------------------------------------------------------===// +// "calloc", "malloc", "realloc", "alloca" and "valloc" with allocation size 0 +//===----------------------------------------------------------------------===// +// FIXME: Eventually these should be rolled into the MallocChecker, but right now +// they're more basic and valuable for widespread use. + +// Returns true if we try to do a zero byte allocation, false otherwise. +// Fills in trueState and falseState. +static bool IsZeroByteAllocation(ProgramStateRef state, + const SVal argVal, + ProgramStateRef *trueState, + ProgramStateRef *falseState) { + llvm::tie(*trueState, *falseState) = + state->assume(cast(argVal)); + + return (*falseState && !*trueState); +} + +// Generates an error report, indicating that the function whose name is given +// will perform a zero byte allocation. +// Returns false if an error occured, true otherwise. +bool UnixAPIChecker::ReportZeroByteAllocation(CheckerContext &C, + ProgramStateRef falseState, + const Expr *arg, + const char *fn_name) const { + ExplodedNode *N = C.generateSink(falseState); + if (!N) + return false; + + LazyInitialize(BT_mallocZero, + "Undefined allocation of 0 bytes (CERT MEM04-C; CWE-131)"); + + SmallString<256> S; + llvm::raw_svector_ostream os(S); + os << "Call to '" << fn_name << "' has an allocation size of 0 bytes"; + BugReport *report = new BugReport(*BT_mallocZero, os.str(), N); + + report->addRange(arg->getSourceRange()); + report->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, arg, + report)); + C.EmitReport(report); + + return true; +} + +// Does a basic check for 0-sized allocations suitable for most of the below +// functions (modulo "calloc") +void UnixAPIChecker::BasicAllocationCheck(CheckerContext &C, + const CallExpr *CE, + const unsigned numArgs, + const unsigned sizeArg, + const char *fn) const { + // Sanity check for the correct number of arguments + if (CE->getNumArgs() != numArgs) + return; + + // Check if the allocation size is 0. + ProgramStateRef state = C.getState(); + ProgramStateRef trueState = NULL, falseState = NULL; + const Expr *arg = CE->getArg(sizeArg); + SVal argVal = state->getSVal(arg, C.getLocationContext()); + + if (argVal.isUnknownOrUndef()) + return; + + // Is the value perfectly constrained to zero? + if (IsZeroByteAllocation(state, argVal, &trueState, &falseState)) { + (void) ReportZeroByteAllocation(C, falseState, arg, fn); + return; + } + // Assume the the value is non-zero going forward. + assert(trueState); + if (trueState != state) + C.addTransition(trueState); +} + +void UnixAPIChecker::CheckCallocZero(CheckerContext &C, + const CallExpr *CE) const { + unsigned int nArgs = CE->getNumArgs(); + if (nArgs != 2) + return; + + ProgramStateRef state = C.getState(); + ProgramStateRef trueState = NULL, falseState = NULL; + + unsigned int i; + for (i = 0; i < nArgs; i++) { + const Expr *arg = CE->getArg(i); + SVal argVal = state->getSVal(arg, C.getLocationContext()); + if (argVal.isUnknownOrUndef()) { + if (i == 0) + continue; + else + return; + } + + if (IsZeroByteAllocation(state, argVal, &trueState, &falseState)) { + if (ReportZeroByteAllocation(C, falseState, arg, "calloc")) + return; + else if (i == 0) + continue; + else + return; + } + } + + // Assume the the value is non-zero going forward. + assert(trueState); + if (trueState != state) + C.addTransition(trueState); +} + +void UnixAPIChecker::CheckMallocZero(CheckerContext &C, + const CallExpr *CE) const { + BasicAllocationCheck(C, CE, 1, 0, "malloc"); +} + +void UnixAPIChecker::CheckReallocZero(CheckerContext &C, + const CallExpr *CE) const { + BasicAllocationCheck(C, CE, 2, 1, "realloc"); +} + +void UnixAPIChecker::CheckAllocaZero(CheckerContext &C, + const CallExpr *CE) const { + BasicAllocationCheck(C, CE, 1, 0, "alloca"); +} + +void UnixAPIChecker::CheckVallocZero(CheckerContext &C, + const CallExpr *CE) const { + BasicAllocationCheck(C, CE, 1, 0, "valloc"); +} + + +//===----------------------------------------------------------------------===// +// Central dispatch function. +//===----------------------------------------------------------------------===// + +void UnixAPIChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + StringRef FName = C.getCalleeName(CE); + if (FName.empty()) + return; + + SubChecker SC = + llvm::StringSwitch(FName) + .Case("open", &UnixAPIChecker::CheckOpen) + .Case("pthread_once", &UnixAPIChecker::CheckPthreadOnce) + .Case("calloc", &UnixAPIChecker::CheckCallocZero) + .Case("malloc", &UnixAPIChecker::CheckMallocZero) + .Case("realloc", &UnixAPIChecker::CheckReallocZero) + .Cases("alloca", "__builtin_alloca", &UnixAPIChecker::CheckAllocaZero) + .Case("valloc", &UnixAPIChecker::CheckVallocZero) + .Default(NULL); + + if (SC) + (this->*SC)(C, CE); +} + +//===----------------------------------------------------------------------===// +// Registration. +//===----------------------------------------------------------------------===// + +void ento::registerUnixAPIChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp new file mode 100644 index 0000000..5a13ed0 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp @@ -0,0 +1,247 @@ +//==- UnreachableCodeChecker.cpp - Generalized dead code checker -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This file implements a generalized unreachable code checker using a +// path-sensitive analysis. We mark any path visited, and then walk the CFG as a +// post-analysis to determine what was never visited. +// +// A similar flow-sensitive only check exists in Analysis/ReachableCode.cpp +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/AST/ParentMap.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/SmallSet.h" + +// The number of CFGBlock pointers we want to reserve memory for. This is used +// once for each function we analyze. +#define DEFAULT_CFGBLOCKS 256 + +using namespace clang; +using namespace ento; + +namespace { +class UnreachableCodeChecker : public Checker { +public: + void checkEndAnalysis(ExplodedGraph &G, BugReporter &B, + ExprEngine &Eng) const; +private: + typedef llvm::SmallSet CFGBlocksSet; + + static inline const Stmt *getUnreachableStmt(const CFGBlock *CB); + static void FindUnreachableEntryPoints(const CFGBlock *CB, + CFGBlocksSet &reachable, + CFGBlocksSet &visited); + static bool isInvalidPath(const CFGBlock *CB, const ParentMap &PM); + static inline bool isEmptyCFGBlock(const CFGBlock *CB); +}; +} + +void UnreachableCodeChecker::checkEndAnalysis(ExplodedGraph &G, + BugReporter &B, + ExprEngine &Eng) const { + CFGBlocksSet reachable, visited; + + if (Eng.hasWorkRemaining()) + return; + + const Decl *D = 0; + CFG *C = 0; + ParentMap *PM = 0; + const LocationContext *LC = 0; + // Iterate over ExplodedGraph + for (ExplodedGraph::node_iterator I = G.nodes_begin(), E = G.nodes_end(); + I != E; ++I) { + const ProgramPoint &P = I->getLocation(); + LC = P.getLocationContext(); + + if (!D) + D = LC->getAnalysisDeclContext()->getDecl(); + // Save the CFG if we don't have it already + if (!C) + C = LC->getAnalysisDeclContext()->getUnoptimizedCFG(); + if (!PM) + PM = &LC->getParentMap(); + + if (const BlockEntrance *BE = dyn_cast(&P)) { + const CFGBlock *CB = BE->getBlock(); + reachable.insert(CB->getBlockID()); + } + } + + // Bail out if we didn't get the CFG or the ParentMap. + if (!D || !C || !PM) + return; + + // Don't do anything for template instantiations. Proving that code + // in a template instantiation is unreachable means proving that it is + // unreachable in all instantiations. + if (const FunctionDecl *FD = dyn_cast(D)) + if (FD->isTemplateInstantiation()) + return; + + // Find CFGBlocks that were not covered by any node + for (CFG::const_iterator I = C->begin(), E = C->end(); I != E; ++I) { + const CFGBlock *CB = *I; + // Check if the block is unreachable + if (reachable.count(CB->getBlockID())) + continue; + + // Check if the block is empty (an artificial block) + if (isEmptyCFGBlock(CB)) + continue; + + // Find the entry points for this block + if (!visited.count(CB->getBlockID())) + FindUnreachableEntryPoints(CB, reachable, visited); + + // This block may have been pruned; check if we still want to report it + if (reachable.count(CB->getBlockID())) + continue; + + // Check for false positives + if (CB->size() > 0 && isInvalidPath(CB, *PM)) + continue; + + // It is good practice to always have a "default" label in a "switch", even + // if we should never get there. It can be used to detect errors, for + // instance. Unreachable code directly under a "default" label is therefore + // likely to be a false positive. + if (const Stmt *label = CB->getLabel()) + if (label->getStmtClass() == Stmt::DefaultStmtClass) + continue; + + // Special case for __builtin_unreachable. + // FIXME: This should be extended to include other unreachable markers, + // such as llvm_unreachable. + if (!CB->empty()) { + bool foundUnreachable = false; + for (CFGBlock::const_iterator ci = CB->begin(), ce = CB->end(); + ci != ce; ++ci) { + if (const CFGStmt *S = (*ci).getAs()) + if (const CallExpr *CE = dyn_cast(S->getStmt())) { + if (CE->isBuiltinCall() == Builtin::BI__builtin_unreachable) { + foundUnreachable = true; + break; + } + } + } + if (foundUnreachable) + continue; + } + + // We found a block that wasn't covered - find the statement to report + SourceRange SR; + PathDiagnosticLocation DL; + SourceLocation SL; + if (const Stmt *S = getUnreachableStmt(CB)) { + SR = S->getSourceRange(); + DL = PathDiagnosticLocation::createBegin(S, B.getSourceManager(), LC); + SL = DL.asLocation(); + if (SR.isInvalid() || !SL.isValid()) + continue; + } + else + continue; + + // Check if the SourceLocation is in a system header + const SourceManager &SM = B.getSourceManager(); + if (SM.isInSystemHeader(SL) || SM.isInExternCSystemHeader(SL)) + continue; + + B.EmitBasicReport(D, "Unreachable code", "Dead code", + "This statement is never executed", DL, SR); + } +} + +// Recursively finds the entry point(s) for this dead CFGBlock. +void UnreachableCodeChecker::FindUnreachableEntryPoints(const CFGBlock *CB, + CFGBlocksSet &reachable, + CFGBlocksSet &visited) { + visited.insert(CB->getBlockID()); + + for (CFGBlock::const_pred_iterator I = CB->pred_begin(), E = CB->pred_end(); + I != E; ++I) { + if (!reachable.count((*I)->getBlockID())) { + // If we find an unreachable predecessor, mark this block as reachable so + // we don't report this block + reachable.insert(CB->getBlockID()); + if (!visited.count((*I)->getBlockID())) + // If we haven't previously visited the unreachable predecessor, recurse + FindUnreachableEntryPoints(*I, reachable, visited); + } + } +} + +// Find the Stmt* in a CFGBlock for reporting a warning +const Stmt *UnreachableCodeChecker::getUnreachableStmt(const CFGBlock *CB) { + for (CFGBlock::const_iterator I = CB->begin(), E = CB->end(); I != E; ++I) { + if (const CFGStmt *S = I->getAs()) + return S->getStmt(); + } + if (const Stmt *S = CB->getTerminator()) + return S; + else + return 0; +} + +// Determines if the path to this CFGBlock contained an element that infers this +// block is a false positive. We assume that FindUnreachableEntryPoints has +// already marked only the entry points to any dead code, so we need only to +// find the condition that led to this block (the predecessor of this block.) +// There will never be more than one predecessor. +bool UnreachableCodeChecker::isInvalidPath(const CFGBlock *CB, + const ParentMap &PM) { + // We only expect a predecessor size of 0 or 1. If it is >1, then an external + // condition has broken our assumption (for example, a sink being placed by + // another check). In these cases, we choose not to report. + if (CB->pred_size() > 1) + return true; + + // If there are no predecessors, then this block is trivially unreachable + if (CB->pred_size() == 0) + return false; + + const CFGBlock *pred = *CB->pred_begin(); + + // Get the predecessor block's terminator conditon + const Stmt *cond = pred->getTerminatorCondition(); + + //assert(cond && "CFGBlock's predecessor has a terminator condition"); + // The previous assertion is invalid in some cases (eg do/while). Leaving + // reporting of these situations on at the moment to help triage these cases. + if (!cond) + return false; + + // Run each of the checks on the conditions + if (containsMacro(cond) || containsEnum(cond) + || containsStaticLocal(cond) || containsBuiltinOffsetOf(cond) + || containsStmt(cond)) + return true; + + return false; +} + +// Returns true if the given CFGBlock is empty +bool UnreachableCodeChecker::isEmptyCFGBlock(const CFGBlock *CB) { + return CB->getLabel() == 0 // No labels + && CB->size() == 0 // No statements + && CB->getTerminator() == 0; // No terminator +} + +void ento::registerUnreachableCodeChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp new file mode 100644 index 0000000..38c9cc1 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp @@ -0,0 +1,162 @@ +//=== VLASizeChecker.cpp - Undefined dereference checker --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines VLASizeChecker, a builtin check in ExprEngine that +// performs checks for declaration of VLA of undefined or zero size. +// In addition, VLASizeChecker is responsible for defining the extent +// of the MemRegion that represents a VLA. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/AST/CharUnits.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" + +using namespace clang; +using namespace ento; + +namespace { +class VLASizeChecker : public Checker< check::PreStmt > { + mutable OwningPtr BT; + enum VLASize_Kind { VLA_Garbage, VLA_Zero, VLA_Tainted }; + + void reportBug(VLASize_Kind Kind, + const Expr *SizeE, + ProgramStateRef State, + CheckerContext &C) const; +public: + void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; +}; +} // end anonymous namespace + +void VLASizeChecker::reportBug(VLASize_Kind Kind, + const Expr *SizeE, + ProgramStateRef State, + CheckerContext &C) const { + // Generate an error node. + ExplodedNode *N = C.generateSink(State); + if (!N) + return; + + if (!BT) + BT.reset(new BuiltinBug("Dangerous variable-length array (VLA) declaration")); + + SmallString<256> buf; + llvm::raw_svector_ostream os(buf); + os << "Declared variable-length array (VLA) "; + switch (Kind) { + case VLA_Garbage: + os << "uses a garbage value as its size"; + break; + case VLA_Zero: + os << "has zero size"; + break; + case VLA_Tainted: + os << "has tainted size"; + break; + } + + BugReport *report = new BugReport(*BT, os.str(), N); + report->addRange(SizeE->getSourceRange()); + report->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, SizeE, + report)); + C.EmitReport(report); + return; +} + +void VLASizeChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { + if (!DS->isSingleDecl()) + return; + + const VarDecl *VD = dyn_cast(DS->getSingleDecl()); + if (!VD) + return; + + ASTContext &Ctx = C.getASTContext(); + const VariableArrayType *VLA = Ctx.getAsVariableArrayType(VD->getType()); + if (!VLA) + return; + + // FIXME: Handle multi-dimensional VLAs. + const Expr *SE = VLA->getSizeExpr(); + ProgramStateRef state = C.getState(); + SVal sizeV = state->getSVal(SE, C.getLocationContext()); + + if (sizeV.isUndef()) { + reportBug(VLA_Garbage, SE, state, C); + return; + } + + // See if the size value is known. It can't be undefined because we would have + // warned about that already. + if (sizeV.isUnknown()) + return; + + // Check if the size is tainted. + if (state->isTainted(sizeV)) { + reportBug(VLA_Tainted, SE, 0, C); + return; + } + + // Check if the size is zero. + DefinedSVal sizeD = cast(sizeV); + + ProgramStateRef stateNotZero, stateZero; + llvm::tie(stateNotZero, stateZero) = state->assume(sizeD); + + if (stateZero && !stateNotZero) { + reportBug(VLA_Zero, SE, stateZero, C); + return; + } + + // From this point on, assume that the size is not zero. + state = stateNotZero; + + // VLASizeChecker is responsible for defining the extent of the array being + // declared. We do this by multiplying the array length by the element size, + // then matching that with the array region's extent symbol. + + // Convert the array length to size_t. + SValBuilder &svalBuilder = C.getSValBuilder(); + QualType SizeTy = Ctx.getSizeType(); + NonLoc ArrayLength = cast(svalBuilder.evalCast(sizeD, SizeTy, + SE->getType())); + + // Get the element size. + CharUnits EleSize = Ctx.getTypeSizeInChars(VLA->getElementType()); + SVal EleSizeVal = svalBuilder.makeIntVal(EleSize.getQuantity(), SizeTy); + + // Multiply the array length by the element size. + SVal ArraySizeVal = svalBuilder.evalBinOpNN(state, BO_Mul, ArrayLength, + cast(EleSizeVal), SizeTy); + + // Finally, assume that the array's extent matches the given size. + const LocationContext *LC = C.getLocationContext(); + DefinedOrUnknownSVal Extent = + state->getRegion(VD, LC)->getExtent(svalBuilder); + DefinedOrUnknownSVal ArraySize = cast(ArraySizeVal); + DefinedOrUnknownSVal sizeIsKnown = + svalBuilder.evalEQ(state, Extent, ArraySize); + state = state->assume(sizeIsKnown, true); + + // Assume should not fail at this point. + assert(state); + + // Remember our assumptions! + C.addTransition(state); +} + +void ento::registerVLASizeChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp new file mode 100644 index 0000000..f7c7c0c --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp @@ -0,0 +1,241 @@ +//=======- VirtualCallChecker.cpp --------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a checker that checks virtual function calls during +// construction or destruction of C++ objects. +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/StmtVisitor.h" +#include "llvm/Support/SaveAndRestore.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "llvm/ADT/SmallString.h" + +using namespace clang; +using namespace ento; + +namespace { + +class WalkAST : public StmtVisitor { + BugReporter &BR; + AnalysisDeclContext *AC; + + typedef const CallExpr * WorkListUnit; + typedef SmallVector DFSWorkList; + + /// A vector representing the worklist which has a chain of CallExprs. + DFSWorkList WList; + + // PreVisited : A CallExpr to this FunctionDecl is in the worklist, but the + // body has not been visited yet. + // PostVisited : A CallExpr to this FunctionDecl is in the worklist, and the + // body has been visited. + enum Kind { NotVisited, + PreVisited, /**< A CallExpr to this FunctionDecl is in the + worklist, but the body has not yet been + visited. */ + PostVisited /**< A CallExpr to this FunctionDecl is in the + worklist, and the body has been visited. */ + } K; + + /// A DenseMap that records visited states of FunctionDecls. + llvm::DenseMap VisitedFunctions; + + /// The CallExpr whose body is currently being visited. This is used for + /// generating bug reports. This is null while visiting the body of a + /// constructor or destructor. + const CallExpr *visitingCallExpr; + +public: + WalkAST(BugReporter &br, AnalysisDeclContext *ac) + : BR(br), + AC(ac), + visitingCallExpr(0) {} + + bool hasWork() const { return !WList.empty(); } + + /// This method adds a CallExpr to the worklist and marks the callee as + /// being PreVisited. + void Enqueue(WorkListUnit WLUnit) { + const FunctionDecl *FD = WLUnit->getDirectCallee(); + if (!FD || !FD->getBody()) + return; + Kind &K = VisitedFunctions[FD]; + if (K != NotVisited) + return; + K = PreVisited; + WList.push_back(WLUnit); + } + + /// This method returns an item from the worklist without removing it. + WorkListUnit Dequeue() { + assert(!WList.empty()); + return WList.back(); + } + + void Execute() { + while (hasWork()) { + WorkListUnit WLUnit = Dequeue(); + const FunctionDecl *FD = WLUnit->getDirectCallee(); + assert(FD && FD->getBody()); + + if (VisitedFunctions[FD] == PreVisited) { + // If the callee is PreVisited, walk its body. + // Visit the body. + SaveAndRestore SaveCall(visitingCallExpr, WLUnit); + Visit(FD->getBody()); + + // Mark the function as being PostVisited to indicate we have + // scanned the body. + VisitedFunctions[FD] = PostVisited; + continue; + } + + // Otherwise, the callee is PostVisited. + // Remove it from the worklist. + assert(VisitedFunctions[FD] == PostVisited); + WList.pop_back(); + } + } + + // Stmt visitor methods. + void VisitCallExpr(CallExpr *CE); + void VisitCXXMemberCallExpr(CallExpr *CE); + void VisitStmt(Stmt *S) { VisitChildren(S); } + void VisitChildren(Stmt *S); + + void ReportVirtualCall(const CallExpr *CE, bool isPure); + +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// AST walking. +//===----------------------------------------------------------------------===// + +void WalkAST::VisitChildren(Stmt *S) { + for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I!=E; ++I) + if (Stmt *child = *I) + Visit(child); +} + +void WalkAST::VisitCallExpr(CallExpr *CE) { + VisitChildren(CE); + Enqueue(CE); +} + +void WalkAST::VisitCXXMemberCallExpr(CallExpr *CE) { + VisitChildren(CE); + bool callIsNonVirtual = false; + + // Several situations to elide for checking. + if (MemberExpr *CME = dyn_cast(CE->getCallee())) { + // If the member access is fully qualified (i.e., X::F), then treat + // this as a non-virtual call and do not warn. + if (CME->getQualifier()) + callIsNonVirtual = true; + + // Elide analyzing the call entirely if the base pointer is not 'this'. + if (Expr *base = CME->getBase()->IgnoreImpCasts()) + if (!isa(base)) + return; + } + + // Get the callee. + const CXXMethodDecl *MD = dyn_cast(CE->getDirectCallee()); + if (MD && MD->isVirtual() && !callIsNonVirtual) + ReportVirtualCall(CE, MD->isPure()); + + Enqueue(CE); +} + +void WalkAST::ReportVirtualCall(const CallExpr *CE, bool isPure) { + SmallString<100> buf; + llvm::raw_svector_ostream os(buf); + + os << "Call Path : "; + // Name of current visiting CallExpr. + os << *CE->getDirectCallee(); + + // Name of the CallExpr whose body is current walking. + if (visitingCallExpr) + os << " <-- " << *visitingCallExpr->getDirectCallee(); + // Names of FunctionDecls in worklist with state PostVisited. + for (SmallVectorImpl::iterator I = WList.end(), + E = WList.begin(); I != E; --I) { + const FunctionDecl *FD = (*(I-1))->getDirectCallee(); + assert(FD); + if (VisitedFunctions[FD] == PostVisited) + os << " <-- " << *FD; + } + + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + SourceRange R = CE->getCallee()->getSourceRange(); + + if (isPure) { + os << "\n" << "Call pure virtual functions during construction or " + << "destruction may leads undefined behaviour"; + BR.EmitBasicReport(AC->getDecl(), + "Call pure virtual function during construction or " + "Destruction", + "Cplusplus", + os.str(), CELoc, &R, 1); + return; + } + else { + os << "\n" << "Call virtual functions during construction or " + << "destruction will never go to a more derived class"; + BR.EmitBasicReport(AC->getDecl(), + "Call virtual function during construction or " + "Destruction", + "Cplusplus", + os.str(), CELoc, &R, 1); + return; + } +} + +//===----------------------------------------------------------------------===// +// VirtualCallChecker +//===----------------------------------------------------------------------===// + +namespace { +class VirtualCallChecker : public Checker > { +public: + void checkASTDecl(const CXXRecordDecl *RD, AnalysisManager& mgr, + BugReporter &BR) const { + WalkAST walker(BR, mgr.getAnalysisDeclContext(RD)); + + // Check the constructors. + for (CXXRecordDecl::ctor_iterator I = RD->ctor_begin(), E = RD->ctor_end(); + I != E; ++I) { + if (!I->isCopyOrMoveConstructor()) + if (Stmt *Body = I->getBody()) { + walker.Visit(Body); + walker.Execute(); + } + } + + // Check the destructor. + if (CXXDestructorDecl *DD = RD->getDestructor()) + if (Stmt *Body = DD->getBody()) { + walker.Visit(Body); + walker.Execute(); + } + } +}; +} + +void ento::registerVirtualCallChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} diff --git a/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp b/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp new file mode 100644 index 0000000..eeaed2d --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp @@ -0,0 +1,78 @@ +//===-- AnalysisManager.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" + +using namespace clang; +using namespace ento; + +void AnalysisManager::anchor() { } + +AnalysisManager::AnalysisManager(ASTContext &ctx, DiagnosticsEngine &diags, + const LangOptions &lang, + PathDiagnosticConsumer *pd, + StoreManagerCreator storemgr, + ConstraintManagerCreator constraintmgr, + CheckerManager *checkerMgr, + unsigned maxnodes, unsigned maxvisit, + bool vizdot, bool vizubi, + AnalysisPurgeMode purge, + bool eager, bool trim, + bool useUnoptimizedCFG, + bool addImplicitDtors, bool addInitializers, + bool eagerlyTrimEGraph, + AnalysisIPAMode ipa, + unsigned inlineMaxStack, + unsigned inlineMaxFunctionSize, + AnalysisInliningMode IMode, + bool NoRetry) + : AnaCtxMgr(useUnoptimizedCFG, addImplicitDtors, addInitializers), + Ctx(ctx), Diags(diags), LangOpts(lang), PD(pd), + CreateStoreMgr(storemgr), CreateConstraintMgr(constraintmgr), + CheckerMgr(checkerMgr), + AScope(ScopeDecl), MaxNodes(maxnodes), MaxVisit(maxvisit), + VisualizeEGDot(vizdot), VisualizeEGUbi(vizubi), PurgeDead(purge), + EagerlyAssume(eager), TrimGraph(trim), + EagerlyTrimEGraph(eagerlyTrimEGraph), + IPAMode(ipa), + InlineMaxStackDepth(inlineMaxStack), + InlineMaxFunctionSize(inlineMaxFunctionSize), + InliningMode(IMode), + NoRetryExhausted(NoRetry) +{ + AnaCtxMgr.getCFGBuildOptions().setAllAlwaysAdd(); +} + +AnalysisManager::AnalysisManager(ASTContext &ctx, DiagnosticsEngine &diags, + AnalysisManager &ParentAM) + : AnaCtxMgr(ParentAM.AnaCtxMgr.getUseUnoptimizedCFG(), + ParentAM.AnaCtxMgr.getCFGBuildOptions().AddImplicitDtors, + ParentAM.AnaCtxMgr.getCFGBuildOptions().AddInitializers), + Ctx(ctx), Diags(diags), + LangOpts(ParentAM.LangOpts), PD(ParentAM.getPathDiagnosticConsumer()), + CreateStoreMgr(ParentAM.CreateStoreMgr), + CreateConstraintMgr(ParentAM.CreateConstraintMgr), + CheckerMgr(ParentAM.CheckerMgr), + AScope(ScopeDecl), + MaxNodes(ParentAM.MaxNodes), + MaxVisit(ParentAM.MaxVisit), + VisualizeEGDot(ParentAM.VisualizeEGDot), + VisualizeEGUbi(ParentAM.VisualizeEGUbi), + PurgeDead(ParentAM.PurgeDead), + EagerlyAssume(ParentAM.EagerlyAssume), + TrimGraph(ParentAM.TrimGraph), + EagerlyTrimEGraph(ParentAM.EagerlyTrimEGraph), + IPAMode(ParentAM.IPAMode), + InlineMaxStackDepth(ParentAM.InlineMaxStackDepth), + InlineMaxFunctionSize(ParentAM.InlineMaxFunctionSize), + InliningMode(ParentAM.InliningMode), + NoRetryExhausted(ParentAM.NoRetryExhausted) +{ + AnaCtxMgr.getCFGBuildOptions().setAllAlwaysAdd(); +} diff --git a/clang/lib/StaticAnalyzer/Core/BasicConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/BasicConstraintManager.cpp new file mode 100644 index 0000000..2d9addd --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/BasicConstraintManager.cpp @@ -0,0 +1,367 @@ +//== BasicConstraintManager.cpp - Manage basic constraints.------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines BasicConstraintManager, a class that tracks simple +// equality and inequality constraints on symbolic values of ProgramState. +// +//===----------------------------------------------------------------------===// + +#include "SimpleConstraintManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + + +namespace { class ConstNotEq {}; } +namespace { class ConstEq {}; } + +typedef llvm::ImmutableMap ConstNotEqTy; +typedef llvm::ImmutableMap ConstEqTy; + +static int ConstEqIndex = 0; +static int ConstNotEqIndex = 0; + +namespace clang { +namespace ento { +template<> +struct ProgramStateTrait : + public ProgramStatePartialTrait { + static inline void *GDMIndex() { return &ConstNotEqIndex; } +}; + +template<> +struct ProgramStateTrait : public ProgramStatePartialTrait { + static inline void *GDMIndex() { return &ConstEqIndex; } +}; +} +} + +namespace { +// BasicConstraintManager only tracks equality and inequality constraints of +// constants and integer variables. +class BasicConstraintManager + : public SimpleConstraintManager { + ProgramState::IntSetTy::Factory ISetFactory; +public: + BasicConstraintManager(ProgramStateManager &statemgr, SubEngine &subengine) + : SimpleConstraintManager(subengine), + ISetFactory(statemgr.getAllocator()) {} + + ProgramStateRef assumeSymNE(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt& V, + const llvm::APSInt& Adjustment); + + ProgramStateRef assumeSymEQ(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt& V, + const llvm::APSInt& Adjustment); + + ProgramStateRef assumeSymLT(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt& V, + const llvm::APSInt& Adjustment); + + ProgramStateRef assumeSymGT(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt& V, + const llvm::APSInt& Adjustment); + + ProgramStateRef assumeSymGE(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt& V, + const llvm::APSInt& Adjustment); + + ProgramStateRef assumeSymLE(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt& V, + const llvm::APSInt& Adjustment); + + ProgramStateRef AddEQ(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt& V); + + ProgramStateRef AddNE(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt& V); + + const llvm::APSInt* getSymVal(ProgramStateRef state, + SymbolRef sym) const; + + bool isNotEqual(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt& V) const; + + bool isEqual(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt& V) const; + + ProgramStateRef removeDeadBindings(ProgramStateRef state, + SymbolReaper& SymReaper); + + void print(ProgramStateRef state, + raw_ostream &Out, + const char* nl, + const char *sep); +}; + +} // end anonymous namespace + +ConstraintManager* +ento::CreateBasicConstraintManager(ProgramStateManager& statemgr, + SubEngine &subengine) { + return new BasicConstraintManager(statemgr, subengine); +} + +ProgramStateRef +BasicConstraintManager::assumeSymNE(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) { + // First, determine if sym == X, where X+Adjustment != V. + llvm::APSInt Adjusted = V-Adjustment; + if (const llvm::APSInt* X = getSymVal(state, sym)) { + bool isFeasible = (*X != Adjusted); + return isFeasible ? state : NULL; + } + + // Second, determine if sym+Adjustment != V. + if (isNotEqual(state, sym, Adjusted)) + return state; + + // If we reach here, sym is not a constant and we don't know if it is != V. + // Make that assumption. + return AddNE(state, sym, Adjusted); +} + +ProgramStateRef +BasicConstraintManager::assumeSymEQ(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) { + // First, determine if sym == X, where X+Adjustment != V. + llvm::APSInt Adjusted = V-Adjustment; + if (const llvm::APSInt* X = getSymVal(state, sym)) { + bool isFeasible = (*X == Adjusted); + return isFeasible ? state : NULL; + } + + // Second, determine if sym+Adjustment != V. + if (isNotEqual(state, sym, Adjusted)) + return NULL; + + // If we reach here, sym is not a constant and we don't know if it is == V. + // Make that assumption. + return AddEQ(state, sym, Adjusted); +} + +// The logic for these will be handled in another ConstraintManager. +ProgramStateRef +BasicConstraintManager::assumeSymLT(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) { + // Is 'V' the smallest possible value? + if (V == llvm::APSInt::getMinValue(V.getBitWidth(), V.isUnsigned())) { + // sym cannot be any value less than 'V'. This path is infeasible. + return NULL; + } + + // FIXME: For now have assuming x < y be the same as assuming sym != V; + return assumeSymNE(state, sym, V, Adjustment); +} + +ProgramStateRef +BasicConstraintManager::assumeSymGT(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) { + // Is 'V' the largest possible value? + if (V == llvm::APSInt::getMaxValue(V.getBitWidth(), V.isUnsigned())) { + // sym cannot be any value greater than 'V'. This path is infeasible. + return NULL; + } + + // FIXME: For now have assuming x > y be the same as assuming sym != V; + return assumeSymNE(state, sym, V, Adjustment); +} + +ProgramStateRef +BasicConstraintManager::assumeSymGE(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) { + // Reject a path if the value of sym is a constant X and !(X+Adj >= V). + if (const llvm::APSInt *X = getSymVal(state, sym)) { + bool isFeasible = (*X >= V-Adjustment); + return isFeasible ? state : NULL; + } + + // Sym is not a constant, but it is worth looking to see if V is the + // maximum integer value. + if (V == llvm::APSInt::getMaxValue(V.getBitWidth(), V.isUnsigned())) { + llvm::APSInt Adjusted = V-Adjustment; + + // If we know that sym != V (after adjustment), then this condition + // is infeasible since there is no other value greater than V. + bool isFeasible = !isNotEqual(state, sym, Adjusted); + + // If the path is still feasible then as a consequence we know that + // 'sym+Adjustment == V' because there are no larger values. + // Add this constraint. + return isFeasible ? AddEQ(state, sym, Adjusted) : NULL; + } + + return state; +} + +ProgramStateRef +BasicConstraintManager::assumeSymLE(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) { + // Reject a path if the value of sym is a constant X and !(X+Adj <= V). + if (const llvm::APSInt* X = getSymVal(state, sym)) { + bool isFeasible = (*X <= V-Adjustment); + return isFeasible ? state : NULL; + } + + // Sym is not a constant, but it is worth looking to see if V is the + // minimum integer value. + if (V == llvm::APSInt::getMinValue(V.getBitWidth(), V.isUnsigned())) { + llvm::APSInt Adjusted = V-Adjustment; + + // If we know that sym != V (after adjustment), then this condition + // is infeasible since there is no other value less than V. + bool isFeasible = !isNotEqual(state, sym, Adjusted); + + // If the path is still feasible then as a consequence we know that + // 'sym+Adjustment == V' because there are no smaller values. + // Add this constraint. + return isFeasible ? AddEQ(state, sym, Adjusted) : NULL; + } + + return state; +} + +ProgramStateRef BasicConstraintManager::AddEQ(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt& V) { + // Create a new state with the old binding replaced. + return state->set(sym, &state->getBasicVals().getValue(V)); +} + +ProgramStateRef BasicConstraintManager::AddNE(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt& V) { + + // First, retrieve the NE-set associated with the given symbol. + ConstNotEqTy::data_type* T = state->get(sym); + ProgramState::IntSetTy S = T ? *T : ISetFactory.getEmptySet(); + + // Now add V to the NE set. + S = ISetFactory.add(S, &state->getBasicVals().getValue(V)); + + // Create a new state with the old binding replaced. + return state->set(sym, S); +} + +const llvm::APSInt* BasicConstraintManager::getSymVal(ProgramStateRef state, + SymbolRef sym) const { + const ConstEqTy::data_type* T = state->get(sym); + return T ? *T : NULL; +} + +bool BasicConstraintManager::isNotEqual(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt& V) const { + + // Retrieve the NE-set associated with the given symbol. + const ConstNotEqTy::data_type* T = state->get(sym); + + // See if V is present in the NE-set. + return T ? T->contains(&state->getBasicVals().getValue(V)) : false; +} + +bool BasicConstraintManager::isEqual(ProgramStateRef state, + SymbolRef sym, + const llvm::APSInt& V) const { + // Retrieve the EQ-set associated with the given symbol. + const ConstEqTy::data_type* T = state->get(sym); + // See if V is present in the EQ-set. + return T ? **T == V : false; +} + +/// Scan all symbols referenced by the constraints. If the symbol is not alive +/// as marked in LSymbols, mark it as dead in DSymbols. +ProgramStateRef +BasicConstraintManager::removeDeadBindings(ProgramStateRef state, + SymbolReaper& SymReaper) { + + ConstEqTy CE = state->get(); + ConstEqTy::Factory& CEFactory = state->get_context(); + + for (ConstEqTy::iterator I = CE.begin(), E = CE.end(); I!=E; ++I) { + SymbolRef sym = I.getKey(); + if (SymReaper.maybeDead(sym)) + CE = CEFactory.remove(CE, sym); + } + state = state->set(CE); + + ConstNotEqTy CNE = state->get(); + ConstNotEqTy::Factory& CNEFactory = state->get_context(); + + for (ConstNotEqTy::iterator I = CNE.begin(), E = CNE.end(); I != E; ++I) { + SymbolRef sym = I.getKey(); + if (SymReaper.maybeDead(sym)) + CNE = CNEFactory.remove(CNE, sym); + } + + return state->set(CNE); +} + +void BasicConstraintManager::print(ProgramStateRef state, + raw_ostream &Out, + const char* nl, const char *sep) { + // Print equality constraints. + + ConstEqTy CE = state->get(); + + if (!CE.isEmpty()) { + Out << nl << sep << "'==' constraints:"; + for (ConstEqTy::iterator I = CE.begin(), E = CE.end(); I!=E; ++I) + Out << nl << " $" << I.getKey() << " : " << *I.getData(); + } + + // Print != constraints. + + ConstNotEqTy CNE = state->get(); + + if (!CNE.isEmpty()) { + Out << nl << sep << "'!=' constraints:"; + + for (ConstNotEqTy::iterator I = CNE.begin(), EI = CNE.end(); I!=EI; ++I) { + Out << nl << " $" << I.getKey() << " : "; + bool isFirst = true; + + ProgramState::IntSetTy::iterator J = I.getData().begin(), + EJ = I.getData().end(); + + for ( ; J != EJ; ++J) { + if (isFirst) isFirst = false; + else Out << ", "; + + Out << (*J)->getSExtValue(); // Hack: should print to raw_ostream. + } + } + } +} diff --git a/clang/lib/StaticAnalyzer/Core/BasicValueFactory.cpp b/clang/lib/StaticAnalyzer/Core/BasicValueFactory.cpp new file mode 100644 index 0000000..fe96700 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/BasicValueFactory.cpp @@ -0,0 +1,291 @@ +//=== BasicValueFactory.cpp - Basic values for Path Sens analysis --*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines BasicValueFactory, a class that manages the lifetime +// of APSInt objects and symbolic constraints used by ExprEngine +// and related classes. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/Store.h" + +using namespace clang; +using namespace ento; + +void CompoundValData::Profile(llvm::FoldingSetNodeID& ID, QualType T, + llvm::ImmutableList L) { + T.Profile(ID); + ID.AddPointer(L.getInternalPointer()); +} + +void LazyCompoundValData::Profile(llvm::FoldingSetNodeID& ID, + const StoreRef &store, + const TypedValueRegion *region) { + ID.AddPointer(store.getStore()); + ID.AddPointer(region); +} + +typedef std::pair SValData; +typedef std::pair SValPair; + +namespace llvm { +template<> struct FoldingSetTrait { + static inline void Profile(const SValData& X, llvm::FoldingSetNodeID& ID) { + X.first.Profile(ID); + ID.AddPointer( (void*) X.second); + } +}; + +template<> struct FoldingSetTrait { + static inline void Profile(const SValPair& X, llvm::FoldingSetNodeID& ID) { + X.first.Profile(ID); + X.second.Profile(ID); + } +}; +} + +typedef llvm::FoldingSet > + PersistentSValsTy; + +typedef llvm::FoldingSet > + PersistentSValPairsTy; + +BasicValueFactory::~BasicValueFactory() { + // Note that the dstor for the contents of APSIntSet will never be called, + // so we iterate over the set and invoke the dstor for each APSInt. This + // frees an aux. memory allocated to represent very large constants. + for (APSIntSetTy::iterator I=APSIntSet.begin(), E=APSIntSet.end(); I!=E; ++I) + I->getValue().~APSInt(); + + delete (PersistentSValsTy*) PersistentSVals; + delete (PersistentSValPairsTy*) PersistentSValPairs; +} + +const llvm::APSInt& BasicValueFactory::getValue(const llvm::APSInt& X) { + llvm::FoldingSetNodeID ID; + void *InsertPos; + typedef llvm::FoldingSetNodeWrapper FoldNodeTy; + + X.Profile(ID); + FoldNodeTy* P = APSIntSet.FindNodeOrInsertPos(ID, InsertPos); + + if (!P) { + P = (FoldNodeTy*) BPAlloc.Allocate(); + new (P) FoldNodeTy(X); + APSIntSet.InsertNode(P, InsertPos); + } + + return *P; +} + +const llvm::APSInt& BasicValueFactory::getValue(const llvm::APInt& X, + bool isUnsigned) { + llvm::APSInt V(X, isUnsigned); + return getValue(V); +} + +const llvm::APSInt& BasicValueFactory::getValue(uint64_t X, unsigned BitWidth, + bool isUnsigned) { + llvm::APSInt V(BitWidth, isUnsigned); + V = X; + return getValue(V); +} + +const llvm::APSInt& BasicValueFactory::getValue(uint64_t X, QualType T) { + + unsigned bits = Ctx.getTypeSize(T); + llvm::APSInt V(bits, + T->isUnsignedIntegerOrEnumerationType() || Loc::isLocType(T)); + V = X; + return getValue(V); +} + +const CompoundValData* +BasicValueFactory::getCompoundValData(QualType T, + llvm::ImmutableList Vals) { + + llvm::FoldingSetNodeID ID; + CompoundValData::Profile(ID, T, Vals); + void *InsertPos; + + CompoundValData* D = CompoundValDataSet.FindNodeOrInsertPos(ID, InsertPos); + + if (!D) { + D = (CompoundValData*) BPAlloc.Allocate(); + new (D) CompoundValData(T, Vals); + CompoundValDataSet.InsertNode(D, InsertPos); + } + + return D; +} + +const LazyCompoundValData* +BasicValueFactory::getLazyCompoundValData(const StoreRef &store, + const TypedValueRegion *region) { + llvm::FoldingSetNodeID ID; + LazyCompoundValData::Profile(ID, store, region); + void *InsertPos; + + LazyCompoundValData *D = + LazyCompoundValDataSet.FindNodeOrInsertPos(ID, InsertPos); + + if (!D) { + D = (LazyCompoundValData*) BPAlloc.Allocate(); + new (D) LazyCompoundValData(store, region); + LazyCompoundValDataSet.InsertNode(D, InsertPos); + } + + return D; +} + +const llvm::APSInt* +BasicValueFactory::evalAPSInt(BinaryOperator::Opcode Op, + const llvm::APSInt& V1, const llvm::APSInt& V2) { + + switch (Op) { + default: + assert (false && "Invalid Opcode."); + + case BO_Mul: + return &getValue( V1 * V2 ); + + case BO_Div: + return &getValue( V1 / V2 ); + + case BO_Rem: + return &getValue( V1 % V2 ); + + case BO_Add: + return &getValue( V1 + V2 ); + + case BO_Sub: + return &getValue( V1 - V2 ); + + case BO_Shl: { + + // FIXME: This logic should probably go higher up, where we can + // test these conditions symbolically. + + // FIXME: Expand these checks to include all undefined behavior. + + if (V2.isSigned() && V2.isNegative()) + return NULL; + + uint64_t Amt = V2.getZExtValue(); + + if (Amt > V1.getBitWidth()) + return NULL; + + return &getValue( V1.operator<<( (unsigned) Amt )); + } + + case BO_Shr: { + + // FIXME: This logic should probably go higher up, where we can + // test these conditions symbolically. + + // FIXME: Expand these checks to include all undefined behavior. + + if (V2.isSigned() && V2.isNegative()) + return NULL; + + uint64_t Amt = V2.getZExtValue(); + + if (Amt > V1.getBitWidth()) + return NULL; + + return &getValue( V1.operator>>( (unsigned) Amt )); + } + + case BO_LT: + return &getTruthValue( V1 < V2 ); + + case BO_GT: + return &getTruthValue( V1 > V2 ); + + case BO_LE: + return &getTruthValue( V1 <= V2 ); + + case BO_GE: + return &getTruthValue( V1 >= V2 ); + + case BO_EQ: + return &getTruthValue( V1 == V2 ); + + case BO_NE: + return &getTruthValue( V1 != V2 ); + + // Note: LAnd, LOr, Comma are handled specially by higher-level logic. + + case BO_And: + return &getValue( V1 & V2 ); + + case BO_Or: + return &getValue( V1 | V2 ); + + case BO_Xor: + return &getValue( V1 ^ V2 ); + } +} + + +const std::pair& +BasicValueFactory::getPersistentSValWithData(const SVal& V, uintptr_t Data) { + + // Lazily create the folding set. + if (!PersistentSVals) PersistentSVals = new PersistentSValsTy(); + + llvm::FoldingSetNodeID ID; + void *InsertPos; + V.Profile(ID); + ID.AddPointer((void*) Data); + + PersistentSValsTy& Map = *((PersistentSValsTy*) PersistentSVals); + + typedef llvm::FoldingSetNodeWrapper FoldNodeTy; + FoldNodeTy* P = Map.FindNodeOrInsertPos(ID, InsertPos); + + if (!P) { + P = (FoldNodeTy*) BPAlloc.Allocate(); + new (P) FoldNodeTy(std::make_pair(V, Data)); + Map.InsertNode(P, InsertPos); + } + + return P->getValue(); +} + +const std::pair& +BasicValueFactory::getPersistentSValPair(const SVal& V1, const SVal& V2) { + + // Lazily create the folding set. + if (!PersistentSValPairs) PersistentSValPairs = new PersistentSValPairsTy(); + + llvm::FoldingSetNodeID ID; + void *InsertPos; + V1.Profile(ID); + V2.Profile(ID); + + PersistentSValPairsTy& Map = *((PersistentSValPairsTy*) PersistentSValPairs); + + typedef llvm::FoldingSetNodeWrapper FoldNodeTy; + FoldNodeTy* P = Map.FindNodeOrInsertPos(ID, InsertPos); + + if (!P) { + P = (FoldNodeTy*) BPAlloc.Allocate(); + new (P) FoldNodeTy(std::make_pair(V1, V2)); + Map.InsertNode(P, InsertPos); + } + + return P->getValue(); +} + +const SVal* BasicValueFactory::getPersistentSVal(SVal X) { + return &getPersistentSValWithData(X, 0).first; +} diff --git a/clang/lib/StaticAnalyzer/Core/BlockCounter.cpp b/clang/lib/StaticAnalyzer/Core/BlockCounter.cpp new file mode 100644 index 0000000..74d761e --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/BlockCounter.cpp @@ -0,0 +1,86 @@ +//==- BlockCounter.h - ADT for counting block visits -------------*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines BlockCounter, an abstract data type used to count +// the number of times a given block has been visited along a path +// analyzed by CoreEngine. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/BlockCounter.h" +#include "llvm/ADT/ImmutableMap.h" + +using namespace clang; +using namespace ento; + +namespace { + +class CountKey { + const StackFrameContext *CallSite; + unsigned BlockID; + +public: + CountKey(const StackFrameContext *CS, unsigned ID) + : CallSite(CS), BlockID(ID) {} + + bool operator==(const CountKey &RHS) const { + return (CallSite == RHS.CallSite) && (BlockID == RHS.BlockID); + } + + bool operator<(const CountKey &RHS) const { + return (CallSite == RHS.CallSite) ? (BlockID < RHS.BlockID) + : (CallSite < RHS.CallSite); + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddPointer(CallSite); + ID.AddInteger(BlockID); + } +}; + +} + +typedef llvm::ImmutableMap CountMap; + +static inline CountMap GetMap(void *D) { + return CountMap(static_cast(D)); +} + +static inline CountMap::Factory& GetFactory(void *F) { + return *static_cast(F); +} + +unsigned BlockCounter::getNumVisited(const StackFrameContext *CallSite, + unsigned BlockID) const { + CountMap M = GetMap(Data); + CountMap::data_type* T = M.lookup(CountKey(CallSite, BlockID)); + return T ? *T : 0; +} + +BlockCounter::Factory::Factory(llvm::BumpPtrAllocator& Alloc) { + F = new CountMap::Factory(Alloc); +} + +BlockCounter::Factory::~Factory() { + delete static_cast(F); +} + +BlockCounter +BlockCounter::Factory::IncrementCount(BlockCounter BC, + const StackFrameContext *CallSite, + unsigned BlockID) { + return BlockCounter(GetFactory(F).add(GetMap(BC.Data), + CountKey(CallSite, BlockID), + BC.getNumVisited(CallSite, BlockID)+1).getRoot()); +} + +BlockCounter +BlockCounter::Factory::GetEmptyCounter() { + return BlockCounter(GetFactory(F).getEmptyMap().getRoot()); +} diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp new file mode 100644 index 0000000..a264212 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp @@ -0,0 +1,2056 @@ +// BugReporter.cpp - Generate PathDiagnostics for Bugs ------------*- C++ -*--// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines BugReporter, a utility class for generating +// PathDiagnostics. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/AST/ASTContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ParentMap.h" +#include "clang/AST/StmtObjC.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Analysis/ProgramPoint.h" +#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include + +using namespace clang; +using namespace ento; + +BugReporterVisitor::~BugReporterVisitor() {} + +void BugReporterContext::anchor() {} + +//===----------------------------------------------------------------------===// +// Helper routines for walking the ExplodedGraph and fetching statements. +//===----------------------------------------------------------------------===// + +static inline const Stmt *GetStmt(const ProgramPoint &P) { + if (const StmtPoint* SP = dyn_cast(&P)) + return SP->getStmt(); + else if (const BlockEdge *BE = dyn_cast(&P)) + return BE->getSrc()->getTerminator(); + + return 0; +} + +static inline const ExplodedNode* +GetPredecessorNode(const ExplodedNode *N) { + return N->pred_empty() ? NULL : *(N->pred_begin()); +} + +static inline const ExplodedNode* +GetSuccessorNode(const ExplodedNode *N) { + return N->succ_empty() ? NULL : *(N->succ_begin()); +} + +static const Stmt *GetPreviousStmt(const ExplodedNode *N) { + for (N = GetPredecessorNode(N); N; N = GetPredecessorNode(N)) + if (const Stmt *S = GetStmt(N->getLocation())) + return S; + + return 0; +} + +static const Stmt *GetNextStmt(const ExplodedNode *N) { + for (N = GetSuccessorNode(N); N; N = GetSuccessorNode(N)) + if (const Stmt *S = GetStmt(N->getLocation())) { + // Check if the statement is '?' or '&&'/'||'. These are "merges", + // not actual statement points. + switch (S->getStmtClass()) { + case Stmt::ChooseExprClass: + case Stmt::BinaryConditionalOperatorClass: continue; + case Stmt::ConditionalOperatorClass: continue; + case Stmt::BinaryOperatorClass: { + BinaryOperatorKind Op = cast(S)->getOpcode(); + if (Op == BO_LAnd || Op == BO_LOr) + continue; + break; + } + default: + break; + } + return S; + } + + return 0; +} + +static inline const Stmt* +GetCurrentOrPreviousStmt(const ExplodedNode *N) { + if (const Stmt *S = GetStmt(N->getLocation())) + return S; + + return GetPreviousStmt(N); +} + +static inline const Stmt* +GetCurrentOrNextStmt(const ExplodedNode *N) { + if (const Stmt *S = GetStmt(N->getLocation())) + return S; + + return GetNextStmt(N); +} + +//===----------------------------------------------------------------------===// +// Diagnostic cleanup. +//===----------------------------------------------------------------------===// + +/// Recursively scan through a path and prune out calls and macros pieces +/// that aren't needed. Return true if afterwards the path contains +/// "interesting stuff" which means it should be pruned from the parent path. +static bool RemoveUneededCalls(PathPieces &pieces) { + bool containsSomethingInteresting = false; + const unsigned N = pieces.size(); + + for (unsigned i = 0 ; i < N ; ++i) { + // Remove the front piece from the path. If it is still something we + // want to keep once we are done, we will push it back on the end. + IntrusiveRefCntPtr piece(pieces.front()); + pieces.pop_front(); + + switch (piece->getKind()) { + case PathDiagnosticPiece::Call: { + PathDiagnosticCallPiece *call = cast(piece); + // Recursively clean out the subclass. Keep this call around if + // it contains any informative diagnostics. + if (!RemoveUneededCalls(call->path)) + continue; + containsSomethingInteresting = true; + break; + } + case PathDiagnosticPiece::Macro: { + PathDiagnosticMacroPiece *macro = cast(piece); + if (!RemoveUneededCalls(macro->subPieces)) + continue; + containsSomethingInteresting = true; + break; + } + case PathDiagnosticPiece::Event: { + PathDiagnosticEventPiece *event = cast(piece); + // We never throw away an event, but we do throw it away wholesale + // as part of a path if we throw the entire path away. + if (event->isPrunable()) + continue; + containsSomethingInteresting = true; + break; + } + case PathDiagnosticPiece::ControlFlow: + break; + } + + pieces.push_back(piece); + } + + return containsSomethingInteresting; +} + +//===----------------------------------------------------------------------===// +// PathDiagnosticBuilder and its associated routines and helper objects. +//===----------------------------------------------------------------------===// + +typedef llvm::DenseMap NodeBackMap; + +namespace { +class NodeMapClosure : public BugReport::NodeResolver { + NodeBackMap& M; +public: + NodeMapClosure(NodeBackMap *m) : M(*m) {} + ~NodeMapClosure() {} + + const ExplodedNode *getOriginalNode(const ExplodedNode *N) { + NodeBackMap::iterator I = M.find(N); + return I == M.end() ? 0 : I->second; + } +}; + +class PathDiagnosticBuilder : public BugReporterContext { + BugReport *R; + PathDiagnosticConsumer *PDC; + OwningPtr PM; + NodeMapClosure NMC; +public: + const LocationContext *LC; + + PathDiagnosticBuilder(GRBugReporter &br, + BugReport *r, NodeBackMap *Backmap, + PathDiagnosticConsumer *pdc) + : BugReporterContext(br), + R(r), PDC(pdc), NMC(Backmap), LC(r->getErrorNode()->getLocationContext()) + {} + + PathDiagnosticLocation ExecutionContinues(const ExplodedNode *N); + + PathDiagnosticLocation ExecutionContinues(llvm::raw_string_ostream &os, + const ExplodedNode *N); + + BugReport *getBugReport() { return R; } + + Decl const &getCodeDecl() { return R->getErrorNode()->getCodeDecl(); } + + ParentMap& getParentMap() { return LC->getParentMap(); } + + const Stmt *getParent(const Stmt *S) { + return getParentMap().getParent(S); + } + + virtual NodeMapClosure& getNodeResolver() { return NMC; } + + PathDiagnosticLocation getEnclosingStmtLocation(const Stmt *S); + + PathDiagnosticConsumer::PathGenerationScheme getGenerationScheme() const { + return PDC ? PDC->getGenerationScheme() : PathDiagnosticConsumer::Extensive; + } + + bool supportsLogicalOpControlFlow() const { + return PDC ? PDC->supportsLogicalOpControlFlow() : true; + } +}; +} // end anonymous namespace + +PathDiagnosticLocation +PathDiagnosticBuilder::ExecutionContinues(const ExplodedNode *N) { + if (const Stmt *S = GetNextStmt(N)) + return PathDiagnosticLocation(S, getSourceManager(), LC); + + return PathDiagnosticLocation::createDeclEnd(N->getLocationContext(), + getSourceManager()); +} + +PathDiagnosticLocation +PathDiagnosticBuilder::ExecutionContinues(llvm::raw_string_ostream &os, + const ExplodedNode *N) { + + // Slow, but probably doesn't matter. + if (os.str().empty()) + os << ' '; + + const PathDiagnosticLocation &Loc = ExecutionContinues(N); + + if (Loc.asStmt()) + os << "Execution continues on line " + << getSourceManager().getExpansionLineNumber(Loc.asLocation()) + << '.'; + else { + os << "Execution jumps to the end of the "; + const Decl *D = N->getLocationContext()->getDecl(); + if (isa(D)) + os << "method"; + else if (isa(D)) + os << "function"; + else { + assert(isa(D)); + os << "anonymous block"; + } + os << '.'; + } + + return Loc; +} + +static bool IsNested(const Stmt *S, ParentMap &PM) { + if (isa(S) && PM.isConsumedExpr(cast(S))) + return true; + + const Stmt *Parent = PM.getParentIgnoreParens(S); + + if (Parent) + switch (Parent->getStmtClass()) { + case Stmt::ForStmtClass: + case Stmt::DoStmtClass: + case Stmt::WhileStmtClass: + return true; + default: + break; + } + + return false; +} + +PathDiagnosticLocation +PathDiagnosticBuilder::getEnclosingStmtLocation(const Stmt *S) { + assert(S && "Null Stmt *passed to getEnclosingStmtLocation"); + ParentMap &P = getParentMap(); + SourceManager &SMgr = getSourceManager(); + + while (IsNested(S, P)) { + const Stmt *Parent = P.getParentIgnoreParens(S); + + if (!Parent) + break; + + switch (Parent->getStmtClass()) { + case Stmt::BinaryOperatorClass: { + const BinaryOperator *B = cast(Parent); + if (B->isLogicalOp()) + return PathDiagnosticLocation(S, SMgr, LC); + break; + } + case Stmt::CompoundStmtClass: + case Stmt::StmtExprClass: + return PathDiagnosticLocation(S, SMgr, LC); + case Stmt::ChooseExprClass: + // Similar to '?' if we are referring to condition, just have the edge + // point to the entire choose expression. + if (cast(Parent)->getCond() == S) + return PathDiagnosticLocation(Parent, SMgr, LC); + else + return PathDiagnosticLocation(S, SMgr, LC); + case Stmt::BinaryConditionalOperatorClass: + case Stmt::ConditionalOperatorClass: + // For '?', if we are referring to condition, just have the edge point + // to the entire '?' expression. + if (cast(Parent)->getCond() == S) + return PathDiagnosticLocation(Parent, SMgr, LC); + else + return PathDiagnosticLocation(S, SMgr, LC); + case Stmt::DoStmtClass: + return PathDiagnosticLocation(S, SMgr, LC); + case Stmt::ForStmtClass: + if (cast(Parent)->getBody() == S) + return PathDiagnosticLocation(S, SMgr, LC); + break; + case Stmt::IfStmtClass: + if (cast(Parent)->getCond() != S) + return PathDiagnosticLocation(S, SMgr, LC); + break; + case Stmt::ObjCForCollectionStmtClass: + if (cast(Parent)->getBody() == S) + return PathDiagnosticLocation(S, SMgr, LC); + break; + case Stmt::WhileStmtClass: + if (cast(Parent)->getCond() != S) + return PathDiagnosticLocation(S, SMgr, LC); + break; + default: + break; + } + + S = Parent; + } + + assert(S && "Cannot have null Stmt for PathDiagnosticLocation"); + + // Special case: DeclStmts can appear in for statement declarations, in which + // case the ForStmt is the context. + if (isa(S)) { + if (const Stmt *Parent = P.getParent(S)) { + switch (Parent->getStmtClass()) { + case Stmt::ForStmtClass: + case Stmt::ObjCForCollectionStmtClass: + return PathDiagnosticLocation(Parent, SMgr, LC); + default: + break; + } + } + } + else if (isa(S)) { + // Special case: the binary operator represents the initialization + // code in a for statement (this can happen when the variable being + // initialized is an old variable. + if (const ForStmt *FS = + dyn_cast_or_null(P.getParentIgnoreParens(S))) { + if (FS->getInit() == S) + return PathDiagnosticLocation(FS, SMgr, LC); + } + } + + return PathDiagnosticLocation(S, SMgr, LC); +} + +//===----------------------------------------------------------------------===// +// "Minimal" path diagnostic generation algorithm. +//===----------------------------------------------------------------------===// +typedef std::pair StackDiagPair; +typedef SmallVector StackDiagVector; + +static void updateStackPiecesWithMessage(PathDiagnosticPiece *P, + StackDiagVector &CallStack) { + // If the piece contains a special message, add it to all the call + // pieces on the active stack. + if (PathDiagnosticEventPiece *ep = + dyn_cast(P)) { + + if (ep->hasCallStackHint()) + for (StackDiagVector::iterator I = CallStack.begin(), + E = CallStack.end(); I != E; ++I) { + PathDiagnosticCallPiece *CP = I->first; + const ExplodedNode *N = I->second; + std::string stackMsg = ep->getCallStackMessage(N); + + // The last message on the path to final bug is the most important + // one. Since we traverse the path backwards, do not add the message + // if one has been previously added. + if (!CP->hasCallStackMessage()) + CP->setCallStackMessage(stackMsg); + } + } +} + +static void CompactPathDiagnostic(PathPieces &path, const SourceManager& SM); + +static void GenerateMinimalPathDiagnostic(PathDiagnostic& PD, + PathDiagnosticBuilder &PDB, + const ExplodedNode *N, + ArrayRef visitors) { + + SourceManager& SMgr = PDB.getSourceManager(); + const LocationContext *LC = PDB.LC; + const ExplodedNode *NextNode = N->pred_empty() + ? NULL : *(N->pred_begin()); + + StackDiagVector CallStack; + + while (NextNode) { + N = NextNode; + PDB.LC = N->getLocationContext(); + NextNode = GetPredecessorNode(N); + + ProgramPoint P = N->getLocation(); + + if (const CallExit *CE = dyn_cast(&P)) { + PathDiagnosticCallPiece *C = + PathDiagnosticCallPiece::construct(N, *CE, SMgr); + PD.getActivePath().push_front(C); + PD.pushActivePath(&C->path); + CallStack.push_back(StackDiagPair(C, N)); + continue; + } + + if (const CallEnter *CE = dyn_cast(&P)) { + PD.popActivePath(); + // The current active path should never be empty. Either we + // just added a bunch of stuff to the top-level path, or + // we have a previous CallExit. If the front of the active + // path is not a PathDiagnosticCallPiece, it means that the + // path terminated within a function call. We must then take the + // current contents of the active path and place it within + // a new PathDiagnosticCallPiece. + assert(!PD.getActivePath().empty()); + PathDiagnosticCallPiece *C = + dyn_cast(PD.getActivePath().front()); + if (!C) { + const Decl *Caller = CE->getLocationContext()->getDecl(); + C = PathDiagnosticCallPiece::construct(PD.getActivePath(), Caller); + } + C->setCallee(*CE, SMgr); + if (!CallStack.empty()) { + assert(CallStack.back().first == C); + CallStack.pop_back(); + } + continue; + } + + if (const BlockEdge *BE = dyn_cast(&P)) { + const CFGBlock *Src = BE->getSrc(); + const CFGBlock *Dst = BE->getDst(); + const Stmt *T = Src->getTerminator(); + + if (!T) + continue; + + PathDiagnosticLocation Start = + PathDiagnosticLocation::createBegin(T, SMgr, + N->getLocationContext()); + + switch (T->getStmtClass()) { + default: + break; + + case Stmt::GotoStmtClass: + case Stmt::IndirectGotoStmtClass: { + const Stmt *S = GetNextStmt(N); + + if (!S) + continue; + + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + const PathDiagnosticLocation &End = PDB.getEnclosingStmtLocation(S); + + os << "Control jumps to line " + << End.asLocation().getExpansionLineNumber(); + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + os.str())); + break; + } + + case Stmt::SwitchStmtClass: { + // Figure out what case arm we took. + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + + if (const Stmt *S = Dst->getLabel()) { + PathDiagnosticLocation End(S, SMgr, LC); + + switch (S->getStmtClass()) { + default: + os << "No cases match in the switch statement. " + "Control jumps to line " + << End.asLocation().getExpansionLineNumber(); + break; + case Stmt::DefaultStmtClass: + os << "Control jumps to the 'default' case at line " + << End.asLocation().getExpansionLineNumber(); + break; + + case Stmt::CaseStmtClass: { + os << "Control jumps to 'case "; + const CaseStmt *Case = cast(S); + const Expr *LHS = Case->getLHS()->IgnoreParenCasts(); + + // Determine if it is an enum. + bool GetRawInt = true; + + if (const DeclRefExpr *DR = dyn_cast(LHS)) { + // FIXME: Maybe this should be an assertion. Are there cases + // were it is not an EnumConstantDecl? + const EnumConstantDecl *D = + dyn_cast(DR->getDecl()); + + if (D) { + GetRawInt = false; + os << *D; + } + } + + if (GetRawInt) + os << LHS->EvaluateKnownConstInt(PDB.getASTContext()); + + os << ":' at line " + << End.asLocation().getExpansionLineNumber(); + break; + } + } + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + os.str())); + } + else { + os << "'Default' branch taken. "; + const PathDiagnosticLocation &End = PDB.ExecutionContinues(os, N); + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + os.str())); + } + + break; + } + + case Stmt::BreakStmtClass: + case Stmt::ContinueStmtClass: { + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + PathDiagnosticLocation End = PDB.ExecutionContinues(os, N); + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + os.str())); + break; + } + + // Determine control-flow for ternary '?'. + case Stmt::BinaryConditionalOperatorClass: + case Stmt::ConditionalOperatorClass: { + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + os << "'?' condition is "; + + if (*(Src->succ_begin()+1) == Dst) + os << "false"; + else + os << "true"; + + PathDiagnosticLocation End = PDB.ExecutionContinues(N); + + if (const Stmt *S = End.asStmt()) + End = PDB.getEnclosingStmtLocation(S); + + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + os.str())); + break; + } + + // Determine control-flow for short-circuited '&&' and '||'. + case Stmt::BinaryOperatorClass: { + if (!PDB.supportsLogicalOpControlFlow()) + break; + + const BinaryOperator *B = cast(T); + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + os << "Left side of '"; + + if (B->getOpcode() == BO_LAnd) { + os << "&&" << "' is "; + + if (*(Src->succ_begin()+1) == Dst) { + os << "false"; + PathDiagnosticLocation End(B->getLHS(), SMgr, LC); + PathDiagnosticLocation Start = + PathDiagnosticLocation::createOperatorLoc(B, SMgr); + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + os.str())); + } + else { + os << "true"; + PathDiagnosticLocation Start(B->getLHS(), SMgr, LC); + PathDiagnosticLocation End = PDB.ExecutionContinues(N); + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + os.str())); + } + } + else { + assert(B->getOpcode() == BO_LOr); + os << "||" << "' is "; + + if (*(Src->succ_begin()+1) == Dst) { + os << "false"; + PathDiagnosticLocation Start(B->getLHS(), SMgr, LC); + PathDiagnosticLocation End = PDB.ExecutionContinues(N); + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + os.str())); + } + else { + os << "true"; + PathDiagnosticLocation End(B->getLHS(), SMgr, LC); + PathDiagnosticLocation Start = + PathDiagnosticLocation::createOperatorLoc(B, SMgr); + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + os.str())); + } + } + + break; + } + + case Stmt::DoStmtClass: { + if (*(Src->succ_begin()) == Dst) { + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + + os << "Loop condition is true. "; + PathDiagnosticLocation End = PDB.ExecutionContinues(os, N); + + if (const Stmt *S = End.asStmt()) + End = PDB.getEnclosingStmtLocation(S); + + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + os.str())); + } + else { + PathDiagnosticLocation End = PDB.ExecutionContinues(N); + + if (const Stmt *S = End.asStmt()) + End = PDB.getEnclosingStmtLocation(S); + + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + "Loop condition is false. Exiting loop")); + } + + break; + } + + case Stmt::WhileStmtClass: + case Stmt::ForStmtClass: { + if (*(Src->succ_begin()+1) == Dst) { + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + + os << "Loop condition is false. "; + PathDiagnosticLocation End = PDB.ExecutionContinues(os, N); + if (const Stmt *S = End.asStmt()) + End = PDB.getEnclosingStmtLocation(S); + + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + os.str())); + } + else { + PathDiagnosticLocation End = PDB.ExecutionContinues(N); + if (const Stmt *S = End.asStmt()) + End = PDB.getEnclosingStmtLocation(S); + + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + "Loop condition is true. Entering loop body")); + } + + break; + } + + case Stmt::IfStmtClass: { + PathDiagnosticLocation End = PDB.ExecutionContinues(N); + + if (const Stmt *S = End.asStmt()) + End = PDB.getEnclosingStmtLocation(S); + + if (*(Src->succ_begin()+1) == Dst) + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + "Taking false branch")); + else + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(Start, End, + "Taking true branch")); + + break; + } + } + } + + if (NextNode) { + // Add diagnostic pieces from custom visitors. + BugReport *R = PDB.getBugReport(); + for (ArrayRef::iterator I = visitors.begin(), + E = visitors.end(); + I != E; ++I) { + if (PathDiagnosticPiece *p = (*I)->VisitNode(N, NextNode, PDB, *R)) { + PD.getActivePath().push_front(p); + updateStackPiecesWithMessage(p, CallStack); + } + } + } + } + + // After constructing the full PathDiagnostic, do a pass over it to compact + // PathDiagnosticPieces that occur within a macro. + CompactPathDiagnostic(PD.getMutablePieces(), PDB.getSourceManager()); +} + +//===----------------------------------------------------------------------===// +// "Extensive" PathDiagnostic generation. +//===----------------------------------------------------------------------===// + +static bool IsControlFlowExpr(const Stmt *S) { + const Expr *E = dyn_cast(S); + + if (!E) + return false; + + E = E->IgnoreParenCasts(); + + if (isa(E)) + return true; + + if (const BinaryOperator *B = dyn_cast(E)) + if (B->isLogicalOp()) + return true; + + return false; +} + +namespace { +class ContextLocation : public PathDiagnosticLocation { + bool IsDead; +public: + ContextLocation(const PathDiagnosticLocation &L, bool isdead = false) + : PathDiagnosticLocation(L), IsDead(isdead) {} + + void markDead() { IsDead = true; } + bool isDead() const { return IsDead; } +}; + +class EdgeBuilder { + std::vector CLocs; + typedef std::vector::iterator iterator; + PathDiagnostic &PD; + PathDiagnosticBuilder &PDB; + PathDiagnosticLocation PrevLoc; + + bool IsConsumedExpr(const PathDiagnosticLocation &L); + + bool containsLocation(const PathDiagnosticLocation &Container, + const PathDiagnosticLocation &Containee); + + PathDiagnosticLocation getContextLocation(const PathDiagnosticLocation &L); + + PathDiagnosticLocation cleanUpLocation(PathDiagnosticLocation L, + bool firstCharOnly = false) { + if (const Stmt *S = L.asStmt()) { + const Stmt *Original = S; + while (1) { + // Adjust the location for some expressions that are best referenced + // by one of their subexpressions. + switch (S->getStmtClass()) { + default: + break; + case Stmt::ParenExprClass: + case Stmt::GenericSelectionExprClass: + S = cast(S)->IgnoreParens(); + firstCharOnly = true; + continue; + case Stmt::BinaryConditionalOperatorClass: + case Stmt::ConditionalOperatorClass: + S = cast(S)->getCond(); + firstCharOnly = true; + continue; + case Stmt::ChooseExprClass: + S = cast(S)->getCond(); + firstCharOnly = true; + continue; + case Stmt::BinaryOperatorClass: + S = cast(S)->getLHS(); + firstCharOnly = true; + continue; + } + + break; + } + + if (S != Original) + L = PathDiagnosticLocation(S, L.getManager(), PDB.LC); + } + + if (firstCharOnly) + L = PathDiagnosticLocation::createSingleLocation(L); + + return L; + } + + void popLocation() { + if (!CLocs.back().isDead() && CLocs.back().asLocation().isFileID()) { + // For contexts, we only one the first character as the range. + rawAddEdge(cleanUpLocation(CLocs.back(), true)); + } + CLocs.pop_back(); + } + +public: + EdgeBuilder(PathDiagnostic &pd, PathDiagnosticBuilder &pdb) + : PD(pd), PDB(pdb) { + + // If the PathDiagnostic already has pieces, add the enclosing statement + // of the first piece as a context as well. + if (!PD.path.empty()) { + PrevLoc = (*PD.path.begin())->getLocation(); + + if (const Stmt *S = PrevLoc.asStmt()) + addExtendedContext(PDB.getEnclosingStmtLocation(S).asStmt()); + } + } + + ~EdgeBuilder() { + while (!CLocs.empty()) popLocation(); + + // Finally, add an initial edge from the start location of the first + // statement (if it doesn't already exist). + PathDiagnosticLocation L = PathDiagnosticLocation::createDeclBegin( + PDB.LC, + PDB.getSourceManager()); + if (L.isValid()) + rawAddEdge(L); + } + + void flushLocations() { + while (!CLocs.empty()) + popLocation(); + PrevLoc = PathDiagnosticLocation(); + } + + void addEdge(PathDiagnosticLocation NewLoc, bool alwaysAdd = false); + + void rawAddEdge(PathDiagnosticLocation NewLoc); + + void addContext(const Stmt *S); + void addExtendedContext(const Stmt *S); +}; +} // end anonymous namespace + + +PathDiagnosticLocation +EdgeBuilder::getContextLocation(const PathDiagnosticLocation &L) { + if (const Stmt *S = L.asStmt()) { + if (IsControlFlowExpr(S)) + return L; + + return PDB.getEnclosingStmtLocation(S); + } + + return L; +} + +bool EdgeBuilder::containsLocation(const PathDiagnosticLocation &Container, + const PathDiagnosticLocation &Containee) { + + if (Container == Containee) + return true; + + if (Container.asDecl()) + return true; + + if (const Stmt *S = Containee.asStmt()) + if (const Stmt *ContainerS = Container.asStmt()) { + while (S) { + if (S == ContainerS) + return true; + S = PDB.getParent(S); + } + return false; + } + + // Less accurate: compare using source ranges. + SourceRange ContainerR = Container.asRange(); + SourceRange ContaineeR = Containee.asRange(); + + SourceManager &SM = PDB.getSourceManager(); + SourceLocation ContainerRBeg = SM.getExpansionLoc(ContainerR.getBegin()); + SourceLocation ContainerREnd = SM.getExpansionLoc(ContainerR.getEnd()); + SourceLocation ContaineeRBeg = SM.getExpansionLoc(ContaineeR.getBegin()); + SourceLocation ContaineeREnd = SM.getExpansionLoc(ContaineeR.getEnd()); + + unsigned ContainerBegLine = SM.getExpansionLineNumber(ContainerRBeg); + unsigned ContainerEndLine = SM.getExpansionLineNumber(ContainerREnd); + unsigned ContaineeBegLine = SM.getExpansionLineNumber(ContaineeRBeg); + unsigned ContaineeEndLine = SM.getExpansionLineNumber(ContaineeREnd); + + assert(ContainerBegLine <= ContainerEndLine); + assert(ContaineeBegLine <= ContaineeEndLine); + + return (ContainerBegLine <= ContaineeBegLine && + ContainerEndLine >= ContaineeEndLine && + (ContainerBegLine != ContaineeBegLine || + SM.getExpansionColumnNumber(ContainerRBeg) <= + SM.getExpansionColumnNumber(ContaineeRBeg)) && + (ContainerEndLine != ContaineeEndLine || + SM.getExpansionColumnNumber(ContainerREnd) >= + SM.getExpansionColumnNumber(ContaineeREnd))); +} + +void EdgeBuilder::rawAddEdge(PathDiagnosticLocation NewLoc) { + if (!PrevLoc.isValid()) { + PrevLoc = NewLoc; + return; + } + + const PathDiagnosticLocation &NewLocClean = cleanUpLocation(NewLoc); + const PathDiagnosticLocation &PrevLocClean = cleanUpLocation(PrevLoc); + + if (NewLocClean.asLocation() == PrevLocClean.asLocation()) + return; + + // FIXME: Ignore intra-macro edges for now. + if (NewLocClean.asLocation().getExpansionLoc() == + PrevLocClean.asLocation().getExpansionLoc()) + return; + + PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(NewLocClean, PrevLocClean)); + PrevLoc = NewLoc; +} + +void EdgeBuilder::addEdge(PathDiagnosticLocation NewLoc, bool alwaysAdd) { + + if (!alwaysAdd && NewLoc.asLocation().isMacroID()) + return; + + const PathDiagnosticLocation &CLoc = getContextLocation(NewLoc); + + while (!CLocs.empty()) { + ContextLocation &TopContextLoc = CLocs.back(); + + // Is the top location context the same as the one for the new location? + if (TopContextLoc == CLoc) { + if (alwaysAdd) { + if (IsConsumedExpr(TopContextLoc) && + !IsControlFlowExpr(TopContextLoc.asStmt())) + TopContextLoc.markDead(); + + rawAddEdge(NewLoc); + } + + return; + } + + if (containsLocation(TopContextLoc, CLoc)) { + if (alwaysAdd) { + rawAddEdge(NewLoc); + + if (IsConsumedExpr(CLoc) && !IsControlFlowExpr(CLoc.asStmt())) { + CLocs.push_back(ContextLocation(CLoc, true)); + return; + } + } + + CLocs.push_back(CLoc); + return; + } + + // Context does not contain the location. Flush it. + popLocation(); + } + + // If we reach here, there is no enclosing context. Just add the edge. + rawAddEdge(NewLoc); +} + +bool EdgeBuilder::IsConsumedExpr(const PathDiagnosticLocation &L) { + if (const Expr *X = dyn_cast_or_null(L.asStmt())) + return PDB.getParentMap().isConsumedExpr(X) && !IsControlFlowExpr(X); + + return false; +} + +void EdgeBuilder::addExtendedContext(const Stmt *S) { + if (!S) + return; + + const Stmt *Parent = PDB.getParent(S); + while (Parent) { + if (isa(Parent)) + Parent = PDB.getParent(Parent); + else + break; + } + + if (Parent) { + switch (Parent->getStmtClass()) { + case Stmt::DoStmtClass: + case Stmt::ObjCAtSynchronizedStmtClass: + addContext(Parent); + default: + break; + } + } + + addContext(S); +} + +void EdgeBuilder::addContext(const Stmt *S) { + if (!S) + return; + + PathDiagnosticLocation L(S, PDB.getSourceManager(), PDB.LC); + + while (!CLocs.empty()) { + const PathDiagnosticLocation &TopContextLoc = CLocs.back(); + + // Is the top location context the same as the one for the new location? + if (TopContextLoc == L) + return; + + if (containsLocation(TopContextLoc, L)) { + CLocs.push_back(L); + return; + } + + // Context does not contain the location. Flush it. + popLocation(); + } + + CLocs.push_back(L); +} + +static void GenerateExtensivePathDiagnostic(PathDiagnostic& PD, + PathDiagnosticBuilder &PDB, + const ExplodedNode *N, + ArrayRef visitors) { + EdgeBuilder EB(PD, PDB); + const SourceManager& SM = PDB.getSourceManager(); + StackDiagVector CallStack; + + const ExplodedNode *NextNode = N->pred_empty() ? NULL : *(N->pred_begin()); + while (NextNode) { + N = NextNode; + NextNode = GetPredecessorNode(N); + ProgramPoint P = N->getLocation(); + + do { + if (const CallExit *CE = dyn_cast(&P)) { + const StackFrameContext *LCtx = + CE->getLocationContext()->getCurrentStackFrame(); + PathDiagnosticLocation Loc(LCtx->getCallSite(), + PDB.getSourceManager(), + LCtx); + EB.addEdge(Loc, true); + EB.flushLocations(); + PathDiagnosticCallPiece *C = + PathDiagnosticCallPiece::construct(N, *CE, SM); + PD.getActivePath().push_front(C); + PD.pushActivePath(&C->path); + CallStack.push_back(StackDiagPair(C, N)); + break; + } + + // Pop the call hierarchy if we are done walking the contents + // of a function call. + if (const CallEnter *CE = dyn_cast(&P)) { + // Add an edge to the start of the function. + const Decl *D = CE->getCalleeContext()->getDecl(); + PathDiagnosticLocation pos = + PathDiagnosticLocation::createBegin(D, SM); + EB.addEdge(pos); + + // Flush all locations, and pop the active path. + EB.flushLocations(); + PD.popActivePath(); + assert(!PD.getActivePath().empty()); + PDB.LC = N->getLocationContext(); + + // The current active path should never be empty. Either we + // just added a bunch of stuff to the top-level path, or + // we have a previous CallExit. If the front of the active + // path is not a PathDiagnosticCallPiece, it means that the + // path terminated within a function call. We must then take the + // current contents of the active path and place it within + // a new PathDiagnosticCallPiece. + PathDiagnosticCallPiece *C = + dyn_cast(PD.getActivePath().front()); + if (!C) { + const Decl * Caller = CE->getLocationContext()->getDecl(); + C = PathDiagnosticCallPiece::construct(PD.getActivePath(), Caller); + } + C->setCallee(*CE, SM); + EB.addContext(CE->getCallExpr()); + + if (!CallStack.empty()) { + assert(CallStack.back().first == C); + CallStack.pop_back(); + } + break; + } + + // Note that is important that we update the LocationContext + // after looking at CallExits. CallExit basically adds an + // edge in the *caller*, so we don't want to update the LocationContext + // too soon. + PDB.LC = N->getLocationContext(); + + // Block edges. + if (const BlockEdge *BE = dyn_cast(&P)) { + const CFGBlock &Blk = *BE->getSrc(); + const Stmt *Term = Blk.getTerminator(); + + // Are we jumping to the head of a loop? Add a special diagnostic. + if (const Stmt *Loop = BE->getDst()->getLoopTarget()) { + PathDiagnosticLocation L(Loop, SM, PDB.LC); + const CompoundStmt *CS = NULL; + + if (!Term) { + if (const ForStmt *FS = dyn_cast(Loop)) + CS = dyn_cast(FS->getBody()); + else if (const WhileStmt *WS = dyn_cast(Loop)) + CS = dyn_cast(WS->getBody()); + } + + PathDiagnosticEventPiece *p = + new PathDiagnosticEventPiece(L, + "Looping back to the head of the loop"); + p->setPrunable(true); + + EB.addEdge(p->getLocation(), true); + PD.getActivePath().push_front(p); + + if (CS) { + PathDiagnosticLocation BL = + PathDiagnosticLocation::createEndBrace(CS, SM); + EB.addEdge(BL); + } + } + + if (Term) + EB.addContext(Term); + + break; + } + + if (const BlockEntrance *BE = dyn_cast(&P)) { + if (const CFGStmt *S = BE->getFirstElement().getAs()) { + const Stmt *stmt = S->getStmt(); + if (IsControlFlowExpr(stmt)) { + // Add the proper context for '&&', '||', and '?'. + EB.addContext(stmt); + } + else + EB.addExtendedContext(PDB.getEnclosingStmtLocation(stmt).asStmt()); + } + + break; + } + + + } while (0); + + if (!NextNode) + continue; + + // Add pieces from custom visitors. + BugReport *R = PDB.getBugReport(); + for (ArrayRef::iterator I = visitors.begin(), + E = visitors.end(); + I != E; ++I) { + if (PathDiagnosticPiece *p = (*I)->VisitNode(N, NextNode, PDB, *R)) { + const PathDiagnosticLocation &Loc = p->getLocation(); + EB.addEdge(Loc, true); + PD.getActivePath().push_front(p); + updateStackPiecesWithMessage(p, CallStack); + + if (const Stmt *S = Loc.asStmt()) + EB.addExtendedContext(PDB.getEnclosingStmtLocation(S).asStmt()); + } + } + } +} + +//===----------------------------------------------------------------------===// +// Methods for BugType and subclasses. +//===----------------------------------------------------------------------===// +BugType::~BugType() { } + +void BugType::FlushReports(BugReporter &BR) {} + +void BuiltinBug::anchor() {} + +//===----------------------------------------------------------------------===// +// Methods for BugReport and subclasses. +//===----------------------------------------------------------------------===// + +void BugReport::NodeResolver::anchor() {} + +void BugReport::addVisitor(BugReporterVisitor* visitor) { + if (!visitor) + return; + + llvm::FoldingSetNodeID ID; + visitor->Profile(ID); + void *InsertPos; + + if (CallbacksSet.FindNodeOrInsertPos(ID, InsertPos)) { + delete visitor; + return; + } + + CallbacksSet.InsertNode(visitor, InsertPos); + Callbacks.push_back(visitor); + ++ConfigurationChangeToken; +} + +BugReport::~BugReport() { + for (visitor_iterator I = visitor_begin(), E = visitor_end(); I != E; ++I) { + delete *I; + } +} + +const Decl *BugReport::getDeclWithIssue() const { + if (DeclWithIssue) + return DeclWithIssue; + + const ExplodedNode *N = getErrorNode(); + if (!N) + return 0; + + const LocationContext *LC = N->getLocationContext(); + return LC->getCurrentStackFrame()->getDecl(); +} + +void BugReport::Profile(llvm::FoldingSetNodeID& hash) const { + hash.AddPointer(&BT); + hash.AddString(Description); + if (UniqueingLocation.isValid()) { + UniqueingLocation.Profile(hash); + } else if (Location.isValid()) { + Location.Profile(hash); + } else { + assert(ErrorNode); + hash.AddPointer(GetCurrentOrPreviousStmt(ErrorNode)); + } + + for (SmallVectorImpl::const_iterator I = + Ranges.begin(), E = Ranges.end(); I != E; ++I) { + const SourceRange range = *I; + if (!range.isValid()) + continue; + hash.AddInteger(range.getBegin().getRawEncoding()); + hash.AddInteger(range.getEnd().getRawEncoding()); + } +} + +void BugReport::markInteresting(SymbolRef sym) { + if (!sym) + return; + + // If the symbol wasn't already in our set, note a configuration change. + if (interestingSymbols.insert(sym).second) + ++ConfigurationChangeToken; + + if (const SymbolMetadata *meta = dyn_cast(sym)) + interestingRegions.insert(meta->getRegion()); +} + +void BugReport::markInteresting(const MemRegion *R) { + if (!R) + return; + + // If the base region wasn't already in our set, note a configuration change. + R = R->getBaseRegion(); + if (interestingRegions.insert(R).second) + ++ConfigurationChangeToken; + + if (const SymbolicRegion *SR = dyn_cast(R)) + interestingSymbols.insert(SR->getSymbol()); +} + +void BugReport::markInteresting(SVal V) { + markInteresting(V.getAsRegion()); + markInteresting(V.getAsSymbol()); +} + +bool BugReport::isInteresting(SVal V) const { + return isInteresting(V.getAsRegion()) || isInteresting(V.getAsSymbol()); +} + +bool BugReport::isInteresting(SymbolRef sym) const { + if (!sym) + return false; + // We don't currently consider metadata symbols to be interesting + // even if we know their region is interesting. Is that correct behavior? + return interestingSymbols.count(sym); +} + +bool BugReport::isInteresting(const MemRegion *R) const { + if (!R) + return false; + R = R->getBaseRegion(); + bool b = interestingRegions.count(R); + if (b) + return true; + if (const SymbolicRegion *SR = dyn_cast(R)) + return interestingSymbols.count(SR->getSymbol()); + return false; +} + + +const Stmt *BugReport::getStmt() const { + if (!ErrorNode) + return 0; + + ProgramPoint ProgP = ErrorNode->getLocation(); + const Stmt *S = NULL; + + if (BlockEntrance *BE = dyn_cast(&ProgP)) { + CFGBlock &Exit = ProgP.getLocationContext()->getCFG()->getExit(); + if (BE->getBlock() == &Exit) + S = GetPreviousStmt(ErrorNode); + } + if (!S) + S = GetStmt(ProgP); + + return S; +} + +std::pair +BugReport::getRanges() { + // If no custom ranges, add the range of the statement corresponding to + // the error node. + if (Ranges.empty()) { + if (const Expr *E = dyn_cast_or_null(getStmt())) + addRange(E->getSourceRange()); + else + return std::make_pair(ranges_iterator(), ranges_iterator()); + } + + // User-specified absence of range info. + if (Ranges.size() == 1 && !Ranges.begin()->isValid()) + return std::make_pair(ranges_iterator(), ranges_iterator()); + + return std::make_pair(Ranges.begin(), Ranges.end()); +} + +PathDiagnosticLocation BugReport::getLocation(const SourceManager &SM) const { + if (ErrorNode) { + assert(!Location.isValid() && + "Either Location or ErrorNode should be specified but not both."); + + if (const Stmt *S = GetCurrentOrPreviousStmt(ErrorNode)) { + const LocationContext *LC = ErrorNode->getLocationContext(); + + // For member expressions, return the location of the '.' or '->'. + if (const MemberExpr *ME = dyn_cast(S)) + return PathDiagnosticLocation::createMemberLoc(ME, SM); + // For binary operators, return the location of the operator. + if (const BinaryOperator *B = dyn_cast(S)) + return PathDiagnosticLocation::createOperatorLoc(B, SM); + + return PathDiagnosticLocation::createBegin(S, SM, LC); + } + } else { + assert(Location.isValid()); + return Location; + } + + return PathDiagnosticLocation(); +} + +//===----------------------------------------------------------------------===// +// Methods for BugReporter and subclasses. +//===----------------------------------------------------------------------===// + +BugReportEquivClass::~BugReportEquivClass() { } +GRBugReporter::~GRBugReporter() { } +BugReporterData::~BugReporterData() {} + +ExplodedGraph &GRBugReporter::getGraph() { return Eng.getGraph(); } + +ProgramStateManager& +GRBugReporter::getStateManager() { return Eng.getStateManager(); } + +BugReporter::~BugReporter() { + FlushReports(); + + // Free the bug reports we are tracking. + typedef std::vector ContTy; + for (ContTy::iterator I = EQClassesVector.begin(), E = EQClassesVector.end(); + I != E; ++I) { + delete *I; + } +} + +void BugReporter::FlushReports() { + if (BugTypes.isEmpty()) + return; + + // First flush the warnings for each BugType. This may end up creating new + // warnings and new BugTypes. + // FIXME: Only NSErrorChecker needs BugType's FlushReports. + // Turn NSErrorChecker into a proper checker and remove this. + SmallVector bugTypes; + for (BugTypesTy::iterator I=BugTypes.begin(), E=BugTypes.end(); I!=E; ++I) + bugTypes.push_back(*I); + for (SmallVector::iterator + I = bugTypes.begin(), E = bugTypes.end(); I != E; ++I) + const_cast(*I)->FlushReports(*this); + + typedef llvm::FoldingSet SetTy; + for (SetTy::iterator EI=EQClasses.begin(), EE=EQClasses.end(); EI!=EE;++EI){ + BugReportEquivClass& EQ = *EI; + FlushReport(EQ); + } + + // BugReporter owns and deletes only BugTypes created implicitly through + // EmitBasicReport. + // FIXME: There are leaks from checkers that assume that the BugTypes they + // create will be destroyed by the BugReporter. + for (llvm::StringMap::iterator + I = StrBugTypes.begin(), E = StrBugTypes.end(); I != E; ++I) + delete I->second; + + // Remove all references to the BugType objects. + BugTypes = F.getEmptySet(); +} + +//===----------------------------------------------------------------------===// +// PathDiagnostics generation. +//===----------------------------------------------------------------------===// + +static std::pair, + std::pair > +MakeReportGraph(const ExplodedGraph* G, + SmallVectorImpl &nodes) { + + // Create the trimmed graph. It will contain the shortest paths from the + // error nodes to the root. In the new graph we should only have one + // error node unless there are two or more error nodes with the same minimum + // path length. + ExplodedGraph* GTrim; + InterExplodedGraphMap* NMap; + + llvm::DenseMap InverseMap; + llvm::tie(GTrim, NMap) = G->Trim(nodes.data(), nodes.data() + nodes.size(), + &InverseMap); + + // Create owning pointers for GTrim and NMap just to ensure that they are + // released when this function exists. + OwningPtr AutoReleaseGTrim(GTrim); + OwningPtr AutoReleaseNMap(NMap); + + // Find the (first) error node in the trimmed graph. We just need to consult + // the node map (NMap) which maps from nodes in the original graph to nodes + // in the new graph. + + std::queue WS; + typedef llvm::DenseMap IndexMapTy; + IndexMapTy IndexMap; + + for (unsigned nodeIndex = 0 ; nodeIndex < nodes.size(); ++nodeIndex) { + const ExplodedNode *originalNode = nodes[nodeIndex]; + if (const ExplodedNode *N = NMap->getMappedNode(originalNode)) { + WS.push(N); + IndexMap[originalNode] = nodeIndex; + } + } + + assert(!WS.empty() && "No error node found in the trimmed graph."); + + // Create a new (third!) graph with a single path. This is the graph + // that will be returned to the caller. + ExplodedGraph *GNew = new ExplodedGraph(); + + // Sometimes the trimmed graph can contain a cycle. Perform a reverse BFS + // to the root node, and then construct a new graph that contains only + // a single path. + llvm::DenseMap Visited; + + unsigned cnt = 0; + const ExplodedNode *Root = 0; + + while (!WS.empty()) { + const ExplodedNode *Node = WS.front(); + WS.pop(); + + if (Visited.find(Node) != Visited.end()) + continue; + + Visited[Node] = cnt++; + + if (Node->pred_empty()) { + Root = Node; + break; + } + + for (ExplodedNode::const_pred_iterator I=Node->pred_begin(), + E=Node->pred_end(); I!=E; ++I) + WS.push(*I); + } + + assert(Root); + + // Now walk from the root down the BFS path, always taking the successor + // with the lowest number. + ExplodedNode *Last = 0, *First = 0; + NodeBackMap *BM = new NodeBackMap(); + unsigned NodeIndex = 0; + + for ( const ExplodedNode *N = Root ;;) { + // Lookup the number associated with the current node. + llvm::DenseMap::iterator I = Visited.find(N); + assert(I != Visited.end()); + + // Create the equivalent node in the new graph with the same state + // and location. + ExplodedNode *NewN = GNew->getNode(N->getLocation(), N->getState()); + + // Store the mapping to the original node. + llvm::DenseMap::iterator IMitr=InverseMap.find(N); + assert(IMitr != InverseMap.end() && "No mapping to original node."); + (*BM)[NewN] = (const ExplodedNode*) IMitr->second; + + // Link up the new node with the previous node. + if (Last) + NewN->addPredecessor(Last, *GNew); + + Last = NewN; + + // Are we at the final node? + IndexMapTy::iterator IMI = + IndexMap.find((const ExplodedNode*)(IMitr->second)); + if (IMI != IndexMap.end()) { + First = NewN; + NodeIndex = IMI->second; + break; + } + + // Find the next successor node. We choose the node that is marked + // with the lowest DFS number. + ExplodedNode::const_succ_iterator SI = N->succ_begin(); + ExplodedNode::const_succ_iterator SE = N->succ_end(); + N = 0; + + for (unsigned MinVal = 0; SI != SE; ++SI) { + + I = Visited.find(*SI); + + if (I == Visited.end()) + continue; + + if (!N || I->second < MinVal) { + N = *SI; + MinVal = I->second; + } + } + + assert(N); + } + + assert(First); + + return std::make_pair(std::make_pair(GNew, BM), + std::make_pair(First, NodeIndex)); +} + +/// CompactPathDiagnostic - This function postprocesses a PathDiagnostic object +/// and collapses PathDiagosticPieces that are expanded by macros. +static void CompactPathDiagnostic(PathPieces &path, const SourceManager& SM) { + typedef std::vector, + SourceLocation> > MacroStackTy; + + typedef std::vector > + PiecesTy; + + MacroStackTy MacroStack; + PiecesTy Pieces; + + for (PathPieces::const_iterator I = path.begin(), E = path.end(); + I!=E; ++I) { + + PathDiagnosticPiece *piece = I->getPtr(); + + // Recursively compact calls. + if (PathDiagnosticCallPiece *call=dyn_cast(piece)){ + CompactPathDiagnostic(call->path, SM); + } + + // Get the location of the PathDiagnosticPiece. + const FullSourceLoc Loc = piece->getLocation().asLocation(); + + // Determine the instantiation location, which is the location we group + // related PathDiagnosticPieces. + SourceLocation InstantiationLoc = Loc.isMacroID() ? + SM.getExpansionLoc(Loc) : + SourceLocation(); + + if (Loc.isFileID()) { + MacroStack.clear(); + Pieces.push_back(piece); + continue; + } + + assert(Loc.isMacroID()); + + // Is the PathDiagnosticPiece within the same macro group? + if (!MacroStack.empty() && InstantiationLoc == MacroStack.back().second) { + MacroStack.back().first->subPieces.push_back(piece); + continue; + } + + // We aren't in the same group. Are we descending into a new macro + // or are part of an old one? + IntrusiveRefCntPtr MacroGroup; + + SourceLocation ParentInstantiationLoc = InstantiationLoc.isMacroID() ? + SM.getExpansionLoc(Loc) : + SourceLocation(); + + // Walk the entire macro stack. + while (!MacroStack.empty()) { + if (InstantiationLoc == MacroStack.back().second) { + MacroGroup = MacroStack.back().first; + break; + } + + if (ParentInstantiationLoc == MacroStack.back().second) { + MacroGroup = MacroStack.back().first; + break; + } + + MacroStack.pop_back(); + } + + if (!MacroGroup || ParentInstantiationLoc == MacroStack.back().second) { + // Create a new macro group and add it to the stack. + PathDiagnosticMacroPiece *NewGroup = + new PathDiagnosticMacroPiece( + PathDiagnosticLocation::createSingleLocation(piece->getLocation())); + + if (MacroGroup) + MacroGroup->subPieces.push_back(NewGroup); + else { + assert(InstantiationLoc.isFileID()); + Pieces.push_back(NewGroup); + } + + MacroGroup = NewGroup; + MacroStack.push_back(std::make_pair(MacroGroup, InstantiationLoc)); + } + + // Finally, add the PathDiagnosticPiece to the group. + MacroGroup->subPieces.push_back(piece); + } + + // Now take the pieces and construct a new PathDiagnostic. + path.clear(); + + for (PiecesTy::iterator I=Pieces.begin(), E=Pieces.end(); I!=E; ++I) + path.push_back(*I); +} + +void GRBugReporter::GeneratePathDiagnostic(PathDiagnostic& PD, + SmallVectorImpl &bugReports) { + + assert(!bugReports.empty()); + SmallVector errorNodes; + for (SmallVectorImpl::iterator I = bugReports.begin(), + E = bugReports.end(); I != E; ++I) { + errorNodes.push_back((*I)->getErrorNode()); + } + + // Construct a new graph that contains only a single path from the error + // node to a root. + const std::pair, + std::pair >& + GPair = MakeReportGraph(&getGraph(), errorNodes); + + // Find the BugReport with the original location. + assert(GPair.second.second < bugReports.size()); + BugReport *R = bugReports[GPair.second.second]; + assert(R && "No original report found for sliced graph."); + + OwningPtr ReportGraph(GPair.first.first); + OwningPtr BackMap(GPair.first.second); + const ExplodedNode *N = GPair.second.first; + + // Start building the path diagnostic... + PathDiagnosticBuilder PDB(*this, R, BackMap.get(), + getPathDiagnosticConsumer()); + + // Register additional node visitors. + R->addVisitor(new NilReceiverBRVisitor()); + R->addVisitor(new ConditionBRVisitor()); + + BugReport::VisitorList visitors; + unsigned originalReportConfigToken, finalReportConfigToken; + + // While generating diagnostics, it's possible the visitors will decide + // new symbols and regions are interesting, or add other visitors based on + // the information they find. If they do, we need to regenerate the path + // based on our new report configuration. + do { + // Get a clean copy of all the visitors. + for (BugReport::visitor_iterator I = R->visitor_begin(), + E = R->visitor_end(); I != E; ++I) + visitors.push_back((*I)->clone()); + + // Clear out the active path from any previous work. + PD.getActivePath().clear(); + originalReportConfigToken = R->getConfigurationChangeToken(); + + // Generate the very last diagnostic piece - the piece is visible before + // the trace is expanded. + PathDiagnosticPiece *LastPiece = 0; + for (BugReport::visitor_iterator I = visitors.begin(), E = visitors.end(); + I != E; ++I) { + if (PathDiagnosticPiece *Piece = (*I)->getEndPath(PDB, N, *R)) { + assert (!LastPiece && + "There can only be one final piece in a diagnostic."); + LastPiece = Piece; + } + } + if (!LastPiece) + LastPiece = BugReporterVisitor::getDefaultEndPath(PDB, N, *R); + if (LastPiece) + PD.getActivePath().push_back(LastPiece); + else + return; + + switch (PDB.getGenerationScheme()) { + case PathDiagnosticConsumer::Extensive: + GenerateExtensivePathDiagnostic(PD, PDB, N, visitors); + break; + case PathDiagnosticConsumer::Minimal: + GenerateMinimalPathDiagnostic(PD, PDB, N, visitors); + break; + } + + // Clean up the visitors we used. + llvm::DeleteContainerPointers(visitors); + + // Did anything change while generating this path? + finalReportConfigToken = R->getConfigurationChangeToken(); + } while(finalReportConfigToken != originalReportConfigToken); + + // Finally, prune the diagnostic path of uninteresting stuff. + bool hasSomethingInteresting = RemoveUneededCalls(PD.getMutablePieces()); + assert(hasSomethingInteresting); + (void) hasSomethingInteresting; +} + +void BugReporter::Register(BugType *BT) { + BugTypes = F.add(BugTypes, BT); +} + +void BugReporter::EmitReport(BugReport* R) { + // Compute the bug report's hash to determine its equivalence class. + llvm::FoldingSetNodeID ID; + R->Profile(ID); + + // Lookup the equivance class. If there isn't one, create it. + BugType& BT = R->getBugType(); + Register(&BT); + void *InsertPos; + BugReportEquivClass* EQ = EQClasses.FindNodeOrInsertPos(ID, InsertPos); + + if (!EQ) { + EQ = new BugReportEquivClass(R); + EQClasses.InsertNode(EQ, InsertPos); + EQClassesVector.push_back(EQ); + } + else + EQ->AddReport(R); +} + + +//===----------------------------------------------------------------------===// +// Emitting reports in equivalence classes. +//===----------------------------------------------------------------------===// + +namespace { +struct FRIEC_WLItem { + const ExplodedNode *N; + ExplodedNode::const_succ_iterator I, E; + + FRIEC_WLItem(const ExplodedNode *n) + : N(n), I(N->succ_begin()), E(N->succ_end()) {} +}; +} + +static BugReport * +FindReportInEquivalenceClass(BugReportEquivClass& EQ, + SmallVectorImpl &bugReports) { + + BugReportEquivClass::iterator I = EQ.begin(), E = EQ.end(); + assert(I != E); + BugType& BT = I->getBugType(); + + // If we don't need to suppress any of the nodes because they are + // post-dominated by a sink, simply add all the nodes in the equivalence class + // to 'Nodes'. Any of the reports will serve as a "representative" report. + if (!BT.isSuppressOnSink()) { + BugReport *R = I; + for (BugReportEquivClass::iterator I=EQ.begin(), E=EQ.end(); I!=E; ++I) { + const ExplodedNode *N = I->getErrorNode(); + if (N) { + R = I; + bugReports.push_back(R); + } + } + return R; + } + + // For bug reports that should be suppressed when all paths are post-dominated + // by a sink node, iterate through the reports in the equivalence class + // until we find one that isn't post-dominated (if one exists). We use a + // DFS traversal of the ExplodedGraph to find a non-sink node. We could write + // this as a recursive function, but we don't want to risk blowing out the + // stack for very long paths. + BugReport *exampleReport = 0; + + for (; I != E; ++I) { + const ExplodedNode *errorNode = I->getErrorNode(); + + if (!errorNode) + continue; + if (errorNode->isSink()) { + llvm_unreachable( + "BugType::isSuppressSink() should not be 'true' for sink end nodes"); + } + // No successors? By definition this nodes isn't post-dominated by a sink. + if (errorNode->succ_empty()) { + bugReports.push_back(I); + if (!exampleReport) + exampleReport = I; + continue; + } + + // At this point we know that 'N' is not a sink and it has at least one + // successor. Use a DFS worklist to find a non-sink end-of-path node. + typedef FRIEC_WLItem WLItem; + typedef SmallVector DFSWorkList; + llvm::DenseMap Visited; + + DFSWorkList WL; + WL.push_back(errorNode); + Visited[errorNode] = 1; + + while (!WL.empty()) { + WLItem &WI = WL.back(); + assert(!WI.N->succ_empty()); + + for (; WI.I != WI.E; ++WI.I) { + const ExplodedNode *Succ = *WI.I; + // End-of-path node? + if (Succ->succ_empty()) { + // If we found an end-of-path node that is not a sink. + if (!Succ->isSink()) { + bugReports.push_back(I); + if (!exampleReport) + exampleReport = I; + WL.clear(); + break; + } + // Found a sink? Continue on to the next successor. + continue; + } + // Mark the successor as visited. If it hasn't been explored, + // enqueue it to the DFS worklist. + unsigned &mark = Visited[Succ]; + if (!mark) { + mark = 1; + WL.push_back(Succ); + break; + } + } + + // The worklist may have been cleared at this point. First + // check if it is empty before checking the last item. + if (!WL.empty() && &WL.back() == &WI) + WL.pop_back(); + } + } + + // ExampleReport will be NULL if all the nodes in the equivalence class + // were post-dominated by sinks. + return exampleReport; +} + +//===----------------------------------------------------------------------===// +// DiagnosticCache. This is a hack to cache analyzer diagnostics. It +// uses global state, which eventually should go elsewhere. +//===----------------------------------------------------------------------===// +namespace { +class DiagCacheItem : public llvm::FoldingSetNode { + llvm::FoldingSetNodeID ID; +public: + DiagCacheItem(BugReport *R, PathDiagnostic *PD) { + R->Profile(ID); + PD->Profile(ID); + } + + void Profile(llvm::FoldingSetNodeID &id) { + id = ID; + } + + llvm::FoldingSetNodeID &getID() { return ID; } +}; +} + +static bool IsCachedDiagnostic(BugReport *R, PathDiagnostic *PD) { + // FIXME: Eventually this diagnostic cache should reside in something + // like AnalysisManager instead of being a static variable. This is + // really unsafe in the long term. + typedef llvm::FoldingSet DiagnosticCache; + static DiagnosticCache DC; + + void *InsertPos; + DiagCacheItem *Item = new DiagCacheItem(R, PD); + + if (DC.FindNodeOrInsertPos(Item->getID(), InsertPos)) { + delete Item; + return true; + } + + DC.InsertNode(Item, InsertPos); + return false; +} + +void BugReporter::FlushReport(BugReportEquivClass& EQ) { + SmallVector bugReports; + BugReport *exampleReport = FindReportInEquivalenceClass(EQ, bugReports); + if (!exampleReport) + return; + + PathDiagnosticConsumer* PD = getPathDiagnosticConsumer(); + + // FIXME: Make sure we use the 'R' for the path that was actually used. + // Probably doesn't make a difference in practice. + BugType& BT = exampleReport->getBugType(); + + OwningPtr + D(new PathDiagnostic(exampleReport->getDeclWithIssue(), + exampleReport->getBugType().getName(), + !PD || PD->useVerboseDescription() + ? exampleReport->getDescription() + : exampleReport->getShortDescription(), + BT.getCategory())); + + if (!bugReports.empty()) + GeneratePathDiagnostic(*D.get(), bugReports); + + // Get the meta data. + const BugReport::ExtraTextList &Meta = + exampleReport->getExtraText(); + for (BugReport::ExtraTextList::const_iterator i = Meta.begin(), + e = Meta.end(); i != e; ++i) { + D->addMeta(*i); + } + + // Emit a summary diagnostic to the regular Diagnostics engine. + BugReport::ranges_iterator Beg, End; + llvm::tie(Beg, End) = exampleReport->getRanges(); + DiagnosticsEngine &Diag = getDiagnostic(); + + if (!IsCachedDiagnostic(exampleReport, D.get())) { + // Search the description for '%', as that will be interpretted as a + // format character by FormatDiagnostics. + StringRef desc = exampleReport->getShortDescription(); + + SmallString<512> TmpStr; + llvm::raw_svector_ostream Out(TmpStr); + for (StringRef::iterator I=desc.begin(), E=desc.end(); I!=E; ++I) { + if (*I == '%') + Out << "%%"; + else + Out << *I; + } + + Out.flush(); + unsigned ErrorDiag = Diag.getCustomDiagID(DiagnosticsEngine::Warning, TmpStr); + + DiagnosticBuilder diagBuilder = Diag.Report( + exampleReport->getLocation(getSourceManager()).asLocation(), ErrorDiag); + for (BugReport::ranges_iterator I = Beg; I != End; ++I) + diagBuilder << *I; + } + + // Emit a full diagnostic for the path if we have a PathDiagnosticConsumer. + if (!PD) + return; + + if (D->path.empty()) { + PathDiagnosticPiece *piece = new PathDiagnosticEventPiece( + exampleReport->getLocation(getSourceManager()), + exampleReport->getDescription()); + for ( ; Beg != End; ++Beg) + piece->addRange(*Beg); + + D->getActivePath().push_back(piece); + } + + PD->HandlePathDiagnostic(D.take()); +} + +void BugReporter::EmitBasicReport(const Decl *DeclWithIssue, + StringRef name, + StringRef category, + StringRef str, PathDiagnosticLocation Loc, + SourceRange* RBeg, unsigned NumRanges) { + + // 'BT' is owned by BugReporter. + BugType *BT = getBugTypeForName(name, category); + BugReport *R = new BugReport(*BT, str, Loc); + R->setDeclWithIssue(DeclWithIssue); + for ( ; NumRanges > 0 ; --NumRanges, ++RBeg) R->addRange(*RBeg); + EmitReport(R); +} + +BugType *BugReporter::getBugTypeForName(StringRef name, + StringRef category) { + SmallString<136> fullDesc; + llvm::raw_svector_ostream(fullDesc) << name << ":" << category; + llvm::StringMapEntry & + entry = StrBugTypes.GetOrCreateValue(fullDesc); + BugType *BT = entry.getValue(); + if (!BT) { + BT = new BugType(name, category); + entry.setValue(BT); + } + return BT; +} diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp new file mode 100644 index 0000000..6532486 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -0,0 +1,784 @@ +// BugReporterVisitors.cpp - Helpers for reporting bugs -----------*- C++ -*--// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a set of BugReporter "visitors" which can be used to +// enhance the diagnostics reported for a bug. +// +//===----------------------------------------------------------------------===// +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h" + +#include "clang/AST/Expr.h" +#include "clang/AST/ExprObjC.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "llvm/ADT/SmallString.h" + +using namespace clang; +using namespace ento; + +//===----------------------------------------------------------------------===// +// Utility functions. +//===----------------------------------------------------------------------===// + +const Stmt *bugreporter::GetDerefExpr(const ExplodedNode *N) { + // Pattern match for a few useful cases (do something smarter later): + // a[0], p->f, *p + const Stmt *S = N->getLocationAs()->getStmt(); + + if (const UnaryOperator *U = dyn_cast(S)) { + if (U->getOpcode() == UO_Deref) + return U->getSubExpr()->IgnoreParenCasts(); + } + else if (const MemberExpr *ME = dyn_cast(S)) { + return ME->getBase()->IgnoreParenCasts(); + } + else if (const ArraySubscriptExpr *AE = dyn_cast(S)) { + return AE->getBase(); + } + + return NULL; +} + +const Stmt *bugreporter::GetDenomExpr(const ExplodedNode *N) { + const Stmt *S = N->getLocationAs()->getStmt(); + if (const BinaryOperator *BE = dyn_cast(S)) + return BE->getRHS(); + return NULL; +} + +const Stmt *bugreporter::GetCalleeExpr(const ExplodedNode *N) { + // Callee is checked as a PreVisit to the CallExpr. + const Stmt *S = N->getLocationAs()->getStmt(); + if (const CallExpr *CE = dyn_cast(S)) + return CE->getCallee(); + return NULL; +} + +const Stmt *bugreporter::GetRetValExpr(const ExplodedNode *N) { + const Stmt *S = N->getLocationAs()->getStmt(); + if (const ReturnStmt *RS = dyn_cast(S)) + return RS->getRetValue(); + return NULL; +} + +//===----------------------------------------------------------------------===// +// Definitions for bug reporter visitors. +//===----------------------------------------------------------------------===// + +PathDiagnosticPiece* +BugReporterVisitor::getEndPath(BugReporterContext &BRC, + const ExplodedNode *EndPathNode, + BugReport &BR) { + return 0; +} + +PathDiagnosticPiece* +BugReporterVisitor::getDefaultEndPath(BugReporterContext &BRC, + const ExplodedNode *EndPathNode, + BugReport &BR) { + PathDiagnosticLocation L = + PathDiagnosticLocation::createEndOfPath(EndPathNode,BRC.getSourceManager()); + + BugReport::ranges_iterator Beg, End; + llvm::tie(Beg, End) = BR.getRanges(); + + // Only add the statement itself as a range if we didn't specify any + // special ranges for this report. + PathDiagnosticPiece *P = new PathDiagnosticEventPiece(L, + BR.getDescription(), + Beg == End); + for (; Beg != End; ++Beg) + P->addRange(*Beg); + + return P; +} + + +void FindLastStoreBRVisitor ::Profile(llvm::FoldingSetNodeID &ID) const { + static int tag = 0; + ID.AddPointer(&tag); + ID.AddPointer(R); + ID.Add(V); +} + +PathDiagnosticPiece *FindLastStoreBRVisitor::VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) { + + if (satisfied) + return NULL; + + if (!StoreSite) { + const ExplodedNode *Node = N, *Last = NULL; + + for ( ; Node ; Node = Node->getFirstPred()) { + + if (const VarRegion *VR = dyn_cast(R)) { + if (const PostStmt *P = Node->getLocationAs()) + if (const DeclStmt *DS = P->getStmtAs()) + if (DS->getSingleDecl() == VR->getDecl()) { + // Record the last seen initialization point. + Last = Node; + break; + } + } + + // Does the region still bind to value V? If not, we are done + // looking for store sites. + if (Node->getState()->getSVal(R) != V) + break; + } + + if (!Node || !Last) { + satisfied = true; + return NULL; + } + + StoreSite = Last; + } + + if (StoreSite != N) + return NULL; + + satisfied = true; + SmallString<256> sbuf; + llvm::raw_svector_ostream os(sbuf); + + if (const PostStmt *PS = N->getLocationAs()) { + if (const DeclStmt *DS = PS->getStmtAs()) { + + if (const VarRegion *VR = dyn_cast(R)) { + os << "Variable '" << *VR->getDecl() << "' "; + } + else + return NULL; + + if (isa(V)) { + bool b = false; + if (R->isBoundable()) { + if (const TypedValueRegion *TR = dyn_cast(R)) { + if (TR->getValueType()->isObjCObjectPointerType()) { + os << "initialized to nil"; + b = true; + } + } + } + + if (!b) + os << "initialized to a null pointer value"; + } + else if (isa(V)) { + os << "initialized to " << cast(V).getValue(); + } + else if (V.isUndef()) { + if (isa(R)) { + const VarDecl *VD = cast(DS->getSingleDecl()); + if (VD->getInit()) + os << "initialized to a garbage value"; + else + os << "declared without an initial value"; + } + } + } + } + + if (os.str().empty()) { + if (isa(V)) { + bool b = false; + if (R->isBoundable()) { + if (const TypedValueRegion *TR = dyn_cast(R)) { + if (TR->getValueType()->isObjCObjectPointerType()) { + os << "nil object reference stored to "; + b = true; + } + } + } + + if (!b) + os << "Null pointer value stored to "; + } + else if (V.isUndef()) { + os << "Uninitialized value stored to "; + } + else if (isa(V)) { + os << "The value " << cast(V).getValue() + << " is assigned to "; + } + else + return NULL; + + if (const VarRegion *VR = dyn_cast(R)) { + os << '\'' << *VR->getDecl() << '\''; + } + else + return NULL; + } + + // Construct a new PathDiagnosticPiece. + ProgramPoint P = N->getLocation(); + PathDiagnosticLocation L = + PathDiagnosticLocation::create(P, BRC.getSourceManager()); + if (!L.isValid()) + return NULL; + return new PathDiagnosticEventPiece(L, os.str()); +} + +void TrackConstraintBRVisitor::Profile(llvm::FoldingSetNodeID &ID) const { + static int tag = 0; + ID.AddPointer(&tag); + ID.AddBoolean(Assumption); + ID.Add(Constraint); +} + +PathDiagnosticPiece * +TrackConstraintBRVisitor::VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) { + if (isSatisfied) + return NULL; + + // Check if in the previous state it was feasible for this constraint + // to *not* be true. + if (PrevN->getState()->assume(Constraint, !Assumption)) { + + isSatisfied = true; + + // As a sanity check, make sure that the negation of the constraint + // was infeasible in the current state. If it is feasible, we somehow + // missed the transition point. + if (N->getState()->assume(Constraint, !Assumption)) + return NULL; + + // We found the transition point for the constraint. We now need to + // pretty-print the constraint. (work-in-progress) + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + + if (isa(Constraint)) { + os << "Assuming pointer value is "; + os << (Assumption ? "non-null" : "null"); + } + + if (os.str().empty()) + return NULL; + + // Construct a new PathDiagnosticPiece. + ProgramPoint P = N->getLocation(); + PathDiagnosticLocation L = + PathDiagnosticLocation::create(P, BRC.getSourceManager()); + if (!L.isValid()) + return NULL; + return new PathDiagnosticEventPiece(L, os.str()); + } + + return NULL; +} + +BugReporterVisitor * +bugreporter::getTrackNullOrUndefValueVisitor(const ExplodedNode *N, + const Stmt *S, + BugReport *report) { + if (!S || !N) + return 0; + + ProgramStateManager &StateMgr = N->getState()->getStateManager(); + + // Walk through nodes until we get one that matches the statement + // exactly. + while (N) { + const ProgramPoint &pp = N->getLocation(); + if (const PostStmt *ps = dyn_cast(&pp)) { + if (ps->getStmt() == S) + break; + } + N = N->getFirstPred(); + } + + if (!N) + return 0; + + ProgramStateRef state = N->getState(); + + // Walk through lvalue-to-rvalue conversions. + const Expr *Ex = dyn_cast(S); + if (Ex) { + Ex = Ex->IgnoreParenLValueCasts(); + if (const DeclRefExpr *DR = dyn_cast(Ex)) { + if (const VarDecl *VD = dyn_cast(DR->getDecl())) { + const VarRegion *R = + StateMgr.getRegionManager().getVarRegion(VD, N->getLocationContext()); + + // What did we load? + SVal V = state->getSVal(loc::MemRegionVal(R)); + report->markInteresting(R); + report->markInteresting(V); + return new FindLastStoreBRVisitor(V, R); + } + } + } + + SVal V = state->getSValAsScalarOrLoc(S, N->getLocationContext()); + + // Uncomment this to find cases where we aren't properly getting the + // base value that was dereferenced. + // assert(!V.isUnknownOrUndef()); + + // Is it a symbolic value? + if (loc::MemRegionVal *L = dyn_cast(&V)) { + const SubRegion *R = cast(L->getRegion()); + while (R && !isa(R)) { + R = dyn_cast(R->getSuperRegion()); + } + + if (R) { + report->markInteresting(R); + return new TrackConstraintBRVisitor(loc::MemRegionVal(R), false); + } + } + + return 0; +} + +BugReporterVisitor * +FindLastStoreBRVisitor::createVisitorObject(const ExplodedNode *N, + const MemRegion *R) { + assert(R && "The memory region is null."); + + ProgramStateRef state = N->getState(); + SVal V = state->getSVal(R); + if (V.isUnknown()) + return 0; + + return new FindLastStoreBRVisitor(V, R); +} + + +PathDiagnosticPiece *NilReceiverBRVisitor::VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) { + const PostStmt *P = N->getLocationAs(); + if (!P) + return 0; + const ObjCMessageExpr *ME = P->getStmtAs(); + if (!ME) + return 0; + const Expr *Receiver = ME->getInstanceReceiver(); + if (!Receiver) + return 0; + ProgramStateRef state = N->getState(); + const SVal &V = state->getSVal(Receiver, N->getLocationContext()); + const DefinedOrUnknownSVal *DV = dyn_cast(&V); + if (!DV) + return 0; + state = state->assume(*DV, true); + if (state) + return 0; + + // The receiver was nil, and hence the method was skipped. + // Register a BugReporterVisitor to issue a message telling us how + // the receiver was null. + BR.addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, Receiver, &BR)); + // Issue a message saying that the method was skipped. + PathDiagnosticLocation L(Receiver, BRC.getSourceManager(), + N->getLocationContext()); + return new PathDiagnosticEventPiece(L, "No method is called " + "because the receiver is nil"); +} + +// Registers every VarDecl inside a Stmt with a last store visitor. +void FindLastStoreBRVisitor::registerStatementVarDecls(BugReport &BR, + const Stmt *S) { + const ExplodedNode *N = BR.getErrorNode(); + std::deque WorkList; + WorkList.push_back(S); + + while (!WorkList.empty()) { + const Stmt *Head = WorkList.front(); + WorkList.pop_front(); + + ProgramStateRef state = N->getState(); + ProgramStateManager &StateMgr = state->getStateManager(); + + if (const DeclRefExpr *DR = dyn_cast(Head)) { + if (const VarDecl *VD = dyn_cast(DR->getDecl())) { + const VarRegion *R = + StateMgr.getRegionManager().getVarRegion(VD, N->getLocationContext()); + + // What did we load? + SVal V = state->getSVal(S, N->getLocationContext()); + + if (isa(V) || isa(V)) { + // Register a new visitor with the BugReport. + BR.addVisitor(new FindLastStoreBRVisitor(V, R)); + } + } + } + + for (Stmt::const_child_iterator I = Head->child_begin(); + I != Head->child_end(); ++I) + WorkList.push_back(*I); + } +} + +//===----------------------------------------------------------------------===// +// Visitor that tries to report interesting diagnostics from conditions. +//===----------------------------------------------------------------------===// +PathDiagnosticPiece *ConditionBRVisitor::VisitNode(const ExplodedNode *N, + const ExplodedNode *Prev, + BugReporterContext &BRC, + BugReport &BR) { + PathDiagnosticPiece *piece = VisitNodeImpl(N, Prev, BRC, BR); + if (PathDiagnosticEventPiece *ev = + dyn_cast_or_null(piece)) + ev->setPrunable(true, /* override */ false); + return piece; +} + +PathDiagnosticPiece *ConditionBRVisitor::VisitNodeImpl(const ExplodedNode *N, + const ExplodedNode *Prev, + BugReporterContext &BRC, + BugReport &BR) { + + const ProgramPoint &progPoint = N->getLocation(); + + ProgramStateRef CurrentState = N->getState(); + ProgramStateRef PrevState = Prev->getState(); + + // Compare the GDMs of the state, because that is where constraints + // are managed. Note that ensure that we only look at nodes that + // were generated by the analyzer engine proper, not checkers. + if (CurrentState->getGDM().getRoot() == + PrevState->getGDM().getRoot()) + return 0; + + // If an assumption was made on a branch, it should be caught + // here by looking at the state transition. + if (const BlockEdge *BE = dyn_cast(&progPoint)) { + const CFGBlock *srcBlk = BE->getSrc(); + if (const Stmt *term = srcBlk->getTerminator()) + return VisitTerminator(term, N, srcBlk, BE->getDst(), BR, BRC); + return 0; + } + + if (const PostStmt *PS = dyn_cast(&progPoint)) { + // FIXME: Assuming that BugReporter is a GRBugReporter is a layering + // violation. + const std::pair &tags = + cast(BRC.getBugReporter()). + getEngine().getEagerlyAssumeTags(); + + const ProgramPointTag *tag = PS->getTag(); + if (tag == tags.first) + return VisitTrueTest(cast(PS->getStmt()), true, + BRC, BR, N); + if (tag == tags.second) + return VisitTrueTest(cast(PS->getStmt()), false, + BRC, BR, N); + + return 0; + } + + return 0; +} + +PathDiagnosticPiece * +ConditionBRVisitor::VisitTerminator(const Stmt *Term, + const ExplodedNode *N, + const CFGBlock *srcBlk, + const CFGBlock *dstBlk, + BugReport &R, + BugReporterContext &BRC) { + const Expr *Cond = 0; + + switch (Term->getStmtClass()) { + default: + return 0; + case Stmt::IfStmtClass: + Cond = cast(Term)->getCond(); + break; + case Stmt::ConditionalOperatorClass: + Cond = cast(Term)->getCond(); + break; + } + + assert(Cond); + assert(srcBlk->succ_size() == 2); + const bool tookTrue = *(srcBlk->succ_begin()) == dstBlk; + return VisitTrueTest(Cond->IgnoreParenNoopCasts(BRC.getASTContext()), + tookTrue, BRC, R, N); +} + +PathDiagnosticPiece * +ConditionBRVisitor::VisitTrueTest(const Expr *Cond, + bool tookTrue, + BugReporterContext &BRC, + BugReport &R, + const ExplodedNode *N) { + + const Expr *Ex = Cond; + + while (true) { + Ex = Ex->IgnoreParens(); + switch (Ex->getStmtClass()) { + default: + return 0; + case Stmt::BinaryOperatorClass: + return VisitTrueTest(Cond, cast(Ex), tookTrue, BRC, + R, N); + case Stmt::DeclRefExprClass: + return VisitTrueTest(Cond, cast(Ex), tookTrue, BRC, + R, N); + case Stmt::UnaryOperatorClass: { + const UnaryOperator *UO = cast(Ex); + if (UO->getOpcode() == UO_LNot) { + tookTrue = !tookTrue; + Ex = UO->getSubExpr()->IgnoreParenNoopCasts(BRC.getASTContext()); + continue; + } + return 0; + } + } + } +} + +bool ConditionBRVisitor::patternMatch(const Expr *Ex, llvm::raw_ostream &Out, + BugReporterContext &BRC, + BugReport &report, + const ExplodedNode *N, + llvm::Optional &prunable) { + const Expr *OriginalExpr = Ex; + Ex = Ex->IgnoreParenCasts(); + + if (const DeclRefExpr *DR = dyn_cast(Ex)) { + const bool quotes = isa(DR->getDecl()); + if (quotes) { + Out << '\''; + const LocationContext *LCtx = N->getLocationContext(); + const ProgramState *state = N->getState().getPtr(); + if (const MemRegion *R = state->getLValue(cast(DR->getDecl()), + LCtx).getAsRegion()) { + if (report.isInteresting(R)) + prunable = false; + else { + const ProgramState *state = N->getState().getPtr(); + SVal V = state->getSVal(R); + if (report.isInteresting(V)) + prunable = false; + } + } + } + Out << DR->getDecl()->getDeclName().getAsString(); + if (quotes) + Out << '\''; + return quotes; + } + + if (const IntegerLiteral *IL = dyn_cast(Ex)) { + QualType OriginalTy = OriginalExpr->getType(); + if (OriginalTy->isPointerType()) { + if (IL->getValue() == 0) { + Out << "null"; + return false; + } + } + else if (OriginalTy->isObjCObjectPointerType()) { + if (IL->getValue() == 0) { + Out << "nil"; + return false; + } + } + + Out << IL->getValue(); + return false; + } + + return false; +} + +PathDiagnosticPiece * +ConditionBRVisitor::VisitTrueTest(const Expr *Cond, + const BinaryOperator *BExpr, + const bool tookTrue, + BugReporterContext &BRC, + BugReport &R, + const ExplodedNode *N) { + + bool shouldInvert = false; + llvm::Optional shouldPrune; + + SmallString<128> LhsString, RhsString; + { + llvm::raw_svector_ostream OutLHS(LhsString), OutRHS(RhsString); + const bool isVarLHS = patternMatch(BExpr->getLHS(), OutLHS, BRC, R, N, + shouldPrune); + const bool isVarRHS = patternMatch(BExpr->getRHS(), OutRHS, BRC, R, N, + shouldPrune); + + shouldInvert = !isVarLHS && isVarRHS; + } + + BinaryOperator::Opcode Op = BExpr->getOpcode(); + + if (BinaryOperator::isAssignmentOp(Op)) { + // For assignment operators, all that we care about is that the LHS + // evaluates to "true" or "false". + return VisitConditionVariable(LhsString, BExpr->getLHS(), tookTrue, + BRC, R, N); + } + + // For non-assignment operations, we require that we can understand + // both the LHS and RHS. + if (LhsString.empty() || RhsString.empty()) + return 0; + + // Should we invert the strings if the LHS is not a variable name? + SmallString<256> buf; + llvm::raw_svector_ostream Out(buf); + Out << "Assuming " << (shouldInvert ? RhsString : LhsString) << " is "; + + // Do we need to invert the opcode? + if (shouldInvert) + switch (Op) { + default: break; + case BO_LT: Op = BO_GT; break; + case BO_GT: Op = BO_LT; break; + case BO_LE: Op = BO_GE; break; + case BO_GE: Op = BO_LE; break; + } + + if (!tookTrue) + switch (Op) { + case BO_EQ: Op = BO_NE; break; + case BO_NE: Op = BO_EQ; break; + case BO_LT: Op = BO_GE; break; + case BO_GT: Op = BO_LE; break; + case BO_LE: Op = BO_GT; break; + case BO_GE: Op = BO_LT; break; + default: + return 0; + } + + switch (Op) { + case BO_EQ: + Out << "equal to "; + break; + case BO_NE: + Out << "not equal to "; + break; + default: + Out << BinaryOperator::getOpcodeStr(Op) << ' '; + break; + } + + Out << (shouldInvert ? LhsString : RhsString); + const LocationContext *LCtx = N->getLocationContext(); + PathDiagnosticLocation Loc(Cond, BRC.getSourceManager(), LCtx); + PathDiagnosticEventPiece *event = + new PathDiagnosticEventPiece(Loc, Out.str()); + if (shouldPrune.hasValue()) + event->setPrunable(shouldPrune.getValue()); + return event; +} + +PathDiagnosticPiece * +ConditionBRVisitor::VisitConditionVariable(StringRef LhsString, + const Expr *CondVarExpr, + const bool tookTrue, + BugReporterContext &BRC, + BugReport &report, + const ExplodedNode *N) { + SmallString<256> buf; + llvm::raw_svector_ostream Out(buf); + Out << "Assuming " << LhsString << " is "; + + QualType Ty = CondVarExpr->getType(); + + if (Ty->isPointerType()) + Out << (tookTrue ? "not null" : "null"); + else if (Ty->isObjCObjectPointerType()) + Out << (tookTrue ? "not nil" : "nil"); + else if (Ty->isBooleanType()) + Out << (tookTrue ? "true" : "false"); + else if (Ty->isIntegerType()) + Out << (tookTrue ? "non-zero" : "zero"); + else + return 0; + + const LocationContext *LCtx = N->getLocationContext(); + PathDiagnosticLocation Loc(CondVarExpr, BRC.getSourceManager(), LCtx); + PathDiagnosticEventPiece *event = + new PathDiagnosticEventPiece(Loc, Out.str()); + + if (const DeclRefExpr *DR = dyn_cast(CondVarExpr)) { + if (const VarDecl *VD = dyn_cast(DR->getDecl())) { + const ProgramState *state = N->getState().getPtr(); + if (const MemRegion *R = state->getLValue(VD, LCtx).getAsRegion()) { + if (report.isInteresting(R)) + event->setPrunable(false); + } + } + } + + return event; +} + +PathDiagnosticPiece * +ConditionBRVisitor::VisitTrueTest(const Expr *Cond, + const DeclRefExpr *DR, + const bool tookTrue, + BugReporterContext &BRC, + BugReport &report, + const ExplodedNode *N) { + + const VarDecl *VD = dyn_cast(DR->getDecl()); + if (!VD) + return 0; + + SmallString<256> Buf; + llvm::raw_svector_ostream Out(Buf); + + Out << "Assuming '"; + VD->getDeclName().printName(Out); + Out << "' is "; + + QualType VDTy = VD->getType(); + + if (VDTy->isPointerType()) + Out << (tookTrue ? "non-null" : "null"); + else if (VDTy->isObjCObjectPointerType()) + Out << (tookTrue ? "non-nil" : "nil"); + else if (VDTy->isScalarType()) + Out << (tookTrue ? "not equal to 0" : "0"); + else + return 0; + + const LocationContext *LCtx = N->getLocationContext(); + PathDiagnosticLocation Loc(Cond, BRC.getSourceManager(), LCtx); + PathDiagnosticEventPiece *event = + new PathDiagnosticEventPiece(Loc, Out.str()); + + const ProgramState *state = N->getState().getPtr(); + if (const MemRegion *R = state->getLValue(VD, LCtx).getAsRegion()) { + if (report.isInteresting(R)) + event->setPrunable(false); + else { + SVal V = state->getSVal(R); + if (report.isInteresting(V)) + event->setPrunable(false); + } + } + return event; +} + diff --git a/clang/lib/StaticAnalyzer/Core/CMakeLists.txt b/clang/lib/StaticAnalyzer/Core/CMakeLists.txt new file mode 100644 index 0000000..1ea25bd --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/CMakeLists.txt @@ -0,0 +1,45 @@ +set(LLVM_LINK_COMPONENTS support) + +set(LLVM_USED_LIBS clangBasic clangLex clangAST clangFrontend clangRewrite) + +add_clang_library(clangStaticAnalyzerCore + AnalysisManager.cpp + BasicConstraintManager.cpp + BasicValueFactory.cpp + BlockCounter.cpp + BugReporter.cpp + BugReporterVisitors.cpp + Checker.cpp + CheckerContext.cpp + CheckerHelpers.cpp + CheckerManager.cpp + CheckerRegistry.cpp + CoreEngine.cpp + Environment.cpp + ExplodedGraph.cpp + ExprEngine.cpp + ExprEngineC.cpp + ExprEngineCXX.cpp + ExprEngineCallAndReturn.cpp + ExprEngineObjC.cpp + FunctionSummary.cpp + HTMLDiagnostics.cpp + MemRegion.cpp + ObjCMessage.cpp + PathDiagnostic.cpp + PlistDiagnostics.cpp + ProgramState.cpp + RangeConstraintManager.cpp + RegionStore.cpp + SValBuilder.cpp + SVals.cpp + SimpleConstraintManager.cpp + SimpleSValBuilder.cpp + Store.cpp + SubEngine.cpp + SymbolManager.cpp + TextPathDiagnostics.cpp + ) + +add_dependencies(clangStaticAnalyzerCore ClangAttrClasses ClangAttrList ClangDeclNodes + ClangStmtNodes) diff --git a/clang/lib/StaticAnalyzer/Core/Checker.cpp b/clang/lib/StaticAnalyzer/Core/Checker.cpp new file mode 100644 index 0000000..07e0aac --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/Checker.cpp @@ -0,0 +1,31 @@ +//== Checker.cpp - Registration mechanism for checkers -----------*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines Checker, used to create and register checkers. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/Checker.h" + +using namespace clang; +using namespace ento; + +StringRef CheckerBase::getTagDescription() const { + // FIXME: We want to return the package + name of the checker here. + return "A Checker"; +} + +void Checker::anchor() { } diff --git a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp new file mode 100644 index 0000000..0a047d9 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp @@ -0,0 +1,83 @@ +//== CheckerContext.cpp - Context info for path-sensitive checkers-----------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines CheckerContext that provides contextual info for +// path-sensitive checkers. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/Basic/Builtins.h" +#include "clang/Lex/Lexer.h" + +using namespace clang; +using namespace ento; + +const FunctionDecl *CheckerContext::getCalleeDecl(const CallExpr *CE) const { + ProgramStateRef State = getState(); + const Expr *Callee = CE->getCallee(); + SVal L = State->getSVal(Callee, Pred->getLocationContext()); + return L.getAsFunctionDecl(); +} + +StringRef CheckerContext::getCalleeName(const FunctionDecl *FunDecl) const { + if (!FunDecl) + return StringRef(); + IdentifierInfo *funI = FunDecl->getIdentifier(); + if (!funI) + return StringRef(); + return funI->getName(); +} + + +bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD, + StringRef Name) { + return isCLibraryFunction(FD, Name, getASTContext()); +} + +bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD, + StringRef Name, ASTContext &Context) { + // To avoid false positives (Ex: finding user defined functions with + // similar names), only perform fuzzy name matching when it's a builtin. + // Using a string compare is slow, we might want to switch on BuiltinID here. + unsigned BId = FD->getBuiltinID(); + if (BId != 0) { + StringRef BName = Context.BuiltinInfo.GetName(BId); + if (BName.find(Name) != StringRef::npos) + return true; + } + + const IdentifierInfo *II = FD->getIdentifier(); + // If this is a special C++ name without IdentifierInfo, it can't be a + // C library function. + if (!II) + return false; + + StringRef FName = II->getName(); + if (FName.equals(Name)) + return true; + + if (FName.startswith("__inline") && (FName.find(Name) != StringRef::npos)) + return true; + + if (FName.startswith("__") && FName.endswith("_chk") && + FName.find(Name) != StringRef::npos) + return true; + + return false; +} + +StringRef CheckerContext::getMacroNameOrSpelling(SourceLocation &Loc) { + if (Loc.isMacroID()) + return Lexer::getImmediateMacroName(Loc, getSourceManager(), + getLangOpts()); + SmallVector buf; + return Lexer::getSpelling(Loc, buf, getSourceManager(), getLangOpts()); +} + diff --git a/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp b/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp new file mode 100644 index 0000000..28df695 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp @@ -0,0 +1,80 @@ +//===---- CheckerHelpers.cpp - Helper functions for checkers ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines several static functions for use in checkers. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" +#include "clang/AST/Expr.h" + +// Recursively find any substatements containing macros +bool clang::ento::containsMacro(const Stmt *S) { + if (S->getLocStart().isMacroID()) + return true; + + if (S->getLocEnd().isMacroID()) + return true; + + for (Stmt::const_child_iterator I = S->child_begin(); I != S->child_end(); + ++I) + if (const Stmt *child = *I) + if (containsMacro(child)) + return true; + + return false; +} + +// Recursively find any substatements containing enum constants +bool clang::ento::containsEnum(const Stmt *S) { + const DeclRefExpr *DR = dyn_cast(S); + + if (DR && isa(DR->getDecl())) + return true; + + for (Stmt::const_child_iterator I = S->child_begin(); I != S->child_end(); + ++I) + if (const Stmt *child = *I) + if (containsEnum(child)) + return true; + + return false; +} + +// Recursively find any substatements containing static vars +bool clang::ento::containsStaticLocal(const Stmt *S) { + const DeclRefExpr *DR = dyn_cast(S); + + if (DR) + if (const VarDecl *VD = dyn_cast(DR->getDecl())) + if (VD->isStaticLocal()) + return true; + + for (Stmt::const_child_iterator I = S->child_begin(); I != S->child_end(); + ++I) + if (const Stmt *child = *I) + if (containsStaticLocal(child)) + return true; + + return false; +} + +// Recursively find any substatements containing __builtin_offsetof +bool clang::ento::containsBuiltinOffsetOf(const Stmt *S) { + if (isa(S)) + return true; + + for (Stmt::const_child_iterator I = S->child_begin(); I != S->child_end(); + ++I) + if (const Stmt *child = *I) + if (containsBuiltinOffsetOf(child)) + return true; + + return false; +} diff --git a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp new file mode 100644 index 0000000..0bcc343 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp @@ -0,0 +1,678 @@ +//===--- CheckerManager.cpp - Static Analyzer Checker Manager -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines the Static Analyzer Checker Manager. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" +#include "clang/Analysis/ProgramPoint.h" +#include "clang/AST/DeclBase.h" + +using namespace clang; +using namespace ento; + +bool CheckerManager::hasPathSensitiveCheckers() const { + return !StmtCheckers.empty() || + !PreObjCMessageCheckers.empty() || + !PostObjCMessageCheckers.empty() || + !LocationCheckers.empty() || + !BindCheckers.empty() || + !EndAnalysisCheckers.empty() || + !EndPathCheckers.empty() || + !BranchConditionCheckers.empty() || + !LiveSymbolsCheckers.empty() || + !DeadSymbolsCheckers.empty() || + !RegionChangesCheckers.empty() || + !EvalAssumeCheckers.empty() || + !EvalCallCheckers.empty() || + !InlineCallCheckers.empty(); +} + +void CheckerManager::finishedCheckerRegistration() { +#ifndef NDEBUG + // Make sure that for every event that has listeners, there is at least + // one dispatcher registered for it. + for (llvm::DenseMap::iterator + I = Events.begin(), E = Events.end(); I != E; ++I) + assert(I->second.HasDispatcher && "No dispatcher registered for an event"); +#endif +} + +//===----------------------------------------------------------------------===// +// Functions for running checkers for AST traversing.. +//===----------------------------------------------------------------------===// + +void CheckerManager::runCheckersOnASTDecl(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) { + assert(D); + + unsigned DeclKind = D->getKind(); + CachedDeclCheckers *checkers = 0; + CachedDeclCheckersMapTy::iterator CCI = CachedDeclCheckersMap.find(DeclKind); + if (CCI != CachedDeclCheckersMap.end()) { + checkers = &(CCI->second); + } else { + // Find the checkers that should run for this Decl and cache them. + checkers = &CachedDeclCheckersMap[DeclKind]; + for (unsigned i = 0, e = DeclCheckers.size(); i != e; ++i) { + DeclCheckerInfo &info = DeclCheckers[i]; + if (info.IsForDeclFn(D)) + checkers->push_back(info.CheckFn); + } + } + + assert(checkers); + for (CachedDeclCheckers::iterator + I = checkers->begin(), E = checkers->end(); I != E; ++I) + (*I)(D, mgr, BR); +} + +void CheckerManager::runCheckersOnASTBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) { + assert(D && D->hasBody()); + + for (unsigned i = 0, e = BodyCheckers.size(); i != e; ++i) + BodyCheckers[i](D, mgr, BR); +} + +//===----------------------------------------------------------------------===// +// Functions for running checkers for path-sensitive checking. +//===----------------------------------------------------------------------===// + +template +static void expandGraphWithCheckers(CHECK_CTX checkCtx, + ExplodedNodeSet &Dst, + const ExplodedNodeSet &Src) { + const NodeBuilderContext &BldrCtx = checkCtx.Eng.getBuilderContext(); + if (Src.empty()) + return; + + typename CHECK_CTX::CheckersTy::const_iterator + I = checkCtx.checkers_begin(), E = checkCtx.checkers_end(); + if (I == E) { + Dst.insert(Src); + return; + } + + ExplodedNodeSet Tmp1, Tmp2; + const ExplodedNodeSet *PrevSet = &Src; + + for (; I != E; ++I) { + ExplodedNodeSet *CurrSet = 0; + if (I+1 == E) + CurrSet = &Dst; + else { + CurrSet = (PrevSet == &Tmp1) ? &Tmp2 : &Tmp1; + CurrSet->clear(); + } + + NodeBuilder B(*PrevSet, *CurrSet, BldrCtx); + for (ExplodedNodeSet::iterator NI = PrevSet->begin(), NE = PrevSet->end(); + NI != NE; ++NI) { + checkCtx.runChecker(*I, B, *NI); + } + + // If all the produced transitions are sinks, stop. + if (CurrSet->empty()) + return; + + // Update which NodeSet is the current one. + PrevSet = CurrSet; + } +} + +namespace { + struct CheckStmtContext { + typedef SmallVectorImpl CheckersTy; + bool IsPreVisit; + const CheckersTy &Checkers; + const Stmt *S; + ExprEngine &Eng; + bool wasInlined; + + CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); } + CheckersTy::const_iterator checkers_end() { return Checkers.end(); } + + CheckStmtContext(bool isPreVisit, const CheckersTy &checkers, + const Stmt *s, ExprEngine &eng, bool wasInlined = false) + : IsPreVisit(isPreVisit), Checkers(checkers), S(s), Eng(eng), + wasInlined(wasInlined) {} + + void runChecker(CheckerManager::CheckStmtFunc checkFn, + NodeBuilder &Bldr, ExplodedNode *Pred) { + // FIXME: Remove respondsToCallback from CheckerContext; + ProgramPoint::Kind K = IsPreVisit ? ProgramPoint::PreStmtKind : + ProgramPoint::PostStmtKind; + const ProgramPoint &L = ProgramPoint::getProgramPoint(S, K, + Pred->getLocationContext(), checkFn.Checker); + CheckerContext C(Bldr, Eng, Pred, L, wasInlined); + checkFn(S, C); + } + }; +} + +/// \brief Run checkers for visiting Stmts. +void CheckerManager::runCheckersForStmt(bool isPreVisit, + ExplodedNodeSet &Dst, + const ExplodedNodeSet &Src, + const Stmt *S, + ExprEngine &Eng, + bool wasInlined) { + CheckStmtContext C(isPreVisit, *getCachedStmtCheckersFor(S, isPreVisit), + S, Eng, wasInlined); + expandGraphWithCheckers(C, Dst, Src); +} + +namespace { + struct CheckObjCMessageContext { + typedef std::vector CheckersTy; + bool IsPreVisit; + const CheckersTy &Checkers; + const ObjCMessage &Msg; + ExprEngine &Eng; + + CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); } + CheckersTy::const_iterator checkers_end() { return Checkers.end(); } + + CheckObjCMessageContext(bool isPreVisit, const CheckersTy &checkers, + const ObjCMessage &msg, ExprEngine &eng) + : IsPreVisit(isPreVisit), Checkers(checkers), Msg(msg), Eng(eng) { } + + void runChecker(CheckerManager::CheckObjCMessageFunc checkFn, + NodeBuilder &Bldr, ExplodedNode *Pred) { + ProgramPoint::Kind K = IsPreVisit ? ProgramPoint::PreStmtKind : + ProgramPoint::PostStmtKind; + const ProgramPoint &L = + ProgramPoint::getProgramPoint(Msg.getMessageExpr(), + K, Pred->getLocationContext(), + checkFn.Checker); + CheckerContext C(Bldr, Eng, Pred, L); + + checkFn(Msg, C); + } + }; +} + +/// \brief Run checkers for visiting obj-c messages. +void CheckerManager::runCheckersForObjCMessage(bool isPreVisit, + ExplodedNodeSet &Dst, + const ExplodedNodeSet &Src, + const ObjCMessage &msg, + ExprEngine &Eng) { + CheckObjCMessageContext C(isPreVisit, + isPreVisit ? PreObjCMessageCheckers + : PostObjCMessageCheckers, + msg, Eng); + expandGraphWithCheckers(C, Dst, Src); +} + +namespace { + struct CheckLocationContext { + typedef std::vector CheckersTy; + const CheckersTy &Checkers; + SVal Loc; + bool IsLoad; + const Stmt *NodeEx; /* Will become a CFGStmt */ + const Stmt *BoundEx; + ExprEngine &Eng; + + CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); } + CheckersTy::const_iterator checkers_end() { return Checkers.end(); } + + CheckLocationContext(const CheckersTy &checkers, + SVal loc, bool isLoad, const Stmt *NodeEx, + const Stmt *BoundEx, + ExprEngine &eng) + : Checkers(checkers), Loc(loc), IsLoad(isLoad), NodeEx(NodeEx), + BoundEx(BoundEx), Eng(eng) {} + + void runChecker(CheckerManager::CheckLocationFunc checkFn, + NodeBuilder &Bldr, ExplodedNode *Pred) { + ProgramPoint::Kind K = IsLoad ? ProgramPoint::PreLoadKind : + ProgramPoint::PreStoreKind; + const ProgramPoint &L = + ProgramPoint::getProgramPoint(NodeEx, K, + Pred->getLocationContext(), + checkFn.Checker); + CheckerContext C(Bldr, Eng, Pred, L); + checkFn(Loc, IsLoad, BoundEx, C); + } + }; +} + +/// \brief Run checkers for load/store of a location. + +void CheckerManager::runCheckersForLocation(ExplodedNodeSet &Dst, + const ExplodedNodeSet &Src, + SVal location, bool isLoad, + const Stmt *NodeEx, + const Stmt *BoundEx, + ExprEngine &Eng) { + CheckLocationContext C(LocationCheckers, location, isLoad, NodeEx, + BoundEx, Eng); + expandGraphWithCheckers(C, Dst, Src); +} + +namespace { + struct CheckBindContext { + typedef std::vector CheckersTy; + const CheckersTy &Checkers; + SVal Loc; + SVal Val; + const Stmt *S; + ExprEngine &Eng; + ProgramPoint::Kind PointKind; + + CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); } + CheckersTy::const_iterator checkers_end() { return Checkers.end(); } + + CheckBindContext(const CheckersTy &checkers, + SVal loc, SVal val, const Stmt *s, ExprEngine &eng, + ProgramPoint::Kind PK) + : Checkers(checkers), Loc(loc), Val(val), S(s), Eng(eng), PointKind(PK) {} + + void runChecker(CheckerManager::CheckBindFunc checkFn, + NodeBuilder &Bldr, ExplodedNode *Pred) { + const ProgramPoint &L = ProgramPoint::getProgramPoint(S, PointKind, + Pred->getLocationContext(), checkFn.Checker); + CheckerContext C(Bldr, Eng, Pred, L); + + checkFn(Loc, Val, S, C); + } + }; +} + +/// \brief Run checkers for binding of a value to a location. +void CheckerManager::runCheckersForBind(ExplodedNodeSet &Dst, + const ExplodedNodeSet &Src, + SVal location, SVal val, + const Stmt *S, ExprEngine &Eng, + ProgramPoint::Kind PointKind) { + CheckBindContext C(BindCheckers, location, val, S, Eng, PointKind); + expandGraphWithCheckers(C, Dst, Src); +} + +void CheckerManager::runCheckersForEndAnalysis(ExplodedGraph &G, + BugReporter &BR, + ExprEngine &Eng) { + for (unsigned i = 0, e = EndAnalysisCheckers.size(); i != e; ++i) + EndAnalysisCheckers[i](G, BR, Eng); +} + +/// \brief Run checkers for end of path. +// Note, We do not chain the checker output (like in expandGraphWithCheckers) +// for this callback since end of path nodes are expected to be final. +void CheckerManager::runCheckersForEndPath(NodeBuilderContext &BC, + ExplodedNodeSet &Dst, + ExprEngine &Eng) { + ExplodedNode *Pred = BC.Pred; + + // We define the builder outside of the loop bacause if at least one checkers + // creates a sucsessor for Pred, we do not need to generate an + // autotransition for it. + NodeBuilder Bldr(Pred, Dst, BC); + for (unsigned i = 0, e = EndPathCheckers.size(); i != e; ++i) { + CheckEndPathFunc checkFn = EndPathCheckers[i]; + + const ProgramPoint &L = BlockEntrance(BC.Block, + Pred->getLocationContext(), + checkFn.Checker); + CheckerContext C(Bldr, Eng, Pred, L); + checkFn(C); + } +} + +namespace { + struct CheckBranchConditionContext { + typedef std::vector CheckersTy; + const CheckersTy &Checkers; + const Stmt *Condition; + ExprEngine &Eng; + + CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); } + CheckersTy::const_iterator checkers_end() { return Checkers.end(); } + + CheckBranchConditionContext(const CheckersTy &checkers, + const Stmt *Cond, ExprEngine &eng) + : Checkers(checkers), Condition(Cond), Eng(eng) {} + + void runChecker(CheckerManager::CheckBranchConditionFunc checkFn, + NodeBuilder &Bldr, ExplodedNode *Pred) { + ProgramPoint L = PostCondition(Condition, Pred->getLocationContext(), + checkFn.Checker); + CheckerContext C(Bldr, Eng, Pred, L); + checkFn(Condition, C); + } + }; +} + +/// \brief Run checkers for branch condition. +void CheckerManager::runCheckersForBranchCondition(const Stmt *Condition, + ExplodedNodeSet &Dst, + ExplodedNode *Pred, + ExprEngine &Eng) { + ExplodedNodeSet Src; + Src.insert(Pred); + CheckBranchConditionContext C(BranchConditionCheckers, Condition, Eng); + expandGraphWithCheckers(C, Dst, Src); +} + +/// \brief Run checkers for live symbols. +void CheckerManager::runCheckersForLiveSymbols(ProgramStateRef state, + SymbolReaper &SymReaper) { + for (unsigned i = 0, e = LiveSymbolsCheckers.size(); i != e; ++i) + LiveSymbolsCheckers[i](state, SymReaper); +} + +namespace { + struct CheckDeadSymbolsContext { + typedef std::vector CheckersTy; + const CheckersTy &Checkers; + SymbolReaper &SR; + const Stmt *S; + ExprEngine &Eng; + + CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); } + CheckersTy::const_iterator checkers_end() { return Checkers.end(); } + + CheckDeadSymbolsContext(const CheckersTy &checkers, SymbolReaper &sr, + const Stmt *s, ExprEngine &eng) + : Checkers(checkers), SR(sr), S(s), Eng(eng) { } + + void runChecker(CheckerManager::CheckDeadSymbolsFunc checkFn, + NodeBuilder &Bldr, ExplodedNode *Pred) { + ProgramPoint::Kind K = ProgramPoint::PostPurgeDeadSymbolsKind; + const ProgramPoint &L = ProgramPoint::getProgramPoint(S, K, + Pred->getLocationContext(), checkFn.Checker); + CheckerContext C(Bldr, Eng, Pred, L); + + checkFn(SR, C); + } + }; +} + +/// \brief Run checkers for dead symbols. +void CheckerManager::runCheckersForDeadSymbols(ExplodedNodeSet &Dst, + const ExplodedNodeSet &Src, + SymbolReaper &SymReaper, + const Stmt *S, + ExprEngine &Eng) { + CheckDeadSymbolsContext C(DeadSymbolsCheckers, SymReaper, S, Eng); + expandGraphWithCheckers(C, Dst, Src); +} + +/// \brief True if at least one checker wants to check region changes. +bool CheckerManager::wantsRegionChangeUpdate(ProgramStateRef state) { + for (unsigned i = 0, e = RegionChangesCheckers.size(); i != e; ++i) + if (RegionChangesCheckers[i].WantUpdateFn(state)) + return true; + + return false; +} + +/// \brief Run checkers for region changes. +ProgramStateRef +CheckerManager::runCheckersForRegionChanges(ProgramStateRef state, + const StoreManager::InvalidatedSymbols *invalidated, + ArrayRef ExplicitRegions, + ArrayRef Regions, + const CallOrObjCMessage *Call) { + for (unsigned i = 0, e = RegionChangesCheckers.size(); i != e; ++i) { + // If any checker declares the state infeasible (or if it starts that way), + // bail out. + if (!state) + return NULL; + state = RegionChangesCheckers[i].CheckFn(state, invalidated, + ExplicitRegions, Regions, Call); + } + return state; +} + +/// \brief Run checkers for handling assumptions on symbolic values. +ProgramStateRef +CheckerManager::runCheckersForEvalAssume(ProgramStateRef state, + SVal Cond, bool Assumption) { + for (unsigned i = 0, e = EvalAssumeCheckers.size(); i != e; ++i) { + // If any checker declares the state infeasible (or if it starts that way), + // bail out. + if (!state) + return NULL; + state = EvalAssumeCheckers[i](state, Cond, Assumption); + } + return state; +} + +/// \brief Run checkers for evaluating a call. +/// Only one checker will evaluate the call. +void CheckerManager::runCheckersForEvalCall(ExplodedNodeSet &Dst, + const ExplodedNodeSet &Src, + const CallExpr *CE, + ExprEngine &Eng, + GraphExpander *defaultEval) { + if (EvalCallCheckers.empty() && + InlineCallCheckers.empty() && + defaultEval == 0) { + Dst.insert(Src); + return; + } + + for (ExplodedNodeSet::iterator + NI = Src.begin(), NE = Src.end(); NI != NE; ++NI) { + + ExplodedNode *Pred = *NI; + bool anyEvaluated = false; + + // First, check if any of the InlineCall callbacks can evaluate the call. + assert(InlineCallCheckers.size() <= 1 && + "InlineCall is a special hacky callback to allow intrusive" + "evaluation of the call (which simulates inlining). It is " + "currently only used by OSAtomicChecker and should go away " + "at some point."); + for (std::vector::iterator + EI = InlineCallCheckers.begin(), EE = InlineCallCheckers.end(); + EI != EE; ++EI) { + ExplodedNodeSet checkDst; + bool evaluated = (*EI)(CE, Eng, Pred, checkDst); + assert(!(evaluated && anyEvaluated) + && "There are more than one checkers evaluating the call"); + if (evaluated) { + anyEvaluated = true; + Dst.insert(checkDst); +#ifdef NDEBUG + break; // on release don't check that no other checker also evals. +#endif + } + } + +#ifdef NDEBUG // on release don't check that no other checker also evals. + if (anyEvaluated) { + break; + } +#endif + + ExplodedNodeSet checkDst; + NodeBuilder B(Pred, checkDst, Eng.getBuilderContext()); + // Next, check if any of the EvalCall callbacks can evaluate the call. + for (std::vector::iterator + EI = EvalCallCheckers.begin(), EE = EvalCallCheckers.end(); + EI != EE; ++EI) { + ProgramPoint::Kind K = ProgramPoint::PostStmtKind; + const ProgramPoint &L = ProgramPoint::getProgramPoint(CE, K, + Pred->getLocationContext(), EI->Checker); + bool evaluated = false; + { // CheckerContext generates transitions(populates checkDest) on + // destruction, so introduce the scope to make sure it gets properly + // populated. + CheckerContext C(B, Eng, Pred, L); + evaluated = (*EI)(CE, C); + } + assert(!(evaluated && anyEvaluated) + && "There are more than one checkers evaluating the call"); + if (evaluated) { + anyEvaluated = true; + Dst.insert(checkDst); +#ifdef NDEBUG + break; // on release don't check that no other checker also evals. +#endif + } + } + + // If none of the checkers evaluated the call, ask ExprEngine to handle it. + if (!anyEvaluated) { + if (defaultEval) + defaultEval->expandGraph(Dst, Pred); + else + Dst.insert(Pred); + } + } +} + +/// \brief Run checkers for the entire Translation Unit. +void CheckerManager::runCheckersOnEndOfTranslationUnit( + const TranslationUnitDecl *TU, + AnalysisManager &mgr, + BugReporter &BR) { + for (unsigned i = 0, e = EndOfTranslationUnitCheckers.size(); i != e; ++i) + EndOfTranslationUnitCheckers[i](TU, mgr, BR); +} + +void CheckerManager::runCheckersForPrintState(raw_ostream &Out, + ProgramStateRef State, + const char *NL, const char *Sep) { + for (llvm::DenseMap::iterator + I = CheckerTags.begin(), E = CheckerTags.end(); I != E; ++I) + I->second->printState(Out, State, NL, Sep); +} + +//===----------------------------------------------------------------------===// +// Internal registration functions for AST traversing. +//===----------------------------------------------------------------------===// + +void CheckerManager::_registerForDecl(CheckDeclFunc checkfn, + HandlesDeclFunc isForDeclFn) { + DeclCheckerInfo info = { checkfn, isForDeclFn }; + DeclCheckers.push_back(info); +} + +void CheckerManager::_registerForBody(CheckDeclFunc checkfn) { + BodyCheckers.push_back(checkfn); +} + +//===----------------------------------------------------------------------===// +// Internal registration functions for path-sensitive checking. +//===----------------------------------------------------------------------===// + +void CheckerManager::_registerForPreStmt(CheckStmtFunc checkfn, + HandlesStmtFunc isForStmtFn) { + StmtCheckerInfo info = { checkfn, isForStmtFn, /*IsPreVisit*/true }; + StmtCheckers.push_back(info); +} +void CheckerManager::_registerForPostStmt(CheckStmtFunc checkfn, + HandlesStmtFunc isForStmtFn) { + StmtCheckerInfo info = { checkfn, isForStmtFn, /*IsPreVisit*/false }; + StmtCheckers.push_back(info); +} + +void CheckerManager::_registerForPreObjCMessage(CheckObjCMessageFunc checkfn) { + PreObjCMessageCheckers.push_back(checkfn); +} +void CheckerManager::_registerForPostObjCMessage(CheckObjCMessageFunc checkfn) { + PostObjCMessageCheckers.push_back(checkfn); +} + +void CheckerManager::_registerForLocation(CheckLocationFunc checkfn) { + LocationCheckers.push_back(checkfn); +} + +void CheckerManager::_registerForBind(CheckBindFunc checkfn) { + BindCheckers.push_back(checkfn); +} + +void CheckerManager::_registerForEndAnalysis(CheckEndAnalysisFunc checkfn) { + EndAnalysisCheckers.push_back(checkfn); +} + +void CheckerManager::_registerForEndPath(CheckEndPathFunc checkfn) { + EndPathCheckers.push_back(checkfn); +} + +void CheckerManager::_registerForBranchCondition( + CheckBranchConditionFunc checkfn) { + BranchConditionCheckers.push_back(checkfn); +} + +void CheckerManager::_registerForLiveSymbols(CheckLiveSymbolsFunc checkfn) { + LiveSymbolsCheckers.push_back(checkfn); +} + +void CheckerManager::_registerForDeadSymbols(CheckDeadSymbolsFunc checkfn) { + DeadSymbolsCheckers.push_back(checkfn); +} + +void CheckerManager::_registerForRegionChanges(CheckRegionChangesFunc checkfn, + WantsRegionChangeUpdateFunc wantUpdateFn) { + RegionChangesCheckerInfo info = {checkfn, wantUpdateFn}; + RegionChangesCheckers.push_back(info); +} + +void CheckerManager::_registerForEvalAssume(EvalAssumeFunc checkfn) { + EvalAssumeCheckers.push_back(checkfn); +} + +void CheckerManager::_registerForEvalCall(EvalCallFunc checkfn) { + EvalCallCheckers.push_back(checkfn); +} + +void CheckerManager::_registerForInlineCall(InlineCallFunc checkfn) { + InlineCallCheckers.push_back(checkfn); +} + +void CheckerManager::_registerForEndOfTranslationUnit( + CheckEndOfTranslationUnit checkfn) { + EndOfTranslationUnitCheckers.push_back(checkfn); +} + +//===----------------------------------------------------------------------===// +// Implementation details. +//===----------------------------------------------------------------------===// + +CheckerManager::CachedStmtCheckers * +CheckerManager::getCachedStmtCheckersFor(const Stmt *S, bool isPreVisit) { + assert(S); + + CachedStmtCheckersKey key(S->getStmtClass(), isPreVisit); + CachedStmtCheckers *checkers = 0; + CachedStmtCheckersMapTy::iterator CCI = CachedStmtCheckersMap.find(key); + if (CCI != CachedStmtCheckersMap.end()) { + checkers = &(CCI->second); + } else { + // Find the checkers that should run for this Stmt and cache them. + checkers = &CachedStmtCheckersMap[key]; + for (unsigned i = 0, e = StmtCheckers.size(); i != e; ++i) { + StmtCheckerInfo &info = StmtCheckers[i]; + if (info.IsPreVisit == isPreVisit && info.IsForStmtFn(S)) + checkers->push_back(info.CheckFn); + } + } + + assert(checkers); + return checkers; +} + +CheckerManager::~CheckerManager() { + for (unsigned i = 0, e = CheckerDtors.size(); i != e; ++i) + CheckerDtors[i](); +} + +// Anchor for the vtable. +GraphExpander::~GraphExpander() { } diff --git a/clang/lib/StaticAnalyzer/Core/CheckerRegistry.cpp b/clang/lib/StaticAnalyzer/Core/CheckerRegistry.cpp new file mode 100644 index 0000000..9791e2e --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/CheckerRegistry.cpp @@ -0,0 +1,150 @@ +//===--- CheckerRegistry.cpp - Maintains all available checkers -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/CheckerRegistry.h" +#include "clang/StaticAnalyzer/Core/CheckerOptInfo.h" +#include "llvm/ADT/SetVector.h" + +using namespace clang; +using namespace ento; + +static const char PackageSeparator = '.'; +typedef llvm::SetVector CheckerInfoSet; + + +static bool checkerNameLT(const CheckerRegistry::CheckerInfo &a, + const CheckerRegistry::CheckerInfo &b) { + return a.FullName < b.FullName; +} + +static bool isInPackage(const CheckerRegistry::CheckerInfo &checker, + StringRef packageName) { + // Does the checker's full name have the package as a prefix? + if (!checker.FullName.startswith(packageName)) + return false; + + // Is the package actually just the name of a specific checker? + if (checker.FullName.size() == packageName.size()) + return true; + + // Is the checker in the package (or a subpackage)? + if (checker.FullName[packageName.size()] == PackageSeparator) + return true; + + return false; +} + +static void collectCheckers(const CheckerRegistry::CheckerInfoList &checkers, + const llvm::StringMap &packageSizes, + CheckerOptInfo &opt, CheckerInfoSet &collected) { + // Use a binary search to find the possible start of the package. + CheckerRegistry::CheckerInfo packageInfo(NULL, opt.getName(), ""); + CheckerRegistry::CheckerInfoList::const_iterator e = checkers.end(); + CheckerRegistry::CheckerInfoList::const_iterator i = + std::lower_bound(checkers.begin(), e, packageInfo, checkerNameLT); + + // If we didn't even find a possible package, give up. + if (i == e) + return; + + // If what we found doesn't actually start the package, give up. + if (!isInPackage(*i, opt.getName())) + return; + + // There is at least one checker in the package; claim the option. + opt.claim(); + + // See how large the package is. + // If the package doesn't exist, assume the option refers to a single checker. + size_t size = 1; + llvm::StringMap::const_iterator packageSize = + packageSizes.find(opt.getName()); + if (packageSize != packageSizes.end()) + size = packageSize->getValue(); + + // Step through all the checkers in the package. + for (e = i+size; i != e; ++i) { + if (opt.isEnabled()) + collected.insert(&*i); + else + collected.remove(&*i); + } +} + +void CheckerRegistry::addChecker(InitializationFunction fn, StringRef name, + StringRef desc) { + Checkers.push_back(CheckerInfo(fn, name, desc)); + + // Record the presence of the checker in its packages. + StringRef packageName, leafName; + llvm::tie(packageName, leafName) = name.rsplit(PackageSeparator); + while (!leafName.empty()) { + Packages[packageName] += 1; + llvm::tie(packageName, leafName) = packageName.rsplit(PackageSeparator); + } +} + +void CheckerRegistry::initializeManager(CheckerManager &checkerMgr, + SmallVectorImpl &opts) const { + // Sort checkers for efficient collection. + std::sort(Checkers.begin(), Checkers.end(), checkerNameLT); + + // Collect checkers enabled by the options. + CheckerInfoSet enabledCheckers; + for (SmallVectorImpl::iterator + i = opts.begin(), e = opts.end(); i != e; ++i) { + collectCheckers(Checkers, Packages, *i, enabledCheckers); + } + + // Initialize the CheckerManager with all enabled checkers. + for (CheckerInfoSet::iterator + i = enabledCheckers.begin(), e = enabledCheckers.end(); i != e; ++i) { + (*i)->Initialize(checkerMgr); + } +} + +void CheckerRegistry::printHelp(llvm::raw_ostream &out, + size_t maxNameChars) const { + // FIXME: Alphabetical sort puts 'experimental' in the middle. + // Would it be better to name it '~experimental' or something else + // that's ASCIIbetically last? + std::sort(Checkers.begin(), Checkers.end(), checkerNameLT); + + // FIXME: Print available packages. + + out << "CHECKERS:\n"; + + // Find the maximum option length. + size_t optionFieldWidth = 0; + for (CheckerInfoList::const_iterator i = Checkers.begin(), e = Checkers.end(); + i != e; ++i) { + // Limit the amount of padding we are willing to give up for alignment. + // Package.Name Description [Hidden] + size_t nameLength = i->FullName.size(); + if (nameLength <= maxNameChars) + optionFieldWidth = std::max(optionFieldWidth, nameLength); + } + + const size_t initialPad = 2; + for (CheckerInfoList::const_iterator i = Checkers.begin(), e = Checkers.end(); + i != e; ++i) { + out.indent(initialPad) << i->FullName; + + int pad = optionFieldWidth - i->FullName.size(); + + // Break on long option names. + if (pad < 0) { + out << '\n'; + pad = optionFieldWidth + initialPad; + } + out.indent(pad + 2) << i->Desc; + + out << '\n'; + } +} diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp new file mode 100644 index 0000000..ca662c7 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp @@ -0,0 +1,688 @@ +//==- CoreEngine.cpp - Path-Sensitive Dataflow Engine ------------*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a generic engine for intraprocedural, path-sensitive, +// dataflow analysis via graph reachability engine. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "CoreEngine" + +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/AST/Expr.h" +#include "clang/AST/StmtCXX.h" +#include "llvm/Support/Casting.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" + +using namespace clang; +using namespace ento; + +STATISTIC(NumReachedMaxSteps, + "The # of times we reached the max number of steps."); +STATISTIC(NumPathsExplored, + "The # of paths explored by the analyzer."); + +//===----------------------------------------------------------------------===// +// Worklist classes for exploration of reachable states. +//===----------------------------------------------------------------------===// + +WorkList::Visitor::~Visitor() {} + +namespace { +class DFS : public WorkList { + SmallVector Stack; +public: + virtual bool hasWork() const { + return !Stack.empty(); + } + + virtual void enqueue(const WorkListUnit& U) { + Stack.push_back(U); + } + + virtual WorkListUnit dequeue() { + assert (!Stack.empty()); + const WorkListUnit& U = Stack.back(); + Stack.pop_back(); // This technically "invalidates" U, but we are fine. + return U; + } + + virtual bool visitItemsInWorkList(Visitor &V) { + for (SmallVectorImpl::iterator + I = Stack.begin(), E = Stack.end(); I != E; ++I) { + if (V.visit(*I)) + return true; + } + return false; + } +}; + +class BFS : public WorkList { + std::deque Queue; +public: + virtual bool hasWork() const { + return !Queue.empty(); + } + + virtual void enqueue(const WorkListUnit& U) { + Queue.push_front(U); + } + + virtual WorkListUnit dequeue() { + WorkListUnit U = Queue.front(); + Queue.pop_front(); + return U; + } + + virtual bool visitItemsInWorkList(Visitor &V) { + for (std::deque::iterator + I = Queue.begin(), E = Queue.end(); I != E; ++I) { + if (V.visit(*I)) + return true; + } + return false; + } +}; + +} // end anonymous namespace + +// Place the dstor for WorkList here because it contains virtual member +// functions, and we the code for the dstor generated in one compilation unit. +WorkList::~WorkList() {} + +WorkList *WorkList::makeDFS() { return new DFS(); } +WorkList *WorkList::makeBFS() { return new BFS(); } + +namespace { + class BFSBlockDFSContents : public WorkList { + std::deque Queue; + SmallVector Stack; + public: + virtual bool hasWork() const { + return !Queue.empty() || !Stack.empty(); + } + + virtual void enqueue(const WorkListUnit& U) { + if (isa(U.getNode()->getLocation())) + Queue.push_front(U); + else + Stack.push_back(U); + } + + virtual WorkListUnit dequeue() { + // Process all basic blocks to completion. + if (!Stack.empty()) { + const WorkListUnit& U = Stack.back(); + Stack.pop_back(); // This technically "invalidates" U, but we are fine. + return U; + } + + assert(!Queue.empty()); + // Don't use const reference. The subsequent pop_back() might make it + // unsafe. + WorkListUnit U = Queue.front(); + Queue.pop_front(); + return U; + } + virtual bool visitItemsInWorkList(Visitor &V) { + for (SmallVectorImpl::iterator + I = Stack.begin(), E = Stack.end(); I != E; ++I) { + if (V.visit(*I)) + return true; + } + for (std::deque::iterator + I = Queue.begin(), E = Queue.end(); I != E; ++I) { + if (V.visit(*I)) + return true; + } + return false; + } + + }; +} // end anonymous namespace + +WorkList* WorkList::makeBFSBlockDFSContents() { + return new BFSBlockDFSContents(); +} + +//===----------------------------------------------------------------------===// +// Core analysis engine. +//===----------------------------------------------------------------------===// + +/// ExecuteWorkList - Run the worklist algorithm for a maximum number of steps. +bool CoreEngine::ExecuteWorkList(const LocationContext *L, unsigned Steps, + ProgramStateRef InitState) { + + if (G->num_roots() == 0) { // Initialize the analysis by constructing + // the root if none exists. + + const CFGBlock *Entry = &(L->getCFG()->getEntry()); + + assert (Entry->empty() && + "Entry block must be empty."); + + assert (Entry->succ_size() == 1 && + "Entry block must have 1 successor."); + + // Mark the entry block as visited. + FunctionSummaries->markVisitedBasicBlock(Entry->getBlockID(), + L->getDecl(), + L->getCFG()->getNumBlockIDs()); + + // Get the solitary successor. + const CFGBlock *Succ = *(Entry->succ_begin()); + + // Construct an edge representing the + // starting location in the function. + BlockEdge StartLoc(Entry, Succ, L); + + // Set the current block counter to being empty. + WList->setBlockCounter(BCounterFactory.GetEmptyCounter()); + + if (!InitState) + // Generate the root. + generateNode(StartLoc, SubEng.getInitialState(L), 0); + else + generateNode(StartLoc, InitState, 0); + } + + // Check if we have a steps limit + bool UnlimitedSteps = Steps == 0; + + while (WList->hasWork()) { + if (!UnlimitedSteps) { + if (Steps == 0) { + NumReachedMaxSteps++; + break; + } + --Steps; + } + + const WorkListUnit& WU = WList->dequeue(); + + // Set the current block counter. + WList->setBlockCounter(WU.getBlockCounter()); + + // Retrieve the node. + ExplodedNode *Node = WU.getNode(); + + dispatchWorkItem(Node, Node->getLocation(), WU); + } + SubEng.processEndWorklist(hasWorkRemaining()); + return WList->hasWork(); +} + +void CoreEngine::dispatchWorkItem(ExplodedNode* Pred, ProgramPoint Loc, + const WorkListUnit& WU) { + // Dispatch on the location type. + switch (Loc.getKind()) { + case ProgramPoint::BlockEdgeKind: + HandleBlockEdge(cast(Loc), Pred); + break; + + case ProgramPoint::BlockEntranceKind: + HandleBlockEntrance(cast(Loc), Pred); + break; + + case ProgramPoint::BlockExitKind: + assert (false && "BlockExit location never occur in forward analysis."); + break; + + case ProgramPoint::CallEnterKind: { + CallEnter CEnter = cast(Loc); + if (AnalyzedCallees) + if (const CallExpr* CE = + dyn_cast_or_null(CEnter.getCallExpr())) + if (const Decl *CD = CE->getCalleeDecl()) + AnalyzedCallees->insert(CD); + SubEng.processCallEnter(CEnter, Pred); + break; + } + + case ProgramPoint::CallExitKind: + SubEng.processCallExit(Pred); + break; + + case ProgramPoint::EpsilonKind: { + assert(Pred->hasSinglePred() && + "Assume epsilon has exactly one predecessor by construction"); + ExplodedNode *PNode = Pred->getFirstPred(); + dispatchWorkItem(Pred, PNode->getLocation(), WU); + break; + } + default: + assert(isa(Loc) || + isa(Loc)); + HandlePostStmt(WU.getBlock(), WU.getIndex(), Pred); + break; + } +} + +bool CoreEngine::ExecuteWorkListWithInitialState(const LocationContext *L, + unsigned Steps, + ProgramStateRef InitState, + ExplodedNodeSet &Dst) { + bool DidNotFinish = ExecuteWorkList(L, Steps, InitState); + for (ExplodedGraph::eop_iterator I = G->eop_begin(), + E = G->eop_end(); I != E; ++I) { + Dst.Add(*I); + } + return DidNotFinish; +} + +void CoreEngine::HandleBlockEdge(const BlockEdge &L, ExplodedNode *Pred) { + + const CFGBlock *Blk = L.getDst(); + NodeBuilderContext BuilderCtx(*this, Blk, Pred); + + // Mark this block as visited. + const LocationContext *LC = Pred->getLocationContext(); + FunctionSummaries->markVisitedBasicBlock(Blk->getBlockID(), + LC->getDecl(), + LC->getCFG()->getNumBlockIDs()); + + // Check if we are entering the EXIT block. + if (Blk == &(L.getLocationContext()->getCFG()->getExit())) { + + assert (L.getLocationContext()->getCFG()->getExit().size() == 0 + && "EXIT block cannot contain Stmts."); + + // Process the final state transition. + SubEng.processEndOfFunction(BuilderCtx); + + // This path is done. Don't enqueue any more nodes. + return; + } + + // Call into the SubEngine to process entering the CFGBlock. + ExplodedNodeSet dstNodes; + BlockEntrance BE(Blk, Pred->getLocationContext()); + NodeBuilderWithSinks nodeBuilder(Pred, dstNodes, BuilderCtx, BE); + SubEng.processCFGBlockEntrance(L, nodeBuilder); + + // Auto-generate a node. + if (!nodeBuilder.hasGeneratedNodes()) { + nodeBuilder.generateNode(Pred->State, Pred); + } + + // Enqueue nodes onto the worklist. + enqueue(dstNodes); +} + +void CoreEngine::HandleBlockEntrance(const BlockEntrance &L, + ExplodedNode *Pred) { + + // Increment the block counter. + const LocationContext *LC = Pred->getLocationContext(); + unsigned BlockId = L.getBlock()->getBlockID(); + BlockCounter Counter = WList->getBlockCounter(); + Counter = BCounterFactory.IncrementCount(Counter, LC->getCurrentStackFrame(), + BlockId); + WList->setBlockCounter(Counter); + + // Process the entrance of the block. + if (CFGElement E = L.getFirstElement()) { + NodeBuilderContext Ctx(*this, L.getBlock(), Pred); + SubEng.processCFGElement(E, Pred, 0, &Ctx); + } + else + HandleBlockExit(L.getBlock(), Pred); +} + +void CoreEngine::HandleBlockExit(const CFGBlock * B, ExplodedNode *Pred) { + + if (const Stmt *Term = B->getTerminator()) { + switch (Term->getStmtClass()) { + default: + llvm_unreachable("Analysis for this terminator not implemented."); + + case Stmt::BinaryOperatorClass: // '&&' and '||' + HandleBranch(cast(Term)->getLHS(), Term, B, Pred); + return; + + case Stmt::BinaryConditionalOperatorClass: + case Stmt::ConditionalOperatorClass: + HandleBranch(cast(Term)->getCond(), + Term, B, Pred); + return; + + // FIXME: Use constant-folding in CFG construction to simplify this + // case. + + case Stmt::ChooseExprClass: + HandleBranch(cast(Term)->getCond(), Term, B, Pred); + return; + + case Stmt::CXXTryStmtClass: { + // Generate a node for each of the successors. + // Our logic for EH analysis can certainly be improved. + for (CFGBlock::const_succ_iterator it = B->succ_begin(), + et = B->succ_end(); it != et; ++it) { + if (const CFGBlock *succ = *it) { + generateNode(BlockEdge(B, succ, Pred->getLocationContext()), + Pred->State, Pred); + } + } + return; + } + + case Stmt::DoStmtClass: + HandleBranch(cast(Term)->getCond(), Term, B, Pred); + return; + + case Stmt::CXXForRangeStmtClass: + HandleBranch(cast(Term)->getCond(), Term, B, Pred); + return; + + case Stmt::ForStmtClass: + HandleBranch(cast(Term)->getCond(), Term, B, Pred); + return; + + case Stmt::ContinueStmtClass: + case Stmt::BreakStmtClass: + case Stmt::GotoStmtClass: + break; + + case Stmt::IfStmtClass: + HandleBranch(cast(Term)->getCond(), Term, B, Pred); + return; + + case Stmt::IndirectGotoStmtClass: { + // Only 1 successor: the indirect goto dispatch block. + assert (B->succ_size() == 1); + + IndirectGotoNodeBuilder + builder(Pred, B, cast(Term)->getTarget(), + *(B->succ_begin()), this); + + SubEng.processIndirectGoto(builder); + return; + } + + case Stmt::ObjCForCollectionStmtClass: { + // In the case of ObjCForCollectionStmt, it appears twice in a CFG: + // + // (1) inside a basic block, which represents the binding of the + // 'element' variable to a value. + // (2) in a terminator, which represents the branch. + // + // For (1), subengines will bind a value (i.e., 0 or 1) indicating + // whether or not collection contains any more elements. We cannot + // just test to see if the element is nil because a container can + // contain nil elements. + HandleBranch(Term, Term, B, Pred); + return; + } + + case Stmt::SwitchStmtClass: { + SwitchNodeBuilder builder(Pred, B, cast(Term)->getCond(), + this); + + SubEng.processSwitch(builder); + return; + } + + case Stmt::WhileStmtClass: + HandleBranch(cast(Term)->getCond(), Term, B, Pred); + return; + } + } + + assert (B->succ_size() == 1 && + "Blocks with no terminator should have at most 1 successor."); + + generateNode(BlockEdge(B, *(B->succ_begin()), Pred->getLocationContext()), + Pred->State, Pred); +} + +void CoreEngine::HandleBranch(const Stmt *Cond, const Stmt *Term, + const CFGBlock * B, ExplodedNode *Pred) { + assert(B->succ_size() == 2); + NodeBuilderContext Ctx(*this, B, Pred); + ExplodedNodeSet Dst; + SubEng.processBranch(Cond, Term, Ctx, Pred, Dst, + *(B->succ_begin()), *(B->succ_begin()+1)); + // Enqueue the new frontier onto the worklist. + enqueue(Dst); +} + +void CoreEngine::HandlePostStmt(const CFGBlock *B, unsigned StmtIdx, + ExplodedNode *Pred) { + assert(B); + assert(!B->empty()); + + if (StmtIdx == B->size()) + HandleBlockExit(B, Pred); + else { + NodeBuilderContext Ctx(*this, B, Pred); + SubEng.processCFGElement((*B)[StmtIdx], Pred, StmtIdx, &Ctx); + } +} + +/// generateNode - Utility method to generate nodes, hook up successors, +/// and add nodes to the worklist. +void CoreEngine::generateNode(const ProgramPoint &Loc, + ProgramStateRef State, + ExplodedNode *Pred) { + + bool IsNew; + ExplodedNode *Node = G->getNode(Loc, State, false, &IsNew); + + if (Pred) + Node->addPredecessor(Pred, *G); // Link 'Node' with its predecessor. + else { + assert (IsNew); + G->addRoot(Node); // 'Node' has no predecessor. Make it a root. + } + + // Only add 'Node' to the worklist if it was freshly generated. + if (IsNew) WList->enqueue(Node); +} + +void CoreEngine::enqueueStmtNode(ExplodedNode *N, + const CFGBlock *Block, unsigned Idx) { + assert(Block); + assert (!N->isSink()); + + // Check if this node entered a callee. + if (isa(N->getLocation())) { + // Still use the index of the CallExpr. It's needed to create the callee + // StackFrameContext. + WList->enqueue(N, Block, Idx); + return; + } + + // Do not create extra nodes. Move to the next CFG element. + if (isa(N->getLocation())) { + WList->enqueue(N, Block, Idx+1); + return; + } + + if (isa(N->getLocation())) { + WList->enqueue(N, Block, Idx); + return; + } + + const CFGStmt *CS = (*Block)[Idx].getAs(); + const Stmt *St = CS ? CS->getStmt() : 0; + PostStmt Loc(St, N->getLocationContext()); + + if (Loc == N->getLocation()) { + // Note: 'N' should be a fresh node because otherwise it shouldn't be + // a member of Deferred. + WList->enqueue(N, Block, Idx+1); + return; + } + + bool IsNew; + ExplodedNode *Succ = G->getNode(Loc, N->getState(), false, &IsNew); + Succ->addPredecessor(N, *G); + + if (IsNew) + WList->enqueue(Succ, Block, Idx+1); +} + +ExplodedNode *CoreEngine::generateCallExitNode(ExplodedNode *N) { + // Create a CallExit node and enqueue it. + const StackFrameContext *LocCtx + = cast(N->getLocationContext()); + const Stmt *CE = LocCtx->getCallSite(); + + // Use the the callee location context. + CallExit Loc(CE, LocCtx); + + bool isNew; + ExplodedNode *Node = G->getNode(Loc, N->getState(), false, &isNew); + Node->addPredecessor(N, *G); + return isNew ? Node : 0; +} + + +void CoreEngine::enqueue(ExplodedNodeSet &Set) { + for (ExplodedNodeSet::iterator I = Set.begin(), + E = Set.end(); I != E; ++I) { + WList->enqueue(*I); + } +} + +void CoreEngine::enqueue(ExplodedNodeSet &Set, + const CFGBlock *Block, unsigned Idx) { + for (ExplodedNodeSet::iterator I = Set.begin(), + E = Set.end(); I != E; ++I) { + enqueueStmtNode(*I, Block, Idx); + } +} + +void CoreEngine::enqueueEndOfFunction(ExplodedNodeSet &Set) { + for (ExplodedNodeSet::iterator I = Set.begin(), E = Set.end(); I != E; ++I) { + ExplodedNode *N = *I; + // If we are in an inlined call, generate CallExit node. + if (N->getLocationContext()->getParent()) { + N = generateCallExitNode(N); + if (N) + WList->enqueue(N); + } else { + G->addEndOfPath(N); + NumPathsExplored++; + } + } +} + + +void NodeBuilder::anchor() { } + +ExplodedNode* NodeBuilder::generateNodeImpl(const ProgramPoint &Loc, + ProgramStateRef State, + ExplodedNode *FromN, + bool MarkAsSink) { + HasGeneratedNodes = true; + bool IsNew; + ExplodedNode *N = C.Eng.G->getNode(Loc, State, MarkAsSink, &IsNew); + N->addPredecessor(FromN, *C.Eng.G); + Frontier.erase(FromN); + + if (!IsNew) + return 0; + + if (!MarkAsSink) + Frontier.Add(N); + + return N; +} + +void NodeBuilderWithSinks::anchor() { } + +StmtNodeBuilder::~StmtNodeBuilder() { + if (EnclosingBldr) + for (ExplodedNodeSet::iterator I = Frontier.begin(), + E = Frontier.end(); I != E; ++I ) + EnclosingBldr->addNodes(*I); +} + +void BranchNodeBuilder::anchor() { } + +ExplodedNode *BranchNodeBuilder::generateNode(ProgramStateRef State, + bool branch, + ExplodedNode *NodePred) { + // If the branch has been marked infeasible we should not generate a node. + if (!isFeasible(branch)) + return NULL; + + ProgramPoint Loc = BlockEdge(C.Block, branch ? DstT:DstF, + NodePred->getLocationContext()); + ExplodedNode *Succ = generateNodeImpl(Loc, State, NodePred); + return Succ; +} + +ExplodedNode* +IndirectGotoNodeBuilder::generateNode(const iterator &I, + ProgramStateRef St, + bool IsSink) { + bool IsNew; + ExplodedNode *Succ = Eng.G->getNode(BlockEdge(Src, I.getBlock(), + Pred->getLocationContext()), St, + IsSink, &IsNew); + Succ->addPredecessor(Pred, *Eng.G); + + if (!IsNew) + return 0; + + if (!IsSink) + Eng.WList->enqueue(Succ); + + return Succ; +} + + +ExplodedNode* +SwitchNodeBuilder::generateCaseStmtNode(const iterator &I, + ProgramStateRef St) { + + bool IsNew; + ExplodedNode *Succ = Eng.G->getNode(BlockEdge(Src, I.getBlock(), + Pred->getLocationContext()), St, + false, &IsNew); + Succ->addPredecessor(Pred, *Eng.G); + if (!IsNew) + return 0; + + Eng.WList->enqueue(Succ); + return Succ; +} + + +ExplodedNode* +SwitchNodeBuilder::generateDefaultCaseNode(ProgramStateRef St, + bool IsSink) { + // Get the block for the default case. + assert(Src->succ_rbegin() != Src->succ_rend()); + CFGBlock *DefaultBlock = *Src->succ_rbegin(); + + // Sanity check for default blocks that are unreachable and not caught + // by earlier stages. + if (!DefaultBlock) + return NULL; + + bool IsNew; + ExplodedNode *Succ = Eng.G->getNode(BlockEdge(Src, DefaultBlock, + Pred->getLocationContext()), St, + IsSink, &IsNew); + Succ->addPredecessor(Pred, *Eng.G); + + if (!IsNew) + return 0; + + if (!IsSink) + Eng.WList->enqueue(Succ); + + return Succ; +} diff --git a/clang/lib/StaticAnalyzer/Core/Environment.cpp b/clang/lib/StaticAnalyzer/Core/Environment.cpp new file mode 100644 index 0000000..b5ea3db --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/Environment.cpp @@ -0,0 +1,295 @@ +//== Environment.cpp - Map from Stmt* to Locations/Values -------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defined the Environment and EnvironmentManager classes. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/ExprCXX.h" +#include "clang/AST/ExprObjC.h" +#include "clang/Analysis/AnalysisContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" + +using namespace clang; +using namespace ento; + +SVal Environment::lookupExpr(const EnvironmentEntry &E) const { + const SVal* X = ExprBindings.lookup(E); + if (X) { + SVal V = *X; + return V; + } + return UnknownVal(); +} + +SVal Environment::getSVal(const EnvironmentEntry &Entry, + SValBuilder& svalBuilder, + bool useOnlyDirectBindings) const { + + if (useOnlyDirectBindings) { + // This branch is rarely taken, but can be exercised by + // checkers that explicitly bind values to arbitrary + // expressions. It is crucial that we do not ignore any + // expression here, and do a direct lookup. + return lookupExpr(Entry); + } + + const Stmt *E = Entry.getStmt(); + const LocationContext *LCtx = Entry.getLocationContext(); + + for (;;) { + if (const Expr *Ex = dyn_cast(E)) + E = Ex->IgnoreParens(); + + switch (E->getStmtClass()) { + case Stmt::AddrLabelExprClass: + return svalBuilder.makeLoc(cast(E)); + case Stmt::OpaqueValueExprClass: { + const OpaqueValueExpr *ope = cast(E); + E = ope->getSourceExpr(); + continue; + } + case Stmt::ParenExprClass: + case Stmt::GenericSelectionExprClass: + llvm_unreachable("ParenExprs and GenericSelectionExprs should " + "have been handled by IgnoreParens()"); + case Stmt::CharacterLiteralClass: { + const CharacterLiteral* C = cast(E); + return svalBuilder.makeIntVal(C->getValue(), C->getType()); + } + case Stmt::CXXBoolLiteralExprClass: { + const SVal *X = ExprBindings.lookup(EnvironmentEntry(E, LCtx)); + if (X) + return *X; + else + return svalBuilder.makeBoolVal(cast(E)); + } + case Stmt::IntegerLiteralClass: { + // In C++, this expression may have been bound to a temporary object. + SVal const *X = ExprBindings.lookup(EnvironmentEntry(E, LCtx)); + if (X) + return *X; + else + return svalBuilder.makeIntVal(cast(E)); + } + case Stmt::ObjCBoolLiteralExprClass: + return svalBuilder.makeBoolVal(cast(E)); + + // For special C0xx nullptr case, make a null pointer SVal. + case Stmt::CXXNullPtrLiteralExprClass: + return svalBuilder.makeNull(); + case Stmt::ExprWithCleanupsClass: + E = cast(E)->getSubExpr(); + continue; + case Stmt::CXXBindTemporaryExprClass: + E = cast(E)->getSubExpr(); + continue; + case Stmt::ObjCPropertyRefExprClass: + return loc::ObjCPropRef(cast(E)); + case Stmt::ObjCStringLiteralClass: { + MemRegionManager &MRMgr = svalBuilder.getRegionManager(); + const ObjCStringLiteral *SL = cast(E); + return svalBuilder.makeLoc(MRMgr.getObjCStringRegion(SL)); + } + case Stmt::StringLiteralClass: { + MemRegionManager &MRMgr = svalBuilder.getRegionManager(); + const StringLiteral *SL = cast(E); + return svalBuilder.makeLoc(MRMgr.getStringRegion(SL)); + } + case Stmt::ReturnStmtClass: { + const ReturnStmt *RS = cast(E); + if (const Expr *RE = RS->getRetValue()) { + E = RE; + continue; + } + return UndefinedVal(); + } + + // Handle all other Stmt* using a lookup. + default: + break; + }; + break; + } + return lookupExpr(EnvironmentEntry(E, LCtx)); +} + +Environment EnvironmentManager::bindExpr(Environment Env, + const EnvironmentEntry &E, + SVal V, + bool Invalidate) { + if (V.isUnknown()) { + if (Invalidate) + return Environment(F.remove(Env.ExprBindings, E)); + else + return Env; + } + return Environment(F.add(Env.ExprBindings, E, V)); +} + +static inline EnvironmentEntry MakeLocation(const EnvironmentEntry &E) { + const Stmt *S = E.getStmt(); + S = (const Stmt*) (((uintptr_t) S) | 0x1); + return EnvironmentEntry(S, E.getLocationContext()); +} + +Environment EnvironmentManager::bindExprAndLocation(Environment Env, + const EnvironmentEntry &E, + SVal location, SVal V) { + return Environment(F.add(F.add(Env.ExprBindings, MakeLocation(E), location), + E, V)); +} + +namespace { +class MarkLiveCallback : public SymbolVisitor { + SymbolReaper &SymReaper; +public: + MarkLiveCallback(SymbolReaper &symreaper) : SymReaper(symreaper) {} + bool VisitSymbol(SymbolRef sym) { + SymReaper.markLive(sym); + return true; + } + bool VisitMemRegion(const MemRegion *R) { + SymReaper.markLive(R); + return true; + } +}; +} // end anonymous namespace + +// In addition to mapping from EnvironmentEntry - > SVals in the Environment, +// we also maintain a mapping from EnvironmentEntry -> SVals (locations) +// that were used during a load and store. +static inline bool IsLocation(const EnvironmentEntry &E) { + const Stmt *S = E.getStmt(); + return (bool) (((uintptr_t) S) & 0x1); +} + +// removeDeadBindings: +// - Remove subexpression bindings. +// - Remove dead block expression bindings. +// - Keep live block expression bindings: +// - Mark their reachable symbols live in SymbolReaper, +// see ScanReachableSymbols. +// - Mark the region in DRoots if the binding is a loc::MemRegionVal. +Environment +EnvironmentManager::removeDeadBindings(Environment Env, + SymbolReaper &SymReaper, + ProgramStateRef ST) { + + // We construct a new Environment object entirely, as this is cheaper than + // individually removing all the subexpression bindings (which will greatly + // outnumber block-level expression bindings). + Environment NewEnv = getInitialEnvironment(); + + SmallVector, 10> deferredLocations; + + MarkLiveCallback CB(SymReaper); + ScanReachableSymbols RSScaner(ST, CB); + + llvm::ImmutableMapRef + EBMapRef(NewEnv.ExprBindings.getRootWithoutRetain(), + F.getTreeFactory()); + + // Iterate over the block-expr bindings. + for (Environment::iterator I = Env.begin(), E = Env.end(); + I != E; ++I) { + + const EnvironmentEntry &BlkExpr = I.getKey(); + // For recorded locations (used when evaluating loads and stores), we + // consider them live only when their associated normal expression is + // also live. + // NOTE: This assumes that loads/stores that evaluated to UnknownVal + // still have an entry in the map. + if (IsLocation(BlkExpr)) { + deferredLocations.push_back(std::make_pair(BlkExpr, I.getData())); + continue; + } + const SVal &X = I.getData(); + + if (SymReaper.isLive(BlkExpr.getStmt(), BlkExpr.getLocationContext())) { + // Copy the binding to the new map. + EBMapRef = EBMapRef.add(BlkExpr, X); + + // If the block expr's value is a memory region, then mark that region. + if (isa(X)) { + const MemRegion *R = cast(X).getRegion(); + SymReaper.markLive(R); + } + + // Mark all symbols in the block expr's value live. + RSScaner.scan(X); + continue; + } + + // Otherwise the expression is dead with a couple exceptions. + // Do not misclean LogicalExpr or ConditionalOperator. It is dead at the + // beginning of itself, but we need its UndefinedVal to determine its + // SVal. + if (X.isUndef() && cast(X).getData()) + EBMapRef = EBMapRef.add(BlkExpr, X); + } + + // Go through he deferred locations and add them to the new environment if + // the correspond Stmt* is in the map as well. + for (SmallVectorImpl >::iterator + I = deferredLocations.begin(), E = deferredLocations.end(); I != E; ++I) { + const EnvironmentEntry &En = I->first; + const Stmt *S = (Stmt*) (((uintptr_t) En.getStmt()) & (uintptr_t) ~0x1); + if (EBMapRef.lookup(EnvironmentEntry(S, En.getLocationContext()))) + EBMapRef = EBMapRef.add(En, I->second); + } + + NewEnv.ExprBindings = EBMapRef.asImmutableMap(); + return NewEnv; +} + +void Environment::print(raw_ostream &Out, const char *NL, + const char *Sep) const { + printAux(Out, false, NL, Sep); + printAux(Out, true, NL, Sep); +} + +void Environment::printAux(raw_ostream &Out, bool printLocations, + const char *NL, + const char *Sep) const{ + + bool isFirst = true; + + for (Environment::iterator I = begin(), E = end(); I != E; ++I) { + const EnvironmentEntry &En = I.getKey(); + if (IsLocation(En)) { + if (!printLocations) + continue; + } + else { + if (printLocations) + continue; + } + + if (isFirst) { + Out << NL << NL + << (printLocations ? "Load/Store locations:" : "Expressions:") + << NL; + isFirst = false; + } else { + Out << NL; + } + + const Stmt *S = En.getStmt(); + if (printLocations) { + S = (Stmt*) (((uintptr_t) S) & ((uintptr_t) ~0x1)); + } + + Out << " (" << (void*) En.getLocationContext() << ',' << (void*) S << ") "; + LangOptions LO; // FIXME. + S->printPretty(Out, 0, PrintingPolicy(LO)); + Out << " : " << I.getData(); + } +} diff --git a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp new file mode 100644 index 0000000..0dcbe1f --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp @@ -0,0 +1,405 @@ +//=-- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -*- C++ -*------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the template classes ExplodedNode and ExplodedGraph, +// which represent a path-sensitive, intra-procedural "exploded graph." +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/ParentMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include + +using namespace clang; +using namespace ento; + +//===----------------------------------------------------------------------===// +// Node auditing. +//===----------------------------------------------------------------------===// + +// An out of line virtual method to provide a home for the class vtable. +ExplodedNode::Auditor::~Auditor() {} + +#ifndef NDEBUG +static ExplodedNode::Auditor* NodeAuditor = 0; +#endif + +void ExplodedNode::SetAuditor(ExplodedNode::Auditor* A) { +#ifndef NDEBUG + NodeAuditor = A; +#endif +} + +//===----------------------------------------------------------------------===// +// Cleanup. +//===----------------------------------------------------------------------===// + +static const unsigned CounterTop = 1000; + +ExplodedGraph::ExplodedGraph() + : NumNodes(0), reclaimNodes(false), reclaimCounter(CounterTop) {} + +ExplodedGraph::~ExplodedGraph() {} + +//===----------------------------------------------------------------------===// +// Node reclamation. +//===----------------------------------------------------------------------===// + +bool ExplodedGraph::shouldCollect(const ExplodedNode *node) { + // Reclaimn all nodes that match *all* the following criteria: + // + // (1) 1 predecessor (that has one successor) + // (2) 1 successor (that has one predecessor) + // (3) The ProgramPoint is for a PostStmt. + // (4) There is no 'tag' for the ProgramPoint. + // (5) The 'store' is the same as the predecessor. + // (6) The 'GDM' is the same as the predecessor. + // (7) The LocationContext is the same as the predecessor. + // (8) The PostStmt is for a non-consumed Stmt or Expr. + + // Conditions 1 and 2. + if (node->pred_size() != 1 || node->succ_size() != 1) + return false; + + const ExplodedNode *pred = *(node->pred_begin()); + if (pred->succ_size() != 1) + return false; + + const ExplodedNode *succ = *(node->succ_begin()); + if (succ->pred_size() != 1) + return false; + + // Condition 3. + ProgramPoint progPoint = node->getLocation(); + if (!isa(progPoint) || + (isa(progPoint) || isa(progPoint))) + return false; + + // Condition 4. + PostStmt ps = cast(progPoint); + if (ps.getTag()) + return false; + + if (isa(ps.getStmt())) + return false; + + // Conditions 5, 6, and 7. + ProgramStateRef state = node->getState(); + ProgramStateRef pred_state = pred->getState(); + if (state->store != pred_state->store || state->GDM != pred_state->GDM || + progPoint.getLocationContext() != pred->getLocationContext()) + return false; + + // Condition 8. + if (const Expr *Ex = dyn_cast(ps.getStmt())) { + ParentMap &PM = progPoint.getLocationContext()->getParentMap(); + if (!PM.isConsumedExpr(Ex)) + return false; + } + + return true; +} + +void ExplodedGraph::collectNode(ExplodedNode *node) { + // Removing a node means: + // (a) changing the predecessors successor to the successor of this node + // (b) changing the successors predecessor to the predecessor of this node + // (c) Putting 'node' onto freeNodes. + assert(node->pred_size() == 1 || node->succ_size() == 1); + ExplodedNode *pred = *(node->pred_begin()); + ExplodedNode *succ = *(node->succ_begin()); + pred->replaceSuccessor(succ); + succ->replacePredecessor(pred); + FreeNodes.push_back(node); + Nodes.RemoveNode(node); + --NumNodes; + node->~ExplodedNode(); +} + +void ExplodedGraph::reclaimRecentlyAllocatedNodes() { + if (ChangedNodes.empty()) + return; + + // Only periodically relcaim nodes so that we can build up a set of + // nodes that meet the reclamation criteria. Freshly created nodes + // by definition have no successor, and thus cannot be reclaimed (see below). + assert(reclaimCounter > 0); + if (--reclaimCounter != 0) + return; + reclaimCounter = CounterTop; + + for (NodeVector::iterator it = ChangedNodes.begin(), et = ChangedNodes.end(); + it != et; ++it) { + ExplodedNode *node = *it; + if (shouldCollect(node)) + collectNode(node); + } + ChangedNodes.clear(); +} + +//===----------------------------------------------------------------------===// +// ExplodedNode. +//===----------------------------------------------------------------------===// + +static inline BumpVector& getVector(void *P) { + return *reinterpret_cast*>(P); +} + +void ExplodedNode::addPredecessor(ExplodedNode *V, ExplodedGraph &G) { + assert (!V->isSink()); + Preds.addNode(V, G); + V->Succs.addNode(this, G); +#ifndef NDEBUG + if (NodeAuditor) NodeAuditor->AddEdge(V, this); +#endif +} + +void ExplodedNode::NodeGroup::replaceNode(ExplodedNode *node) { + assert(getKind() == Size1); + P = reinterpret_cast(node); + assert(getKind() == Size1); +} + +void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) { + assert((reinterpret_cast(N) & Mask) == 0x0); + assert(!getFlag()); + + if (getKind() == Size1) { + if (ExplodedNode *NOld = getNode()) { + BumpVectorContext &Ctx = G.getNodeAllocator(); + BumpVector *V = + G.getAllocator().Allocate >(); + new (V) BumpVector(Ctx, 4); + + assert((reinterpret_cast(V) & Mask) == 0x0); + V->push_back(NOld, Ctx); + V->push_back(N, Ctx); + P = reinterpret_cast(V) | SizeOther; + assert(getPtr() == (void*) V); + assert(getKind() == SizeOther); + } + else { + P = reinterpret_cast(N); + assert(getKind() == Size1); + } + } + else { + assert(getKind() == SizeOther); + getVector(getPtr()).push_back(N, G.getNodeAllocator()); + } +} + +unsigned ExplodedNode::NodeGroup::size() const { + if (getFlag()) + return 0; + + if (getKind() == Size1) + return getNode() ? 1 : 0; + else + return getVector(getPtr()).size(); +} + +ExplodedNode **ExplodedNode::NodeGroup::begin() const { + if (getFlag()) + return NULL; + + if (getKind() == Size1) + return (ExplodedNode**) (getPtr() ? &P : NULL); + else + return const_cast(&*(getVector(getPtr()).begin())); +} + +ExplodedNode** ExplodedNode::NodeGroup::end() const { + if (getFlag()) + return NULL; + + if (getKind() == Size1) + return (ExplodedNode**) (getPtr() ? &P+1 : NULL); + else { + // Dereferencing end() is undefined behaviour. The vector is not empty, so + // we can dereference the last elem and then add 1 to the result. + return const_cast(getVector(getPtr()).end()); + } +} + +ExplodedNode *ExplodedGraph::getNode(const ProgramPoint &L, + ProgramStateRef State, + bool IsSink, + bool* IsNew) { + // Profile 'State' to determine if we already have an existing node. + llvm::FoldingSetNodeID profile; + void *InsertPos = 0; + + NodeTy::Profile(profile, L, State, IsSink); + NodeTy* V = Nodes.FindNodeOrInsertPos(profile, InsertPos); + + if (!V) { + if (!FreeNodes.empty()) { + V = FreeNodes.back(); + FreeNodes.pop_back(); + } + else { + // Allocate a new node. + V = (NodeTy*) getAllocator().Allocate(); + } + + new (V) NodeTy(L, State, IsSink); + + if (reclaimNodes) + ChangedNodes.push_back(V); + + // Insert the node into the node set and return it. + Nodes.InsertNode(V, InsertPos); + ++NumNodes; + + if (IsNew) *IsNew = true; + } + else + if (IsNew) *IsNew = false; + + return V; +} + +std::pair +ExplodedGraph::Trim(const NodeTy* const* NBeg, const NodeTy* const* NEnd, + llvm::DenseMap *InverseMap) const { + + if (NBeg == NEnd) + return std::make_pair((ExplodedGraph*) 0, + (InterExplodedGraphMap*) 0); + + assert (NBeg < NEnd); + + OwningPtr M(new InterExplodedGraphMap()); + + ExplodedGraph* G = TrimInternal(NBeg, NEnd, M.get(), InverseMap); + + return std::make_pair(static_cast(G), M.take()); +} + +ExplodedGraph* +ExplodedGraph::TrimInternal(const ExplodedNode* const* BeginSources, + const ExplodedNode* const* EndSources, + InterExplodedGraphMap* M, + llvm::DenseMap *InverseMap) const { + + typedef llvm::DenseSet Pass1Ty; + Pass1Ty Pass1; + + typedef llvm::DenseMap Pass2Ty; + Pass2Ty& Pass2 = M->M; + + SmallVector WL1, WL2; + + // ===- Pass 1 (reverse DFS) -=== + for (const ExplodedNode* const* I = BeginSources; I != EndSources; ++I) { + assert(*I); + WL1.push_back(*I); + } + + // Process the first worklist until it is empty. Because it is a std::list + // it acts like a FIFO queue. + while (!WL1.empty()) { + const ExplodedNode *N = WL1.back(); + WL1.pop_back(); + + // Have we already visited this node? If so, continue to the next one. + if (Pass1.count(N)) + continue; + + // Otherwise, mark this node as visited. + Pass1.insert(N); + + // If this is a root enqueue it to the second worklist. + if (N->Preds.empty()) { + WL2.push_back(N); + continue; + } + + // Visit our predecessors and enqueue them. + for (ExplodedNode** I=N->Preds.begin(), **E=N->Preds.end(); I!=E; ++I) + WL1.push_back(*I); + } + + // We didn't hit a root? Return with a null pointer for the new graph. + if (WL2.empty()) + return 0; + + // Create an empty graph. + ExplodedGraph* G = MakeEmptyGraph(); + + // ===- Pass 2 (forward DFS to construct the new graph) -=== + while (!WL2.empty()) { + const ExplodedNode *N = WL2.back(); + WL2.pop_back(); + + // Skip this node if we have already processed it. + if (Pass2.find(N) != Pass2.end()) + continue; + + // Create the corresponding node in the new graph and record the mapping + // from the old node to the new node. + ExplodedNode *NewN = G->getNode(N->getLocation(), N->State, N->isSink(), 0); + Pass2[N] = NewN; + + // Also record the reverse mapping from the new node to the old node. + if (InverseMap) (*InverseMap)[NewN] = N; + + // If this node is a root, designate it as such in the graph. + if (N->Preds.empty()) + G->addRoot(NewN); + + // In the case that some of the intended predecessors of NewN have already + // been created, we should hook them up as predecessors. + + // Walk through the predecessors of 'N' and hook up their corresponding + // nodes in the new graph (if any) to the freshly created node. + for (ExplodedNode **I=N->Preds.begin(), **E=N->Preds.end(); I!=E; ++I) { + Pass2Ty::iterator PI = Pass2.find(*I); + if (PI == Pass2.end()) + continue; + + NewN->addPredecessor(PI->second, *G); + } + + // In the case that some of the intended successors of NewN have already + // been created, we should hook them up as successors. Otherwise, enqueue + // the new nodes from the original graph that should have nodes created + // in the new graph. + for (ExplodedNode **I=N->Succs.begin(), **E=N->Succs.end(); I!=E; ++I) { + Pass2Ty::iterator PI = Pass2.find(*I); + if (PI != Pass2.end()) { + PI->second->addPredecessor(NewN, *G); + continue; + } + + // Enqueue nodes to the worklist that were marked during pass 1. + if (Pass1.count(*I)) + WL2.push_back(*I); + } + } + + return G; +} + +void InterExplodedGraphMap::anchor() { } + +ExplodedNode* +InterExplodedGraphMap::getMappedNode(const ExplodedNode *N) const { + llvm::DenseMap::const_iterator I = + M.find(N); + + return I == M.end() ? 0 : I->second; +} + diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp new file mode 100644 index 0000000..1fd9068 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -0,0 +1,2076 @@ +//=-- ExprEngine.cpp - Path-Sensitive Expression-Level Dataflow ---*- C++ -*-= +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a meta-engine for path-sensitive dataflow analysis that +// is built on GREngine, but provides the boilerplate to execute transfer +// functions and build the ExplodedGraph at the expression level. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ExprEngine" + +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" +#include "clang/AST/CharUnits.h" +#include "clang/AST/ParentMap.h" +#include "clang/AST/StmtObjC.h" +#include "clang/AST/StmtCXX.h" +#include "clang/AST/DeclCXX.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/PrettyStackTrace.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/ImmutableList.h" +#include "llvm/ADT/Statistic.h" + +#ifndef NDEBUG +#include "llvm/Support/GraphWriter.h" +#endif + +using namespace clang; +using namespace ento; +using llvm::APSInt; + +STATISTIC(NumRemoveDeadBindings, + "The # of times RemoveDeadBindings is called"); +STATISTIC(NumRemoveDeadBindingsSkipped, + "The # of times RemoveDeadBindings is skipped"); +STATISTIC(NumMaxBlockCountReached, + "The # of aborted paths due to reaching the maximum block count in " + "a top level function"); +STATISTIC(NumMaxBlockCountReachedInInlined, + "The # of aborted paths due to reaching the maximum block count in " + "an inlined function"); +STATISTIC(NumTimesRetriedWithoutInlining, + "The # of times we re-evaluated a call without inlining"); + +//===----------------------------------------------------------------------===// +// Utility functions. +//===----------------------------------------------------------------------===// + +static inline Selector GetNullarySelector(const char* name, ASTContext &Ctx) { + IdentifierInfo* II = &Ctx.Idents.get(name); + return Ctx.Selectors.getSelector(0, &II); +} + +//===----------------------------------------------------------------------===// +// Engine construction and deletion. +//===----------------------------------------------------------------------===// + +ExprEngine::ExprEngine(AnalysisManager &mgr, bool gcEnabled, + SetOfConstDecls *VisitedCallees, + FunctionSummariesTy *FS) + : AMgr(mgr), + AnalysisDeclContexts(mgr.getAnalysisDeclContextManager()), + Engine(*this, VisitedCallees, FS), + G(Engine.getGraph()), + StateMgr(getContext(), mgr.getStoreManagerCreator(), + mgr.getConstraintManagerCreator(), G.getAllocator(), + *this), + SymMgr(StateMgr.getSymbolManager()), + svalBuilder(StateMgr.getSValBuilder()), + EntryNode(NULL), + currentStmt(NULL), currentStmtIdx(0), currentBuilderContext(0), + NSExceptionII(NULL), NSExceptionInstanceRaiseSelectors(NULL), + RaiseSel(GetNullarySelector("raise", getContext())), + ObjCGCEnabled(gcEnabled), BR(mgr, *this) { + + if (mgr.shouldEagerlyTrimExplodedGraph()) { + // Enable eager node reclaimation when constructing the ExplodedGraph. + G.enableNodeReclamation(); + } +} + +ExprEngine::~ExprEngine() { + BR.FlushReports(); + delete [] NSExceptionInstanceRaiseSelectors; +} + +//===----------------------------------------------------------------------===// +// Utility methods. +//===----------------------------------------------------------------------===// + +ProgramStateRef ExprEngine::getInitialState(const LocationContext *InitLoc) { + ProgramStateRef state = StateMgr.getInitialState(InitLoc); + const Decl *D = InitLoc->getDecl(); + + // Preconditions. + // FIXME: It would be nice if we had a more general mechanism to add + // such preconditions. Some day. + do { + + if (const FunctionDecl *FD = dyn_cast(D)) { + // Precondition: the first argument of 'main' is an integer guaranteed + // to be > 0. + const IdentifierInfo *II = FD->getIdentifier(); + if (!II || !(II->getName() == "main" && FD->getNumParams() > 0)) + break; + + const ParmVarDecl *PD = FD->getParamDecl(0); + QualType T = PD->getType(); + if (!T->isIntegerType()) + break; + + const MemRegion *R = state->getRegion(PD, InitLoc); + if (!R) + break; + + SVal V = state->getSVal(loc::MemRegionVal(R)); + SVal Constraint_untested = evalBinOp(state, BO_GT, V, + svalBuilder.makeZeroVal(T), + getContext().IntTy); + + DefinedOrUnknownSVal *Constraint = + dyn_cast(&Constraint_untested); + + if (!Constraint) + break; + + if (ProgramStateRef newState = state->assume(*Constraint, true)) + state = newState; + } + break; + } + while (0); + + if (const ObjCMethodDecl *MD = dyn_cast(D)) { + // Precondition: 'self' is always non-null upon entry to an Objective-C + // method. + const ImplicitParamDecl *SelfD = MD->getSelfDecl(); + const MemRegion *R = state->getRegion(SelfD, InitLoc); + SVal V = state->getSVal(loc::MemRegionVal(R)); + + if (const Loc *LV = dyn_cast(&V)) { + // Assume that the pointer value in 'self' is non-null. + state = state->assume(*LV, true); + assert(state && "'self' cannot be null"); + } + } + + if (const CXXMethodDecl *MD = dyn_cast(D)) { + if (!MD->isStatic()) { + // Precondition: 'this' is always non-null upon entry to the + // top-level function. This is our starting assumption for + // analyzing an "open" program. + const StackFrameContext *SFC = InitLoc->getCurrentStackFrame(); + if (SFC->getParent() == 0) { + loc::MemRegionVal L(getCXXThisRegion(MD, SFC)); + SVal V = state->getSVal(L); + if (const Loc *LV = dyn_cast(&V)) { + state = state->assume(*LV, true); + assert(state && "'this' cannot be null"); + } + } + } + } + + return state; +} + +//===----------------------------------------------------------------------===// +// Top-level transfer function logic (Dispatcher). +//===----------------------------------------------------------------------===// + +/// evalAssume - Called by ConstraintManager. Used to call checker-specific +/// logic for handling assumptions on symbolic values. +ProgramStateRef ExprEngine::processAssume(ProgramStateRef state, + SVal cond, bool assumption) { + return getCheckerManager().runCheckersForEvalAssume(state, cond, assumption); +} + +bool ExprEngine::wantsRegionChangeUpdate(ProgramStateRef state) { + return getCheckerManager().wantsRegionChangeUpdate(state); +} + +ProgramStateRef +ExprEngine::processRegionChanges(ProgramStateRef state, + const StoreManager::InvalidatedSymbols *invalidated, + ArrayRef Explicits, + ArrayRef Regions, + const CallOrObjCMessage *Call) { + return getCheckerManager().runCheckersForRegionChanges(state, invalidated, + Explicits, Regions, Call); +} + +void ExprEngine::printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) { + getCheckerManager().runCheckersForPrintState(Out, State, NL, Sep); +} + +void ExprEngine::processEndWorklist(bool hasWorkRemaining) { + getCheckerManager().runCheckersForEndAnalysis(G, BR, *this); +} + +void ExprEngine::processCFGElement(const CFGElement E, ExplodedNode *Pred, + unsigned StmtIdx, NodeBuilderContext *Ctx) { + currentStmtIdx = StmtIdx; + currentBuilderContext = Ctx; + + switch (E.getKind()) { + case CFGElement::Invalid: + llvm_unreachable("Unexpected CFGElement kind."); + case CFGElement::Statement: + ProcessStmt(const_cast(E.getAs()->getStmt()), Pred); + return; + case CFGElement::Initializer: + ProcessInitializer(E.getAs()->getInitializer(), Pred); + return; + case CFGElement::AutomaticObjectDtor: + case CFGElement::BaseDtor: + case CFGElement::MemberDtor: + case CFGElement::TemporaryDtor: + ProcessImplicitDtor(*E.getAs(), Pred); + return; + } +} + +static bool shouldRemoveDeadBindings(AnalysisManager &AMgr, + const CFGStmt S, + const ExplodedNode *Pred, + const LocationContext *LC) { + + // Are we never purging state values? + if (AMgr.getPurgeMode() == PurgeNone) + return false; + + // Is this the beginning of a basic block? + if (isa(Pred->getLocation())) + return true; + + // Is this on a non-expression? + if (!isa(S.getStmt())) + return true; + + // Run before processing a call. + if (isa(S.getStmt())) + return true; + + // Is this an expression that is consumed by another expression? If so, + // postpone cleaning out the state. + ParentMap &PM = LC->getAnalysisDeclContext()->getParentMap(); + return !PM.isConsumedExpr(cast(S.getStmt())); +} + +void ExprEngine::ProcessStmt(const CFGStmt S, + ExplodedNode *Pred) { + // Reclaim any unnecessary nodes in the ExplodedGraph. + G.reclaimRecentlyAllocatedNodes(); + + currentStmt = S.getStmt(); + PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(), + currentStmt->getLocStart(), + "Error evaluating statement"); + + EntryNode = Pred; + + ProgramStateRef EntryState = EntryNode->getState(); + CleanedState = EntryState; + + // Create the cleaned state. + const LocationContext *LC = EntryNode->getLocationContext(); + SymbolReaper SymReaper(LC, currentStmt, SymMgr, getStoreManager()); + + if (shouldRemoveDeadBindings(AMgr, S, Pred, LC)) { + NumRemoveDeadBindings++; + getCheckerManager().runCheckersForLiveSymbols(CleanedState, SymReaper); + + const StackFrameContext *SFC = LC->getCurrentStackFrame(); + + // Create a state in which dead bindings are removed from the environment + // and the store. TODO: The function should just return new env and store, + // not a new state. + CleanedState = StateMgr.removeDeadBindings(CleanedState, SFC, SymReaper); + } else { + NumRemoveDeadBindingsSkipped++; + } + + // Process any special transfer function for dead symbols. + ExplodedNodeSet Tmp; + // A tag to track convenience transitions, which can be removed at cleanup. + static SimpleProgramPointTag cleanupTag("ExprEngine : Clean Node"); + + if (!SymReaper.hasDeadSymbols()) { + // Generate a CleanedNode that has the environment and store cleaned + // up. Since no symbols are dead, we can optimize and not clean out + // the constraint manager. + StmtNodeBuilder Bldr(Pred, Tmp, *currentBuilderContext); + Bldr.generateNode(currentStmt, EntryNode, CleanedState, false, &cleanupTag); + + } else { + // Call checkers with the non-cleaned state so that they could query the + // values of the soon to be dead symbols. + ExplodedNodeSet CheckedSet; + getCheckerManager().runCheckersForDeadSymbols(CheckedSet, EntryNode, + SymReaper, currentStmt, *this); + + // For each node in CheckedSet, generate CleanedNodes that have the + // environment, the store, and the constraints cleaned up but have the + // user-supplied states as the predecessors. + StmtNodeBuilder Bldr(CheckedSet, Tmp, *currentBuilderContext); + for (ExplodedNodeSet::const_iterator + I = CheckedSet.begin(), E = CheckedSet.end(); I != E; ++I) { + ProgramStateRef CheckerState = (*I)->getState(); + + // The constraint manager has not been cleaned up yet, so clean up now. + CheckerState = getConstraintManager().removeDeadBindings(CheckerState, + SymReaper); + + assert(StateMgr.haveEqualEnvironments(CheckerState, EntryState) && + "Checkers are not allowed to modify the Environment as a part of " + "checkDeadSymbols processing."); + assert(StateMgr.haveEqualStores(CheckerState, EntryState) && + "Checkers are not allowed to modify the Store as a part of " + "checkDeadSymbols processing."); + + // Create a state based on CleanedState with CheckerState GDM and + // generate a transition to that state. + ProgramStateRef CleanedCheckerSt = + StateMgr.getPersistentStateWithGDM(CleanedState, CheckerState); + Bldr.generateNode(currentStmt, *I, CleanedCheckerSt, false, &cleanupTag, + ProgramPoint::PostPurgeDeadSymbolsKind); + } + } + + ExplodedNodeSet Dst; + for (ExplodedNodeSet::iterator I=Tmp.begin(), E=Tmp.end(); I!=E; ++I) { + ExplodedNodeSet DstI; + // Visit the statement. + Visit(currentStmt, *I, DstI); + Dst.insert(DstI); + } + + // Enqueue the new nodes onto the work list. + Engine.enqueue(Dst, currentBuilderContext->getBlock(), currentStmtIdx); + + // NULL out these variables to cleanup. + CleanedState = NULL; + EntryNode = NULL; + currentStmt = 0; +} + +void ExprEngine::ProcessInitializer(const CFGInitializer Init, + ExplodedNode *Pred) { + ExplodedNodeSet Dst; + + // We don't set EntryNode and currentStmt. And we don't clean up state. + const CXXCtorInitializer *BMI = Init.getInitializer(); + const StackFrameContext *stackFrame = + cast(Pred->getLocationContext()); + const CXXConstructorDecl *decl = + cast(stackFrame->getDecl()); + const CXXThisRegion *thisReg = getCXXThisRegion(decl, stackFrame); + + SVal thisVal = Pred->getState()->getSVal(thisReg); + + if (BMI->isAnyMemberInitializer()) { + // Evaluate the initializer. + + StmtNodeBuilder Bldr(Pred, Dst, *currentBuilderContext); + ProgramStateRef state = Pred->getState(); + + const FieldDecl *FD = BMI->getAnyMember(); + + SVal FieldLoc = state->getLValue(FD, thisVal); + SVal InitVal = state->getSVal(BMI->getInit(), Pred->getLocationContext()); + state = state->bindLoc(FieldLoc, InitVal); + + // Use a custom node building process. + PostInitializer PP(BMI, stackFrame); + // Builder automatically add the generated node to the deferred set, + // which are processed in the builder's dtor. + Bldr.generateNode(PP, Pred, state); + } else { + assert(BMI->isBaseInitializer()); + + // Get the base class declaration. + const CXXConstructExpr *ctorExpr = cast(BMI->getInit()); + + // Create the base object region. + SVal baseVal = + getStoreManager().evalDerivedToBase(thisVal, ctorExpr->getType()); + const MemRegion *baseReg = baseVal.getAsRegion(); + assert(baseReg); + + VisitCXXConstructExpr(ctorExpr, baseReg, Pred, Dst); + } + + // Enqueue the new nodes onto the work list. + Engine.enqueue(Dst, currentBuilderContext->getBlock(), currentStmtIdx); +} + +void ExprEngine::ProcessImplicitDtor(const CFGImplicitDtor D, + ExplodedNode *Pred) { + ExplodedNodeSet Dst; + switch (D.getKind()) { + case CFGElement::AutomaticObjectDtor: + ProcessAutomaticObjDtor(cast(D), Pred, Dst); + break; + case CFGElement::BaseDtor: + ProcessBaseDtor(cast(D), Pred, Dst); + break; + case CFGElement::MemberDtor: + ProcessMemberDtor(cast(D), Pred, Dst); + break; + case CFGElement::TemporaryDtor: + ProcessTemporaryDtor(cast(D), Pred, Dst); + break; + default: + llvm_unreachable("Unexpected dtor kind."); + } + + // Enqueue the new nodes onto the work list. + Engine.enqueue(Dst, currentBuilderContext->getBlock(), currentStmtIdx); +} + +void ExprEngine::ProcessAutomaticObjDtor(const CFGAutomaticObjDtor Dtor, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + ProgramStateRef state = Pred->getState(); + const VarDecl *varDecl = Dtor.getVarDecl(); + + QualType varType = varDecl->getType(); + + if (const ReferenceType *refType = varType->getAs()) + varType = refType->getPointeeType(); + + const CXXRecordDecl *recordDecl = varType->getAsCXXRecordDecl(); + assert(recordDecl && "get CXXRecordDecl fail"); + const CXXDestructorDecl *dtorDecl = recordDecl->getDestructor(); + + Loc dest = state->getLValue(varDecl, Pred->getLocationContext()); + + VisitCXXDestructor(dtorDecl, cast(dest).getRegion(), + Dtor.getTriggerStmt(), Pred, Dst); +} + +void ExprEngine::ProcessBaseDtor(const CFGBaseDtor D, + ExplodedNode *Pred, ExplodedNodeSet &Dst) {} + +void ExprEngine::ProcessMemberDtor(const CFGMemberDtor D, + ExplodedNode *Pred, ExplodedNodeSet &Dst) {} + +void ExprEngine::ProcessTemporaryDtor(const CFGTemporaryDtor D, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) {} + +void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, + ExplodedNodeSet &DstTop) { + PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(), + S->getLocStart(), + "Error evaluating statement"); + ExplodedNodeSet Dst; + StmtNodeBuilder Bldr(Pred, DstTop, *currentBuilderContext); + + // Expressions to ignore. + if (const Expr *Ex = dyn_cast(S)) + S = Ex->IgnoreParens(); + + // FIXME: add metadata to the CFG so that we can disable + // this check when we KNOW that there is no block-level subexpression. + // The motivation is that this check requires a hashtable lookup. + + if (S != currentStmt && Pred->getLocationContext()->getCFG()->isBlkExpr(S)) + return; + + switch (S->getStmtClass()) { + // C++ and ARC stuff we don't support yet. + case Expr::ObjCIndirectCopyRestoreExprClass: + case Stmt::CXXDependentScopeMemberExprClass: + case Stmt::CXXPseudoDestructorExprClass: + case Stmt::CXXTryStmtClass: + case Stmt::CXXTypeidExprClass: + case Stmt::CXXUuidofExprClass: + case Stmt::CXXUnresolvedConstructExprClass: + case Stmt::CXXScalarValueInitExprClass: + case Stmt::DependentScopeDeclRefExprClass: + case Stmt::UnaryTypeTraitExprClass: + case Stmt::BinaryTypeTraitExprClass: + case Stmt::TypeTraitExprClass: + case Stmt::ArrayTypeTraitExprClass: + case Stmt::ExpressionTraitExprClass: + case Stmt::UnresolvedLookupExprClass: + case Stmt::UnresolvedMemberExprClass: + case Stmt::CXXNoexceptExprClass: + case Stmt::PackExpansionExprClass: + case Stmt::SubstNonTypeTemplateParmPackExprClass: + case Stmt::SEHTryStmtClass: + case Stmt::SEHExceptStmtClass: + case Stmt::LambdaExprClass: + case Stmt::SEHFinallyStmtClass: { + const ExplodedNode *node = Bldr.generateNode(S, Pred, Pred->getState(), + /* sink */ true); + Engine.addAbortedBlock(node, currentBuilderContext->getBlock()); + break; + } + + // We don't handle default arguments either yet, but we can fake it + // for now by just skipping them. + case Stmt::SubstNonTypeTemplateParmExprClass: + case Stmt::CXXDefaultArgExprClass: + break; + + case Stmt::ParenExprClass: + llvm_unreachable("ParenExprs already handled."); + case Stmt::GenericSelectionExprClass: + llvm_unreachable("GenericSelectionExprs already handled."); + // Cases that should never be evaluated simply because they shouldn't + // appear in the CFG. + case Stmt::BreakStmtClass: + case Stmt::CaseStmtClass: + case Stmt::CompoundStmtClass: + case Stmt::ContinueStmtClass: + case Stmt::CXXForRangeStmtClass: + case Stmt::DefaultStmtClass: + case Stmt::DoStmtClass: + case Stmt::ForStmtClass: + case Stmt::GotoStmtClass: + case Stmt::IfStmtClass: + case Stmt::IndirectGotoStmtClass: + case Stmt::LabelStmtClass: + case Stmt::AttributedStmtClass: + case Stmt::NoStmtClass: + case Stmt::NullStmtClass: + case Stmt::SwitchStmtClass: + case Stmt::WhileStmtClass: + case Expr::MSDependentExistsStmtClass: + llvm_unreachable("Stmt should not be in analyzer evaluation loop"); + + case Stmt::GNUNullExprClass: { + // GNU __null is a pointer-width integer, not an actual pointer. + ProgramStateRef state = Pred->getState(); + state = state->BindExpr(S, Pred->getLocationContext(), + svalBuilder.makeIntValWithPtrWidth(0, false)); + Bldr.generateNode(S, Pred, state); + break; + } + + case Stmt::ObjCAtSynchronizedStmtClass: + Bldr.takeNodes(Pred); + VisitObjCAtSynchronizedStmt(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + + // FIXME. + case Stmt::ObjCSubscriptRefExprClass: + break; + + case Stmt::ObjCPropertyRefExprClass: + // Implicitly handled by Environment::getSVal(). + break; + + case Stmt::ImplicitValueInitExprClass: { + ProgramStateRef state = Pred->getState(); + QualType ty = cast(S)->getType(); + SVal val = svalBuilder.makeZeroVal(ty); + Bldr.generateNode(S, Pred, state->BindExpr(S, Pred->getLocationContext(), + val)); + break; + } + + case Stmt::ExprWithCleanupsClass: + // Handled due to fully linearised CFG. + break; + + // Cases not handled yet; but will handle some day. + case Stmt::DesignatedInitExprClass: + case Stmt::ExtVectorElementExprClass: + case Stmt::ImaginaryLiteralClass: + case Stmt::ObjCAtCatchStmtClass: + case Stmt::ObjCAtFinallyStmtClass: + case Stmt::ObjCAtTryStmtClass: + case Stmt::ObjCAutoreleasePoolStmtClass: + case Stmt::ObjCEncodeExprClass: + case Stmt::ObjCIsaExprClass: + case Stmt::ObjCProtocolExprClass: + case Stmt::ObjCSelectorExprClass: + case Expr::ObjCNumericLiteralClass: + case Stmt::ParenListExprClass: + case Stmt::PredefinedExprClass: + case Stmt::ShuffleVectorExprClass: + case Stmt::VAArgExprClass: + case Stmt::CUDAKernelCallExprClass: + case Stmt::OpaqueValueExprClass: + case Stmt::AsTypeExprClass: + case Stmt::AtomicExprClass: + // Fall through. + + // Currently all handling of 'throw' just falls to the CFG. We + // can consider doing more if necessary. + case Stmt::CXXThrowExprClass: + // Fall through. + + // Cases we intentionally don't evaluate, since they don't need + // to be explicitly evaluated. + case Stmt::AddrLabelExprClass: + case Stmt::IntegerLiteralClass: + case Stmt::CharacterLiteralClass: + case Stmt::CXXBoolLiteralExprClass: + case Stmt::ObjCBoolLiteralExprClass: + case Stmt::FloatingLiteralClass: + case Stmt::SizeOfPackExprClass: + case Stmt::StringLiteralClass: + case Stmt::ObjCStringLiteralClass: + case Stmt::CXXBindTemporaryExprClass: + case Stmt::CXXNullPtrLiteralExprClass: { + Bldr.takeNodes(Pred); + ExplodedNodeSet preVisit; + getCheckerManager().runCheckersForPreStmt(preVisit, Pred, S, *this); + getCheckerManager().runCheckersForPostStmt(Dst, preVisit, S, *this); + Bldr.addNodes(Dst); + break; + } + + case Expr::ObjCArrayLiteralClass: + case Expr::ObjCDictionaryLiteralClass: { + Bldr.takeNodes(Pred); + + ExplodedNodeSet preVisit; + getCheckerManager().runCheckersForPreStmt(preVisit, Pred, S, *this); + + // FIXME: explicitly model with a region and the actual contents + // of the container. For now, conjure a symbol. + ExplodedNodeSet Tmp; + StmtNodeBuilder Bldr2(preVisit, Tmp, *currentBuilderContext); + + for (ExplodedNodeSet::iterator it = preVisit.begin(), et = preVisit.end(); + it != et; ++it) { + ExplodedNode *N = *it; + const Expr *Ex = cast(S); + QualType resultType = Ex->getType(); + const LocationContext *LCtx = N->getLocationContext(); + SVal result = + svalBuilder.getConjuredSymbolVal(0, Ex, LCtx, resultType, + currentBuilderContext->getCurrentBlockCount()); + ProgramStateRef state = N->getState()->BindExpr(Ex, LCtx, result); + Bldr2.generateNode(S, N, state); + } + + getCheckerManager().runCheckersForPostStmt(Dst, Tmp, S, *this); + Bldr.addNodes(Dst); + break; + } + + case Stmt::ArraySubscriptExprClass: + Bldr.takeNodes(Pred); + VisitLvalArraySubscriptExpr(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + + case Stmt::AsmStmtClass: + Bldr.takeNodes(Pred); + VisitAsmStmt(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + + case Stmt::BlockExprClass: + Bldr.takeNodes(Pred); + VisitBlockExpr(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + + case Stmt::BinaryOperatorClass: { + const BinaryOperator* B = cast(S); + if (B->isLogicalOp()) { + Bldr.takeNodes(Pred); + VisitLogicalExpr(B, Pred, Dst); + Bldr.addNodes(Dst); + break; + } + else if (B->getOpcode() == BO_Comma) { + ProgramStateRef state = Pred->getState(); + Bldr.generateNode(B, Pred, + state->BindExpr(B, Pred->getLocationContext(), + state->getSVal(B->getRHS(), + Pred->getLocationContext()))); + break; + } + + Bldr.takeNodes(Pred); + + if (AMgr.shouldEagerlyAssume() && + (B->isRelationalOp() || B->isEqualityOp())) { + ExplodedNodeSet Tmp; + VisitBinaryOperator(cast(S), Pred, Tmp); + evalEagerlyAssume(Dst, Tmp, cast(S)); + } + else + VisitBinaryOperator(cast(S), Pred, Dst); + + Bldr.addNodes(Dst); + break; + } + + case Stmt::CallExprClass: + case Stmt::CXXOperatorCallExprClass: + case Stmt::CXXMemberCallExprClass: + case Stmt::UserDefinedLiteralClass: { + Bldr.takeNodes(Pred); + VisitCallExpr(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + } + + case Stmt::CXXCatchStmtClass: { + Bldr.takeNodes(Pred); + VisitCXXCatchStmt(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + } + + case Stmt::CXXTemporaryObjectExprClass: + case Stmt::CXXConstructExprClass: { + const CXXConstructExpr *C = cast(S); + // For block-level CXXConstructExpr, we don't have a destination region. + // Let VisitCXXConstructExpr() create one. + Bldr.takeNodes(Pred); + VisitCXXConstructExpr(C, 0, Pred, Dst); + Bldr.addNodes(Dst); + break; + } + + case Stmt::CXXNewExprClass: { + Bldr.takeNodes(Pred); + const CXXNewExpr *NE = cast(S); + VisitCXXNewExpr(NE, Pred, Dst); + Bldr.addNodes(Dst); + break; + } + + case Stmt::CXXDeleteExprClass: { + Bldr.takeNodes(Pred); + const CXXDeleteExpr *CDE = cast(S); + VisitCXXDeleteExpr(CDE, Pred, Dst); + Bldr.addNodes(Dst); + break; + } + // FIXME: ChooseExpr is really a constant. We need to fix + // the CFG do not model them as explicit control-flow. + + case Stmt::ChooseExprClass: { // __builtin_choose_expr + Bldr.takeNodes(Pred); + const ChooseExpr *C = cast(S); + VisitGuardedExpr(C, C->getLHS(), C->getRHS(), Pred, Dst); + Bldr.addNodes(Dst); + break; + } + + case Stmt::CompoundAssignOperatorClass: + Bldr.takeNodes(Pred); + VisitBinaryOperator(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + + case Stmt::CompoundLiteralExprClass: + Bldr.takeNodes(Pred); + VisitCompoundLiteralExpr(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + + case Stmt::BinaryConditionalOperatorClass: + case Stmt::ConditionalOperatorClass: { // '?' operator + Bldr.takeNodes(Pred); + const AbstractConditionalOperator *C + = cast(S); + VisitGuardedExpr(C, C->getTrueExpr(), C->getFalseExpr(), Pred, Dst); + Bldr.addNodes(Dst); + break; + } + + case Stmt::CXXThisExprClass: + Bldr.takeNodes(Pred); + VisitCXXThisExpr(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + + case Stmt::DeclRefExprClass: { + Bldr.takeNodes(Pred); + const DeclRefExpr *DE = cast(S); + VisitCommonDeclRefExpr(DE, DE->getDecl(), Pred, Dst); + Bldr.addNodes(Dst); + break; + } + + case Stmt::DeclStmtClass: + Bldr.takeNodes(Pred); + VisitDeclStmt(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + + case Stmt::ImplicitCastExprClass: + case Stmt::CStyleCastExprClass: + case Stmt::CXXStaticCastExprClass: + case Stmt::CXXDynamicCastExprClass: + case Stmt::CXXReinterpretCastExprClass: + case Stmt::CXXConstCastExprClass: + case Stmt::CXXFunctionalCastExprClass: + case Stmt::ObjCBridgedCastExprClass: { + Bldr.takeNodes(Pred); + const CastExpr *C = cast(S); + // Handle the previsit checks. + ExplodedNodeSet dstPrevisit; + getCheckerManager().runCheckersForPreStmt(dstPrevisit, Pred, C, *this); + + // Handle the expression itself. + ExplodedNodeSet dstExpr; + for (ExplodedNodeSet::iterator i = dstPrevisit.begin(), + e = dstPrevisit.end(); i != e ; ++i) { + VisitCast(C, C->getSubExpr(), *i, dstExpr); + } + + // Handle the postvisit checks. + getCheckerManager().runCheckersForPostStmt(Dst, dstExpr, C, *this); + Bldr.addNodes(Dst); + break; + } + + case Expr::MaterializeTemporaryExprClass: { + Bldr.takeNodes(Pred); + const MaterializeTemporaryExpr *Materialize + = cast(S); + if (Materialize->getType()->isRecordType()) + Dst.Add(Pred); + else + CreateCXXTemporaryObject(Materialize, Pred, Dst); + Bldr.addNodes(Dst); + break; + } + + case Stmt::InitListExprClass: + Bldr.takeNodes(Pred); + VisitInitListExpr(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + + case Stmt::MemberExprClass: + Bldr.takeNodes(Pred); + VisitMemberExpr(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + + case Stmt::ObjCIvarRefExprClass: + Bldr.takeNodes(Pred); + VisitLvalObjCIvarRefExpr(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + + case Stmt::ObjCForCollectionStmtClass: + Bldr.takeNodes(Pred); + VisitObjCForCollectionStmt(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + + case Stmt::ObjCMessageExprClass: { + Bldr.takeNodes(Pred); + // Is this a property access? + const ParentMap &PM = Pred->getLocationContext()->getParentMap(); + const ObjCMessageExpr *ME = cast(S); + bool evaluated = false; + + if (const PseudoObjectExpr *PO = + dyn_cast_or_null(PM.getParent(S))) { + const Expr *syntactic = PO->getSyntacticForm(); + if (const ObjCPropertyRefExpr *PR = + dyn_cast(syntactic)) { + bool isSetter = ME->getNumArgs() > 0; + VisitObjCMessage(ObjCMessage(ME, PR, isSetter), Pred, Dst); + evaluated = true; + } + else if (isa(syntactic)) { + VisitObjCMessage(ObjCMessage(ME, 0, true), Pred, Dst); + } + } + + if (!evaluated) + VisitObjCMessage(ME, Pred, Dst); + + Bldr.addNodes(Dst); + break; + } + + case Stmt::ObjCAtThrowStmtClass: { + // FIXME: This is not complete. We basically treat @throw as + // an abort. + Bldr.generateNode(S, Pred, Pred->getState()); + break; + } + + case Stmt::ReturnStmtClass: + Bldr.takeNodes(Pred); + VisitReturnStmt(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + + case Stmt::OffsetOfExprClass: + Bldr.takeNodes(Pred); + VisitOffsetOfExpr(cast(S), Pred, Dst); + Bldr.addNodes(Dst); + break; + + case Stmt::UnaryExprOrTypeTraitExprClass: + Bldr.takeNodes(Pred); + VisitUnaryExprOrTypeTraitExpr(cast(S), + Pred, Dst); + Bldr.addNodes(Dst); + break; + + case Stmt::StmtExprClass: { + const StmtExpr *SE = cast(S); + + if (SE->getSubStmt()->body_empty()) { + // Empty statement expression. + assert(SE->getType() == getContext().VoidTy + && "Empty statement expression must have void type."); + break; + } + + if (Expr *LastExpr = dyn_cast(*SE->getSubStmt()->body_rbegin())) { + ProgramStateRef state = Pred->getState(); + Bldr.generateNode(SE, Pred, + state->BindExpr(SE, Pred->getLocationContext(), + state->getSVal(LastExpr, + Pred->getLocationContext()))); + } + break; + } + + case Stmt::UnaryOperatorClass: { + Bldr.takeNodes(Pred); + const UnaryOperator *U = cast(S); + if (AMgr.shouldEagerlyAssume() && (U->getOpcode() == UO_LNot)) { + ExplodedNodeSet Tmp; + VisitUnaryOperator(U, Pred, Tmp); + evalEagerlyAssume(Dst, Tmp, U); + } + else + VisitUnaryOperator(U, Pred, Dst); + Bldr.addNodes(Dst); + break; + } + + case Stmt::PseudoObjectExprClass: { + Bldr.takeNodes(Pred); + ProgramStateRef state = Pred->getState(); + const PseudoObjectExpr *PE = cast(S); + if (const Expr *Result = PE->getResultExpr()) { + SVal V = state->getSVal(Result, Pred->getLocationContext()); + Bldr.generateNode(S, Pred, + state->BindExpr(S, Pred->getLocationContext(), V)); + } + else + Bldr.generateNode(S, Pred, + state->BindExpr(S, Pred->getLocationContext(), + UnknownVal())); + + Bldr.addNodes(Dst); + break; + } + } +} + +bool ExprEngine::replayWithoutInlining(ExplodedNode *N, + const LocationContext *CalleeLC) { + const StackFrameContext *CalleeSF = CalleeLC->getCurrentStackFrame(); + const StackFrameContext *CallerSF = CalleeSF->getParent()->getCurrentStackFrame(); + assert(CalleeSF && CallerSF); + ExplodedNode *BeforeProcessingCall = 0; + + // Find the first node before we started processing the call expression. + while (N) { + ProgramPoint L = N->getLocation(); + BeforeProcessingCall = N; + N = N->pred_empty() ? NULL : *(N->pred_begin()); + + // Skip the nodes corresponding to the inlined code. + if (L.getLocationContext()->getCurrentStackFrame() != CallerSF) + continue; + // We reached the caller. Find the node right before we started + // processing the CallExpr. + if (isa(L)) + continue; + if (const StmtPoint *SP = dyn_cast(&L)) + if (SP->getStmt() == CalleeSF->getCallSite()) + continue; + break; + } + + if (!BeforeProcessingCall) + return false; + + // TODO: Clean up the unneeded nodes. + + // Build an Epsilon node from which we will restart the analyzes. + const Stmt *CE = CalleeSF->getCallSite(); + ProgramPoint NewNodeLoc = + EpsilonPoint(BeforeProcessingCall->getLocationContext(), CE); + // Add the special flag to GDM to signal retrying with no inlining. + // Note, changing the state ensures that we are not going to cache out. + ProgramStateRef NewNodeState = BeforeProcessingCall->getState(); + NewNodeState = NewNodeState->set((void*)CE); + + // Make the new node a successor of BeforeProcessingCall. + bool IsNew = false; + ExplodedNode *NewNode = G.getNode(NewNodeLoc, NewNodeState, false, &IsNew); + // We cached out at this point. Caching out is common due to us backtracking + // from the inlined function, which might spawn several paths. + if (!IsNew) + return true; + + NewNode->addPredecessor(BeforeProcessingCall, G); + + // Add the new node to the work list. + Engine.enqueueStmtNode(NewNode, CalleeSF->getCallSiteBlock(), + CalleeSF->getIndex()); + NumTimesRetriedWithoutInlining++; + return true; +} + +/// Block entrance. (Update counters). +void ExprEngine::processCFGBlockEntrance(const BlockEdge &L, + NodeBuilderWithSinks &nodeBuilder) { + + // FIXME: Refactor this into a checker. + ExplodedNode *pred = nodeBuilder.getContext().getPred(); + + if (nodeBuilder.getContext().getCurrentBlockCount() >= AMgr.getMaxVisit()) { + static SimpleProgramPointTag tag("ExprEngine : Block count exceeded"); + const ExplodedNode *Sink = + nodeBuilder.generateNode(pred->getState(), pred, &tag, true); + + // Check if we stopped at the top level function or not. + // Root node should have the location context of the top most function. + const LocationContext *CalleeLC = pred->getLocation().getLocationContext(); + const LocationContext *CalleeSF = CalleeLC->getCurrentStackFrame(); + const LocationContext *RootLC = + (*G.roots_begin())->getLocation().getLocationContext(); + if (RootLC->getCurrentStackFrame() != CalleeSF) { + Engine.FunctionSummaries->markReachedMaxBlockCount(CalleeSF->getDecl()); + + // Re-run the call evaluation without inlining it, by storing the + // no-inlining policy in the state and enqueuing the new work item on + // the list. Replay should almost never fail. Use the stats to catch it + // if it does. + if ((!AMgr.NoRetryExhausted && replayWithoutInlining(pred, CalleeLC))) + return; + NumMaxBlockCountReachedInInlined++; + } else + NumMaxBlockCountReached++; + + // Make sink nodes as exhausted(for stats) only if retry failed. + Engine.blocksExhausted.push_back(std::make_pair(L, Sink)); + } +} + +//===----------------------------------------------------------------------===// +// Branch processing. +//===----------------------------------------------------------------------===// + +ProgramStateRef ExprEngine::MarkBranch(ProgramStateRef state, + const Stmt *Terminator, + const LocationContext *LCtx, + bool branchTaken) { + + switch (Terminator->getStmtClass()) { + default: + return state; + + case Stmt::BinaryOperatorClass: { // '&&' and '||' + + const BinaryOperator* B = cast(Terminator); + BinaryOperator::Opcode Op = B->getOpcode(); + + assert (Op == BO_LAnd || Op == BO_LOr); + + // For &&, if we take the true branch, then the value of the whole + // expression is that of the RHS expression. + // + // For ||, if we take the false branch, then the value of the whole + // expression is that of the RHS expression. + + const Expr *Ex = (Op == BO_LAnd && branchTaken) || + (Op == BO_LOr && !branchTaken) + ? B->getRHS() : B->getLHS(); + + return state->BindExpr(B, LCtx, UndefinedVal(Ex)); + } + + case Stmt::BinaryConditionalOperatorClass: + case Stmt::ConditionalOperatorClass: { // ?: + const AbstractConditionalOperator* C + = cast(Terminator); + + // For ?, if branchTaken == true then the value is either the LHS or + // the condition itself. (GNU extension). + + const Expr *Ex; + + if (branchTaken) + Ex = C->getTrueExpr(); + else + Ex = C->getFalseExpr(); + + return state->BindExpr(C, LCtx, UndefinedVal(Ex)); + } + + case Stmt::ChooseExprClass: { // ?: + + const ChooseExpr *C = cast(Terminator); + + const Expr *Ex = branchTaken ? C->getLHS() : C->getRHS(); + return state->BindExpr(C, LCtx, UndefinedVal(Ex)); + } + } +} + +/// RecoverCastedSymbol - A helper function for ProcessBranch that is used +/// to try to recover some path-sensitivity for casts of symbolic +/// integers that promote their values (which are currently not tracked well). +/// This function returns the SVal bound to Condition->IgnoreCasts if all the +// cast(s) did was sign-extend the original value. +static SVal RecoverCastedSymbol(ProgramStateManager& StateMgr, + ProgramStateRef state, + const Stmt *Condition, + const LocationContext *LCtx, + ASTContext &Ctx) { + + const Expr *Ex = dyn_cast(Condition); + if (!Ex) + return UnknownVal(); + + uint64_t bits = 0; + bool bitsInit = false; + + while (const CastExpr *CE = dyn_cast(Ex)) { + QualType T = CE->getType(); + + if (!T->isIntegerType()) + return UnknownVal(); + + uint64_t newBits = Ctx.getTypeSize(T); + if (!bitsInit || newBits < bits) { + bitsInit = true; + bits = newBits; + } + + Ex = CE->getSubExpr(); + } + + // We reached a non-cast. Is it a symbolic value? + QualType T = Ex->getType(); + + if (!bitsInit || !T->isIntegerType() || Ctx.getTypeSize(T) > bits) + return UnknownVal(); + + return state->getSVal(Ex, LCtx); +} + +void ExprEngine::processBranch(const Stmt *Condition, const Stmt *Term, + NodeBuilderContext& BldCtx, + ExplodedNode *Pred, + ExplodedNodeSet &Dst, + const CFGBlock *DstT, + const CFGBlock *DstF) { + currentBuilderContext = &BldCtx; + + // Check for NULL conditions; e.g. "for(;;)" + if (!Condition) { + BranchNodeBuilder NullCondBldr(Pred, Dst, BldCtx, DstT, DstF); + NullCondBldr.markInfeasible(false); + NullCondBldr.generateNode(Pred->getState(), true, Pred); + return; + } + + PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(), + Condition->getLocStart(), + "Error evaluating branch"); + + ExplodedNodeSet CheckersOutSet; + getCheckerManager().runCheckersForBranchCondition(Condition, CheckersOutSet, + Pred, *this); + // We generated only sinks. + if (CheckersOutSet.empty()) + return; + + BranchNodeBuilder builder(CheckersOutSet, Dst, BldCtx, DstT, DstF); + for (NodeBuilder::iterator I = CheckersOutSet.begin(), + E = CheckersOutSet.end(); E != I; ++I) { + ExplodedNode *PredI = *I; + + if (PredI->isSink()) + continue; + + ProgramStateRef PrevState = Pred->getState(); + SVal X = PrevState->getSVal(Condition, Pred->getLocationContext()); + + if (X.isUnknownOrUndef()) { + // Give it a chance to recover from unknown. + if (const Expr *Ex = dyn_cast(Condition)) { + if (Ex->getType()->isIntegerType()) { + // Try to recover some path-sensitivity. Right now casts of symbolic + // integers that promote their values are currently not tracked well. + // If 'Condition' is such an expression, try and recover the + // underlying value and use that instead. + SVal recovered = RecoverCastedSymbol(getStateManager(), + PrevState, Condition, + Pred->getLocationContext(), + getContext()); + + if (!recovered.isUnknown()) { + X = recovered; + } + } + } + } + + const LocationContext *LCtx = PredI->getLocationContext(); + + // If the condition is still unknown, give up. + if (X.isUnknownOrUndef()) { + builder.generateNode(MarkBranch(PrevState, Term, LCtx, true), + true, PredI); + builder.generateNode(MarkBranch(PrevState, Term, LCtx, false), + false, PredI); + continue; + } + + DefinedSVal V = cast(X); + + // Process the true branch. + if (builder.isFeasible(true)) { + if (ProgramStateRef state = PrevState->assume(V, true)) + builder.generateNode(MarkBranch(state, Term, LCtx, true), + true, PredI); + else + builder.markInfeasible(true); + } + + // Process the false branch. + if (builder.isFeasible(false)) { + if (ProgramStateRef state = PrevState->assume(V, false)) + builder.generateNode(MarkBranch(state, Term, LCtx, false), + false, PredI); + else + builder.markInfeasible(false); + } + } + currentBuilderContext = 0; +} + +/// processIndirectGoto - Called by CoreEngine. Used to generate successor +/// nodes by processing the 'effects' of a computed goto jump. +void ExprEngine::processIndirectGoto(IndirectGotoNodeBuilder &builder) { + + ProgramStateRef state = builder.getState(); + SVal V = state->getSVal(builder.getTarget(), builder.getLocationContext()); + + // Three possibilities: + // + // (1) We know the computed label. + // (2) The label is NULL (or some other constant), or Undefined. + // (3) We have no clue about the label. Dispatch to all targets. + // + + typedef IndirectGotoNodeBuilder::iterator iterator; + + if (isa(V)) { + const LabelDecl *L = cast(V).getLabel(); + + for (iterator I = builder.begin(), E = builder.end(); I != E; ++I) { + if (I.getLabel() == L) { + builder.generateNode(I, state); + return; + } + } + + llvm_unreachable("No block with label."); + } + + if (isa(V) || isa(V)) { + // Dispatch to the first target and mark it as a sink. + //ExplodedNode* N = builder.generateNode(builder.begin(), state, true); + // FIXME: add checker visit. + // UndefBranches.insert(N); + return; + } + + // This is really a catch-all. We don't support symbolics yet. + // FIXME: Implement dispatch for symbolic pointers. + + for (iterator I=builder.begin(), E=builder.end(); I != E; ++I) + builder.generateNode(I, state); +} + +/// ProcessEndPath - Called by CoreEngine. Used to generate end-of-path +/// nodes when the control reaches the end of a function. +void ExprEngine::processEndOfFunction(NodeBuilderContext& BC) { + StateMgr.EndPath(BC.Pred->getState()); + ExplodedNodeSet Dst; + getCheckerManager().runCheckersForEndPath(BC, Dst, *this); + Engine.enqueueEndOfFunction(Dst); +} + +/// ProcessSwitch - Called by CoreEngine. Used to generate successor +/// nodes by processing the 'effects' of a switch statement. +void ExprEngine::processSwitch(SwitchNodeBuilder& builder) { + typedef SwitchNodeBuilder::iterator iterator; + ProgramStateRef state = builder.getState(); + const Expr *CondE = builder.getCondition(); + SVal CondV_untested = state->getSVal(CondE, builder.getLocationContext()); + + if (CondV_untested.isUndef()) { + //ExplodedNode* N = builder.generateDefaultCaseNode(state, true); + // FIXME: add checker + //UndefBranches.insert(N); + + return; + } + DefinedOrUnknownSVal CondV = cast(CondV_untested); + + ProgramStateRef DefaultSt = state; + + iterator I = builder.begin(), EI = builder.end(); + bool defaultIsFeasible = I == EI; + + for ( ; I != EI; ++I) { + // Successor may be pruned out during CFG construction. + if (!I.getBlock()) + continue; + + const CaseStmt *Case = I.getCase(); + + // Evaluate the LHS of the case value. + llvm::APSInt V1 = Case->getLHS()->EvaluateKnownConstInt(getContext()); + assert(V1.getBitWidth() == getContext().getTypeSize(CondE->getType())); + + // Get the RHS of the case, if it exists. + llvm::APSInt V2; + if (const Expr *E = Case->getRHS()) + V2 = E->EvaluateKnownConstInt(getContext()); + else + V2 = V1; + + // FIXME: Eventually we should replace the logic below with a range + // comparison, rather than concretize the values within the range. + // This should be easy once we have "ranges" for NonLVals. + + do { + nonloc::ConcreteInt CaseVal(getBasicVals().getValue(V1)); + DefinedOrUnknownSVal Res = svalBuilder.evalEQ(DefaultSt ? DefaultSt : state, + CondV, CaseVal); + + // Now "assume" that the case matches. + if (ProgramStateRef stateNew = state->assume(Res, true)) { + builder.generateCaseStmtNode(I, stateNew); + + // If CondV evaluates to a constant, then we know that this + // is the *only* case that we can take, so stop evaluating the + // others. + if (isa(CondV)) + return; + } + + // Now "assume" that the case doesn't match. Add this state + // to the default state (if it is feasible). + if (DefaultSt) { + if (ProgramStateRef stateNew = DefaultSt->assume(Res, false)) { + defaultIsFeasible = true; + DefaultSt = stateNew; + } + else { + defaultIsFeasible = false; + DefaultSt = NULL; + } + } + + // Concretize the next value in the range. + if (V1 == V2) + break; + + ++V1; + assert (V1 <= V2); + + } while (true); + } + + if (!defaultIsFeasible) + return; + + // If we have switch(enum value), the default branch is not + // feasible if all of the enum constants not covered by 'case:' statements + // are not feasible values for the switch condition. + // + // Note that this isn't as accurate as it could be. Even if there isn't + // a case for a particular enum value as long as that enum value isn't + // feasible then it shouldn't be considered for making 'default:' reachable. + const SwitchStmt *SS = builder.getSwitch(); + const Expr *CondExpr = SS->getCond()->IgnoreParenImpCasts(); + if (CondExpr->getType()->getAs()) { + if (SS->isAllEnumCasesCovered()) + return; + } + + builder.generateDefaultCaseNode(DefaultSt); +} + +//===----------------------------------------------------------------------===// +// Transfer functions: Loads and stores. +//===----------------------------------------------------------------------===// + +void ExprEngine::VisitCommonDeclRefExpr(const Expr *Ex, const NamedDecl *D, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + StmtNodeBuilder Bldr(Pred, Dst, *currentBuilderContext); + + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + + if (const VarDecl *VD = dyn_cast(D)) { + assert(Ex->isLValue()); + SVal V = state->getLValue(VD, Pred->getLocationContext()); + + // For references, the 'lvalue' is the pointer address stored in the + // reference region. + if (VD->getType()->isReferenceType()) { + if (const MemRegion *R = V.getAsRegion()) + V = state->getSVal(R); + else + V = UnknownVal(); + } + + Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V), false, 0, + ProgramPoint::PostLValueKind); + return; + } + if (const EnumConstantDecl *ED = dyn_cast(D)) { + assert(!Ex->isLValue()); + SVal V = svalBuilder.makeIntVal(ED->getInitVal()); + Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V)); + return; + } + if (const FunctionDecl *FD = dyn_cast(D)) { + SVal V = svalBuilder.getFunctionPointer(FD); + Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V), false, 0, + ProgramPoint::PostLValueKind); + return; + } + if (isa(D)) { + // FIXME: Compute lvalue of fields. + Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, UnknownVal()), + false, 0, ProgramPoint::PostLValueKind); + return; + } + + assert (false && + "ValueDecl support for this ValueDecl not implemented."); +} + +/// VisitArraySubscriptExpr - Transfer function for array accesses +void ExprEngine::VisitLvalArraySubscriptExpr(const ArraySubscriptExpr *A, + ExplodedNode *Pred, + ExplodedNodeSet &Dst){ + + const Expr *Base = A->getBase()->IgnoreParens(); + const Expr *Idx = A->getIdx()->IgnoreParens(); + + + ExplodedNodeSet checkerPreStmt; + getCheckerManager().runCheckersForPreStmt(checkerPreStmt, Pred, A, *this); + + StmtNodeBuilder Bldr(checkerPreStmt, Dst, *currentBuilderContext); + + for (ExplodedNodeSet::iterator it = checkerPreStmt.begin(), + ei = checkerPreStmt.end(); it != ei; ++it) { + const LocationContext *LCtx = (*it)->getLocationContext(); + ProgramStateRef state = (*it)->getState(); + SVal V = state->getLValue(A->getType(), + state->getSVal(Idx, LCtx), + state->getSVal(Base, LCtx)); + assert(A->isLValue()); + Bldr.generateNode(A, *it, state->BindExpr(A, LCtx, V), + false, 0, ProgramPoint::PostLValueKind); + } +} + +/// VisitMemberExpr - Transfer function for member expressions. +void ExprEngine::VisitMemberExpr(const MemberExpr *M, ExplodedNode *Pred, + ExplodedNodeSet &TopDst) { + + StmtNodeBuilder Bldr(Pred, TopDst, *currentBuilderContext); + ExplodedNodeSet Dst; + Decl *member = M->getMemberDecl(); + if (VarDecl *VD = dyn_cast(member)) { + assert(M->isLValue()); + Bldr.takeNodes(Pred); + VisitCommonDeclRefExpr(M, VD, Pred, Dst); + Bldr.addNodes(Dst); + return; + } + + FieldDecl *field = dyn_cast(member); + if (!field) // FIXME: skipping member expressions for non-fields + return; + + Expr *baseExpr = M->getBase()->IgnoreParens(); + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + SVal baseExprVal = state->getSVal(baseExpr, Pred->getLocationContext()); + if (isa(baseExprVal) || + isa(baseExprVal) || + // FIXME: This can originate by conjuring a symbol for an unknown + // temporary struct object, see test/Analysis/fields.c: + // (p = getit()).x + isa(baseExprVal)) { + Bldr.generateNode(M, Pred, state->BindExpr(M, LCtx, UnknownVal())); + return; + } + + // FIXME: Should we insert some assumption logic in here to determine + // if "Base" is a valid piece of memory? Before we put this assumption + // later when using FieldOffset lvals (which we no longer have). + + // For all other cases, compute an lvalue. + SVal L = state->getLValue(field, baseExprVal); + if (M->isLValue()) + Bldr.generateNode(M, Pred, state->BindExpr(M, LCtx, L), false, 0, + ProgramPoint::PostLValueKind); + else { + Bldr.takeNodes(Pred); + evalLoad(Dst, M, M, Pred, state, L); + Bldr.addNodes(Dst); + } +} + +/// evalBind - Handle the semantics of binding a value to a specific location. +/// This method is used by evalStore and (soon) VisitDeclStmt, and others. +void ExprEngine::evalBind(ExplodedNodeSet &Dst, const Stmt *StoreE, + ExplodedNode *Pred, + SVal location, SVal Val, bool atDeclInit) { + + // Do a previsit of the bind. + ExplodedNodeSet CheckedSet; + getCheckerManager().runCheckersForBind(CheckedSet, Pred, location, Val, + StoreE, *this, + ProgramPoint::PostStmtKind); + + ExplodedNodeSet TmpDst; + StmtNodeBuilder Bldr(CheckedSet, TmpDst, *currentBuilderContext); + + const LocationContext *LC = Pred->getLocationContext(); + for (ExplodedNodeSet::iterator I = CheckedSet.begin(), E = CheckedSet.end(); + I!=E; ++I) { + ExplodedNode *PredI = *I; + ProgramStateRef state = PredI->getState(); + + if (atDeclInit) { + const VarRegion *VR = + cast(cast(location).getRegion()); + + state = state->bindDecl(VR, Val); + } else { + state = state->bindLoc(location, Val); + } + + const MemRegion *LocReg = 0; + if (loc::MemRegionVal *LocRegVal = dyn_cast(&location)) + LocReg = LocRegVal->getRegion(); + + const ProgramPoint L = PostStore(StoreE, LC, LocReg, 0); + Bldr.generateNode(L, PredI, state, false); + } + + Dst.insert(TmpDst); +} + +/// evalStore - Handle the semantics of a store via an assignment. +/// @param Dst The node set to store generated state nodes +/// @param AssignE The assignment expression if the store happens in an +/// assignment. +/// @param LocatioinE The location expression that is stored to. +/// @param state The current simulation state +/// @param location The location to store the value +/// @param Val The value to be stored +void ExprEngine::evalStore(ExplodedNodeSet &Dst, const Expr *AssignE, + const Expr *LocationE, + ExplodedNode *Pred, + ProgramStateRef state, SVal location, SVal Val, + const ProgramPointTag *tag) { + // Proceed with the store. We use AssignE as the anchor for the PostStore + // ProgramPoint if it is non-NULL, and LocationE otherwise. + const Expr *StoreE = AssignE ? AssignE : LocationE; + + if (isa(location)) { + assert(false); + } + + // Evaluate the location (checks for bad dereferences). + ExplodedNodeSet Tmp; + evalLocation(Tmp, AssignE, LocationE, Pred, state, location, tag, false); + + if (Tmp.empty()) + return; + + if (location.isUndef()) + return; + + for (ExplodedNodeSet::iterator NI=Tmp.begin(), NE=Tmp.end(); NI!=NE; ++NI) + evalBind(Dst, StoreE, *NI, location, Val, false); +} + +void ExprEngine::evalLoad(ExplodedNodeSet &Dst, + const Expr *NodeEx, + const Expr *BoundEx, + ExplodedNode *Pred, + ProgramStateRef state, + SVal location, + const ProgramPointTag *tag, + QualType LoadTy) +{ + assert(!isa(location) && "location cannot be a NonLoc."); + assert(!isa(location)); + + // Are we loading from a region? This actually results in two loads; one + // to fetch the address of the referenced value and one to fetch the + // referenced value. + if (const TypedValueRegion *TR = + dyn_cast_or_null(location.getAsRegion())) { + + QualType ValTy = TR->getValueType(); + if (const ReferenceType *RT = ValTy->getAs()) { + static SimpleProgramPointTag + loadReferenceTag("ExprEngine : Load Reference"); + ExplodedNodeSet Tmp; + evalLoadCommon(Tmp, NodeEx, BoundEx, Pred, state, + location, &loadReferenceTag, + getContext().getPointerType(RT->getPointeeType())); + + // Perform the load from the referenced value. + for (ExplodedNodeSet::iterator I=Tmp.begin(), E=Tmp.end() ; I!=E; ++I) { + state = (*I)->getState(); + location = state->getSVal(BoundEx, (*I)->getLocationContext()); + evalLoadCommon(Dst, NodeEx, BoundEx, *I, state, location, tag, LoadTy); + } + return; + } + } + + evalLoadCommon(Dst, NodeEx, BoundEx, Pred, state, location, tag, LoadTy); +} + +void ExprEngine::evalLoadCommon(ExplodedNodeSet &Dst, + const Expr *NodeEx, + const Expr *BoundEx, + ExplodedNode *Pred, + ProgramStateRef state, + SVal location, + const ProgramPointTag *tag, + QualType LoadTy) { + assert(NodeEx); + assert(BoundEx); + // Evaluate the location (checks for bad dereferences). + ExplodedNodeSet Tmp; + evalLocation(Tmp, NodeEx, BoundEx, Pred, state, location, tag, true); + if (Tmp.empty()) + return; + + StmtNodeBuilder Bldr(Tmp, Dst, *currentBuilderContext); + if (location.isUndef()) + return; + + // Proceed with the load. + for (ExplodedNodeSet::iterator NI=Tmp.begin(), NE=Tmp.end(); NI!=NE; ++NI) { + state = (*NI)->getState(); + const LocationContext *LCtx = (*NI)->getLocationContext(); + + if (location.isUnknown()) { + // This is important. We must nuke the old binding. + Bldr.generateNode(NodeEx, *NI, + state->BindExpr(BoundEx, LCtx, UnknownVal()), + false, tag, + ProgramPoint::PostLoadKind); + } + else { + if (LoadTy.isNull()) + LoadTy = BoundEx->getType(); + SVal V = state->getSVal(cast(location), LoadTy); + Bldr.generateNode(NodeEx, *NI, + state->bindExprAndLocation(BoundEx, LCtx, location, V), + false, tag, ProgramPoint::PostLoadKind); + } + } +} + +void ExprEngine::evalLocation(ExplodedNodeSet &Dst, + const Stmt *NodeEx, + const Stmt *BoundEx, + ExplodedNode *Pred, + ProgramStateRef state, + SVal location, + const ProgramPointTag *tag, + bool isLoad) { + StmtNodeBuilder BldrTop(Pred, Dst, *currentBuilderContext); + // Early checks for performance reason. + if (location.isUnknown()) { + return; + } + + ExplodedNodeSet Src; + BldrTop.takeNodes(Pred); + StmtNodeBuilder Bldr(Pred, Src, *currentBuilderContext); + if (Pred->getState() != state) { + // Associate this new state with an ExplodedNode. + // FIXME: If I pass null tag, the graph is incorrect, e.g for + // int *p; + // p = 0; + // *p = 0xDEADBEEF; + // "p = 0" is not noted as "Null pointer value stored to 'p'" but + // instead "int *p" is noted as + // "Variable 'p' initialized to a null pointer value" + + // FIXME: why is 'tag' not used instead of etag? + static SimpleProgramPointTag etag("ExprEngine: Location"); + Bldr.generateNode(NodeEx, Pred, state, false, &etag); + } + ExplodedNodeSet Tmp; + getCheckerManager().runCheckersForLocation(Tmp, Src, location, isLoad, + NodeEx, BoundEx, *this); + BldrTop.addNodes(Tmp); +} + +std::pair +ExprEngine::getEagerlyAssumeTags() { + static SimpleProgramPointTag + EagerlyAssumeTrue("ExprEngine : Eagerly Assume True"), + EagerlyAssumeFalse("ExprEngine : Eagerly Assume False"); + return std::make_pair(&EagerlyAssumeTrue, &EagerlyAssumeFalse); +} + +void ExprEngine::evalEagerlyAssume(ExplodedNodeSet &Dst, ExplodedNodeSet &Src, + const Expr *Ex) { + StmtNodeBuilder Bldr(Src, Dst, *currentBuilderContext); + + for (ExplodedNodeSet::iterator I=Src.begin(), E=Src.end(); I!=E; ++I) { + ExplodedNode *Pred = *I; + // Test if the previous node was as the same expression. This can happen + // when the expression fails to evaluate to anything meaningful and + // (as an optimization) we don't generate a node. + ProgramPoint P = Pred->getLocation(); + if (!isa(P) || cast(P).getStmt() != Ex) { + continue; + } + + ProgramStateRef state = Pred->getState(); + SVal V = state->getSVal(Ex, Pred->getLocationContext()); + nonloc::SymbolVal *SEV = dyn_cast(&V); + if (SEV && SEV->isExpression()) { + const std::pair &tags = + getEagerlyAssumeTags(); + + // First assume that the condition is true. + if (ProgramStateRef StateTrue = state->assume(*SEV, true)) { + SVal Val = svalBuilder.makeIntVal(1U, Ex->getType()); + StateTrue = StateTrue->BindExpr(Ex, Pred->getLocationContext(), Val); + Bldr.generateNode(Ex, Pred, StateTrue, false, tags.first); + } + + // Next, assume that the condition is false. + if (ProgramStateRef StateFalse = state->assume(*SEV, false)) { + SVal Val = svalBuilder.makeIntVal(0U, Ex->getType()); + StateFalse = StateFalse->BindExpr(Ex, Pred->getLocationContext(), Val); + Bldr.generateNode(Ex, Pred, StateFalse, false, tags.second); + } + } + } +} + +void ExprEngine::VisitAsmStmt(const AsmStmt *A, ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + StmtNodeBuilder Bldr(Pred, Dst, *currentBuilderContext); + // We have processed both the inputs and the outputs. All of the outputs + // should evaluate to Locs. Nuke all of their values. + + // FIXME: Some day in the future it would be nice to allow a "plug-in" + // which interprets the inline asm and stores proper results in the + // outputs. + + ProgramStateRef state = Pred->getState(); + + for (AsmStmt::const_outputs_iterator OI = A->begin_outputs(), + OE = A->end_outputs(); OI != OE; ++OI) { + SVal X = state->getSVal(*OI, Pred->getLocationContext()); + assert (!isa(X)); // Should be an Lval, or unknown, undef. + + if (isa(X)) + state = state->bindLoc(cast(X), UnknownVal()); + } + + Bldr.generateNode(A, Pred, state); +} + +//===----------------------------------------------------------------------===// +// Visualization. +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +static ExprEngine* GraphPrintCheckerState; +static SourceManager* GraphPrintSourceManager; + +namespace llvm { +template<> +struct DOTGraphTraits : + public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + // FIXME: Since we do not cache error nodes in ExprEngine now, this does not + // work. + static std::string getNodeAttributes(const ExplodedNode *N, void*) { + +#if 0 + // FIXME: Replace with a general scheme to tell if the node is + // an error node. + if (GraphPrintCheckerState->isImplicitNullDeref(N) || + GraphPrintCheckerState->isExplicitNullDeref(N) || + GraphPrintCheckerState->isUndefDeref(N) || + GraphPrintCheckerState->isUndefStore(N) || + GraphPrintCheckerState->isUndefControlFlow(N) || + GraphPrintCheckerState->isUndefResult(N) || + GraphPrintCheckerState->isBadCall(N) || + GraphPrintCheckerState->isUndefArg(N)) + return "color=\"red\",style=\"filled\""; + + if (GraphPrintCheckerState->isNoReturnCall(N)) + return "color=\"blue\",style=\"filled\""; +#endif + return ""; + } + + static std::string getNodeLabel(const ExplodedNode *N, void*){ + + std::string sbuf; + llvm::raw_string_ostream Out(sbuf); + + // Program Location. + ProgramPoint Loc = N->getLocation(); + + switch (Loc.getKind()) { + case ProgramPoint::BlockEntranceKind: + Out << "Block Entrance: B" + << cast(Loc).getBlock()->getBlockID(); + break; + + case ProgramPoint::BlockExitKind: + assert (false); + break; + + case ProgramPoint::CallEnterKind: + Out << "CallEnter"; + break; + + case ProgramPoint::CallExitKind: + Out << "CallExit"; + break; + + case ProgramPoint::EpsilonKind: + Out << "Epsilon Point"; + break; + + default: { + if (StmtPoint *L = dyn_cast(&Loc)) { + const Stmt *S = L->getStmt(); + SourceLocation SLoc = S->getLocStart(); + + Out << S->getStmtClassName() << ' ' << (void*) S << ' '; + LangOptions LO; // FIXME. + S->printPretty(Out, 0, PrintingPolicy(LO)); + + if (SLoc.isFileID()) { + Out << "\\lline=" + << GraphPrintSourceManager->getExpansionLineNumber(SLoc) + << " col=" + << GraphPrintSourceManager->getExpansionColumnNumber(SLoc) + << "\\l"; + } + + if (isa(Loc)) + Out << "\\lPreStmt\\l;"; + else if (isa(Loc)) + Out << "\\lPostLoad\\l;"; + else if (isa(Loc)) + Out << "\\lPostStore\\l"; + else if (isa(Loc)) + Out << "\\lPostLValue\\l"; + +#if 0 + // FIXME: Replace with a general scheme to determine + // the name of the check. + if (GraphPrintCheckerState->isImplicitNullDeref(N)) + Out << "\\|Implicit-Null Dereference.\\l"; + else if (GraphPrintCheckerState->isExplicitNullDeref(N)) + Out << "\\|Explicit-Null Dereference.\\l"; + else if (GraphPrintCheckerState->isUndefDeref(N)) + Out << "\\|Dereference of undefialied value.\\l"; + else if (GraphPrintCheckerState->isUndefStore(N)) + Out << "\\|Store to Undefined Loc."; + else if (GraphPrintCheckerState->isUndefResult(N)) + Out << "\\|Result of operation is undefined."; + else if (GraphPrintCheckerState->isNoReturnCall(N)) + Out << "\\|Call to function marked \"noreturn\"."; + else if (GraphPrintCheckerState->isBadCall(N)) + Out << "\\|Call to NULL/Undefined."; + else if (GraphPrintCheckerState->isUndefArg(N)) + Out << "\\|Argument in call is undefined"; +#endif + + break; + } + + const BlockEdge &E = cast(Loc); + Out << "Edge: (B" << E.getSrc()->getBlockID() << ", B" + << E.getDst()->getBlockID() << ')'; + + if (const Stmt *T = E.getSrc()->getTerminator()) { + + SourceLocation SLoc = T->getLocStart(); + + Out << "\\|Terminator: "; + LangOptions LO; // FIXME. + E.getSrc()->printTerminator(Out, LO); + + if (SLoc.isFileID()) { + Out << "\\lline=" + << GraphPrintSourceManager->getExpansionLineNumber(SLoc) + << " col=" + << GraphPrintSourceManager->getExpansionColumnNumber(SLoc); + } + + if (isa(T)) { + const Stmt *Label = E.getDst()->getLabel(); + + if (Label) { + if (const CaseStmt *C = dyn_cast(Label)) { + Out << "\\lcase "; + LangOptions LO; // FIXME. + C->getLHS()->printPretty(Out, 0, PrintingPolicy(LO)); + + if (const Stmt *RHS = C->getRHS()) { + Out << " .. "; + RHS->printPretty(Out, 0, PrintingPolicy(LO)); + } + + Out << ":"; + } + else { + assert (isa(Label)); + Out << "\\ldefault:"; + } + } + else + Out << "\\l(implicit) default:"; + } + else if (isa(T)) { + // FIXME + } + else { + Out << "\\lCondition: "; + if (*E.getSrc()->succ_begin() == E.getDst()) + Out << "true"; + else + Out << "false"; + } + + Out << "\\l"; + } + +#if 0 + // FIXME: Replace with a general scheme to determine + // the name of the check. + if (GraphPrintCheckerState->isUndefControlFlow(N)) { + Out << "\\|Control-flow based on\\lUndefined value.\\l"; + } +#endif + } + } + + ProgramStateRef state = N->getState(); + Out << "\\|StateID: " << (void*) state.getPtr() + << " NodeID: " << (void*) N << "\\|"; + state->printDOT(Out); + + Out << "\\l"; + + if (const ProgramPointTag *tag = Loc.getTag()) { + Out << "\\|Tag: " << tag->getTagDescription(); + Out << "\\l"; + } + return Out.str(); + } +}; +} // end llvm namespace +#endif + +#ifndef NDEBUG +template +ExplodedNode *GetGraphNode(ITERATOR I) { return *I; } + +template <> ExplodedNode* +GetGraphNode::iterator> + (llvm::DenseMap::iterator I) { + return I->first; +} +#endif + +void ExprEngine::ViewGraph(bool trim) { +#ifndef NDEBUG + if (trim) { + std::vector Src; + + // Flush any outstanding reports to make sure we cover all the nodes. + // This does not cause them to get displayed. + for (BugReporter::iterator I=BR.begin(), E=BR.end(); I!=E; ++I) + const_cast(*I)->FlushReports(BR); + + // Iterate through the reports and get their nodes. + for (BugReporter::EQClasses_iterator + EI = BR.EQClasses_begin(), EE = BR.EQClasses_end(); EI != EE; ++EI) { + ExplodedNode *N = const_cast(EI->begin()->getErrorNode()); + if (N) Src.push_back(N); + } + + ViewGraph(&Src[0], &Src[0]+Src.size()); + } + else { + GraphPrintCheckerState = this; + GraphPrintSourceManager = &getContext().getSourceManager(); + + llvm::ViewGraph(*G.roots_begin(), "ExprEngine"); + + GraphPrintCheckerState = NULL; + GraphPrintSourceManager = NULL; + } +#endif +} + +void ExprEngine::ViewGraph(ExplodedNode** Beg, ExplodedNode** End) { +#ifndef NDEBUG + GraphPrintCheckerState = this; + GraphPrintSourceManager = &getContext().getSourceManager(); + + std::auto_ptr TrimmedG(G.Trim(Beg, End).first); + + if (!TrimmedG.get()) + llvm::errs() << "warning: Trimmed ExplodedGraph is empty.\n"; + else + llvm::ViewGraph(*TrimmedG->roots_begin(), "TrimmedExprEngine"); + + GraphPrintCheckerState = NULL; + GraphPrintSourceManager = NULL; +#endif +} diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp new file mode 100644 index 0000000..93e598a --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -0,0 +1,811 @@ +//=-- ExprEngineC.cpp - ExprEngine support for C expressions ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines ExprEngine's support for C expressions. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" + +using namespace clang; +using namespace ento; +using llvm::APSInt; + +void ExprEngine::VisitBinaryOperator(const BinaryOperator* B, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + + Expr *LHS = B->getLHS()->IgnoreParens(); + Expr *RHS = B->getRHS()->IgnoreParens(); + + // FIXME: Prechecks eventually go in ::Visit(). + ExplodedNodeSet CheckedSet; + ExplodedNodeSet Tmp2; + getCheckerManager().runCheckersForPreStmt(CheckedSet, Pred, B, *this); + + // With both the LHS and RHS evaluated, process the operation itself. + for (ExplodedNodeSet::iterator it=CheckedSet.begin(), ei=CheckedSet.end(); + it != ei; ++it) { + + ProgramStateRef state = (*it)->getState(); + const LocationContext *LCtx = (*it)->getLocationContext(); + SVal LeftV = state->getSVal(LHS, LCtx); + SVal RightV = state->getSVal(RHS, LCtx); + + BinaryOperator::Opcode Op = B->getOpcode(); + + if (Op == BO_Assign) { + // EXPERIMENTAL: "Conjured" symbols. + // FIXME: Handle structs. + if (RightV.isUnknown()) { + unsigned Count = currentBuilderContext->getCurrentBlockCount(); + RightV = svalBuilder.getConjuredSymbolVal(NULL, B->getRHS(), LCtx, Count); + } + // Simulate the effects of a "store": bind the value of the RHS + // to the L-Value represented by the LHS. + SVal ExprVal = B->isLValue() ? LeftV : RightV; + evalStore(Tmp2, B, LHS, *it, state->BindExpr(B, LCtx, ExprVal), + LeftV, RightV); + continue; + } + + if (!B->isAssignmentOp()) { + StmtNodeBuilder Bldr(*it, Tmp2, *currentBuilderContext); + // Process non-assignments except commas or short-circuited + // logical expressions (LAnd and LOr). + SVal Result = evalBinOp(state, Op, LeftV, RightV, B->getType()); + if (Result.isUnknown()) { + Bldr.generateNode(B, *it, state); + continue; + } + + state = state->BindExpr(B, LCtx, Result); + Bldr.generateNode(B, *it, state); + continue; + } + + assert (B->isCompoundAssignmentOp()); + + switch (Op) { + default: + llvm_unreachable("Invalid opcode for compound assignment."); + case BO_MulAssign: Op = BO_Mul; break; + case BO_DivAssign: Op = BO_Div; break; + case BO_RemAssign: Op = BO_Rem; break; + case BO_AddAssign: Op = BO_Add; break; + case BO_SubAssign: Op = BO_Sub; break; + case BO_ShlAssign: Op = BO_Shl; break; + case BO_ShrAssign: Op = BO_Shr; break; + case BO_AndAssign: Op = BO_And; break; + case BO_XorAssign: Op = BO_Xor; break; + case BO_OrAssign: Op = BO_Or; break; + } + + // Perform a load (the LHS). This performs the checks for + // null dereferences, and so on. + ExplodedNodeSet Tmp; + SVal location = LeftV; + evalLoad(Tmp, B, LHS, *it, state, location); + + for (ExplodedNodeSet::iterator I = Tmp.begin(), E = Tmp.end(); I != E; + ++I) { + + state = (*I)->getState(); + const LocationContext *LCtx = (*I)->getLocationContext(); + SVal V = state->getSVal(LHS, LCtx); + + // Get the computation type. + QualType CTy = + cast(B)->getComputationResultType(); + CTy = getContext().getCanonicalType(CTy); + + QualType CLHSTy = + cast(B)->getComputationLHSType(); + CLHSTy = getContext().getCanonicalType(CLHSTy); + + QualType LTy = getContext().getCanonicalType(LHS->getType()); + + // Promote LHS. + V = svalBuilder.evalCast(V, CLHSTy, LTy); + + // Compute the result of the operation. + SVal Result = svalBuilder.evalCast(evalBinOp(state, Op, V, RightV, CTy), + B->getType(), CTy); + + // EXPERIMENTAL: "Conjured" symbols. + // FIXME: Handle structs. + + SVal LHSVal; + + if (Result.isUnknown()) { + + unsigned Count = currentBuilderContext->getCurrentBlockCount(); + + // The symbolic value is actually for the type of the left-hand side + // expression, not the computation type, as this is the value the + // LValue on the LHS will bind to. + LHSVal = svalBuilder.getConjuredSymbolVal(NULL, B->getRHS(), LCtx, + LTy, Count); + + // However, we need to convert the symbol to the computation type. + Result = svalBuilder.evalCast(LHSVal, CTy, LTy); + } + else { + // The left-hand side may bind to a different value then the + // computation type. + LHSVal = svalBuilder.evalCast(Result, LTy, CTy); + } + + // In C++, assignment and compound assignment operators return an + // lvalue. + if (B->isLValue()) + state = state->BindExpr(B, LCtx, location); + else + state = state->BindExpr(B, LCtx, Result); + + evalStore(Tmp2, B, LHS, *I, state, location, LHSVal); + } + } + + // FIXME: postvisits eventually go in ::Visit() + getCheckerManager().runCheckersForPostStmt(Dst, Tmp2, B, *this); +} + +void ExprEngine::VisitBlockExpr(const BlockExpr *BE, ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + + CanQualType T = getContext().getCanonicalType(BE->getType()); + SVal V = svalBuilder.getBlockPointer(BE->getBlockDecl(), T, + Pred->getLocationContext()); + + ExplodedNodeSet Tmp; + StmtNodeBuilder Bldr(Pred, Tmp, *currentBuilderContext); + Bldr.generateNode(BE, Pred, + Pred->getState()->BindExpr(BE, Pred->getLocationContext(), + V), + false, 0, + ProgramPoint::PostLValueKind); + + // FIXME: Move all post/pre visits to ::Visit(). + getCheckerManager().runCheckersForPostStmt(Dst, Tmp, BE, *this); +} + +void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, + ExplodedNode *Pred, ExplodedNodeSet &Dst) { + + ExplodedNodeSet dstPreStmt; + getCheckerManager().runCheckersForPreStmt(dstPreStmt, Pred, CastE, *this); + + if (CastE->getCastKind() == CK_LValueToRValue) { + for (ExplodedNodeSet::iterator I = dstPreStmt.begin(), E = dstPreStmt.end(); + I!=E; ++I) { + ExplodedNode *subExprNode = *I; + ProgramStateRef state = subExprNode->getState(); + const LocationContext *LCtx = subExprNode->getLocationContext(); + evalLoad(Dst, CastE, CastE, subExprNode, state, state->getSVal(Ex, LCtx)); + } + return; + } + + // All other casts. + QualType T = CastE->getType(); + QualType ExTy = Ex->getType(); + + if (const ExplicitCastExpr *ExCast=dyn_cast_or_null(CastE)) + T = ExCast->getTypeAsWritten(); + + StmtNodeBuilder Bldr(dstPreStmt, Dst, *currentBuilderContext); + for (ExplodedNodeSet::iterator I = dstPreStmt.begin(), E = dstPreStmt.end(); + I != E; ++I) { + + Pred = *I; + + switch (CastE->getCastKind()) { + case CK_LValueToRValue: + llvm_unreachable("LValueToRValue casts handled earlier."); + case CK_ToVoid: + continue; + // The analyzer doesn't do anything special with these casts, + // since it understands retain/release semantics already. + case CK_ARCProduceObject: + case CK_ARCConsumeObject: + case CK_ARCReclaimReturnedObject: + case CK_ARCExtendBlockObject: // Fall-through. + case CK_CopyAndAutoreleaseBlockObject: + // The analyser can ignore atomic casts for now, although some future + // checkers may want to make certain that you're not modifying the same + // value through atomic and nonatomic pointers. + case CK_AtomicToNonAtomic: + case CK_NonAtomicToAtomic: + // True no-ops. + case CK_NoOp: + case CK_FunctionToPointerDecay: { + // Copy the SVal of Ex to CastE. + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + SVal V = state->getSVal(Ex, LCtx); + state = state->BindExpr(CastE, LCtx, V); + Bldr.generateNode(CastE, Pred, state); + continue; + } + case CK_Dependent: + case CK_ArrayToPointerDecay: + case CK_BitCast: + case CK_LValueBitCast: + case CK_IntegralCast: + case CK_NullToPointer: + case CK_IntegralToPointer: + case CK_PointerToIntegral: + case CK_PointerToBoolean: + case CK_IntegralToBoolean: + case CK_IntegralToFloating: + case CK_FloatingToIntegral: + case CK_FloatingToBoolean: + case CK_FloatingCast: + case CK_FloatingRealToComplex: + case CK_FloatingComplexToReal: + case CK_FloatingComplexToBoolean: + case CK_FloatingComplexCast: + case CK_FloatingComplexToIntegralComplex: + case CK_IntegralRealToComplex: + case CK_IntegralComplexToReal: + case CK_IntegralComplexToBoolean: + case CK_IntegralComplexCast: + case CK_IntegralComplexToFloatingComplex: + case CK_CPointerToObjCPointerCast: + case CK_BlockPointerToObjCPointerCast: + case CK_AnyPointerToBlockPointerCast: + case CK_ObjCObjectLValueCast: { + // Delegate to SValBuilder to process. + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + SVal V = state->getSVal(Ex, LCtx); + V = svalBuilder.evalCast(V, T, ExTy); + state = state->BindExpr(CastE, LCtx, V); + Bldr.generateNode(CastE, Pred, state); + continue; + } + case CK_DerivedToBase: + case CK_UncheckedDerivedToBase: { + // For DerivedToBase cast, delegate to the store manager. + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + SVal val = state->getSVal(Ex, LCtx); + val = getStoreManager().evalDerivedToBase(val, T); + state = state->BindExpr(CastE, LCtx, val); + Bldr.generateNode(CastE, Pred, state); + continue; + } + // Handle C++ dyn_cast. + case CK_Dynamic: { + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + SVal val = state->getSVal(Ex, LCtx); + + // Compute the type of the result. + QualType resultType = CastE->getType(); + if (CastE->isLValue()) + resultType = getContext().getPointerType(resultType); + + bool Failed = false; + + // Check if the value being cast evaluates to 0. + if (val.isZeroConstant()) + Failed = true; + // Else, evaluate the cast. + else + val = getStoreManager().evalDynamicCast(val, T, Failed); + + if (Failed) { + if (T->isReferenceType()) { + // A bad_cast exception is thrown if input value is a reference. + // Currently, we model this, by generating a sink. + Bldr.generateNode(CastE, Pred, state, true); + continue; + } else { + // If the cast fails on a pointer, bind to 0. + state = state->BindExpr(CastE, LCtx, svalBuilder.makeNull()); + } + } else { + // If we don't know if the cast succeeded, conjure a new symbol. + if (val.isUnknown()) { + DefinedOrUnknownSVal NewSym = svalBuilder.getConjuredSymbolVal(NULL, + CastE, LCtx, resultType, + currentBuilderContext->getCurrentBlockCount()); + state = state->BindExpr(CastE, LCtx, NewSym); + } else + // Else, bind to the derived region value. + state = state->BindExpr(CastE, LCtx, val); + } + Bldr.generateNode(CastE, Pred, state); + continue; + } + // Various C++ casts that are not handled yet. + case CK_ToUnion: + case CK_BaseToDerived: + case CK_NullToMemberPointer: + case CK_BaseToDerivedMemberPointer: + case CK_DerivedToBaseMemberPointer: + case CK_ReinterpretMemberPointer: + case CK_UserDefinedConversion: + case CK_ConstructorConversion: + case CK_VectorSplat: + case CK_MemberPointerToBoolean: { + // Recover some path-sensitivty by conjuring a new value. + QualType resultType = CastE->getType(); + if (CastE->isLValue()) + resultType = getContext().getPointerType(resultType); + const LocationContext *LCtx = Pred->getLocationContext(); + SVal result = svalBuilder.getConjuredSymbolVal(NULL, CastE, LCtx, + resultType, currentBuilderContext->getCurrentBlockCount()); + ProgramStateRef state = Pred->getState()->BindExpr(CastE, LCtx, + result); + Bldr.generateNode(CastE, Pred, state); + continue; + } + } + } +} + +void ExprEngine::VisitCompoundLiteralExpr(const CompoundLiteralExpr *CL, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + StmtNodeBuilder B(Pred, Dst, *currentBuilderContext); + + const InitListExpr *ILE + = cast(CL->getInitializer()->IgnoreParens()); + + ProgramStateRef state = Pred->getState(); + SVal ILV = state->getSVal(ILE, Pred->getLocationContext()); + const LocationContext *LC = Pred->getLocationContext(); + state = state->bindCompoundLiteral(CL, LC, ILV); + + if (CL->isLValue()) + B.generateNode(CL, Pred, state->BindExpr(CL, LC, state->getLValue(CL, LC))); + else + B.generateNode(CL, Pred, state->BindExpr(CL, LC, ILV)); +} + +void ExprEngine::VisitDeclStmt(const DeclStmt *DS, ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + + // FIXME: static variables may have an initializer, but the second + // time a function is called those values may not be current. + // This may need to be reflected in the CFG. + + // Assumption: The CFG has one DeclStmt per Decl. + const Decl *D = *DS->decl_begin(); + + if (!D || !isa(D)) { + //TODO:AZ: remove explicit insertion after refactoring is done. + Dst.insert(Pred); + return; + } + + // FIXME: all pre/post visits should eventually be handled by ::Visit(). + ExplodedNodeSet dstPreVisit; + getCheckerManager().runCheckersForPreStmt(dstPreVisit, Pred, DS, *this); + + StmtNodeBuilder B(dstPreVisit, Dst, *currentBuilderContext); + const VarDecl *VD = dyn_cast(D); + for (ExplodedNodeSet::iterator I = dstPreVisit.begin(), E = dstPreVisit.end(); + I!=E; ++I) { + ExplodedNode *N = *I; + ProgramStateRef state = N->getState(); + + // Decls without InitExpr are not initialized explicitly. + const LocationContext *LC = N->getLocationContext(); + + if (const Expr *InitEx = VD->getInit()) { + SVal InitVal = state->getSVal(InitEx, Pred->getLocationContext()); + + // We bound the temp obj region to the CXXConstructExpr. Now recover + // the lazy compound value when the variable is not a reference. + if (AMgr.getLangOpts().CPlusPlus && VD->getType()->isRecordType() && + !VD->getType()->isReferenceType() && isa(InitVal)){ + InitVal = state->getSVal(cast(InitVal).getRegion()); + assert(isa(InitVal)); + } + + // Recover some path-sensitivity if a scalar value evaluated to + // UnknownVal. + if (InitVal.isUnknown()) { + QualType Ty = InitEx->getType(); + if (InitEx->isLValue()) { + Ty = getContext().getPointerType(Ty); + } + + InitVal = svalBuilder.getConjuredSymbolVal(NULL, InitEx, LC, Ty, + currentBuilderContext->getCurrentBlockCount()); + } + B.takeNodes(N); + ExplodedNodeSet Dst2; + evalBind(Dst2, DS, N, state->getLValue(VD, LC), InitVal, true); + B.addNodes(Dst2); + } + else { + B.generateNode(DS, N,state->bindDeclWithNoInit(state->getRegion(VD, LC))); + } + } +} + +void ExprEngine::VisitLogicalExpr(const BinaryOperator* B, ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + assert(B->getOpcode() == BO_LAnd || + B->getOpcode() == BO_LOr); + + StmtNodeBuilder Bldr(Pred, Dst, *currentBuilderContext); + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + SVal X = state->getSVal(B, LCtx); + assert(X.isUndef()); + + const Expr *Ex = (const Expr*) cast(X).getData(); + assert(Ex); + + if (Ex == B->getRHS()) { + X = state->getSVal(Ex, LCtx); + + // Handle undefined values. + if (X.isUndef()) { + Bldr.generateNode(B, Pred, state->BindExpr(B, LCtx, X)); + return; + } + + DefinedOrUnknownSVal XD = cast(X); + + // We took the RHS. Because the value of the '&&' or '||' expression must + // evaluate to 0 or 1, we must assume the value of the RHS evaluates to 0 + // or 1. Alternatively, we could take a lazy approach, and calculate this + // value later when necessary. We don't have the machinery in place for + // this right now, and since most logical expressions are used for branches, + // the payoff is not likely to be large. Instead, we do eager evaluation. + if (ProgramStateRef newState = state->assume(XD, true)) + Bldr.generateNode(B, Pred, + newState->BindExpr(B, LCtx, + svalBuilder.makeIntVal(1U, B->getType()))); + + if (ProgramStateRef newState = state->assume(XD, false)) + Bldr.generateNode(B, Pred, + newState->BindExpr(B, LCtx, + svalBuilder.makeIntVal(0U, B->getType()))); + } + else { + // We took the LHS expression. Depending on whether we are '&&' or + // '||' we know what the value of the expression is via properties of + // the short-circuiting. + X = svalBuilder.makeIntVal(B->getOpcode() == BO_LAnd ? 0U : 1U, + B->getType()); + Bldr.generateNode(B, Pred, state->BindExpr(B, LCtx, X)); + } +} + +void ExprEngine::VisitInitListExpr(const InitListExpr *IE, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + StmtNodeBuilder B(Pred, Dst, *currentBuilderContext); + + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + QualType T = getContext().getCanonicalType(IE->getType()); + unsigned NumInitElements = IE->getNumInits(); + + if (T->isArrayType() || T->isRecordType() || T->isVectorType()) { + llvm::ImmutableList vals = getBasicVals().getEmptySValList(); + + // Handle base case where the initializer has no elements. + // e.g: static int* myArray[] = {}; + if (NumInitElements == 0) { + SVal V = svalBuilder.makeCompoundVal(T, vals); + B.generateNode(IE, Pred, state->BindExpr(IE, LCtx, V)); + return; + } + + for (InitListExpr::const_reverse_iterator it = IE->rbegin(), + ei = IE->rend(); it != ei; ++it) { + vals = getBasicVals().consVals(state->getSVal(cast(*it), LCtx), + vals); + } + + B.generateNode(IE, Pred, + state->BindExpr(IE, LCtx, + svalBuilder.makeCompoundVal(T, vals))); + return; + } + + if (Loc::isLocType(T) || T->isIntegerType()) { + assert(IE->getNumInits() == 1); + const Expr *initEx = IE->getInit(0); + B.generateNode(IE, Pred, state->BindExpr(IE, LCtx, + state->getSVal(initEx, LCtx))); + return; + } + + llvm_unreachable("unprocessed InitListExpr type"); +} + +void ExprEngine::VisitGuardedExpr(const Expr *Ex, + const Expr *L, + const Expr *R, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + StmtNodeBuilder B(Pred, Dst, *currentBuilderContext); + + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + SVal X = state->getSVal(Ex, LCtx); + assert (X.isUndef()); + const Expr *SE = (Expr*) cast(X).getData(); + assert(SE); + X = state->getSVal(SE, LCtx); + + // Make sure that we invalidate the previous binding. + B.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, X, true)); +} + +void ExprEngine:: +VisitOffsetOfExpr(const OffsetOfExpr *OOE, + ExplodedNode *Pred, ExplodedNodeSet &Dst) { + StmtNodeBuilder B(Pred, Dst, *currentBuilderContext); + APSInt IV; + if (OOE->EvaluateAsInt(IV, getContext())) { + assert(IV.getBitWidth() == getContext().getTypeSize(OOE->getType())); + assert(OOE->getType()->isIntegerType()); + assert(IV.isSigned() == OOE->getType()->isSignedIntegerOrEnumerationType()); + SVal X = svalBuilder.makeIntVal(IV); + B.generateNode(OOE, Pred, + Pred->getState()->BindExpr(OOE, Pred->getLocationContext(), + X)); + } + // FIXME: Handle the case where __builtin_offsetof is not a constant. +} + + +void ExprEngine:: +VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *Ex, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + StmtNodeBuilder Bldr(Pred, Dst, *currentBuilderContext); + + QualType T = Ex->getTypeOfArgument(); + + if (Ex->getKind() == UETT_SizeOf) { + if (!T->isIncompleteType() && !T->isConstantSizeType()) { + assert(T->isVariableArrayType() && "Unknown non-constant-sized type."); + + // FIXME: Add support for VLA type arguments and VLA expressions. + // When that happens, we should probably refactor VLASizeChecker's code. + return; + } + else if (T->getAs()) { + // Some code tries to take the sizeof an ObjCObjectType, relying that + // the compiler has laid out its representation. Just report Unknown + // for these. + return; + } + } + + APSInt Value = Ex->EvaluateKnownConstInt(getContext()); + CharUnits amt = CharUnits::fromQuantity(Value.getZExtValue()); + + ProgramStateRef state = Pred->getState(); + state = state->BindExpr(Ex, Pred->getLocationContext(), + svalBuilder.makeIntVal(amt.getQuantity(), + Ex->getType())); + Bldr.generateNode(Ex, Pred, state); +} + +void ExprEngine::VisitUnaryOperator(const UnaryOperator* U, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + StmtNodeBuilder Bldr(Pred, Dst, *currentBuilderContext); + switch (U->getOpcode()) { + default: { + Bldr.takeNodes(Pred); + ExplodedNodeSet Tmp; + VisitIncrementDecrementOperator(U, Pred, Tmp); + Bldr.addNodes(Tmp); + } + break; + case UO_Real: { + const Expr *Ex = U->getSubExpr()->IgnoreParens(); + + // FIXME: We don't have complex SValues yet. + if (Ex->getType()->isAnyComplexType()) { + // Just report "Unknown." + break; + } + + // For all other types, UO_Real is an identity operation. + assert (U->getType() == Ex->getType()); + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + Bldr.generateNode(U, Pred, state->BindExpr(U, LCtx, + state->getSVal(Ex, LCtx))); + break; + } + + case UO_Imag: { + const Expr *Ex = U->getSubExpr()->IgnoreParens(); + // FIXME: We don't have complex SValues yet. + if (Ex->getType()->isAnyComplexType()) { + // Just report "Unknown." + break; + } + // For all other types, UO_Imag returns 0. + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + SVal X = svalBuilder.makeZeroVal(Ex->getType()); + Bldr.generateNode(U, Pred, state->BindExpr(U, LCtx, X)); + break; + } + + case UO_Plus: + assert(!U->isLValue()); + // FALL-THROUGH. + case UO_Deref: + case UO_AddrOf: + case UO_Extension: { + // FIXME: We can probably just have some magic in Environment::getSVal() + // that propagates values, instead of creating a new node here. + // + // Unary "+" is a no-op, similar to a parentheses. We still have places + // where it may be a block-level expression, so we need to + // generate an extra node that just propagates the value of the + // subexpression. + const Expr *Ex = U->getSubExpr()->IgnoreParens(); + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + Bldr.generateNode(U, Pred, state->BindExpr(U, LCtx, + state->getSVal(Ex, LCtx))); + break; + } + + case UO_LNot: + case UO_Minus: + case UO_Not: { + assert (!U->isLValue()); + const Expr *Ex = U->getSubExpr()->IgnoreParens(); + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + + // Get the value of the subexpression. + SVal V = state->getSVal(Ex, LCtx); + + if (V.isUnknownOrUndef()) { + Bldr.generateNode(U, Pred, state->BindExpr(U, LCtx, V)); + break; + } + + switch (U->getOpcode()) { + default: + llvm_unreachable("Invalid Opcode."); + case UO_Not: + // FIXME: Do we need to handle promotions? + state = state->BindExpr(U, LCtx, evalComplement(cast(V))); + break; + case UO_Minus: + // FIXME: Do we need to handle promotions? + state = state->BindExpr(U, LCtx, evalMinus(cast(V))); + break; + case UO_LNot: + // C99 6.5.3.3: "The expression !E is equivalent to (0==E)." + // + // Note: technically we do "E == 0", but this is the same in the + // transfer functions as "0 == E". + SVal Result; + if (isa(V)) { + Loc X = svalBuilder.makeNull(); + Result = evalBinOp(state, BO_EQ, cast(V), X, + U->getType()); + } + else { + nonloc::ConcreteInt X(getBasicVals().getValue(0, Ex->getType())); + Result = evalBinOp(state, BO_EQ, cast(V), X, + U->getType()); + } + + state = state->BindExpr(U, LCtx, Result); + break; + } + Bldr.generateNode(U, Pred, state); + break; + } + } + +} + +void ExprEngine::VisitIncrementDecrementOperator(const UnaryOperator* U, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + // Handle ++ and -- (both pre- and post-increment). + assert (U->isIncrementDecrementOp()); + const Expr *Ex = U->getSubExpr()->IgnoreParens(); + + const LocationContext *LCtx = Pred->getLocationContext(); + ProgramStateRef state = Pred->getState(); + SVal loc = state->getSVal(Ex, LCtx); + + // Perform a load. + ExplodedNodeSet Tmp; + evalLoad(Tmp, U, Ex, Pred, state, loc); + + ExplodedNodeSet Dst2; + StmtNodeBuilder Bldr(Tmp, Dst2, *currentBuilderContext); + for (ExplodedNodeSet::iterator I=Tmp.begin(), E=Tmp.end();I!=E;++I) { + + state = (*I)->getState(); + assert(LCtx == (*I)->getLocationContext()); + SVal V2_untested = state->getSVal(Ex, LCtx); + + // Propagate unknown and undefined values. + if (V2_untested.isUnknownOrUndef()) { + Bldr.generateNode(U, *I, state->BindExpr(U, LCtx, V2_untested)); + continue; + } + DefinedSVal V2 = cast(V2_untested); + + // Handle all other values. + BinaryOperator::Opcode Op = U->isIncrementOp() ? BO_Add : BO_Sub; + + // If the UnaryOperator has non-location type, use its type to create the + // constant value. If the UnaryOperator has location type, create the + // constant with int type and pointer width. + SVal RHS; + + if (U->getType()->isAnyPointerType()) + RHS = svalBuilder.makeArrayIndex(1); + else + RHS = svalBuilder.makeIntVal(1, U->getType()); + + SVal Result = evalBinOp(state, Op, V2, RHS, U->getType()); + + // Conjure a new symbol if necessary to recover precision. + if (Result.isUnknown()){ + DefinedOrUnknownSVal SymVal = + svalBuilder.getConjuredSymbolVal(NULL, Ex, LCtx, + currentBuilderContext->getCurrentBlockCount()); + Result = SymVal; + + // If the value is a location, ++/-- should always preserve + // non-nullness. Check if the original value was non-null, and if so + // propagate that constraint. + if (Loc::isLocType(U->getType())) { + DefinedOrUnknownSVal Constraint = + svalBuilder.evalEQ(state, V2,svalBuilder.makeZeroVal(U->getType())); + + if (!state->assume(Constraint, true)) { + // It isn't feasible for the original value to be null. + // Propagate this constraint. + Constraint = svalBuilder.evalEQ(state, SymVal, + svalBuilder.makeZeroVal(U->getType())); + + + state = state->assume(Constraint, false); + assert(state); + } + } + } + + // Since the lvalue-to-rvalue conversion is explicit in the AST, + // we bind an l-value if the operator is prefix and an lvalue (in C++). + if (U->isLValue()) + state = state->BindExpr(U, LCtx, loc); + else + state = state->BindExpr(U, LCtx, U->isPostfix() ? V2 : Result); + + // Perform the store. + Bldr.takeNodes(*I); + ExplodedNodeSet Dst3; + evalStore(Dst3, U, U, *I, state, loc, Result); + Bldr.addNodes(Dst3); + } + Dst.insert(Dst2); +} diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp new file mode 100644 index 0000000..a14a491 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -0,0 +1,300 @@ +//===- ExprEngineCXX.cpp - ExprEngine support for C++ -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the C++ expression evaluation engine. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/StmtCXX.h" + +using namespace clang; +using namespace ento; + +const CXXThisRegion *ExprEngine::getCXXThisRegion(const CXXRecordDecl *D, + const StackFrameContext *SFC) { + const Type *T = D->getTypeForDecl(); + QualType PT = getContext().getPointerType(QualType(T, 0)); + return svalBuilder.getRegionManager().getCXXThisRegion(PT, SFC); +} + +const CXXThisRegion *ExprEngine::getCXXThisRegion(const CXXMethodDecl *decl, + const StackFrameContext *frameCtx) { + return svalBuilder.getRegionManager(). + getCXXThisRegion(decl->getThisType(getContext()), frameCtx); +} + +void ExprEngine::CreateCXXTemporaryObject(const MaterializeTemporaryExpr *ME, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + StmtNodeBuilder Bldr(Pred, Dst, *currentBuilderContext); + const Expr *tempExpr = ME->GetTemporaryExpr()->IgnoreParens(); + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + + // Bind the temporary object to the value of the expression. Then bind + // the expression to the location of the object. + SVal V = state->getSVal(tempExpr, Pred->getLocationContext()); + + const MemRegion *R = + svalBuilder.getRegionManager().getCXXTempObjectRegion(ME, LCtx); + + state = state->bindLoc(loc::MemRegionVal(R), V); + Bldr.generateNode(ME, Pred, state->BindExpr(ME, LCtx, loc::MemRegionVal(R))); +} + +void ExprEngine::VisitCXXTemporaryObjectExpr(const CXXTemporaryObjectExpr *expr, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + VisitCXXConstructExpr(expr, 0, Pred, Dst); +} + +void ExprEngine::VisitCXXConstructExpr(const CXXConstructExpr *E, + const MemRegion *Dest, + ExplodedNode *Pred, + ExplodedNodeSet &destNodes) { + +#if 0 + const CXXConstructorDecl *CD = E->getConstructor(); + assert(CD); +#endif + +#if 0 + if (!(CD->doesThisDeclarationHaveABody() && AMgr.shouldInlineCall())) + // FIXME: invalidate the object. + return; +#endif + +#if 0 + // Is the constructor elidable? + if (E->isElidable()) { + destNodes.Add(Pred); + return; + } +#endif + + // Perform the previsit of the constructor. + ExplodedNodeSet SrcNodes; + SrcNodes.Add(Pred); + ExplodedNodeSet TmpNodes; + getCheckerManager().runCheckersForPreStmt(TmpNodes, SrcNodes, E, *this); + + // Evaluate the constructor. Currently we don't now allow checker-specific + // implementations of specific constructors (as we do with ordinary + // function calls. We can re-evaluate this in the future. + +#if 0 + // Inlining currently isn't fully implemented. + + if (AMgr.shouldInlineCall()) { + if (!Dest) + Dest = + svalBuilder.getRegionManager().getCXXTempObjectRegion(E, + Pred->getLocationContext()); + + // The callee stack frame context used to create the 'this' + // parameter region. + const StackFrameContext *SFC = + AMgr.getStackFrame(CD, Pred->getLocationContext(), + E, currentBuilderContext->getBlock(), + currentStmtIdx); + + // Create the 'this' region. + const CXXThisRegion *ThisR = + getCXXThisRegion(E->getConstructor()->getParent(), SFC); + + CallEnter Loc(E, SFC, Pred->getLocationContext()); + + StmtNodeBuilder Bldr(SrcNodes, TmpNodes, *currentBuilderContext); + for (ExplodedNodeSet::iterator NI = SrcNodes.begin(), + NE = SrcNodes.end(); NI != NE; ++NI) { + ProgramStateRef state = (*NI)->getState(); + // Setup 'this' region, so that the ctor is evaluated on the object pointed + // by 'Dest'. + state = state->bindLoc(loc::MemRegionVal(ThisR), loc::MemRegionVal(Dest)); + Bldr.generateNode(Loc, *NI, state); + } + } +#endif + + // Default semantics: invalidate all regions passed as arguments. + ExplodedNodeSet destCall; + { + StmtNodeBuilder Bldr(TmpNodes, destCall, *currentBuilderContext); + for (ExplodedNodeSet::iterator i = TmpNodes.begin(), e = TmpNodes.end(); + i != e; ++i) + { + ExplodedNode *Pred = *i; + const LocationContext *LC = Pred->getLocationContext(); + ProgramStateRef state = Pred->getState(); + + state = invalidateArguments(state, CallOrObjCMessage(E, state, LC), LC); + Bldr.generateNode(E, Pred, state); + } + } + // Do the post visit. + getCheckerManager().runCheckersForPostStmt(destNodes, destCall, E, *this); +} + +void ExprEngine::VisitCXXDestructor(const CXXDestructorDecl *DD, + const MemRegion *Dest, + const Stmt *S, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + StmtNodeBuilder Bldr(Pred, Dst, *currentBuilderContext); + if (!(DD->doesThisDeclarationHaveABody() && AMgr.shouldInlineCall())) + return; + + // Create the context for 'this' region. + const StackFrameContext *SFC = + AnalysisDeclContexts.getContext(DD)-> + getStackFrame(Pred->getLocationContext(), S, + currentBuilderContext->getBlock(), currentStmtIdx); + + const CXXThisRegion *ThisR = getCXXThisRegion(DD->getParent(), SFC); + + CallEnter PP(S, SFC, Pred->getLocationContext()); + + ProgramStateRef state = Pred->getState(); + state = state->bindLoc(loc::MemRegionVal(ThisR), loc::MemRegionVal(Dest)); + Bldr.generateNode(PP, Pred, state); +} + +void ExprEngine::VisitCXXNewExpr(const CXXNewExpr *CNE, ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + StmtNodeBuilder Bldr(Pred, Dst, *currentBuilderContext); + + unsigned blockCount = currentBuilderContext->getCurrentBlockCount(); + const LocationContext *LCtx = Pred->getLocationContext(); + DefinedOrUnknownSVal symVal = + svalBuilder.getConjuredSymbolVal(NULL, CNE, LCtx, CNE->getType(), blockCount); + const MemRegion *NewReg = cast(symVal).getRegion(); + QualType ObjTy = CNE->getType()->getAs()->getPointeeType(); + const ElementRegion *EleReg = + getStoreManager().GetElementZeroRegion(NewReg, ObjTy); + + if (CNE->isArray()) { + // FIXME: allocating an array requires simulating the constructors. + // For now, just return a symbolicated region. + ProgramStateRef state = Pred->getState(); + state = state->BindExpr(CNE, Pred->getLocationContext(), + loc::MemRegionVal(EleReg)); + Bldr.generateNode(CNE, Pred, state); + return; + } + + // FIXME: Update for AST changes. +#if 0 + // Evaluate constructor arguments. + const FunctionProtoType *FnType = NULL; + const CXXConstructorDecl *CD = CNE->getConstructor(); + if (CD) + FnType = CD->getType()->getAs(); + ExplodedNodeSet argsEvaluated; + Bldr.takeNodes(Pred); + evalArguments(CNE->constructor_arg_begin(), CNE->constructor_arg_end(), + FnType, Pred, argsEvaluated); + Bldr.addNodes(argsEvaluated); + + // Initialize the object region and bind the 'new' expression. + for (ExplodedNodeSet::iterator I = argsEvaluated.begin(), + E = argsEvaluated.end(); I != E; ++I) { + + ProgramStateRef state = (*I)->getState(); + + // Accumulate list of regions that are invalidated. + // FIXME: Eventually we should unify the logic for constructor + // processing in one place. + SmallVector regionsToInvalidate; + for (CXXNewExpr::const_arg_iterator + ai = CNE->constructor_arg_begin(), ae = CNE->constructor_arg_end(); + ai != ae; ++ai) + { + SVal val = state->getSVal(*ai, (*I)->getLocationContext()); + if (const MemRegion *region = val.getAsRegion()) + regionsToInvalidate.push_back(region); + } + + if (ObjTy->isRecordType()) { + regionsToInvalidate.push_back(EleReg); + // Invalidate the regions. + // TODO: Pass the call to new information as the last argument, to limit + // the globals which will get invalidated. + state = state->invalidateRegions(regionsToInvalidate, + CNE, blockCount, 0, 0); + + } else { + // Invalidate the regions. + // TODO: Pass the call to new information as the last argument, to limit + // the globals which will get invalidated. + state = state->invalidateRegions(regionsToInvalidate, + CNE, blockCount, 0, 0); + + if (CNE->hasInitializer()) { + SVal V = state->getSVal(*CNE->constructor_arg_begin(), + (*I)->getLocationContext()); + state = state->bindLoc(loc::MemRegionVal(EleReg), V); + } else { + // Explicitly set to undefined, because currently we retrieve symbolic + // value from symbolic region. + state = state->bindLoc(loc::MemRegionVal(EleReg), UndefinedVal()); + } + } + state = state->BindExpr(CNE, (*I)->getLocationContext(), + loc::MemRegionVal(EleReg)); + Bldr.generateNode(CNE, *I, state); + } +#endif +} + +void ExprEngine::VisitCXXDeleteExpr(const CXXDeleteExpr *CDE, + ExplodedNode *Pred, ExplodedNodeSet &Dst) { + StmtNodeBuilder Bldr(Pred, Dst, *currentBuilderContext); + ProgramStateRef state = Pred->getState(); + Bldr.generateNode(CDE, Pred, state); +} + +void ExprEngine::VisitCXXCatchStmt(const CXXCatchStmt *CS, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + const VarDecl *VD = CS->getExceptionDecl(); + if (!VD) { + Dst.Add(Pred); + return; + } + + const LocationContext *LCtx = Pred->getLocationContext(); + SVal V = svalBuilder.getConjuredSymbolVal(CS, LCtx, VD->getType(), + currentBuilderContext->getCurrentBlockCount()); + ProgramStateRef state = Pred->getState(); + state = state->bindLoc(state->getLValue(VD, LCtx), V); + + StmtNodeBuilder Bldr(Pred, Dst, *currentBuilderContext); + Bldr.generateNode(CS, Pred, state); +} + +void ExprEngine::VisitCXXThisExpr(const CXXThisExpr *TE, ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + StmtNodeBuilder Bldr(Pred, Dst, *currentBuilderContext); + + // Get the this object region from StoreManager. + const LocationContext *LCtx = Pred->getLocationContext(); + const MemRegion *R = + svalBuilder.getRegionManager().getCXXThisRegion( + getContext().getCanonicalType(TE->getType()), + LCtx); + + ProgramStateRef state = Pred->getState(); + SVal V = state->getSVal(loc::MemRegionVal(R)); + Bldr.generateNode(TE, Pred, state->BindExpr(TE, LCtx, V)); +} diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp new file mode 100644 index 0000000..b9f4e15 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp @@ -0,0 +1,487 @@ +//=-- ExprEngineCallAndReturn.cpp - Support for call/return -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines ExprEngine's support for calls and returns. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" +#include "clang/AST/DeclCXX.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Support/SaveAndRestore.h" + +using namespace clang; +using namespace ento; + +void ExprEngine::processCallEnter(CallEnter CE, ExplodedNode *Pred) { + // Get the entry block in the CFG of the callee. + const StackFrameContext *calleeCtx = CE.getCalleeContext(); + const CFG *CalleeCFG = calleeCtx->getCFG(); + const CFGBlock *Entry = &(CalleeCFG->getEntry()); + + // Validate the CFG. + assert(Entry->empty()); + assert(Entry->succ_size() == 1); + + // Get the solitary sucessor. + const CFGBlock *Succ = *(Entry->succ_begin()); + + // Construct an edge representing the starting location in the callee. + BlockEdge Loc(Entry, Succ, calleeCtx); + + // Construct a new state which contains the mapping from actual to + // formal arguments. + const LocationContext *callerCtx = Pred->getLocationContext(); + ProgramStateRef state = Pred->getState()->enterStackFrame(callerCtx, + calleeCtx); + + // Construct a new node and add it to the worklist. + bool isNew; + ExplodedNode *Node = G.getNode(Loc, state, false, &isNew); + Node->addPredecessor(Pred, G); + if (isNew) + Engine.getWorkList()->enqueue(Node); +} + +static const ReturnStmt *getReturnStmt(const ExplodedNode *Node) { + while (Node) { + const ProgramPoint &PP = Node->getLocation(); + // Skip any BlockEdges. + if (isa(PP) || isa(PP)) { + assert(Node->pred_size() == 1); + Node = *Node->pred_begin(); + continue; + } + if (const StmtPoint *SP = dyn_cast(&PP)) { + const Stmt *S = SP->getStmt(); + return dyn_cast(S); + } + break; + } + return 0; +} + +void ExprEngine::processCallExit(ExplodedNode *Pred) { + ProgramStateRef state = Pred->getState(); + const StackFrameContext *calleeCtx = + Pred->getLocationContext()->getCurrentStackFrame(); + const LocationContext *callerCtx = calleeCtx->getParent(); + const Stmt *CE = calleeCtx->getCallSite(); + + // If the callee returns an expression, bind its value to CallExpr. + if (const ReturnStmt *RS = getReturnStmt(Pred)) { + const LocationContext *LCtx = Pred->getLocationContext(); + SVal V = state->getSVal(RS, LCtx); + state = state->BindExpr(CE, callerCtx, V); + } + + // Bind the constructed object value to CXXConstructExpr. + if (const CXXConstructExpr *CCE = dyn_cast(CE)) { + const CXXThisRegion *ThisR = + getCXXThisRegion(CCE->getConstructor()->getParent(), calleeCtx); + + SVal ThisV = state->getSVal(ThisR); + // Always bind the region to the CXXConstructExpr. + state = state->BindExpr(CCE, Pred->getLocationContext(), ThisV); + } + + static SimpleProgramPointTag returnTag("ExprEngine : Call Return"); + PostStmt Loc(CE, callerCtx, &returnTag); + bool isNew; + ExplodedNode *N = G.getNode(Loc, state, false, &isNew); + N->addPredecessor(Pred, G); + if (!isNew) + return; + + // Perform the post-condition check of the CallExpr. + ExplodedNodeSet Dst; + NodeBuilderContext Ctx(Engine, calleeCtx->getCallSiteBlock(), N); + SaveAndRestore NBCSave(currentBuilderContext, + &Ctx); + SaveAndRestore CBISave(currentStmtIdx, calleeCtx->getIndex()); + + getCheckerManager().runCheckersForPostStmt(Dst, N, CE, *this, + /* wasInlined */ true); + + // Enqueue the next element in the block. + for (ExplodedNodeSet::iterator I = Dst.begin(), E = Dst.end(); I != E; ++I) { + Engine.getWorkList()->enqueue(*I, + calleeCtx->getCallSiteBlock(), + calleeCtx->getIndex()+1); + } +} + +static unsigned getNumberStackFrames(const LocationContext *LCtx) { + unsigned count = 0; + while (LCtx) { + if (isa(LCtx)) + ++count; + LCtx = LCtx->getParent(); + } + return count; +} + +// Determine if we should inline the call. +bool ExprEngine::shouldInlineDecl(const FunctionDecl *FD, ExplodedNode *Pred) { + AnalysisDeclContext *CalleeADC = AMgr.getAnalysisDeclContext(FD); + const CFG *CalleeCFG = CalleeADC->getCFG(); + + // It is possible that the CFG cannot be constructed. + // Be safe, and check if the CalleeCFG is valid. + if (!CalleeCFG) + return false; + + if (getNumberStackFrames(Pred->getLocationContext()) + == AMgr.InlineMaxStackDepth) + return false; + + if (Engine.FunctionSummaries->hasReachedMaxBlockCount(FD)) + return false; + + if (CalleeCFG->getNumBlockIDs() > AMgr.InlineMaxFunctionSize) + return false; + + return true; +} + +// For now, skip inlining variadic functions. +// We also don't inline blocks. +static bool shouldInlineCallExpr(const CallExpr *CE, ExprEngine *E) { + if (!E->getAnalysisManager().shouldInlineCall()) + return false; + QualType callee = CE->getCallee()->getType(); + const FunctionProtoType *FT = 0; + if (const PointerType *PT = callee->getAs()) + FT = dyn_cast(PT->getPointeeType()); + else if (const BlockPointerType *BT = callee->getAs()) { + // FIXME: inline blocks. + // FT = dyn_cast(BT->getPointeeType()); + (void) BT; + return false; + } + // If we have no prototype, assume the function is okay. + if (!FT) + return true; + + // Skip inlining of variadic functions. + return !FT->isVariadic(); +} + +bool ExprEngine::InlineCall(ExplodedNodeSet &Dst, + const CallExpr *CE, + ExplodedNode *Pred) { + if (!shouldInlineCallExpr(CE, this)) + return false; + + ProgramStateRef state = Pred->getState(); + const Expr *Callee = CE->getCallee(); + const FunctionDecl *FD = + state->getSVal(Callee, Pred->getLocationContext()).getAsFunctionDecl(); + if (!FD || !FD->hasBody(FD)) + return false; + + switch (CE->getStmtClass()) { + default: + // FIXME: Handle C++. + break; + case Stmt::CallExprClass: { + if (!shouldInlineDecl(FD, Pred)) + return false; + + // Construct a new stack frame for the callee. + AnalysisDeclContext *CalleeADC = AMgr.getAnalysisDeclContext(FD); + const StackFrameContext *CallerSFC = + Pred->getLocationContext()->getCurrentStackFrame(); + const StackFrameContext *CalleeSFC = + CalleeADC->getStackFrame(CallerSFC, CE, + currentBuilderContext->getBlock(), + currentStmtIdx); + + CallEnter Loc(CE, CalleeSFC, Pred->getLocationContext()); + bool isNew; + if (ExplodedNode *N = G.getNode(Loc, state, false, &isNew)) { + N->addPredecessor(Pred, G); + if (isNew) + Engine.getWorkList()->enqueue(N); + } + return true; + } + } + return false; +} + +static bool isPointerToConst(const ParmVarDecl *ParamDecl) { + QualType PointeeTy = ParamDecl->getOriginalType()->getPointeeType(); + if (PointeeTy != QualType() && PointeeTy.isConstQualified() && + !PointeeTy->isAnyPointerType() && !PointeeTy->isReferenceType()) { + return true; + } + return false; +} + +// Try to retrieve the function declaration and find the function parameter +// types which are pointers/references to a non-pointer const. +// We do not invalidate the corresponding argument regions. +static void findPtrToConstParams(llvm::SmallSet &PreserveArgs, + const CallOrObjCMessage &Call) { + const Decl *CallDecl = Call.getDecl(); + if (!CallDecl) + return; + + if (const FunctionDecl *FDecl = dyn_cast(CallDecl)) { + const IdentifierInfo *II = FDecl->getIdentifier(); + + // List the cases, where the region should be invalidated even if the + // argument is const. + if (II) { + StringRef FName = II->getName(); + // - 'int pthread_setspecific(ptheread_key k, const void *)' stores a + // value into thread local storage. The value can later be retrieved with + // 'void *ptheread_getspecific(pthread_key)'. So even thought the + // parameter is 'const void *', the region escapes through the call. + // - funopen - sets a buffer for future IO calls. + // - ObjC functions that end with "NoCopy" can free memory, of the passed + // in buffer. + // - Many CF containers allow objects to escape through custom + // allocators/deallocators upon container construction. + // - NSXXInsertXX, for example NSMapInsertIfAbsent, since they can + // be deallocated by NSMapRemove. + if (FName == "pthread_setspecific" || + FName == "funopen" || + FName.endswith("NoCopy") || + (FName.startswith("NS") && + (FName.find("Insert") != StringRef::npos)) || + Call.isCFCGAllowingEscape(FName)) + return; + } + + for (unsigned Idx = 0, E = Call.getNumArgs(); Idx != E; ++Idx) { + if (FDecl && Idx < FDecl->getNumParams()) { + if (isPointerToConst(FDecl->getParamDecl(Idx))) + PreserveArgs.insert(Idx); + } + } + return; + } + + if (const ObjCMethodDecl *MDecl = dyn_cast(CallDecl)) { + assert(MDecl->param_size() <= Call.getNumArgs()); + unsigned Idx = 0; + for (clang::ObjCMethodDecl::param_const_iterator + I = MDecl->param_begin(), E = MDecl->param_end(); I != E; ++I, ++Idx) { + if (isPointerToConst(*I)) + PreserveArgs.insert(Idx); + } + return; + } +} + +ProgramStateRef +ExprEngine::invalidateArguments(ProgramStateRef State, + const CallOrObjCMessage &Call, + const LocationContext *LC) { + SmallVector RegionsToInvalidate; + + if (Call.isObjCMessage()) { + // Invalidate all instance variables of the receiver of an ObjC message. + // FIXME: We should be able to do better with inter-procedural analysis. + if (const MemRegion *MR = Call.getInstanceMessageReceiver(LC).getAsRegion()) + RegionsToInvalidate.push_back(MR); + + } else if (Call.isCXXCall()) { + // Invalidate all instance variables for the callee of a C++ method call. + // FIXME: We should be able to do better with inter-procedural analysis. + // FIXME: We can probably do better for const versus non-const methods. + if (const MemRegion *Callee = Call.getCXXCallee().getAsRegion()) + RegionsToInvalidate.push_back(Callee); + + } else if (Call.isFunctionCall()) { + // Block calls invalidate all captured-by-reference values. + SVal CalleeVal = Call.getFunctionCallee(); + if (const MemRegion *Callee = CalleeVal.getAsRegion()) { + if (isa(Callee)) + RegionsToInvalidate.push_back(Callee); + } + } + + // Indexes of arguments whose values will be preserved by the call. + llvm::SmallSet PreserveArgs; + findPtrToConstParams(PreserveArgs, Call); + + for (unsigned idx = 0, e = Call.getNumArgs(); idx != e; ++idx) { + if (PreserveArgs.count(idx)) + continue; + + SVal V = Call.getArgSVal(idx); + + // If we are passing a location wrapped as an integer, unwrap it and + // invalidate the values referred by the location. + if (nonloc::LocAsInteger *Wrapped = dyn_cast(&V)) + V = Wrapped->getLoc(); + else if (!isa(V)) + continue; + + if (const MemRegion *R = V.getAsRegion()) { + // Invalidate the value of the variable passed by reference. + + // Are we dealing with an ElementRegion? If the element type is + // a basic integer type (e.g., char, int) and the underlying region + // is a variable region then strip off the ElementRegion. + // FIXME: We really need to think about this for the general case + // as sometimes we are reasoning about arrays and other times + // about (char*), etc., is just a form of passing raw bytes. + // e.g., void *p = alloca(); foo((char*)p); + if (const ElementRegion *ER = dyn_cast(R)) { + // Checking for 'integral type' is probably too promiscuous, but + // we'll leave it in for now until we have a systematic way of + // handling all of these cases. Eventually we need to come up + // with an interface to StoreManager so that this logic can be + // appropriately delegated to the respective StoreManagers while + // still allowing us to do checker-specific logic (e.g., + // invalidating reference counts), probably via callbacks. + if (ER->getElementType()->isIntegralOrEnumerationType()) { + const MemRegion *superReg = ER->getSuperRegion(); + if (isa(superReg) || isa(superReg) || + isa(superReg)) + R = cast(superReg); + } + // FIXME: What about layers of ElementRegions? + } + + // Mark this region for invalidation. We batch invalidate regions + // below for efficiency. + RegionsToInvalidate.push_back(R); + } else { + // Nuke all other arguments passed by reference. + // FIXME: is this necessary or correct? This handles the non-Region + // cases. Is it ever valid to store to these? + State = State->unbindLoc(cast(V)); + } + } + + // Invalidate designated regions using the batch invalidation API. + + // FIXME: We can have collisions on the conjured symbol if the + // expression *I also creates conjured symbols. We probably want + // to identify conjured symbols by an expression pair: the enclosing + // expression (the context) and the expression itself. This should + // disambiguate conjured symbols. + unsigned Count = currentBuilderContext->getCurrentBlockCount(); + StoreManager::InvalidatedSymbols IS; + + // NOTE: Even if RegionsToInvalidate is empty, we may still invalidate + // global variables. + return State->invalidateRegions(RegionsToInvalidate, + Call.getOriginExpr(), Count, LC, + &IS, &Call); + +} + +static ProgramStateRef getReplayWithoutInliningState(ExplodedNode *&N, + const CallExpr *CE) { + void *ReplayState = N->getState()->get(); + if (!ReplayState) + return 0; + const CallExpr *ReplayCE = reinterpret_cast(ReplayState); + if (CE == ReplayCE) { + return N->getState()->remove(); + } + return 0; +} + +void ExprEngine::VisitCallExpr(const CallExpr *CE, ExplodedNode *Pred, + ExplodedNodeSet &dst) { + // Perform the previsit of the CallExpr. + ExplodedNodeSet dstPreVisit; + getCheckerManager().runCheckersForPreStmt(dstPreVisit, Pred, CE, *this); + + // Now evaluate the call itself. + class DefaultEval : public GraphExpander { + ExprEngine &Eng; + const CallExpr *CE; + public: + + DefaultEval(ExprEngine &eng, const CallExpr *ce) + : Eng(eng), CE(ce) {} + virtual void expandGraph(ExplodedNodeSet &Dst, ExplodedNode *Pred) { + + ProgramStateRef state = getReplayWithoutInliningState(Pred, CE); + + // First, try to inline the call. + if (state == 0 && Eng.InlineCall(Dst, CE, Pred)) + return; + + // First handle the return value. + StmtNodeBuilder Bldr(Pred, Dst, *Eng.currentBuilderContext); + + // Get the callee. + const Expr *Callee = CE->getCallee()->IgnoreParens(); + if (state == 0) + state = Pred->getState(); + SVal L = state->getSVal(Callee, Pred->getLocationContext()); + + // Figure out the result type. We do this dance to handle references. + QualType ResultTy; + if (const FunctionDecl *FD = L.getAsFunctionDecl()) + ResultTy = FD->getResultType(); + else + ResultTy = CE->getType(); + + if (CE->isLValue()) + ResultTy = Eng.getContext().getPointerType(ResultTy); + + // Conjure a symbol value to use as the result. + SValBuilder &SVB = Eng.getSValBuilder(); + unsigned Count = Eng.currentBuilderContext->getCurrentBlockCount(); + const LocationContext *LCtx = Pred->getLocationContext(); + SVal RetVal = SVB.getConjuredSymbolVal(0, CE, LCtx, ResultTy, Count); + + // Generate a new state with the return value set. + state = state->BindExpr(CE, LCtx, RetVal); + + // Invalidate the arguments. + state = Eng.invalidateArguments(state, CallOrObjCMessage(CE, state, LCtx), + LCtx); + + // And make the result node. + Bldr.generateNode(CE, Pred, state); + } + }; + + // Finally, evaluate the function call. We try each of the checkers + // to see if the can evaluate the function call. + ExplodedNodeSet dstCallEvaluated; + DefaultEval defEval(*this, CE); + getCheckerManager().runCheckersForEvalCall(dstCallEvaluated, + dstPreVisit, + CE, *this, &defEval); + + // Finally, perform the post-condition check of the CallExpr and store + // the created nodes in 'Dst'. + getCheckerManager().runCheckersForPostStmt(dst, dstCallEvaluated, CE, + *this); +} + +void ExprEngine::VisitReturnStmt(const ReturnStmt *RS, ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + + ExplodedNodeSet dstPreVisit; + getCheckerManager().runCheckersForPreStmt(dstPreVisit, Pred, RS, *this); + + StmtNodeBuilder B(dstPreVisit, Dst, *currentBuilderContext); + + if (RS->getRetValue()) { + for (ExplodedNodeSet::iterator it = dstPreVisit.begin(), + ei = dstPreVisit.end(); it != ei; ++it) { + B.generateNode(RS, *it, (*it)->getState()); + } + } +} diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineObjC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineObjC.cpp new file mode 100644 index 0000000..c8ad70a --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineObjC.cpp @@ -0,0 +1,273 @@ +//=-- ExprEngineObjC.cpp - ExprEngine support for Objective-C ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines ExprEngine's support for Objective-C expressions. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/StmtObjC.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" + +using namespace clang; +using namespace ento; + +void ExprEngine::VisitLvalObjCIvarRefExpr(const ObjCIvarRefExpr *Ex, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + SVal baseVal = state->getSVal(Ex->getBase(), LCtx); + SVal location = state->getLValue(Ex->getDecl(), baseVal); + + ExplodedNodeSet dstIvar; + StmtNodeBuilder Bldr(Pred, dstIvar, *currentBuilderContext); + Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, location)); + + // Perform the post-condition check of the ObjCIvarRefExpr and store + // the created nodes in 'Dst'. + getCheckerManager().runCheckersForPostStmt(Dst, dstIvar, Ex, *this); +} + +void ExprEngine::VisitObjCAtSynchronizedStmt(const ObjCAtSynchronizedStmt *S, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + getCheckerManager().runCheckersForPreStmt(Dst, Pred, S, *this); +} + +void ExprEngine::VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + + // ObjCForCollectionStmts are processed in two places. This method + // handles the case where an ObjCForCollectionStmt* occurs as one of the + // statements within a basic block. This transfer function does two things: + // + // (1) binds the next container value to 'element'. This creates a new + // node in the ExplodedGraph. + // + // (2) binds the value 0/1 to the ObjCForCollectionStmt* itself, indicating + // whether or not the container has any more elements. This value + // will be tested in ProcessBranch. We need to explicitly bind + // this value because a container can contain nil elements. + // + // FIXME: Eventually this logic should actually do dispatches to + // 'countByEnumeratingWithState:objects:count:' (NSFastEnumeration). + // This will require simulating a temporary NSFastEnumerationState, either + // through an SVal or through the use of MemRegions. This value can + // be affixed to the ObjCForCollectionStmt* instead of 0/1; when the loop + // terminates we reclaim the temporary (it goes out of scope) and we + // we can test if the SVal is 0 or if the MemRegion is null (depending + // on what approach we take). + // + // For now: simulate (1) by assigning either a symbol or nil if the + // container is empty. Thus this transfer function will by default + // result in state splitting. + + const Stmt *elem = S->getElement(); + ProgramStateRef state = Pred->getState(); + SVal elementV; + StmtNodeBuilder Bldr(Pred, Dst, *currentBuilderContext); + + if (const DeclStmt *DS = dyn_cast(elem)) { + const VarDecl *elemD = cast(DS->getSingleDecl()); + assert(elemD->getInit() == 0); + elementV = state->getLValue(elemD, Pred->getLocationContext()); + } + else { + elementV = state->getSVal(elem, Pred->getLocationContext()); + } + + ExplodedNodeSet dstLocation; + Bldr.takeNodes(Pred); + evalLocation(dstLocation, S, elem, Pred, state, elementV, NULL, false); + Bldr.addNodes(dstLocation); + + for (ExplodedNodeSet::iterator NI = dstLocation.begin(), + NE = dstLocation.end(); NI!=NE; ++NI) { + Pred = *NI; + ProgramStateRef state = Pred->getState(); + const LocationContext *LCtx = Pred->getLocationContext(); + + // Handle the case where the container still has elements. + SVal TrueV = svalBuilder.makeTruthVal(1); + ProgramStateRef hasElems = state->BindExpr(S, LCtx, TrueV); + + // Handle the case where the container has no elements. + SVal FalseV = svalBuilder.makeTruthVal(0); + ProgramStateRef noElems = state->BindExpr(S, LCtx, FalseV); + + if (loc::MemRegionVal *MV = dyn_cast(&elementV)) + if (const TypedValueRegion *R = + dyn_cast(MV->getRegion())) { + // FIXME: The proper thing to do is to really iterate over the + // container. We will do this with dispatch logic to the store. + // For now, just 'conjure' up a symbolic value. + QualType T = R->getValueType(); + assert(Loc::isLocType(T)); + unsigned Count = currentBuilderContext->getCurrentBlockCount(); + SymbolRef Sym = SymMgr.getConjuredSymbol(elem, LCtx, T, Count); + SVal V = svalBuilder.makeLoc(Sym); + hasElems = hasElems->bindLoc(elementV, V); + + // Bind the location to 'nil' on the false branch. + SVal nilV = svalBuilder.makeIntVal(0, T); + noElems = noElems->bindLoc(elementV, nilV); + } + + // Create the new nodes. + Bldr.generateNode(S, Pred, hasElems); + Bldr.generateNode(S, Pred, noElems); + } +} + +void ExprEngine::VisitObjCMessage(const ObjCMessage &msg, + ExplodedNode *Pred, + ExplodedNodeSet &Dst) { + + // Handle the previsits checks. + ExplodedNodeSet dstPrevisit; + getCheckerManager().runCheckersForPreObjCMessage(dstPrevisit, Pred, + msg, *this); + + // Proceed with evaluate the message expression. + ExplodedNodeSet dstEval; + StmtNodeBuilder Bldr(dstPrevisit, dstEval, *currentBuilderContext); + + for (ExplodedNodeSet::iterator DI = dstPrevisit.begin(), + DE = dstPrevisit.end(); DI != DE; ++DI) { + + ExplodedNode *Pred = *DI; + bool RaisesException = false; + + if (const Expr *Receiver = msg.getInstanceReceiver()) { + ProgramStateRef state = Pred->getState(); + SVal recVal = state->getSVal(Receiver, Pred->getLocationContext()); + if (!recVal.isUndef()) { + // Bifurcate the state into nil and non-nil ones. + DefinedOrUnknownSVal receiverVal = cast(recVal); + + ProgramStateRef notNilState, nilState; + llvm::tie(notNilState, nilState) = state->assume(receiverVal); + + // There are three cases: can be nil or non-nil, must be nil, must be + // non-nil. We ignore must be nil, and merge the rest two into non-nil. + if (nilState && !notNilState) { + continue; + } + + // Check if the "raise" message was sent. + assert(notNilState); + if (msg.getSelector() == RaiseSel) + RaisesException = true; + + // If we raise an exception, for now treat it as a sink. + // Eventually we will want to handle exceptions properly. + // Dispatch to plug-in transfer function. + evalObjCMessage(Bldr, msg, Pred, notNilState, RaisesException); + } + } + else if (const ObjCInterfaceDecl *Iface = msg.getReceiverInterface()) { + IdentifierInfo* ClsName = Iface->getIdentifier(); + Selector S = msg.getSelector(); + + // Check for special instance methods. + if (!NSExceptionII) { + ASTContext &Ctx = getContext(); + NSExceptionII = &Ctx.Idents.get("NSException"); + } + + if (ClsName == NSExceptionII) { + enum { NUM_RAISE_SELECTORS = 2 }; + + // Lazily create a cache of the selectors. + if (!NSExceptionInstanceRaiseSelectors) { + ASTContext &Ctx = getContext(); + NSExceptionInstanceRaiseSelectors = + new Selector[NUM_RAISE_SELECTORS]; + SmallVector II; + unsigned idx = 0; + + // raise:format: + II.push_back(&Ctx.Idents.get("raise")); + II.push_back(&Ctx.Idents.get("format")); + NSExceptionInstanceRaiseSelectors[idx++] = + Ctx.Selectors.getSelector(II.size(), &II[0]); + + // raise:format::arguments: + II.push_back(&Ctx.Idents.get("arguments")); + NSExceptionInstanceRaiseSelectors[idx++] = + Ctx.Selectors.getSelector(II.size(), &II[0]); + } + + for (unsigned i = 0; i < NUM_RAISE_SELECTORS; ++i) + if (S == NSExceptionInstanceRaiseSelectors[i]) { + RaisesException = true; + break; + } + } + + // If we raise an exception, for now treat it as a sink. + // Eventually we will want to handle exceptions properly. + // Dispatch to plug-in transfer function. + evalObjCMessage(Bldr, msg, Pred, Pred->getState(), RaisesException); + } + } + + // Finally, perform the post-condition check of the ObjCMessageExpr and store + // the created nodes in 'Dst'. + getCheckerManager().runCheckersForPostObjCMessage(Dst, dstEval, msg, *this); +} + +void ExprEngine::evalObjCMessage(StmtNodeBuilder &Bldr, + const ObjCMessage &msg, + ExplodedNode *Pred, + ProgramStateRef state, + bool GenSink) { + // First handle the return value. + SVal ReturnValue = UnknownVal(); + + // Some method families have known return values. + switch (msg.getMethodFamily()) { + default: + break; + case OMF_autorelease: + case OMF_retain: + case OMF_self: { + // These methods return their receivers. + const Expr *ReceiverE = msg.getInstanceReceiver(); + if (ReceiverE) + ReturnValue = state->getSVal(ReceiverE, Pred->getLocationContext()); + break; + } + } + + // If we failed to figure out the return value, use a conjured value instead. + if (ReturnValue.isUnknown()) { + SValBuilder &SVB = getSValBuilder(); + QualType ResultTy = msg.getResultType(getContext()); + unsigned Count = currentBuilderContext->getCurrentBlockCount(); + const Expr *CurrentE = cast(currentStmt); + const LocationContext *LCtx = Pred->getLocationContext(); + ReturnValue = SVB.getConjuredSymbolVal(NULL, CurrentE, LCtx, ResultTy, Count); + } + + // Bind the return value. + const LocationContext *LCtx = Pred->getLocationContext(); + state = state->BindExpr(currentStmt, LCtx, ReturnValue); + + // Invalidate the arguments (and the receiver) + state = invalidateArguments(state, CallOrObjCMessage(msg, state, LCtx), LCtx); + + // And create the new node. + Bldr.generateNode(msg.getMessageExpr(), Pred, state, GenSink); + assert(Bldr.hasGeneratedNodes()); +} + diff --git a/clang/lib/StaticAnalyzer/Core/FunctionSummary.cpp b/clang/lib/StaticAnalyzer/Core/FunctionSummary.cpp new file mode 100644 index 0000000..c227aac --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/FunctionSummary.cpp @@ -0,0 +1,38 @@ +//== FunctionSummary.h - Stores summaries of functions. ------------*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a summary of a function gathered/used by static analyzes. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h" +using namespace clang; +using namespace ento; + +FunctionSummariesTy::~FunctionSummariesTy() { + for (MapTy::iterator I = Map.begin(), E = Map.end(); I != E; ++I) { + delete(I->second); + } +} + +unsigned FunctionSummariesTy::getTotalNumBasicBlocks() { + unsigned Total = 0; + for (MapTy::iterator I = Map.begin(), E = Map.end(); I != E; ++I) { + Total += I->second->TotalBasicBlocks; + } + return Total; +} + +unsigned FunctionSummariesTy::getTotalNumVisitedBasicBlocks() { + unsigned Total = 0; + for (MapTy::iterator I = Map.begin(), E = Map.end(); I != E; ++I) { + Total += I->second->VisitedBasicBlocks.count(); + } + return Total; +} diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp new file mode 100644 index 0000000..629f1ea --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp @@ -0,0 +1,578 @@ +//===--- HTMLDiagnostics.cpp - HTML Diagnostics for Paths ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the HTMLDiagnostics object. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h" +#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/FileManager.h" +#include "clang/Rewrite/Rewriter.h" +#include "clang/Rewrite/HTMLRewrite.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Path.h" + +using namespace clang; +using namespace ento; + +//===----------------------------------------------------------------------===// +// Boilerplate. +//===----------------------------------------------------------------------===// + +namespace { + +class HTMLDiagnostics : public PathDiagnosticConsumer { + llvm::sys::Path Directory, FilePrefix; + bool createdDir, noDir; + const Preprocessor &PP; +public: + HTMLDiagnostics(const std::string& prefix, const Preprocessor &pp); + + virtual ~HTMLDiagnostics() { FlushDiagnostics(NULL); } + + virtual void FlushDiagnosticsImpl(std::vector &Diags, + SmallVectorImpl *FilesMade); + + virtual StringRef getName() const { + return "HTMLDiagnostics"; + } + + unsigned ProcessMacroPiece(raw_ostream &os, + const PathDiagnosticMacroPiece& P, + unsigned num); + + void HandlePiece(Rewriter& R, FileID BugFileID, + const PathDiagnosticPiece& P, unsigned num, unsigned max); + + void HighlightRange(Rewriter& R, FileID BugFileID, SourceRange Range, + const char *HighlightStart = "", + const char *HighlightEnd = ""); + + void ReportDiag(const PathDiagnostic& D, + SmallVectorImpl *FilesMade); +}; + +} // end anonymous namespace + +HTMLDiagnostics::HTMLDiagnostics(const std::string& prefix, + const Preprocessor &pp) + : Directory(prefix), FilePrefix(prefix), createdDir(false), noDir(false), + PP(pp) { + // All html files begin with "report" + FilePrefix.appendComponent("report"); +} + +PathDiagnosticConsumer* +ento::createHTMLDiagnosticConsumer(const std::string& prefix, + const Preprocessor &PP) { + return new HTMLDiagnostics(prefix, PP); +} + +//===----------------------------------------------------------------------===// +// Report processing. +//===----------------------------------------------------------------------===// + +void HTMLDiagnostics::FlushDiagnosticsImpl( + std::vector &Diags, + SmallVectorImpl *FilesMade) { + for (std::vector::iterator it = Diags.begin(), + et = Diags.end(); it != et; ++it) { + ReportDiag(**it, FilesMade); + } +} + +static void flattenPath(PathPieces &primaryPath, PathPieces ¤tPath, + const PathPieces &oldPath) { + for (PathPieces::const_iterator it = oldPath.begin(), et = oldPath.end(); + it != et; ++it ) { + PathDiagnosticPiece *piece = it->getPtr(); + if (const PathDiagnosticCallPiece *call = + dyn_cast(piece)) { + IntrusiveRefCntPtr callEnter = + call->getCallEnterEvent(); + if (callEnter) + currentPath.push_back(callEnter); + flattenPath(primaryPath, primaryPath, call->path); + IntrusiveRefCntPtr callExit = + call->getCallExitEvent(); + if (callExit) + currentPath.push_back(callExit); + continue; + } + if (PathDiagnosticMacroPiece *macro = + dyn_cast(piece)) { + currentPath.push_back(piece); + PathPieces newPath; + flattenPath(primaryPath, newPath, macro->subPieces); + macro->subPieces = newPath; + continue; + } + + currentPath.push_back(piece); + } +} + +void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, + SmallVectorImpl *FilesMade) { + + // Create the HTML directory if it is missing. + if (!createdDir) { + createdDir = true; + std::string ErrorMsg; + Directory.createDirectoryOnDisk(true, &ErrorMsg); + + bool IsDirectory; + if (llvm::sys::fs::is_directory(Directory.str(), IsDirectory) || + !IsDirectory) { + llvm::errs() << "warning: could not create directory '" + << Directory.str() << "'\n" + << "reason: " << ErrorMsg << '\n'; + + noDir = true; + + return; + } + } + + if (noDir) + return; + + // First flatten out the entire path to make it easier to use. + PathPieces path; + flattenPath(path, path, D.path); + + // The path as already been prechecked that all parts of the path are + // from the same file and that it is non-empty. + const SourceManager &SMgr = (*path.begin())->getLocation().getManager(); + assert(!path.empty()); + FileID FID = + (*path.begin())->getLocation().asLocation().getExpansionLoc().getFileID(); + assert(!FID.isInvalid()); + + // Create a new rewriter to generate HTML. + Rewriter R(const_cast(SMgr), PP.getLangOpts()); + + // Process the path. + unsigned n = path.size(); + unsigned max = n; + + for (PathPieces::const_reverse_iterator I = path.rbegin(), + E = path.rend(); + I != E; ++I, --n) + HandlePiece(R, FID, **I, n, max); + + // Add line numbers, header, footer, etc. + + // unsigned FID = R.getSourceMgr().getMainFileID(); + html::EscapeText(R, FID); + html::AddLineNumbers(R, FID); + + // If we have a preprocessor, relex the file and syntax highlight. + // We might not have a preprocessor if we come from a deserialized AST file, + // for example. + + html::SyntaxHighlight(R, FID, PP); + html::HighlightMacros(R, FID, PP); + + // Get the full directory name of the analyzed file. + + const FileEntry* Entry = SMgr.getFileEntryForID(FID); + + // This is a cludge; basically we want to append either the full + // working directory if we have no directory information. This is + // a work in progress. + + std::string DirName = ""; + + if (llvm::sys::path::is_relative(Entry->getName())) { + llvm::sys::Path P = llvm::sys::Path::GetCurrentDirectory(); + DirName = P.str() + "/"; + } + + // Add the name of the file as an

tag. + + { + std::string s; + llvm::raw_string_ostream os(s); + + os << "\n" + << "

Bug Summary

\n\n" + "\n\n" + "\n"; + + // Output any other meta data. + + for (PathDiagnostic::meta_iterator I=D.meta_begin(), E=D.meta_end(); + I!=E; ++I) { + os << "\n"; + } + + os << "
File:" + << html::EscapeText(DirName) + << html::EscapeText(Entry->getName()) + << "
Location:" + "line " + << (*path.rbegin())->getLocation().asLocation().getExpansionLineNumber() + << ", column " + << (*path.rbegin())->getLocation().asLocation().getExpansionColumnNumber() + << "
Description:" + << D.getDescription() << "
" << html::EscapeText(*I) << "
\n\n" + "

Annotated Source Code

\n"; + + R.InsertTextBefore(SMgr.getLocForStartOfFile(FID), os.str()); + } + + // Embed meta-data tags. + { + std::string s; + llvm::raw_string_ostream os(s); + + const std::string& BugDesc = D.getDescription(); + if (!BugDesc.empty()) + os << "\n\n"; + + const std::string& BugType = D.getBugType(); + if (!BugType.empty()) + os << "\n\n"; + + const std::string& BugCategory = D.getCategory(); + if (!BugCategory.empty()) + os << "\n\n"; + + os << "\n\n"; + + os << "\n\n"; + + os << "\n\n"; + + // Mark the end of the tags. + os << "\n\n"; + + // Insert the text. + R.InsertTextBefore(SMgr.getLocForStartOfFile(FID), os.str()); + } + + // Add CSS, header, and footer. + + html::AddHeaderFooterInternalBuiltinCSS(R, FID, Entry->getName()); + + // Get the rewrite buffer. + const RewriteBuffer *Buf = R.getRewriteBufferFor(FID); + + if (!Buf) { + llvm::errs() << "warning: no diagnostics generated for main file.\n"; + return; + } + + // Create a path for the target HTML file. + llvm::sys::Path F(FilePrefix); + F.makeUnique(false, NULL); + + // Rename the file with an HTML extension. + llvm::sys::Path H(F); + H.appendSuffix("html"); + F.renamePathOnDisk(H, NULL); + + std::string ErrorMsg; + llvm::raw_fd_ostream os(H.c_str(), ErrorMsg); + + if (!ErrorMsg.empty()) { + llvm::errs() << "warning: could not create file '" << F.str() + << "'\n"; + return; + } + + if (FilesMade) + FilesMade->push_back(llvm::sys::path::filename(H.str())); + + // Emit the HTML to disk. + for (RewriteBuffer::iterator I = Buf->begin(), E = Buf->end(); I!=E; ++I) + os << *I; +} + +void HTMLDiagnostics::HandlePiece(Rewriter& R, FileID BugFileID, + const PathDiagnosticPiece& P, + unsigned num, unsigned max) { + + // For now, just draw a box above the line in question, and emit the + // warning. + FullSourceLoc Pos = P.getLocation().asLocation(); + + if (!Pos.isValid()) + return; + + SourceManager &SM = R.getSourceMgr(); + assert(&Pos.getManager() == &SM && "SourceManagers are different!"); + std::pair LPosInfo = SM.getDecomposedExpansionLoc(Pos); + + if (LPosInfo.first != BugFileID) + return; + + const llvm::MemoryBuffer *Buf = SM.getBuffer(LPosInfo.first); + const char* FileStart = Buf->getBufferStart(); + + // Compute the column number. Rewind from the current position to the start + // of the line. + unsigned ColNo = SM.getColumnNumber(LPosInfo.first, LPosInfo.second); + const char *TokInstantiationPtr =Pos.getExpansionLoc().getCharacterData(); + const char *LineStart = TokInstantiationPtr-ColNo; + + // Compute LineEnd. + const char *LineEnd = TokInstantiationPtr; + const char* FileEnd = Buf->getBufferEnd(); + while (*LineEnd != '\n' && LineEnd != FileEnd) + ++LineEnd; + + // Compute the margin offset by counting tabs and non-tabs. + unsigned PosNo = 0; + for (const char* c = LineStart; c != TokInstantiationPtr; ++c) + PosNo += *c == '\t' ? 8 : 1; + + // Create the html for the message. + + const char *Kind = 0; + switch (P.getKind()) { + case PathDiagnosticPiece::Call: + llvm_unreachable("Calls should already be handled"); + case PathDiagnosticPiece::Event: Kind = "Event"; break; + case PathDiagnosticPiece::ControlFlow: Kind = "Control"; break; + // Setting Kind to "Control" is intentional. + case PathDiagnosticPiece::Macro: Kind = "Control"; break; + } + + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + + os << "\n
(P)) { + // Get the string and determining its maximum substring. + const std::string& Msg = P.getString(); + unsigned max_token = 0; + unsigned cnt = 0; + unsigned len = Msg.size(); + + for (std::string::const_iterator I=Msg.begin(), E=Msg.end(); I!=E; ++I) + switch (*I) { + default: + ++cnt; + continue; + case ' ': + case '\t': + case '\n': + if (cnt > max_token) max_token = cnt; + cnt = 0; + } + + if (cnt > max_token) + max_token = cnt; + + // Determine the approximate size of the message bubble in em. + unsigned em; + const unsigned max_line = 120; + + if (max_token >= max_line) + em = max_token / 2; + else { + unsigned characters = max_line; + unsigned lines = len / max_line; + + if (lines > 0) { + for (; characters > max_token; --characters) + if (len / characters > lines) { + ++characters; + break; + } + } + + em = characters / 2; + } + + if (em < max_line/2) + os << "; max-width:" << em << "em"; + } + else + os << "; max-width:100em"; + + os << "\">"; + + if (max > 1) { + os << "
"; + os << "
" << num << "
"; + os << "
"; + } + + if (const PathDiagnosticMacroPiece *MP = + dyn_cast(&P)) { + + os << "Within the expansion of the macro '"; + + // Get the name of the macro by relexing it. + { + FullSourceLoc L = MP->getLocation().asLocation().getExpansionLoc(); + assert(L.isFileID()); + StringRef BufferInfo = L.getBufferData(); + const char* MacroName = L.getDecomposedLoc().second + BufferInfo.data(); + Lexer rawLexer(L, PP.getLangOpts(), BufferInfo.begin(), + MacroName, BufferInfo.end()); + + Token TheTok; + rawLexer.LexFromRawLexer(TheTok); + for (unsigned i = 0, n = TheTok.getLength(); i < n; ++i) + os << MacroName[i]; + } + + os << "':\n"; + + if (max > 1) + os << "
"; + + // Within a macro piece. Write out each event. + ProcessMacroPiece(os, *MP, 0); + } + else { + os << html::EscapeText(P.getString()); + + if (max > 1) + os << ""; + } + + os << "
"; + + // Insert the new html. + unsigned DisplayPos = LineEnd - FileStart; + SourceLocation Loc = + SM.getLocForStartOfFile(LPosInfo.first).getLocWithOffset(DisplayPos); + + R.InsertTextBefore(Loc, os.str()); + + // Now highlight the ranges. + for (const SourceRange *I = P.ranges_begin(), *E = P.ranges_end(); + I != E; ++I) + HighlightRange(R, LPosInfo.first, *I); + +#if 0 + // If there is a code insertion hint, insert that code. + // FIXME: This code is disabled because it seems to mangle the HTML + // output. I'm leaving it here because it's generally the right idea, + // but needs some help from someone more familiar with the rewriter. + for (const FixItHint *Hint = P.fixit_begin(), *HintEnd = P.fixit_end(); + Hint != HintEnd; ++Hint) { + if (Hint->RemoveRange.isValid()) { + HighlightRange(R, LPosInfo.first, Hint->RemoveRange, + "", ""); + } + if (Hint->InsertionLoc.isValid()) { + std::string EscapedCode = html::EscapeText(Hint->CodeToInsert, true); + EscapedCode = "" + EscapedCode + + ""; + R.InsertTextBefore(Hint->InsertionLoc, EscapedCode); + } + } +#endif +} + +static void EmitAlphaCounter(raw_ostream &os, unsigned n) { + unsigned x = n % ('z' - 'a'); + n /= 'z' - 'a'; + + if (n > 0) + EmitAlphaCounter(os, n); + + os << char('a' + x); +} + +unsigned HTMLDiagnostics::ProcessMacroPiece(raw_ostream &os, + const PathDiagnosticMacroPiece& P, + unsigned num) { + + for (PathPieces::const_iterator I = P.subPieces.begin(), E=P.subPieces.end(); + I!=E; ++I) { + + if (const PathDiagnosticMacroPiece *MP = + dyn_cast(*I)) { + num = ProcessMacroPiece(os, *MP, num); + continue; + } + + if (PathDiagnosticEventPiece *EP = dyn_cast(*I)) { + os << "
" + "" + "
"; + EmitAlphaCounter(os, num++); + os << "
" + << html::EscapeText(EP->getString()) + << "
\n"; + } + } + + return num; +} + +void HTMLDiagnostics::HighlightRange(Rewriter& R, FileID BugFileID, + SourceRange Range, + const char *HighlightStart, + const char *HighlightEnd) { + SourceManager &SM = R.getSourceMgr(); + const LangOptions &LangOpts = R.getLangOpts(); + + SourceLocation InstantiationStart = SM.getExpansionLoc(Range.getBegin()); + unsigned StartLineNo = SM.getExpansionLineNumber(InstantiationStart); + + SourceLocation InstantiationEnd = SM.getExpansionLoc(Range.getEnd()); + unsigned EndLineNo = SM.getExpansionLineNumber(InstantiationEnd); + + if (EndLineNo < StartLineNo) + return; + + if (SM.getFileID(InstantiationStart) != BugFileID || + SM.getFileID(InstantiationEnd) != BugFileID) + return; + + // Compute the column number of the end. + unsigned EndColNo = SM.getExpansionColumnNumber(InstantiationEnd); + unsigned OldEndColNo = EndColNo; + + if (EndColNo) { + // Add in the length of the token, so that we cover multi-char tokens. + EndColNo += Lexer::MeasureTokenLength(Range.getEnd(), SM, LangOpts)-1; + } + + // Highlight the range. Make the span tag the outermost tag for the + // selected range. + + SourceLocation E = + InstantiationEnd.getLocWithOffset(EndColNo - OldEndColNo); + + html::HighlightRange(R, InstantiationStart, E, HighlightStart, HighlightEnd); +} diff --git a/clang/lib/StaticAnalyzer/Core/Makefile b/clang/lib/StaticAnalyzer/Core/Makefile new file mode 100644 index 0000000..4aebc16 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/Makefile @@ -0,0 +1,17 @@ +##===- clang/lib/StaticAnalyzer/Core/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements analyses built on top of source-level CFGs. +# +##===----------------------------------------------------------------------===## + +CLANG_LEVEL := ../../.. +LIBRARYNAME := clangStaticAnalyzerCore + +include $(CLANG_LEVEL)/Makefile diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp new file mode 100644 index 0000000..ed94c79 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -0,0 +1,1101 @@ +//== MemRegion.cpp - Abstract memory regions for static analysis --*- C++ -*--// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines MemRegion and its subclasses. MemRegion defines a +// partially-typed abstraction of memory useful for path-sensitive dataflow +// analyses. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" +#include "clang/Analysis/AnalysisContext.h" +#include "clang/Analysis/Support/BumpVector.h" +#include "clang/AST/CharUnits.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/RecordLayout.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +//===----------------------------------------------------------------------===// +// MemRegion Construction. +//===----------------------------------------------------------------------===// + +template struct MemRegionManagerTrait; + +template +RegionTy* MemRegionManager::getRegion(const A1 a1) { + + const typename MemRegionManagerTrait::SuperRegionTy *superRegion = + MemRegionManagerTrait::getSuperRegion(*this, a1); + + llvm::FoldingSetNodeID ID; + RegionTy::ProfileRegion(ID, a1, superRegion); + void *InsertPos; + RegionTy* R = cast_or_null(Regions.FindNodeOrInsertPos(ID, + InsertPos)); + + if (!R) { + R = (RegionTy*) A.Allocate(); + new (R) RegionTy(a1, superRegion); + Regions.InsertNode(R, InsertPos); + } + + return R; +} + +template +RegionTy* MemRegionManager::getSubRegion(const A1 a1, + const MemRegion *superRegion) { + llvm::FoldingSetNodeID ID; + RegionTy::ProfileRegion(ID, a1, superRegion); + void *InsertPos; + RegionTy* R = cast_or_null(Regions.FindNodeOrInsertPos(ID, + InsertPos)); + + if (!R) { + R = (RegionTy*) A.Allocate(); + new (R) RegionTy(a1, superRegion); + Regions.InsertNode(R, InsertPos); + } + + return R; +} + +template +RegionTy* MemRegionManager::getRegion(const A1 a1, const A2 a2) { + + const typename MemRegionManagerTrait::SuperRegionTy *superRegion = + MemRegionManagerTrait::getSuperRegion(*this, a1, a2); + + llvm::FoldingSetNodeID ID; + RegionTy::ProfileRegion(ID, a1, a2, superRegion); + void *InsertPos; + RegionTy* R = cast_or_null(Regions.FindNodeOrInsertPos(ID, + InsertPos)); + + if (!R) { + R = (RegionTy*) A.Allocate(); + new (R) RegionTy(a1, a2, superRegion); + Regions.InsertNode(R, InsertPos); + } + + return R; +} + +template +RegionTy* MemRegionManager::getSubRegion(const A1 a1, const A2 a2, + const MemRegion *superRegion) { + + llvm::FoldingSetNodeID ID; + RegionTy::ProfileRegion(ID, a1, a2, superRegion); + void *InsertPos; + RegionTy* R = cast_or_null(Regions.FindNodeOrInsertPos(ID, + InsertPos)); + + if (!R) { + R = (RegionTy*) A.Allocate(); + new (R) RegionTy(a1, a2, superRegion); + Regions.InsertNode(R, InsertPos); + } + + return R; +} + +template +RegionTy* MemRegionManager::getSubRegion(const A1 a1, const A2 a2, const A3 a3, + const MemRegion *superRegion) { + + llvm::FoldingSetNodeID ID; + RegionTy::ProfileRegion(ID, a1, a2, a3, superRegion); + void *InsertPos; + RegionTy* R = cast_or_null(Regions.FindNodeOrInsertPos(ID, + InsertPos)); + + if (!R) { + R = (RegionTy*) A.Allocate(); + new (R) RegionTy(a1, a2, a3, superRegion); + Regions.InsertNode(R, InsertPos); + } + + return R; +} + +//===----------------------------------------------------------------------===// +// Object destruction. +//===----------------------------------------------------------------------===// + +MemRegion::~MemRegion() {} + +MemRegionManager::~MemRegionManager() { + // All regions and their data are BumpPtrAllocated. No need to call + // their destructors. +} + +//===----------------------------------------------------------------------===// +// Basic methods. +//===----------------------------------------------------------------------===// + +bool SubRegion::isSubRegionOf(const MemRegion* R) const { + const MemRegion* r = getSuperRegion(); + while (r != 0) { + if (r == R) + return true; + if (const SubRegion* sr = dyn_cast(r)) + r = sr->getSuperRegion(); + else + break; + } + return false; +} + +MemRegionManager* SubRegion::getMemRegionManager() const { + const SubRegion* r = this; + do { + const MemRegion *superRegion = r->getSuperRegion(); + if (const SubRegion *sr = dyn_cast(superRegion)) { + r = sr; + continue; + } + return superRegion->getMemRegionManager(); + } while (1); +} + +const StackFrameContext *VarRegion::getStackFrame() const { + const StackSpaceRegion *SSR = dyn_cast(getMemorySpace()); + return SSR ? SSR->getStackFrame() : NULL; +} + +//===----------------------------------------------------------------------===// +// Region extents. +//===----------------------------------------------------------------------===// + +DefinedOrUnknownSVal DeclRegion::getExtent(SValBuilder &svalBuilder) const { + ASTContext &Ctx = svalBuilder.getContext(); + QualType T = getDesugaredValueType(Ctx); + + if (isa(T)) + return nonloc::SymbolVal(svalBuilder.getSymbolManager().getExtentSymbol(this)); + if (isa(T)) + return UnknownVal(); + + CharUnits size = Ctx.getTypeSizeInChars(T); + QualType sizeTy = svalBuilder.getArrayIndexType(); + return svalBuilder.makeIntVal(size.getQuantity(), sizeTy); +} + +DefinedOrUnknownSVal FieldRegion::getExtent(SValBuilder &svalBuilder) const { + DefinedOrUnknownSVal Extent = DeclRegion::getExtent(svalBuilder); + + // A zero-length array at the end of a struct often stands for dynamically- + // allocated extra memory. + if (Extent.isZeroConstant()) { + QualType T = getDesugaredValueType(svalBuilder.getContext()); + + if (isa(T)) + return UnknownVal(); + } + + return Extent; +} + +DefinedOrUnknownSVal AllocaRegion::getExtent(SValBuilder &svalBuilder) const { + return nonloc::SymbolVal(svalBuilder.getSymbolManager().getExtentSymbol(this)); +} + +DefinedOrUnknownSVal SymbolicRegion::getExtent(SValBuilder &svalBuilder) const { + return nonloc::SymbolVal(svalBuilder.getSymbolManager().getExtentSymbol(this)); +} + +DefinedOrUnknownSVal StringRegion::getExtent(SValBuilder &svalBuilder) const { + return svalBuilder.makeIntVal(getStringLiteral()->getByteLength()+1, + svalBuilder.getArrayIndexType()); +} + +ObjCIvarRegion::ObjCIvarRegion(const ObjCIvarDecl *ivd, const MemRegion* sReg) + : DeclRegion(ivd, sReg, ObjCIvarRegionKind) {} + +const ObjCIvarDecl *ObjCIvarRegion::getDecl() const { + return cast(D); +} + +QualType ObjCIvarRegion::getValueType() const { + return getDecl()->getType(); +} + +QualType CXXBaseObjectRegion::getValueType() const { + return QualType(decl->getTypeForDecl(), 0); +} + +//===----------------------------------------------------------------------===// +// FoldingSet profiling. +//===----------------------------------------------------------------------===// + +void MemSpaceRegion::Profile(llvm::FoldingSetNodeID& ID) const { + ID.AddInteger((unsigned)getKind()); +} + +void StackSpaceRegion::Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger((unsigned)getKind()); + ID.AddPointer(getStackFrame()); +} + +void StaticGlobalSpaceRegion::Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger((unsigned)getKind()); + ID.AddPointer(getCodeRegion()); +} + +void StringRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, + const StringLiteral* Str, + const MemRegion* superRegion) { + ID.AddInteger((unsigned) StringRegionKind); + ID.AddPointer(Str); + ID.AddPointer(superRegion); +} + +void ObjCStringRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, + const ObjCStringLiteral* Str, + const MemRegion* superRegion) { + ID.AddInteger((unsigned) ObjCStringRegionKind); + ID.AddPointer(Str); + ID.AddPointer(superRegion); +} + +void AllocaRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, + const Expr *Ex, unsigned cnt, + const MemRegion *) { + ID.AddInteger((unsigned) AllocaRegionKind); + ID.AddPointer(Ex); + ID.AddInteger(cnt); +} + +void AllocaRegion::Profile(llvm::FoldingSetNodeID& ID) const { + ProfileRegion(ID, Ex, Cnt, superRegion); +} + +void CompoundLiteralRegion::Profile(llvm::FoldingSetNodeID& ID) const { + CompoundLiteralRegion::ProfileRegion(ID, CL, superRegion); +} + +void CompoundLiteralRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, + const CompoundLiteralExpr *CL, + const MemRegion* superRegion) { + ID.AddInteger((unsigned) CompoundLiteralRegionKind); + ID.AddPointer(CL); + ID.AddPointer(superRegion); +} + +void CXXThisRegion::ProfileRegion(llvm::FoldingSetNodeID &ID, + const PointerType *PT, + const MemRegion *sRegion) { + ID.AddInteger((unsigned) CXXThisRegionKind); + ID.AddPointer(PT); + ID.AddPointer(sRegion); +} + +void CXXThisRegion::Profile(llvm::FoldingSetNodeID &ID) const { + CXXThisRegion::ProfileRegion(ID, ThisPointerTy, superRegion); +} + +void ObjCIvarRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, + const ObjCIvarDecl *ivd, + const MemRegion* superRegion) { + DeclRegion::ProfileRegion(ID, ivd, superRegion, ObjCIvarRegionKind); +} + +void DeclRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, const Decl *D, + const MemRegion* superRegion, Kind k) { + ID.AddInteger((unsigned) k); + ID.AddPointer(D); + ID.AddPointer(superRegion); +} + +void DeclRegion::Profile(llvm::FoldingSetNodeID& ID) const { + DeclRegion::ProfileRegion(ID, D, superRegion, getKind()); +} + +void VarRegion::Profile(llvm::FoldingSetNodeID &ID) const { + VarRegion::ProfileRegion(ID, getDecl(), superRegion); +} + +void SymbolicRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, SymbolRef sym, + const MemRegion *sreg) { + ID.AddInteger((unsigned) MemRegion::SymbolicRegionKind); + ID.Add(sym); + ID.AddPointer(sreg); +} + +void SymbolicRegion::Profile(llvm::FoldingSetNodeID& ID) const { + SymbolicRegion::ProfileRegion(ID, sym, getSuperRegion()); +} + +void ElementRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, + QualType ElementType, SVal Idx, + const MemRegion* superRegion) { + ID.AddInteger(MemRegion::ElementRegionKind); + ID.Add(ElementType); + ID.AddPointer(superRegion); + Idx.Profile(ID); +} + +void ElementRegion::Profile(llvm::FoldingSetNodeID& ID) const { + ElementRegion::ProfileRegion(ID, ElementType, Index, superRegion); +} + +void FunctionTextRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, + const FunctionDecl *FD, + const MemRegion*) { + ID.AddInteger(MemRegion::FunctionTextRegionKind); + ID.AddPointer(FD); +} + +void FunctionTextRegion::Profile(llvm::FoldingSetNodeID& ID) const { + FunctionTextRegion::ProfileRegion(ID, FD, superRegion); +} + +void BlockTextRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, + const BlockDecl *BD, CanQualType, + const AnalysisDeclContext *AC, + const MemRegion*) { + ID.AddInteger(MemRegion::BlockTextRegionKind); + ID.AddPointer(BD); +} + +void BlockTextRegion::Profile(llvm::FoldingSetNodeID& ID) const { + BlockTextRegion::ProfileRegion(ID, BD, locTy, AC, superRegion); +} + +void BlockDataRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, + const BlockTextRegion *BC, + const LocationContext *LC, + const MemRegion *sReg) { + ID.AddInteger(MemRegion::BlockDataRegionKind); + ID.AddPointer(BC); + ID.AddPointer(LC); + ID.AddPointer(sReg); +} + +void BlockDataRegion::Profile(llvm::FoldingSetNodeID& ID) const { + BlockDataRegion::ProfileRegion(ID, BC, LC, getSuperRegion()); +} + +void CXXTempObjectRegion::ProfileRegion(llvm::FoldingSetNodeID &ID, + Expr const *Ex, + const MemRegion *sReg) { + ID.AddPointer(Ex); + ID.AddPointer(sReg); +} + +void CXXTempObjectRegion::Profile(llvm::FoldingSetNodeID &ID) const { + ProfileRegion(ID, Ex, getSuperRegion()); +} + +void CXXBaseObjectRegion::ProfileRegion(llvm::FoldingSetNodeID &ID, + const CXXRecordDecl *decl, + const MemRegion *sReg) { + ID.AddPointer(decl); + ID.AddPointer(sReg); +} + +void CXXBaseObjectRegion::Profile(llvm::FoldingSetNodeID &ID) const { + ProfileRegion(ID, decl, superRegion); +} + +//===----------------------------------------------------------------------===// +// Region anchors. +//===----------------------------------------------------------------------===// + +void GlobalsSpaceRegion::anchor() { } +void HeapSpaceRegion::anchor() { } +void UnknownSpaceRegion::anchor() { } +void StackLocalsSpaceRegion::anchor() { } +void StackArgumentsSpaceRegion::anchor() { } +void TypedRegion::anchor() { } +void TypedValueRegion::anchor() { } +void CodeTextRegion::anchor() { } +void SubRegion::anchor() { } + +//===----------------------------------------------------------------------===// +// Region pretty-printing. +//===----------------------------------------------------------------------===// + +void MemRegion::dump() const { + dumpToStream(llvm::errs()); +} + +std::string MemRegion::getString() const { + std::string s; + llvm::raw_string_ostream os(s); + dumpToStream(os); + return os.str(); +} + +void MemRegion::dumpToStream(raw_ostream &os) const { + os << ""; +} + +void AllocaRegion::dumpToStream(raw_ostream &os) const { + os << "alloca{" << (void*) Ex << ',' << Cnt << '}'; +} + +void FunctionTextRegion::dumpToStream(raw_ostream &os) const { + os << "code{" << getDecl()->getDeclName().getAsString() << '}'; +} + +void BlockTextRegion::dumpToStream(raw_ostream &os) const { + os << "block_code{" << (void*) this << '}'; +} + +void BlockDataRegion::dumpToStream(raw_ostream &os) const { + os << "block_data{" << BC << '}'; +} + +void CompoundLiteralRegion::dumpToStream(raw_ostream &os) const { + // FIXME: More elaborate pretty-printing. + os << "{ " << (void*) CL << " }"; +} + +void CXXTempObjectRegion::dumpToStream(raw_ostream &os) const { + os << "temp_object{" << getValueType().getAsString() << ',' + << (void*) Ex << '}'; +} + +void CXXBaseObjectRegion::dumpToStream(raw_ostream &os) const { + os << "base " << decl->getName(); +} + +void CXXThisRegion::dumpToStream(raw_ostream &os) const { + os << "this"; +} + +void ElementRegion::dumpToStream(raw_ostream &os) const { + os << "element{" << superRegion << ',' + << Index << ',' << getElementType().getAsString() << '}'; +} + +void FieldRegion::dumpToStream(raw_ostream &os) const { + os << superRegion << "->" << *getDecl(); +} + +void ObjCIvarRegion::dumpToStream(raw_ostream &os) const { + os << "ivar{" << superRegion << ',' << *getDecl() << '}'; +} + +void StringRegion::dumpToStream(raw_ostream &os) const { + Str->printPretty(os, 0, PrintingPolicy(getContext().getLangOpts())); +} + +void ObjCStringRegion::dumpToStream(raw_ostream &os) const { + Str->printPretty(os, 0, PrintingPolicy(getContext().getLangOpts())); +} + +void SymbolicRegion::dumpToStream(raw_ostream &os) const { + os << "SymRegion{" << sym << '}'; +} + +void VarRegion::dumpToStream(raw_ostream &os) const { + os << *cast(D); +} + +void RegionRawOffset::dump() const { + dumpToStream(llvm::errs()); +} + +void RegionRawOffset::dumpToStream(raw_ostream &os) const { + os << "raw_offset{" << getRegion() << ',' << getOffset().getQuantity() << '}'; +} + +void StaticGlobalSpaceRegion::dumpToStream(raw_ostream &os) const { + os << "StaticGlobalsMemSpace{" << CR << '}'; +} + +void NonStaticGlobalSpaceRegion::dumpToStream(raw_ostream &os) const { + os << "NonStaticGlobalSpaceRegion"; +} + +void GlobalInternalSpaceRegion::dumpToStream(raw_ostream &os) const { + os << "GlobalInternalSpaceRegion"; +} + +void GlobalSystemSpaceRegion::dumpToStream(raw_ostream &os) const { + os << "GlobalSystemSpaceRegion"; +} + +void GlobalImmutableSpaceRegion::dumpToStream(raw_ostream &os) const { + os << "GlobalImmutableSpaceRegion"; +} + +void MemRegion::dumpPretty(raw_ostream &os) const { + return; +} + +void VarRegion::dumpPretty(raw_ostream &os) const { + os << getDecl()->getName(); +} + +void FieldRegion::dumpPretty(raw_ostream &os) const { + superRegion->dumpPretty(os); + os << "->" << getDecl(); +} + +//===----------------------------------------------------------------------===// +// MemRegionManager methods. +//===----------------------------------------------------------------------===// + +template +const REG *MemRegionManager::LazyAllocate(REG*& region) { + if (!region) { + region = (REG*) A.Allocate(); + new (region) REG(this); + } + + return region; +} + +template +const REG *MemRegionManager::LazyAllocate(REG*& region, ARG a) { + if (!region) { + region = (REG*) A.Allocate(); + new (region) REG(this, a); + } + + return region; +} + +const StackLocalsSpaceRegion* +MemRegionManager::getStackLocalsRegion(const StackFrameContext *STC) { + assert(STC); + StackLocalsSpaceRegion *&R = StackLocalsSpaceRegions[STC]; + + if (R) + return R; + + R = A.Allocate(); + new (R) StackLocalsSpaceRegion(this, STC); + return R; +} + +const StackArgumentsSpaceRegion * +MemRegionManager::getStackArgumentsRegion(const StackFrameContext *STC) { + assert(STC); + StackArgumentsSpaceRegion *&R = StackArgumentsSpaceRegions[STC]; + + if (R) + return R; + + R = A.Allocate(); + new (R) StackArgumentsSpaceRegion(this, STC); + return R; +} + +const GlobalsSpaceRegion +*MemRegionManager::getGlobalsRegion(MemRegion::Kind K, + const CodeTextRegion *CR) { + if (!CR) { + if (K == MemRegion::GlobalSystemSpaceRegionKind) + return LazyAllocate(SystemGlobals); + if (K == MemRegion::GlobalImmutableSpaceRegionKind) + return LazyAllocate(ImmutableGlobals); + assert(K == MemRegion::GlobalInternalSpaceRegionKind); + return LazyAllocate(InternalGlobals); + } + + assert(K == MemRegion::StaticGlobalSpaceRegionKind); + StaticGlobalSpaceRegion *&R = StaticsGlobalSpaceRegions[CR]; + if (R) + return R; + + R = A.Allocate(); + new (R) StaticGlobalSpaceRegion(this, CR); + return R; +} + +const HeapSpaceRegion *MemRegionManager::getHeapRegion() { + return LazyAllocate(heap); +} + +const MemSpaceRegion *MemRegionManager::getUnknownRegion() { + return LazyAllocate(unknown); +} + +const MemSpaceRegion *MemRegionManager::getCodeRegion() { + return LazyAllocate(code); +} + +//===----------------------------------------------------------------------===// +// Constructing regions. +//===----------------------------------------------------------------------===// +const StringRegion* MemRegionManager::getStringRegion(const StringLiteral* Str){ + return getSubRegion(Str, getGlobalsRegion()); +} + +const ObjCStringRegion * +MemRegionManager::getObjCStringRegion(const ObjCStringLiteral* Str){ + return getSubRegion(Str, getGlobalsRegion()); +} + +const VarRegion* MemRegionManager::getVarRegion(const VarDecl *D, + const LocationContext *LC) { + const MemRegion *sReg = 0; + + if (D->hasGlobalStorage() && !D->isStaticLocal()) { + + // First handle the globals defined in system headers. + if (C.getSourceManager().isInSystemHeader(D->getLocation())) { + // Whitelist the system globals which often DO GET modified, assume the + // rest are immutable. + if (D->getName().find("errno") != StringRef::npos) + sReg = getGlobalsRegion(MemRegion::GlobalSystemSpaceRegionKind); + else + sReg = getGlobalsRegion(MemRegion::GlobalImmutableSpaceRegionKind); + + // Treat other globals as GlobalInternal unless they are constants. + } else { + QualType GQT = D->getType(); + const Type *GT = GQT.getTypePtrOrNull(); + // TODO: We could walk the complex types here and see if everything is + // constified. + if (GT && GQT.isConstQualified() && GT->isArithmeticType()) + sReg = getGlobalsRegion(MemRegion::GlobalImmutableSpaceRegionKind); + else + sReg = getGlobalsRegion(); + } + + // Finally handle static locals. + } else { + // FIXME: Once we implement scope handling, we will need to properly lookup + // 'D' to the proper LocationContext. + const DeclContext *DC = D->getDeclContext(); + const StackFrameContext *STC = LC->getStackFrameForDeclContext(DC); + + if (!STC) + sReg = getUnknownRegion(); + else { + if (D->hasLocalStorage()) { + sReg = isa(D) || isa(D) + ? static_cast(getStackArgumentsRegion(STC)) + : static_cast(getStackLocalsRegion(STC)); + } + else { + assert(D->isStaticLocal()); + const Decl *D = STC->getDecl(); + if (const FunctionDecl *FD = dyn_cast(D)) + sReg = getGlobalsRegion(MemRegion::StaticGlobalSpaceRegionKind, + getFunctionTextRegion(FD)); + else if (const BlockDecl *BD = dyn_cast(D)) { + const BlockTextRegion *BTR = + getBlockTextRegion(BD, + C.getCanonicalType(BD->getSignatureAsWritten()->getType()), + STC->getAnalysisDeclContext()); + sReg = getGlobalsRegion(MemRegion::StaticGlobalSpaceRegionKind, + BTR); + } + else { + // FIXME: For ObjC-methods, we need a new CodeTextRegion. For now + // just use the main global memspace. + sReg = getGlobalsRegion(); + } + } + } + } + + return getSubRegion(D, sReg); +} + +const VarRegion *MemRegionManager::getVarRegion(const VarDecl *D, + const MemRegion *superR) { + return getSubRegion(D, superR); +} + +const BlockDataRegion * +MemRegionManager::getBlockDataRegion(const BlockTextRegion *BC, + const LocationContext *LC) { + const MemRegion *sReg = 0; + const BlockDecl *BD = BC->getDecl(); + if (!BD->hasCaptures()) { + // This handles 'static' blocks. + sReg = getGlobalsRegion(MemRegion::GlobalImmutableSpaceRegionKind); + } + else { + if (LC) { + // FIXME: Once we implement scope handling, we want the parent region + // to be the scope. + const StackFrameContext *STC = LC->getCurrentStackFrame(); + assert(STC); + sReg = getStackLocalsRegion(STC); + } + else { + // We allow 'LC' to be NULL for cases where want BlockDataRegions + // without context-sensitivity. + sReg = getUnknownRegion(); + } + } + + return getSubRegion(BC, LC, sReg); +} + +const CompoundLiteralRegion* +MemRegionManager::getCompoundLiteralRegion(const CompoundLiteralExpr *CL, + const LocationContext *LC) { + + const MemRegion *sReg = 0; + + if (CL->isFileScope()) + sReg = getGlobalsRegion(); + else { + const StackFrameContext *STC = LC->getCurrentStackFrame(); + assert(STC); + sReg = getStackLocalsRegion(STC); + } + + return getSubRegion(CL, sReg); +} + +const ElementRegion* +MemRegionManager::getElementRegion(QualType elementType, NonLoc Idx, + const MemRegion* superRegion, + ASTContext &Ctx){ + + QualType T = Ctx.getCanonicalType(elementType).getUnqualifiedType(); + + llvm::FoldingSetNodeID ID; + ElementRegion::ProfileRegion(ID, T, Idx, superRegion); + + void *InsertPos; + MemRegion* data = Regions.FindNodeOrInsertPos(ID, InsertPos); + ElementRegion* R = cast_or_null(data); + + if (!R) { + R = (ElementRegion*) A.Allocate(); + new (R) ElementRegion(T, Idx, superRegion); + Regions.InsertNode(R, InsertPos); + } + + return R; +} + +const FunctionTextRegion * +MemRegionManager::getFunctionTextRegion(const FunctionDecl *FD) { + return getSubRegion(FD, getCodeRegion()); +} + +const BlockTextRegion * +MemRegionManager::getBlockTextRegion(const BlockDecl *BD, CanQualType locTy, + AnalysisDeclContext *AC) { + return getSubRegion(BD, locTy, AC, getCodeRegion()); +} + + +/// getSymbolicRegion - Retrieve or create a "symbolic" memory region. +const SymbolicRegion *MemRegionManager::getSymbolicRegion(SymbolRef sym) { + return getSubRegion(sym, getUnknownRegion()); +} + +const FieldRegion* +MemRegionManager::getFieldRegion(const FieldDecl *d, + const MemRegion* superRegion){ + return getSubRegion(d, superRegion); +} + +const ObjCIvarRegion* +MemRegionManager::getObjCIvarRegion(const ObjCIvarDecl *d, + const MemRegion* superRegion) { + return getSubRegion(d, superRegion); +} + +const CXXTempObjectRegion* +MemRegionManager::getCXXTempObjectRegion(Expr const *E, + LocationContext const *LC) { + const StackFrameContext *SFC = LC->getCurrentStackFrame(); + assert(SFC); + return getSubRegion(E, getStackLocalsRegion(SFC)); +} + +const CXXBaseObjectRegion * +MemRegionManager::getCXXBaseObjectRegion(const CXXRecordDecl *decl, + const MemRegion *superRegion) { + return getSubRegion(decl, superRegion); +} + +const CXXThisRegion* +MemRegionManager::getCXXThisRegion(QualType thisPointerTy, + const LocationContext *LC) { + const StackFrameContext *STC = LC->getCurrentStackFrame(); + assert(STC); + const PointerType *PT = thisPointerTy->getAs(); + assert(PT); + return getSubRegion(PT, getStackArgumentsRegion(STC)); +} + +const AllocaRegion* +MemRegionManager::getAllocaRegion(const Expr *E, unsigned cnt, + const LocationContext *LC) { + const StackFrameContext *STC = LC->getCurrentStackFrame(); + assert(STC); + return getSubRegion(E, cnt, getStackLocalsRegion(STC)); +} + +const MemSpaceRegion *MemRegion::getMemorySpace() const { + const MemRegion *R = this; + const SubRegion* SR = dyn_cast(this); + + while (SR) { + R = SR->getSuperRegion(); + SR = dyn_cast(R); + } + + return dyn_cast(R); +} + +bool MemRegion::hasStackStorage() const { + return isa(getMemorySpace()); +} + +bool MemRegion::hasStackNonParametersStorage() const { + return isa(getMemorySpace()); +} + +bool MemRegion::hasStackParametersStorage() const { + return isa(getMemorySpace()); +} + +bool MemRegion::hasGlobalsOrParametersStorage() const { + const MemSpaceRegion *MS = getMemorySpace(); + return isa(MS) || + isa(MS); +} + +// getBaseRegion strips away all elements and fields, and get the base region +// of them. +const MemRegion *MemRegion::getBaseRegion() const { + const MemRegion *R = this; + while (true) { + switch (R->getKind()) { + case MemRegion::ElementRegionKind: + case MemRegion::FieldRegionKind: + case MemRegion::ObjCIvarRegionKind: + case MemRegion::CXXBaseObjectRegionKind: + R = cast(R)->getSuperRegion(); + continue; + default: + break; + } + break; + } + return R; +} + +//===----------------------------------------------------------------------===// +// View handling. +//===----------------------------------------------------------------------===// + +const MemRegion *MemRegion::StripCasts() const { + const MemRegion *R = this; + while (true) { + if (const ElementRegion *ER = dyn_cast(R)) { + // FIXME: generalize. Essentially we want to strip away ElementRegions + // that were layered on a symbolic region because of casts. We only + // want to strip away ElementRegions, however, where the index is 0. + SVal index = ER->getIndex(); + if (nonloc::ConcreteInt *CI = dyn_cast(&index)) { + if (CI->getValue().getSExtValue() == 0) { + R = ER->getSuperRegion(); + continue; + } + } + } + break; + } + return R; +} + +// FIXME: Merge with the implementation of the same method in Store.cpp +static bool IsCompleteType(ASTContext &Ctx, QualType Ty) { + if (const RecordType *RT = Ty->getAs()) { + const RecordDecl *D = RT->getDecl(); + if (!D->getDefinition()) + return false; + } + + return true; +} + +RegionRawOffset ElementRegion::getAsArrayOffset() const { + CharUnits offset = CharUnits::Zero(); + const ElementRegion *ER = this; + const MemRegion *superR = NULL; + ASTContext &C = getContext(); + + // FIXME: Handle multi-dimensional arrays. + + while (ER) { + superR = ER->getSuperRegion(); + + // FIXME: generalize to symbolic offsets. + SVal index = ER->getIndex(); + if (nonloc::ConcreteInt *CI = dyn_cast(&index)) { + // Update the offset. + int64_t i = CI->getValue().getSExtValue(); + + if (i != 0) { + QualType elemType = ER->getElementType(); + + // If we are pointing to an incomplete type, go no further. + if (!IsCompleteType(C, elemType)) { + superR = ER; + break; + } + + CharUnits size = C.getTypeSizeInChars(elemType); + offset += (i * size); + } + + // Go to the next ElementRegion (if any). + ER = dyn_cast(superR); + continue; + } + + return NULL; + } + + assert(superR && "super region cannot be NULL"); + return RegionRawOffset(superR, offset); +} + +RegionOffset MemRegion::getAsOffset() const { + const MemRegion *R = this; + int64_t Offset = 0; + + while (1) { + switch (R->getKind()) { + default: + return RegionOffset(0); + case SymbolicRegionKind: + case AllocaRegionKind: + case CompoundLiteralRegionKind: + case CXXThisRegionKind: + case StringRegionKind: + case VarRegionKind: + case CXXTempObjectRegionKind: + goto Finish; + case ElementRegionKind: { + const ElementRegion *ER = cast(R); + QualType EleTy = ER->getValueType(); + + if (!IsCompleteType(getContext(), EleTy)) + return RegionOffset(0); + + SVal Index = ER->getIndex(); + if (const nonloc::ConcreteInt *CI=dyn_cast(&Index)) { + int64_t i = CI->getValue().getSExtValue(); + CharUnits Size = getContext().getTypeSizeInChars(EleTy); + Offset += i * Size.getQuantity() * 8; + } else { + // We cannot compute offset for non-concrete index. + return RegionOffset(0); + } + R = ER->getSuperRegion(); + break; + } + case FieldRegionKind: { + const FieldRegion *FR = cast(R); + const RecordDecl *RD = FR->getDecl()->getParent(); + if (!RD->isCompleteDefinition()) + // We cannot compute offset for incomplete type. + return RegionOffset(0); + // Get the field number. + unsigned idx = 0; + for (RecordDecl::field_iterator FI = RD->field_begin(), + FE = RD->field_end(); FI != FE; ++FI, ++idx) + if (FR->getDecl() == *FI) + break; + + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + // This is offset in bits. + Offset += Layout.getFieldOffset(idx); + R = FR->getSuperRegion(); + break; + } + } + } + + Finish: + return RegionOffset(R, Offset); +} + +//===----------------------------------------------------------------------===// +// BlockDataRegion +//===----------------------------------------------------------------------===// + +void BlockDataRegion::LazyInitializeReferencedVars() { + if (ReferencedVars) + return; + + AnalysisDeclContext *AC = getCodeRegion()->getAnalysisDeclContext(); + AnalysisDeclContext::referenced_decls_iterator I, E; + llvm::tie(I, E) = AC->getReferencedBlockVars(BC->getDecl()); + + if (I == E) { + ReferencedVars = (void*) 0x1; + return; + } + + MemRegionManager &MemMgr = *getMemRegionManager(); + llvm::BumpPtrAllocator &A = MemMgr.getAllocator(); + BumpVectorContext BC(A); + + typedef BumpVector VarVec; + VarVec *BV = (VarVec*) A.Allocate(); + new (BV) VarVec(BC, E - I); + + for ( ; I != E; ++I) { + const VarDecl *VD = *I; + const VarRegion *VR = 0; + + if (!VD->getAttr() && VD->hasLocalStorage()) + VR = MemMgr.getVarRegion(VD, this); + else { + if (LC) + VR = MemMgr.getVarRegion(VD, LC); + else { + VR = MemMgr.getVarRegion(VD, MemMgr.getUnknownRegion()); + } + } + + assert(VR); + BV->push_back(VR, BC); + } + + ReferencedVars = BV; +} + +BlockDataRegion::referenced_vars_iterator +BlockDataRegion::referenced_vars_begin() const { + const_cast(this)->LazyInitializeReferencedVars(); + + BumpVector *Vec = + static_cast*>(ReferencedVars); + + return BlockDataRegion::referenced_vars_iterator(Vec == (void*) 0x1 ? + NULL : Vec->begin()); +} + +BlockDataRegion::referenced_vars_iterator +BlockDataRegion::referenced_vars_end() const { + const_cast(this)->LazyInitializeReferencedVars(); + + BumpVector *Vec = + static_cast*>(ReferencedVars); + + return BlockDataRegion::referenced_vars_iterator(Vec == (void*) 0x1 ? + NULL : Vec->end()); +} diff --git a/clang/lib/StaticAnalyzer/Core/ObjCMessage.cpp b/clang/lib/StaticAnalyzer/Core/ObjCMessage.cpp new file mode 100644 index 0000000..65cdcd9 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/ObjCMessage.cpp @@ -0,0 +1,90 @@ +//===- ObjCMessage.cpp - Wrapper for ObjC messages and dot syntax -*- C++ -*--// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines ObjCMessage which serves as a common wrapper for ObjC +// message expressions or implicit messages for loading/storing ObjC properties. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" +#include "clang/AST/DeclCXX.h" + +using namespace clang; +using namespace ento; + +QualType CallOrObjCMessage::getResultType(ASTContext &ctx) const { + QualType resultTy; + bool isLVal = false; + + if (isObjCMessage()) { + resultTy = Msg.getResultType(ctx); + } else if (const CXXConstructExpr *Ctor = + CallE.dyn_cast()) { + resultTy = Ctor->getType(); + } else { + const CallExpr *FunctionCall = CallE.get(); + + isLVal = FunctionCall->isLValue(); + const Expr *Callee = FunctionCall->getCallee(); + if (const FunctionDecl *FD = State->getSVal(Callee, LCtx).getAsFunctionDecl()) + resultTy = FD->getResultType(); + else + resultTy = FunctionCall->getType(); + } + + if (isLVal) + resultTy = ctx.getPointerType(resultTy); + + return resultTy; +} + +SVal CallOrObjCMessage::getFunctionCallee() const { + assert(isFunctionCall()); + assert(!isCXXCall()); + const Expr *Fun = CallE.get()->getCallee()->IgnoreParens(); + return State->getSVal(Fun, LCtx); +} + +SVal CallOrObjCMessage::getCXXCallee() const { + assert(isCXXCall()); + const CallExpr *ActualCall = CallE.get(); + const Expr *callee = + cast(ActualCall)->getImplicitObjectArgument(); + + // FIXME: Will eventually need to cope with member pointers. This is + // a limitation in getImplicitObjectArgument(). + if (!callee) + return UnknownVal(); + + return State->getSVal(callee, LCtx); +} + +SVal +CallOrObjCMessage::getInstanceMessageReceiver(const LocationContext *LC) const { + assert(isObjCMessage()); + return Msg.getInstanceReceiverSVal(State, LC); +} + +const Decl *CallOrObjCMessage::getDecl() const { + if (isCXXCall()) { + const CXXMemberCallExpr *CE = + cast(CallE.dyn_cast()); + assert(CE); + return CE->getMethodDecl(); + } else if (isObjCMessage()) { + return Msg.getMethodDecl(); + } else if (isFunctionCall()) { + // In case of a C style call, use the path sensitive information to find + // the function declaration. + SVal CalleeVal = getFunctionCallee(); + return CalleeVal.getAsFunctionDecl(); + } + return 0; +} + diff --git a/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp b/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp new file mode 100644 index 0000000..01dd965 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp @@ -0,0 +1,755 @@ +//===--- PathDiagnostic.cpp - Path-Specific Diagnostic Handling -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PathDiagnostic-related interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" +#include "clang/Basic/SourceManager.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/ParentMap.h" +#include "clang/AST/StmtCXX.h" +#include "llvm/ADT/SmallString.h" + +using namespace clang; +using namespace ento; + +bool PathDiagnosticMacroPiece::containsEvent() const { + for (PathPieces::const_iterator I = subPieces.begin(), E = subPieces.end(); + I!=E; ++I) { + if (isa(*I)) + return true; + if (PathDiagnosticMacroPiece *MP = dyn_cast(*I)) + if (MP->containsEvent()) + return true; + } + return false; +} + +static StringRef StripTrailingDots(StringRef s) { + for (StringRef::size_type i = s.size(); i != 0; --i) + if (s[i - 1] != '.') + return s.substr(0, i); + return ""; +} + +PathDiagnosticPiece::PathDiagnosticPiece(StringRef s, + Kind k, DisplayHint hint) + : str(StripTrailingDots(s)), kind(k), Hint(hint) {} + +PathDiagnosticPiece::PathDiagnosticPiece(Kind k, DisplayHint hint) + : kind(k), Hint(hint) {} + +PathDiagnosticPiece::~PathDiagnosticPiece() {} +PathDiagnosticEventPiece::~PathDiagnosticEventPiece() {} +PathDiagnosticCallPiece::~PathDiagnosticCallPiece() {} +PathDiagnosticControlFlowPiece::~PathDiagnosticControlFlowPiece() {} +PathDiagnosticMacroPiece::~PathDiagnosticMacroPiece() {} + + +PathPieces::~PathPieces() {} +PathDiagnostic::~PathDiagnostic() {} + +PathDiagnostic::PathDiagnostic(const Decl *declWithIssue, + StringRef bugtype, StringRef desc, + StringRef category) + : DeclWithIssue(declWithIssue), + BugType(StripTrailingDots(bugtype)), + Desc(StripTrailingDots(desc)), + Category(StripTrailingDots(category)), + path(pathImpl) {} + +void PathDiagnosticConsumer::anchor() { } + +PathDiagnosticConsumer::~PathDiagnosticConsumer() { + // Delete the contents of the FoldingSet if it isn't empty already. + for (llvm::FoldingSet::iterator it = + Diags.begin(), et = Diags.end() ; it != et ; ++it) { + delete &*it; + } +} + +void PathDiagnosticConsumer::HandlePathDiagnostic(PathDiagnostic *D) { + llvm::OwningPtr OwningD(D); + + if (!D || D->path.empty()) + return; + + // We need to flatten the locations (convert Stmt* to locations) because + // the referenced statements may be freed by the time the diagnostics + // are emitted. + D->flattenLocations(); + + // If the PathDiagnosticConsumer does not support diagnostics that + // cross file boundaries, prune out such diagnostics now. + if (!supportsCrossFileDiagnostics()) { + // Verify that the entire path is from the same FileID. + FileID FID; + const SourceManager &SMgr = (*D->path.begin())->getLocation().getManager(); + llvm::SmallVector WorkList; + WorkList.push_back(&D->path); + + while (!WorkList.empty()) { + const PathPieces &path = *WorkList.back(); + WorkList.pop_back(); + + for (PathPieces::const_iterator I = path.begin(), E = path.end(); + I != E; ++I) { + const PathDiagnosticPiece *piece = I->getPtr(); + FullSourceLoc L = piece->getLocation().asLocation().getExpansionLoc(); + + if (FID.isInvalid()) { + FID = SMgr.getFileID(L); + } else if (SMgr.getFileID(L) != FID) + return; // FIXME: Emit a warning? + + // Check the source ranges. + for (PathDiagnosticPiece::range_iterator RI = piece->ranges_begin(), + RE = piece->ranges_end(); + RI != RE; ++RI) { + SourceLocation L = SMgr.getExpansionLoc(RI->getBegin()); + if (!L.isFileID() || SMgr.getFileID(L) != FID) + return; // FIXME: Emit a warning? + L = SMgr.getExpansionLoc(RI->getEnd()); + if (!L.isFileID() || SMgr.getFileID(L) != FID) + return; // FIXME: Emit a warning? + } + + if (const PathDiagnosticCallPiece *call = + dyn_cast(piece)) { + WorkList.push_back(&call->path); + } + else if (const PathDiagnosticMacroPiece *macro = + dyn_cast(piece)) { + WorkList.push_back(¯o->subPieces); + } + } + } + + if (FID.isInvalid()) + return; // FIXME: Emit a warning? + } + + // Profile the node to see if we already have something matching it + llvm::FoldingSetNodeID profile; + D->Profile(profile); + void *InsertPos = 0; + + if (PathDiagnostic *orig = Diags.FindNodeOrInsertPos(profile, InsertPos)) { + // Keep the PathDiagnostic with the shorter path. + const unsigned orig_size = orig->full_size(); + const unsigned new_size = D->full_size(); + + if (orig_size <= new_size) { + bool shouldKeepOriginal = true; + if (orig_size == new_size) { + // Here we break ties in a fairly arbitrary, but deterministic, way. + llvm::FoldingSetNodeID fullProfile, fullProfileOrig; + D->FullProfile(fullProfile); + orig->FullProfile(fullProfileOrig); + if (fullProfile.ComputeHash() < fullProfileOrig.ComputeHash()) + shouldKeepOriginal = false; + } + + if (shouldKeepOriginal) + return; + } + Diags.RemoveNode(orig); + delete orig; + } + + Diags.InsertNode(OwningD.take()); +} + + +namespace { +struct CompareDiagnostics { + // Compare if 'X' is "<" than 'Y'. + bool operator()(const PathDiagnostic *X, const PathDiagnostic *Y) const { + // First compare by location + const FullSourceLoc &XLoc = X->getLocation().asLocation(); + const FullSourceLoc &YLoc = Y->getLocation().asLocation(); + if (XLoc < YLoc) + return true; + if (XLoc != YLoc) + return false; + + // Next, compare by bug type. + StringRef XBugType = X->getBugType(); + StringRef YBugType = Y->getBugType(); + if (XBugType < YBugType) + return true; + if (XBugType != YBugType) + return false; + + // Next, compare by bug description. + StringRef XDesc = X->getDescription(); + StringRef YDesc = Y->getDescription(); + if (XDesc < YDesc) + return true; + if (XDesc != YDesc) + return false; + + // FIXME: Further refine by comparing PathDiagnosticPieces? + return false; + } +}; +} + +void +PathDiagnosticConsumer::FlushDiagnostics(SmallVectorImpl *Files) { + if (flushed) + return; + + flushed = true; + + std::vector BatchDiags; + for (llvm::FoldingSet::iterator it = Diags.begin(), + et = Diags.end(); it != et; ++it) { + BatchDiags.push_back(&*it); + } + + // Clear out the FoldingSet. + Diags.clear(); + + // Sort the diagnostics so that they are always emitted in a deterministic + // order. + if (!BatchDiags.empty()) + std::sort(BatchDiags.begin(), BatchDiags.end(), CompareDiagnostics()); + + FlushDiagnosticsImpl(BatchDiags, Files); + + // Delete the flushed diagnostics. + for (std::vector::iterator it = BatchDiags.begin(), + et = BatchDiags.end(); it != et; ++it) { + const PathDiagnostic *D = *it; + delete D; + } +} + +//===----------------------------------------------------------------------===// +// PathDiagnosticLocation methods. +//===----------------------------------------------------------------------===// + +static SourceLocation getValidSourceLocation(const Stmt* S, + LocationOrAnalysisDeclContext LAC) { + SourceLocation L = S->getLocStart(); + assert(!LAC.isNull() && "A valid LocationContext or AnalysisDeclContext should " + "be passed to PathDiagnosticLocation upon creation."); + + // S might be a temporary statement that does not have a location in the + // source code, so find an enclosing statement and use it's location. + if (!L.isValid()) { + + ParentMap *PM = 0; + if (LAC.is()) + PM = &LAC.get()->getParentMap(); + else + PM = &LAC.get()->getParentMap(); + + while (!L.isValid()) { + S = PM->getParent(S); + L = S->getLocStart(); + } + } + + return L; +} + +PathDiagnosticLocation + PathDiagnosticLocation::createBegin(const Decl *D, + const SourceManager &SM) { + return PathDiagnosticLocation(D->getLocStart(), SM, SingleLocK); +} + +PathDiagnosticLocation + PathDiagnosticLocation::createBegin(const Stmt *S, + const SourceManager &SM, + LocationOrAnalysisDeclContext LAC) { + return PathDiagnosticLocation(getValidSourceLocation(S, LAC), + SM, SingleLocK); +} + +PathDiagnosticLocation + PathDiagnosticLocation::createOperatorLoc(const BinaryOperator *BO, + const SourceManager &SM) { + return PathDiagnosticLocation(BO->getOperatorLoc(), SM, SingleLocK); +} + +PathDiagnosticLocation + PathDiagnosticLocation::createMemberLoc(const MemberExpr *ME, + const SourceManager &SM) { + return PathDiagnosticLocation(ME->getMemberLoc(), SM, SingleLocK); +} + +PathDiagnosticLocation + PathDiagnosticLocation::createBeginBrace(const CompoundStmt *CS, + const SourceManager &SM) { + SourceLocation L = CS->getLBracLoc(); + return PathDiagnosticLocation(L, SM, SingleLocK); +} + +PathDiagnosticLocation + PathDiagnosticLocation::createEndBrace(const CompoundStmt *CS, + const SourceManager &SM) { + SourceLocation L = CS->getRBracLoc(); + return PathDiagnosticLocation(L, SM, SingleLocK); +} + +PathDiagnosticLocation + PathDiagnosticLocation::createDeclBegin(const LocationContext *LC, + const SourceManager &SM) { + // FIXME: Should handle CXXTryStmt if analyser starts supporting C++. + if (const CompoundStmt *CS = + dyn_cast_or_null(LC->getDecl()->getBody())) + if (!CS->body_empty()) { + SourceLocation Loc = (*CS->body_begin())->getLocStart(); + return PathDiagnosticLocation(Loc, SM, SingleLocK); + } + + return PathDiagnosticLocation(); +} + +PathDiagnosticLocation + PathDiagnosticLocation::createDeclEnd(const LocationContext *LC, + const SourceManager &SM) { + SourceLocation L = LC->getDecl()->getBodyRBrace(); + return PathDiagnosticLocation(L, SM, SingleLocK); +} + +PathDiagnosticLocation + PathDiagnosticLocation::create(const ProgramPoint& P, + const SourceManager &SMng) { + + const Stmt* S = 0; + if (const BlockEdge *BE = dyn_cast(&P)) { + const CFGBlock *BSrc = BE->getSrc(); + S = BSrc->getTerminatorCondition(); + } + else if (const PostStmt *PS = dyn_cast(&P)) { + S = PS->getStmt(); + } + + return PathDiagnosticLocation(S, SMng, P.getLocationContext()); +} + +PathDiagnosticLocation + PathDiagnosticLocation::createEndOfPath(const ExplodedNode* N, + const SourceManager &SM) { + assert(N && "Cannot create a location with a null node."); + + const ExplodedNode *NI = N; + + while (NI) { + ProgramPoint P = NI->getLocation(); + const LocationContext *LC = P.getLocationContext(); + if (const StmtPoint *PS = dyn_cast(&P)) + return PathDiagnosticLocation(PS->getStmt(), SM, LC); + else if (const BlockEdge *BE = dyn_cast(&P)) { + const Stmt *Term = BE->getSrc()->getTerminator(); + if (Term) { + return PathDiagnosticLocation(Term, SM, LC); + } + } + NI = NI->succ_empty() ? 0 : *(NI->succ_begin()); + } + + return createDeclEnd(N->getLocationContext(), SM); +} + +PathDiagnosticLocation PathDiagnosticLocation::createSingleLocation( + const PathDiagnosticLocation &PDL) { + FullSourceLoc L = PDL.asLocation(); + return PathDiagnosticLocation(L, L.getManager(), SingleLocK); +} + +FullSourceLoc + PathDiagnosticLocation::genLocation(SourceLocation L, + LocationOrAnalysisDeclContext LAC) const { + assert(isValid()); + // Note that we want a 'switch' here so that the compiler can warn us in + // case we add more cases. + switch (K) { + case SingleLocK: + case RangeK: + break; + case StmtK: + // Defensive checking. + if (!S) + break; + return FullSourceLoc(getValidSourceLocation(S, LAC), + const_cast(*SM)); + case DeclK: + // Defensive checking. + if (!D) + break; + return FullSourceLoc(D->getLocation(), const_cast(*SM)); + } + + return FullSourceLoc(L, const_cast(*SM)); +} + +PathDiagnosticRange + PathDiagnosticLocation::genRange(LocationOrAnalysisDeclContext LAC) const { + assert(isValid()); + // Note that we want a 'switch' here so that the compiler can warn us in + // case we add more cases. + switch (K) { + case SingleLocK: + return PathDiagnosticRange(SourceRange(Loc,Loc), true); + case RangeK: + break; + case StmtK: { + const Stmt *S = asStmt(); + switch (S->getStmtClass()) { + default: + break; + case Stmt::DeclStmtClass: { + const DeclStmt *DS = cast(S); + if (DS->isSingleDecl()) { + // Should always be the case, but we'll be defensive. + return SourceRange(DS->getLocStart(), + DS->getSingleDecl()->getLocation()); + } + break; + } + // FIXME: Provide better range information for different + // terminators. + case Stmt::IfStmtClass: + case Stmt::WhileStmtClass: + case Stmt::DoStmtClass: + case Stmt::ForStmtClass: + case Stmt::ChooseExprClass: + case Stmt::IndirectGotoStmtClass: + case Stmt::SwitchStmtClass: + case Stmt::BinaryConditionalOperatorClass: + case Stmt::ConditionalOperatorClass: + case Stmt::ObjCForCollectionStmtClass: { + SourceLocation L = getValidSourceLocation(S, LAC); + return SourceRange(L, L); + } + } + SourceRange R = S->getSourceRange(); + if (R.isValid()) + return R; + break; + } + case DeclK: + if (const ObjCMethodDecl *MD = dyn_cast(D)) + return MD->getSourceRange(); + if (const FunctionDecl *FD = dyn_cast(D)) { + if (Stmt *Body = FD->getBody()) + return Body->getSourceRange(); + } + else { + SourceLocation L = D->getLocation(); + return PathDiagnosticRange(SourceRange(L, L), true); + } + } + + return SourceRange(Loc,Loc); +} + +void PathDiagnosticLocation::flatten() { + if (K == StmtK) { + K = RangeK; + S = 0; + D = 0; + } + else if (K == DeclK) { + K = SingleLocK; + S = 0; + D = 0; + } +} + +PathDiagnosticLocation PathDiagnostic::getLocation() const { + assert(path.size() > 0 && + "getLocation() requires a non-empty PathDiagnostic."); + + PathDiagnosticPiece *p = path.rbegin()->getPtr(); + + while (true) { + if (PathDiagnosticCallPiece *cp = dyn_cast(p)) { + assert(!cp->path.empty()); + p = cp->path.rbegin()->getPtr(); + continue; + } + break; + } + + return p->getLocation(); +} + +//===----------------------------------------------------------------------===// +// Manipulation of PathDiagnosticCallPieces. +//===----------------------------------------------------------------------===// + +static PathDiagnosticLocation getLastStmtLoc(const ExplodedNode *N, + const SourceManager &SM) { + while (N) { + ProgramPoint PP = N->getLocation(); + if (const StmtPoint *SP = dyn_cast(&PP)) + return PathDiagnosticLocation(SP->getStmt(), SM, PP.getLocationContext()); + if (N->pred_empty()) + break; + N = *N->pred_begin(); + } + return PathDiagnosticLocation(); +} + +PathDiagnosticCallPiece * +PathDiagnosticCallPiece::construct(const ExplodedNode *N, + const CallExit &CE, + const SourceManager &SM) { + const Decl *caller = CE.getLocationContext()->getParent()->getDecl(); + PathDiagnosticLocation pos = getLastStmtLoc(N, SM); + return new PathDiagnosticCallPiece(caller, pos); +} + +PathDiagnosticCallPiece * +PathDiagnosticCallPiece::construct(PathPieces &path, + const Decl *caller) { + PathDiagnosticCallPiece *C = new PathDiagnosticCallPiece(path, caller); + path.clear(); + path.push_front(C); + return C; +} + +void PathDiagnosticCallPiece::setCallee(const CallEnter &CE, + const SourceManager &SM) { + const Decl *D = CE.getCalleeContext()->getDecl(); + Callee = D; + callEnter = PathDiagnosticLocation(CE.getCallExpr(), SM, + CE.getLocationContext()); + callEnterWithin = PathDiagnosticLocation::createBegin(D, SM); +} + +IntrusiveRefCntPtr +PathDiagnosticCallPiece::getCallEnterEvent() const { + if (!Callee) + return 0; + SmallString<256> buf; + llvm::raw_svector_ostream Out(buf); + if (isa(Callee)) + Out << "Calling anonymous block"; + else if (const NamedDecl *ND = dyn_cast(Callee)) + Out << "Calling '" << *ND << "'"; + StringRef msg = Out.str(); + if (msg.empty()) + return 0; + return new PathDiagnosticEventPiece(callEnter, msg); +} + +IntrusiveRefCntPtr +PathDiagnosticCallPiece::getCallEnterWithinCallerEvent() const { + SmallString<256> buf; + llvm::raw_svector_ostream Out(buf); + if (const NamedDecl *ND = dyn_cast_or_null(Caller)) + Out << "Entered call from '" << *ND << "'"; + else + Out << "Entered call"; + StringRef msg = Out.str(); + if (msg.empty()) + return 0; + return new PathDiagnosticEventPiece(callEnterWithin, msg); +} + +IntrusiveRefCntPtr +PathDiagnosticCallPiece::getCallExitEvent() const { + if (NoExit) + return 0; + SmallString<256> buf; + llvm::raw_svector_ostream Out(buf); + if (!CallStackMessage.empty()) + Out << CallStackMessage; + else if (const NamedDecl *ND = dyn_cast_or_null(Callee)) + Out << "Returning from '" << *ND << "'"; + else + Out << "Returning to caller"; + return new PathDiagnosticEventPiece(callReturn, Out.str()); +} + +static void compute_path_size(const PathPieces &pieces, unsigned &size) { + for (PathPieces::const_iterator it = pieces.begin(), + et = pieces.end(); it != et; ++it) { + const PathDiagnosticPiece *piece = it->getPtr(); + if (const PathDiagnosticCallPiece *cp = + dyn_cast(piece)) { + compute_path_size(cp->path, size); + } + else + ++size; + } +} + +unsigned PathDiagnostic::full_size() { + unsigned size = 0; + compute_path_size(path, size); + return size; +} + +//===----------------------------------------------------------------------===// +// FoldingSet profiling methods. +//===----------------------------------------------------------------------===// + +void PathDiagnosticLocation::Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(Range.getBegin().getRawEncoding()); + ID.AddInteger(Range.getEnd().getRawEncoding()); + ID.AddInteger(Loc.getRawEncoding()); + return; +} + +void PathDiagnosticPiece::Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger((unsigned) getKind()); + ID.AddString(str); + // FIXME: Add profiling support for code hints. + ID.AddInteger((unsigned) getDisplayHint()); + for (range_iterator I = ranges_begin(), E = ranges_end(); I != E; ++I) { + ID.AddInteger(I->getBegin().getRawEncoding()); + ID.AddInteger(I->getEnd().getRawEncoding()); + } +} + +void PathDiagnosticCallPiece::Profile(llvm::FoldingSetNodeID &ID) const { + PathDiagnosticPiece::Profile(ID); + for (PathPieces::const_iterator it = path.begin(), + et = path.end(); it != et; ++it) { + ID.Add(**it); + } +} + +void PathDiagnosticSpotPiece::Profile(llvm::FoldingSetNodeID &ID) const { + PathDiagnosticPiece::Profile(ID); + ID.Add(Pos); +} + +void PathDiagnosticControlFlowPiece::Profile(llvm::FoldingSetNodeID &ID) const { + PathDiagnosticPiece::Profile(ID); + for (const_iterator I = begin(), E = end(); I != E; ++I) + ID.Add(*I); +} + +void PathDiagnosticMacroPiece::Profile(llvm::FoldingSetNodeID &ID) const { + PathDiagnosticSpotPiece::Profile(ID); + for (PathPieces::const_iterator I = subPieces.begin(), E = subPieces.end(); + I != E; ++I) + ID.Add(**I); +} + +void PathDiagnostic::Profile(llvm::FoldingSetNodeID &ID) const { + if (!path.empty()) + getLocation().Profile(ID); + ID.AddString(BugType); + ID.AddString(Desc); + ID.AddString(Category); +} + +void PathDiagnostic::FullProfile(llvm::FoldingSetNodeID &ID) const { + Profile(ID); + for (PathPieces::const_iterator I = path.begin(), E = path.end(); I != E; ++I) + ID.Add(**I); + for (meta_iterator I = meta_begin(), E = meta_end(); I != E; ++I) + ID.AddString(*I); +} + +StackHintGenerator::~StackHintGenerator() {} + +std::string StackHintGeneratorForSymbol::getMessage(const ExplodedNode *N){ + ProgramPoint P = N->getLocation(); + const CallExit *CExit = dyn_cast(&P); + assert(CExit && "Stack Hints should be constructed at CallExit points."); + + const CallExpr *CE = dyn_cast_or_null(CExit->getStmt()); + if (!CE) + return ""; + + // Get the successor node to make sure the return statement is evaluated and + // CE is set to the result value. + N = *N->succ_begin(); + if (!N) + return getMessageForSymbolNotFound(); + + // Check if one of the parameters are set to the interesting symbol. + ProgramStateRef State = N->getState(); + const LocationContext *LCtx = N->getLocationContext(); + unsigned ArgIndex = 0; + for (CallExpr::const_arg_iterator I = CE->arg_begin(), + E = CE->arg_end(); I != E; ++I, ++ArgIndex){ + SVal SV = State->getSVal(*I, LCtx); + + // Check if the variable corresponding to the symbol is passed by value. + SymbolRef AS = SV.getAsLocSymbol(); + if (AS == Sym) { + return getMessageForArg(*I, ArgIndex); + } + + // Check if the parameter is a pointer to the symbol. + if (const loc::MemRegionVal *Reg = dyn_cast(&SV)) { + SVal PSV = State->getSVal(Reg->getRegion()); + SymbolRef AS = PSV.getAsLocSymbol(); + if (AS == Sym) { + return getMessageForArg(*I, ArgIndex); + } + } + } + + // Check if we are returning the interesting symbol. + SVal SV = State->getSVal(CE, LCtx); + SymbolRef RetSym = SV.getAsLocSymbol(); + if (RetSym == Sym) { + return getMessageForReturn(CE); + } + + return getMessageForSymbolNotFound(); +} + +/// TODO: This is copied from clang diagnostics. Maybe we could just move it to +/// some common place. (Same as HandleOrdinalModifier.) +void StackHintGeneratorForSymbol::printOrdinal(unsigned ValNo, + llvm::raw_svector_ostream &Out) { + assert(ValNo != 0 && "ValNo must be strictly positive!"); + + // We could use text forms for the first N ordinals, but the numeric + // forms are actually nicer in diagnostics because they stand out. + Out << ValNo; + + // It is critically important that we do this perfectly for + // user-written sequences with over 100 elements. + switch (ValNo % 100) { + case 11: + case 12: + case 13: + Out << "th"; return; + default: + switch (ValNo % 10) { + case 1: Out << "st"; return; + case 2: Out << "nd"; return; + case 3: Out << "rd"; return; + default: Out << "th"; return; + } + } +} + +std::string StackHintGeneratorForSymbol::getMessageForArg(const Expr *ArgE, + unsigned ArgIndex) { + SmallString<200> buf; + llvm::raw_svector_ostream os(buf); + + os << Msg << " via "; + // Printed parameters start at 1, not 0. + printOrdinal(++ArgIndex, os); + os << " parameter"; + + return os.str(); +} diff --git a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp new file mode 100644 index 0000000..323cede --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp @@ -0,0 +1,513 @@ +//===--- PlistDiagnostics.cpp - Plist Diagnostics for Paths -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PlistDiagnostics object. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h" +#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/FileManager.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Casting.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +using namespace clang; +using namespace ento; + +typedef llvm::DenseMap FIDMap; + + +namespace { + class PlistDiagnostics : public PathDiagnosticConsumer { + const std::string OutputFile; + const LangOptions &LangOpts; + OwningPtr SubPD; + bool flushed; + const bool SupportsCrossFileDiagnostics; + public: + PlistDiagnostics(const std::string& prefix, const LangOptions &LangOpts, + bool supportsMultipleFiles, + PathDiagnosticConsumer *subPD); + + virtual ~PlistDiagnostics() {} + + void FlushDiagnosticsImpl(std::vector &Diags, + SmallVectorImpl *FilesMade); + + virtual StringRef getName() const { + return "PlistDiagnostics"; + } + + PathGenerationScheme getGenerationScheme() const; + bool supportsLogicalOpControlFlow() const { return true; } + bool supportsAllBlockEdges() const { return true; } + virtual bool useVerboseDescription() const { return false; } + virtual bool supportsCrossFileDiagnostics() const { + return SupportsCrossFileDiagnostics; + } + }; +} // end anonymous namespace + +PlistDiagnostics::PlistDiagnostics(const std::string& output, + const LangOptions &LO, + bool supportsMultipleFiles, + PathDiagnosticConsumer *subPD) + : OutputFile(output), LangOpts(LO), SubPD(subPD), flushed(false), + SupportsCrossFileDiagnostics(supportsMultipleFiles) {} + +PathDiagnosticConsumer* +ento::createPlistDiagnosticConsumer(const std::string& s, const Preprocessor &PP, + PathDiagnosticConsumer *subPD) { + return new PlistDiagnostics(s, PP.getLangOpts(), false, subPD); +} + +PathDiagnosticConsumer* +ento::createPlistMultiFileDiagnosticConsumer(const std::string &s, + const Preprocessor &PP) { + return new PlistDiagnostics(s, PP.getLangOpts(), true, 0); +} + +PathDiagnosticConsumer::PathGenerationScheme +PlistDiagnostics::getGenerationScheme() const { + if (const PathDiagnosticConsumer *PD = SubPD.get()) + return PD->getGenerationScheme(); + + return Extensive; +} + +static void AddFID(FIDMap &FIDs, SmallVectorImpl &V, + const SourceManager* SM, SourceLocation L) { + + FileID FID = SM->getFileID(SM->getExpansionLoc(L)); + FIDMap::iterator I = FIDs.find(FID); + if (I != FIDs.end()) return; + FIDs[FID] = V.size(); + V.push_back(FID); +} + +static unsigned GetFID(const FIDMap& FIDs, const SourceManager &SM, + SourceLocation L) { + FileID FID = SM.getFileID(SM.getExpansionLoc(L)); + FIDMap::const_iterator I = FIDs.find(FID); + assert(I != FIDs.end()); + return I->second; +} + +static raw_ostream &Indent(raw_ostream &o, const unsigned indent) { + for (unsigned i = 0; i < indent; ++i) o << ' '; + return o; +} + +static void EmitLocation(raw_ostream &o, const SourceManager &SM, + const LangOptions &LangOpts, + SourceLocation L, const FIDMap &FM, + unsigned indent, bool extend = false) { + + FullSourceLoc Loc(SM.getExpansionLoc(L), const_cast(SM)); + + // Add in the length of the token, so that we cover multi-char tokens. + unsigned offset = + extend ? Lexer::MeasureTokenLength(Loc, SM, LangOpts) - 1 : 0; + + Indent(o, indent) << "\n"; + Indent(o, indent) << " line" + << Loc.getExpansionLineNumber() << "\n"; + Indent(o, indent) << " col" + << Loc.getExpansionColumnNumber() + offset << "\n"; + Indent(o, indent) << " file" + << GetFID(FM, SM, Loc) << "\n"; + Indent(o, indent) << "\n"; +} + +static void EmitLocation(raw_ostream &o, const SourceManager &SM, + const LangOptions &LangOpts, + const PathDiagnosticLocation &L, const FIDMap& FM, + unsigned indent, bool extend = false) { + EmitLocation(o, SM, LangOpts, L.asLocation(), FM, indent, extend); +} + +static void EmitRange(raw_ostream &o, const SourceManager &SM, + const LangOptions &LangOpts, + PathDiagnosticRange R, const FIDMap &FM, + unsigned indent) { + Indent(o, indent) << "\n"; + EmitLocation(o, SM, LangOpts, R.getBegin(), FM, indent+1); + EmitLocation(o, SM, LangOpts, R.getEnd(), FM, indent+1, !R.isPoint); + Indent(o, indent) << "\n"; +} + +static raw_ostream &EmitString(raw_ostream &o, StringRef s) { + o << ""; + for (StringRef::const_iterator I = s.begin(), E = s.end(); I != E; ++I) { + char c = *I; + switch (c) { + default: o << c; break; + case '&': o << "&"; break; + case '<': o << "<"; break; + case '>': o << ">"; break; + case '\'': o << "'"; break; + case '\"': o << """; break; + } + } + o << ""; + return o; +} + +static void ReportControlFlow(raw_ostream &o, + const PathDiagnosticControlFlowPiece& P, + const FIDMap& FM, + const SourceManager &SM, + const LangOptions &LangOpts, + unsigned indent) { + + Indent(o, indent) << "\n"; + ++indent; + + Indent(o, indent) << "kindcontrol\n"; + + // Emit edges. + Indent(o, indent) << "edges\n"; + ++indent; + Indent(o, indent) << "\n"; + ++indent; + for (PathDiagnosticControlFlowPiece::const_iterator I=P.begin(), E=P.end(); + I!=E; ++I) { + Indent(o, indent) << "\n"; + ++indent; + Indent(o, indent) << "start\n"; + EmitRange(o, SM, LangOpts, I->getStart().asRange(), FM, indent+1); + Indent(o, indent) << "end\n"; + EmitRange(o, SM, LangOpts, I->getEnd().asRange(), FM, indent+1); + --indent; + Indent(o, indent) << "\n"; + } + --indent; + Indent(o, indent) << "\n"; + --indent; + + // Output any helper text. + const std::string& s = P.getString(); + if (!s.empty()) { + Indent(o, indent) << "alternate"; + EmitString(o, s) << '\n'; + } + + --indent; + Indent(o, indent) << "\n"; +} + +static void ReportEvent(raw_ostream &o, const PathDiagnosticPiece& P, + const FIDMap& FM, + const SourceManager &SM, + const LangOptions &LangOpts, + unsigned indent, + unsigned depth) { + + Indent(o, indent) << "\n"; + ++indent; + + Indent(o, indent) << "kindevent\n"; + + // Output the location. + FullSourceLoc L = P.getLocation().asLocation(); + + Indent(o, indent) << "location\n"; + EmitLocation(o, SM, LangOpts, L, FM, indent); + + // Output the ranges (if any). + PathDiagnosticPiece::range_iterator RI = P.ranges_begin(), + RE = P.ranges_end(); + + if (RI != RE) { + Indent(o, indent) << "ranges\n"; + Indent(o, indent) << "\n"; + ++indent; + for (; RI != RE; ++RI) + EmitRange(o, SM, LangOpts, *RI, FM, indent+1); + --indent; + Indent(o, indent) << "\n"; + } + + // Output the call depth. + Indent(o, indent) << "depth" + << "" << depth << "\n"; + + // Output the text. + assert(!P.getString().empty()); + Indent(o, indent) << "extended_message\n"; + Indent(o, indent); + EmitString(o, P.getString()) << '\n'; + + // Output the short text. + // FIXME: Really use a short string. + Indent(o, indent) << "message\n"; + EmitString(o, P.getString()) << '\n'; + + // Finish up. + --indent; + Indent(o, indent); o << "\n"; +} + +static void ReportPiece(raw_ostream &o, + const PathDiagnosticPiece &P, + const FIDMap& FM, const SourceManager &SM, + const LangOptions &LangOpts, + unsigned indent, + unsigned depth, + bool includeControlFlow); + +static void ReportCall(raw_ostream &o, + const PathDiagnosticCallPiece &P, + const FIDMap& FM, const SourceManager &SM, + const LangOptions &LangOpts, + unsigned indent, + unsigned depth) { + + IntrusiveRefCntPtr callEnter = + P.getCallEnterEvent(); + + if (callEnter) + ReportPiece(o, *callEnter, FM, SM, LangOpts, indent, depth, true); + + IntrusiveRefCntPtr callEnterWithinCaller = + P.getCallEnterWithinCallerEvent(); + + ++depth; + + if (callEnterWithinCaller) + ReportPiece(o, *callEnterWithinCaller, FM, SM, LangOpts, + indent, depth, true); + + for (PathPieces::const_iterator I = P.path.begin(), E = P.path.end();I!=E;++I) + ReportPiece(o, **I, FM, SM, LangOpts, indent, depth, true); + + IntrusiveRefCntPtr callExit = + P.getCallExitEvent(); + + if (callExit) + ReportPiece(o, *callExit, FM, SM, LangOpts, indent, depth, true); +} + +static void ReportMacro(raw_ostream &o, + const PathDiagnosticMacroPiece& P, + const FIDMap& FM, const SourceManager &SM, + const LangOptions &LangOpts, + unsigned indent, + unsigned depth) { + + for (PathPieces::const_iterator I = P.subPieces.begin(), E=P.subPieces.end(); + I!=E; ++I) { + ReportPiece(o, **I, FM, SM, LangOpts, indent, depth, false); + } +} + +static void ReportDiag(raw_ostream &o, const PathDiagnosticPiece& P, + const FIDMap& FM, const SourceManager &SM, + const LangOptions &LangOpts) { + ReportPiece(o, P, FM, SM, LangOpts, 4, 0, true); +} + +static void ReportPiece(raw_ostream &o, + const PathDiagnosticPiece &P, + const FIDMap& FM, const SourceManager &SM, + const LangOptions &LangOpts, + unsigned indent, + unsigned depth, + bool includeControlFlow) { + switch (P.getKind()) { + case PathDiagnosticPiece::ControlFlow: + if (includeControlFlow) + ReportControlFlow(o, cast(P), FM, SM, + LangOpts, indent); + break; + case PathDiagnosticPiece::Call: + ReportCall(o, cast(P), FM, SM, LangOpts, + indent, depth); + break; + case PathDiagnosticPiece::Event: + ReportEvent(o, cast(P), FM, SM, LangOpts, + indent, depth); + break; + case PathDiagnosticPiece::Macro: + ReportMacro(o, cast(P), FM, SM, LangOpts, + indent, depth); + break; + } +} + +void PlistDiagnostics::FlushDiagnosticsImpl( + std::vector &Diags, + SmallVectorImpl *FilesMade) { + // Build up a set of FIDs that we use by scanning the locations and + // ranges of the diagnostics. + FIDMap FM; + SmallVector Fids; + const SourceManager* SM = 0; + + if (!Diags.empty()) + SM = &(*(*Diags.begin())->path.begin())->getLocation().getManager(); + + + for (std::vector::iterator DI = Diags.begin(), + DE = Diags.end(); DI != DE; ++DI) { + + const PathDiagnostic *D = *DI; + + llvm::SmallVector WorkList; + WorkList.push_back(&D->path); + + while (!WorkList.empty()) { + const PathPieces &path = *WorkList.back(); + WorkList.pop_back(); + + for (PathPieces::const_iterator I = path.begin(), E = path.end(); + I!=E; ++I) { + const PathDiagnosticPiece *piece = I->getPtr(); + AddFID(FM, Fids, SM, piece->getLocation().asLocation()); + + for (PathDiagnosticPiece::range_iterator RI = piece->ranges_begin(), + RE= piece->ranges_end(); RI != RE; ++RI) { + AddFID(FM, Fids, SM, RI->getBegin()); + AddFID(FM, Fids, SM, RI->getEnd()); + } + + if (const PathDiagnosticCallPiece *call = + dyn_cast(piece)) { + WorkList.push_back(&call->path); + } + else if (const PathDiagnosticMacroPiece *macro = + dyn_cast(piece)) { + WorkList.push_back(¯o->subPieces); + } + } + } + } + + // Open the file. + std::string ErrMsg; + llvm::raw_fd_ostream o(OutputFile.c_str(), ErrMsg); + if (!ErrMsg.empty()) { + llvm::errs() << "warning: could not create file: " << OutputFile << '\n'; + return; + } + + // Write the plist header. + o << "\n" + "\n" + "\n"; + + // Write the root object: a containing... + // - "files", an mapping from FIDs to file names + // - "diagnostics", an containing the path diagnostics + o << "\n" + " files\n" + " \n"; + + for (SmallVectorImpl::iterator I=Fids.begin(), E=Fids.end(); + I!=E; ++I) { + o << " "; + EmitString(o, SM->getFileEntryForID(*I)->getName()) << '\n'; + } + + o << " \n" + " diagnostics\n" + " \n"; + + for (std::vector::iterator DI=Diags.begin(), + DE = Diags.end(); DI!=DE; ++DI) { + + o << " \n" + " path\n"; + + const PathDiagnostic *D = *DI; + + o << " \n"; + + for (PathPieces::const_iterator I = D->path.begin(), E = D->path.end(); + I != E; ++I) + ReportDiag(o, **I, FM, *SM, LangOpts); + + o << " \n"; + + // Output the bug type and bug category. + o << " description"; + EmitString(o, D->getDescription()) << '\n'; + o << " category"; + EmitString(o, D->getCategory()) << '\n'; + o << " type"; + EmitString(o, D->getBugType()) << '\n'; + + // Output information about the semantic context where + // the issue occurred. + if (const Decl *DeclWithIssue = D->getDeclWithIssue()) { + // FIXME: handle blocks, which have no name. + if (const NamedDecl *ND = dyn_cast(DeclWithIssue)) { + StringRef declKind; + switch (ND->getKind()) { + case Decl::CXXRecord: + declKind = "C++ class"; + break; + case Decl::CXXMethod: + declKind = "C++ method"; + break; + case Decl::ObjCMethod: + declKind = "Objective-C method"; + break; + case Decl::Function: + declKind = "function"; + break; + default: + break; + } + if (!declKind.empty()) { + const std::string &declName = ND->getDeclName().getAsString(); + o << " issue_context_kind"; + EmitString(o, declKind) << '\n'; + o << " issue_context"; + EmitString(o, declName) << '\n'; + } + } + } + + // Output the location of the bug. + o << " location\n"; + EmitLocation(o, *SM, LangOpts, D->getLocation(), FM, 2); + + // Output the diagnostic to the sub-diagnostic client, if any. + if (SubPD) { + std::vector SubDiags; + SubDiags.push_back(D); + SmallVector SubFilesMade; + SubPD->FlushDiagnosticsImpl(SubDiags, &SubFilesMade); + + if (!SubFilesMade.empty()) { + o << " " << SubPD->getName() << "_files\n"; + o << " \n"; + for (size_t i = 0, n = SubFilesMade.size(); i < n ; ++i) + o << " " << SubFilesMade[i] << "\n"; + o << " \n"; + } + } + + // Close up the entry. + o << " \n"; + } + + o << " \n"; + + // Finish. + o << "\n"; + + if (FilesMade) + FilesMade->push_back(OutputFile); +} diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp new file mode 100644 index 0000000..b9cfa27 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp @@ -0,0 +1,709 @@ +//= ProgramState.cpp - Path-Sensitive "State" for tracking values --*- C++ -*--= +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ProgramState and ProgramStateManager. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/CFG.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +// Give the vtable for ConstraintManager somewhere to live. +// FIXME: Move this elsewhere. +ConstraintManager::~ConstraintManager() {} + +namespace clang { namespace ento { +/// Increments the number of times this state is referenced. + +void ProgramStateRetain(const ProgramState *state) { + ++const_cast(state)->refCount; +} + +/// Decrement the number of times this state is referenced. +void ProgramStateRelease(const ProgramState *state) { + assert(state->refCount > 0); + ProgramState *s = const_cast(state); + if (--s->refCount == 0) { + ProgramStateManager &Mgr = s->getStateManager(); + Mgr.StateSet.RemoveNode(s); + s->~ProgramState(); + Mgr.freeStates.push_back(s); + } +} +}} + +ProgramState::ProgramState(ProgramStateManager *mgr, const Environment& env, + StoreRef st, GenericDataMap gdm) + : stateMgr(mgr), + Env(env), + store(st.getStore()), + GDM(gdm), + refCount(0) { + stateMgr->getStoreManager().incrementReferenceCount(store); +} + +ProgramState::ProgramState(const ProgramState &RHS) + : llvm::FoldingSetNode(), + stateMgr(RHS.stateMgr), + Env(RHS.Env), + store(RHS.store), + GDM(RHS.GDM), + refCount(0) { + stateMgr->getStoreManager().incrementReferenceCount(store); +} + +ProgramState::~ProgramState() { + if (store) + stateMgr->getStoreManager().decrementReferenceCount(store); +} + +ProgramStateManager::~ProgramStateManager() { + for (GDMContextsTy::iterator I=GDMContexts.begin(), E=GDMContexts.end(); + I!=E; ++I) + I->second.second(I->second.first); +} + +ProgramStateRef +ProgramStateManager::removeDeadBindings(ProgramStateRef state, + const StackFrameContext *LCtx, + SymbolReaper& SymReaper) { + + // This code essentially performs a "mark-and-sweep" of the VariableBindings. + // The roots are any Block-level exprs and Decls that our liveness algorithm + // tells us are live. We then see what Decls they may reference, and keep + // those around. This code more than likely can be made faster, and the + // frequency of which this method is called should be experimented with + // for optimum performance. + ProgramState NewState = *state; + + NewState.Env = EnvMgr.removeDeadBindings(NewState.Env, SymReaper, state); + + // Clean up the store. + StoreRef newStore = StoreMgr->removeDeadBindings(NewState.getStore(), LCtx, + SymReaper); + NewState.setStore(newStore); + SymReaper.setReapedStore(newStore); + + return getPersistentState(NewState); +} + +ProgramStateRef ProgramStateManager::MarshalState(ProgramStateRef state, + const StackFrameContext *InitLoc) { + // make up an empty state for now. + ProgramState State(this, + EnvMgr.getInitialEnvironment(), + StoreMgr->getInitialStore(InitLoc), + GDMFactory.getEmptyMap()); + + return getPersistentState(State); +} + +ProgramStateRef ProgramState::bindCompoundLiteral(const CompoundLiteralExpr *CL, + const LocationContext *LC, + SVal V) const { + const StoreRef &newStore = + getStateManager().StoreMgr->BindCompoundLiteral(getStore(), CL, LC, V); + return makeWithStore(newStore); +} + +ProgramStateRef ProgramState::bindDecl(const VarRegion* VR, SVal IVal) const { + const StoreRef &newStore = + getStateManager().StoreMgr->BindDecl(getStore(), VR, IVal); + return makeWithStore(newStore); +} + +ProgramStateRef ProgramState::bindDeclWithNoInit(const VarRegion* VR) const { + const StoreRef &newStore = + getStateManager().StoreMgr->BindDeclWithNoInit(getStore(), VR); + return makeWithStore(newStore); +} + +ProgramStateRef ProgramState::bindLoc(Loc LV, SVal V) const { + ProgramStateManager &Mgr = getStateManager(); + ProgramStateRef newState = makeWithStore(Mgr.StoreMgr->Bind(getStore(), + LV, V)); + const MemRegion *MR = LV.getAsRegion(); + if (MR && Mgr.getOwningEngine()) + return Mgr.getOwningEngine()->processRegionChange(newState, MR); + + return newState; +} + +ProgramStateRef ProgramState::bindDefault(SVal loc, SVal V) const { + ProgramStateManager &Mgr = getStateManager(); + const MemRegion *R = cast(loc).getRegion(); + const StoreRef &newStore = Mgr.StoreMgr->BindDefault(getStore(), R, V); + ProgramStateRef new_state = makeWithStore(newStore); + return Mgr.getOwningEngine() ? + Mgr.getOwningEngine()->processRegionChange(new_state, R) : + new_state; +} + +ProgramStateRef +ProgramState::invalidateRegions(ArrayRef Regions, + const Expr *E, unsigned Count, + const LocationContext *LCtx, + StoreManager::InvalidatedSymbols *IS, + const CallOrObjCMessage *Call) const { + if (!IS) { + StoreManager::InvalidatedSymbols invalidated; + return invalidateRegionsImpl(Regions, E, Count, LCtx, + invalidated, Call); + } + return invalidateRegionsImpl(Regions, E, Count, LCtx, *IS, Call); +} + +ProgramStateRef +ProgramState::invalidateRegionsImpl(ArrayRef Regions, + const Expr *E, unsigned Count, + const LocationContext *LCtx, + StoreManager::InvalidatedSymbols &IS, + const CallOrObjCMessage *Call) const { + ProgramStateManager &Mgr = getStateManager(); + SubEngine* Eng = Mgr.getOwningEngine(); + + if (Eng && Eng->wantsRegionChangeUpdate(this)) { + StoreManager::InvalidatedRegions Invalidated; + const StoreRef &newStore + = Mgr.StoreMgr->invalidateRegions(getStore(), Regions, E, Count, LCtx, IS, + Call, &Invalidated); + ProgramStateRef newState = makeWithStore(newStore); + return Eng->processRegionChanges(newState, &IS, Regions, Invalidated, Call); + } + + const StoreRef &newStore = + Mgr.StoreMgr->invalidateRegions(getStore(), Regions, E, Count, LCtx, IS, + Call, NULL); + return makeWithStore(newStore); +} + +ProgramStateRef ProgramState::unbindLoc(Loc LV) const { + assert(!isa(LV) && "Use invalidateRegion instead."); + + Store OldStore = getStore(); + const StoreRef &newStore = getStateManager().StoreMgr->Remove(OldStore, LV); + + if (newStore.getStore() == OldStore) + return this; + + return makeWithStore(newStore); +} + +ProgramStateRef +ProgramState::enterStackFrame(const LocationContext *callerCtx, + const StackFrameContext *calleeCtx) const { + const StoreRef &new_store = + getStateManager().StoreMgr->enterStackFrame(this, callerCtx, calleeCtx); + return makeWithStore(new_store); +} + +SVal ProgramState::getSValAsScalarOrLoc(const MemRegion *R) const { + // We only want to do fetches from regions that we can actually bind + // values. For example, SymbolicRegions of type 'id<...>' cannot + // have direct bindings (but their can be bindings on their subregions). + if (!R->isBoundable()) + return UnknownVal(); + + if (const TypedValueRegion *TR = dyn_cast(R)) { + QualType T = TR->getValueType(); + if (Loc::isLocType(T) || T->isIntegerType()) + return getSVal(R); + } + + return UnknownVal(); +} + +SVal ProgramState::getSVal(Loc location, QualType T) const { + SVal V = getRawSVal(cast(location), T); + + // If 'V' is a symbolic value that is *perfectly* constrained to + // be a constant value, use that value instead to lessen the burden + // on later analysis stages (so we have less symbolic values to reason + // about). + if (!T.isNull()) { + if (SymbolRef sym = V.getAsSymbol()) { + if (const llvm::APSInt *Int = getSymVal(sym)) { + // FIXME: Because we don't correctly model (yet) sign-extension + // and truncation of symbolic values, we need to convert + // the integer value to the correct signedness and bitwidth. + // + // This shows up in the following: + // + // char foo(); + // unsigned x = foo(); + // if (x == 54) + // ... + // + // The symbolic value stored to 'x' is actually the conjured + // symbol for the call to foo(); the type of that symbol is 'char', + // not unsigned. + const llvm::APSInt &NewV = getBasicVals().Convert(T, *Int); + + if (isa(V)) + return loc::ConcreteInt(NewV); + else + return nonloc::ConcreteInt(NewV); + } + } + } + + return V; +} + +ProgramStateRef ProgramState::BindExpr(const Stmt *S, + const LocationContext *LCtx, + SVal V, bool Invalidate) const{ + Environment NewEnv = + getStateManager().EnvMgr.bindExpr(Env, EnvironmentEntry(S, LCtx), V, + Invalidate); + if (NewEnv == Env) + return this; + + ProgramState NewSt = *this; + NewSt.Env = NewEnv; + return getStateManager().getPersistentState(NewSt); +} + +ProgramStateRef +ProgramState::bindExprAndLocation(const Stmt *S, const LocationContext *LCtx, + SVal location, + SVal V) const { + Environment NewEnv = + getStateManager().EnvMgr.bindExprAndLocation(Env, + EnvironmentEntry(S, LCtx), + location, V); + + if (NewEnv == Env) + return this; + + ProgramState NewSt = *this; + NewSt.Env = NewEnv; + return getStateManager().getPersistentState(NewSt); +} + +ProgramStateRef ProgramState::assumeInBound(DefinedOrUnknownSVal Idx, + DefinedOrUnknownSVal UpperBound, + bool Assumption, + QualType indexTy) const { + if (Idx.isUnknown() || UpperBound.isUnknown()) + return this; + + // Build an expression for 0 <= Idx < UpperBound. + // This is the same as Idx + MIN < UpperBound + MIN, if overflow is allowed. + // FIXME: This should probably be part of SValBuilder. + ProgramStateManager &SM = getStateManager(); + SValBuilder &svalBuilder = SM.getSValBuilder(); + ASTContext &Ctx = svalBuilder.getContext(); + + // Get the offset: the minimum value of the array index type. + BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); + // FIXME: This should be using ValueManager::ArrayindexTy...somehow. + if (indexTy.isNull()) + indexTy = Ctx.IntTy; + nonloc::ConcreteInt Min(BVF.getMinValue(indexTy)); + + // Adjust the index. + SVal newIdx = svalBuilder.evalBinOpNN(this, BO_Add, + cast(Idx), Min, indexTy); + if (newIdx.isUnknownOrUndef()) + return this; + + // Adjust the upper bound. + SVal newBound = + svalBuilder.evalBinOpNN(this, BO_Add, cast(UpperBound), + Min, indexTy); + + if (newBound.isUnknownOrUndef()) + return this; + + // Build the actual comparison. + SVal inBound = svalBuilder.evalBinOpNN(this, BO_LT, + cast(newIdx), cast(newBound), + Ctx.IntTy); + if (inBound.isUnknownOrUndef()) + return this; + + // Finally, let the constraint manager take care of it. + ConstraintManager &CM = SM.getConstraintManager(); + return CM.assume(this, cast(inBound), Assumption); +} + +ProgramStateRef ProgramStateManager::getInitialState(const LocationContext *InitLoc) { + ProgramState State(this, + EnvMgr.getInitialEnvironment(), + StoreMgr->getInitialStore(InitLoc), + GDMFactory.getEmptyMap()); + + return getPersistentState(State); +} + +ProgramStateRef ProgramStateManager::getPersistentStateWithGDM( + ProgramStateRef FromState, + ProgramStateRef GDMState) { + ProgramState NewState(*FromState); + NewState.GDM = GDMState->GDM; + return getPersistentState(NewState); +} + +ProgramStateRef ProgramStateManager::getPersistentState(ProgramState &State) { + + llvm::FoldingSetNodeID ID; + State.Profile(ID); + void *InsertPos; + + if (ProgramState *I = StateSet.FindNodeOrInsertPos(ID, InsertPos)) + return I; + + ProgramState *newState = 0; + if (!freeStates.empty()) { + newState = freeStates.back(); + freeStates.pop_back(); + } + else { + newState = (ProgramState*) Alloc.Allocate(); + } + new (newState) ProgramState(State); + StateSet.InsertNode(newState, InsertPos); + return newState; +} + +ProgramStateRef ProgramState::makeWithStore(const StoreRef &store) const { + ProgramState NewSt(*this); + NewSt.setStore(store); + return getStateManager().getPersistentState(NewSt); +} + +void ProgramState::setStore(const StoreRef &newStore) { + Store newStoreStore = newStore.getStore(); + if (newStoreStore) + stateMgr->getStoreManager().incrementReferenceCount(newStoreStore); + if (store) + stateMgr->getStoreManager().decrementReferenceCount(store); + store = newStoreStore; +} + +//===----------------------------------------------------------------------===// +// State pretty-printing. +//===----------------------------------------------------------------------===// + +void ProgramState::print(raw_ostream &Out, + const char *NL, const char *Sep) const { + // Print the store. + ProgramStateManager &Mgr = getStateManager(); + Mgr.getStoreManager().print(getStore(), Out, NL, Sep); + + // Print out the environment. + Env.print(Out, NL, Sep); + + // Print out the constraints. + Mgr.getConstraintManager().print(this, Out, NL, Sep); + + // Print checker-specific data. + Mgr.getOwningEngine()->printState(Out, this, NL, Sep); +} + +void ProgramState::printDOT(raw_ostream &Out) const { + print(Out, "\\l", "\\|"); +} + +void ProgramState::dump() const { + print(llvm::errs()); +} + +void ProgramState::printTaint(raw_ostream &Out, + const char *NL, const char *Sep) const { + TaintMapImpl TM = get(); + + if (!TM.isEmpty()) + Out <<"Tainted Symbols:" << NL; + + for (TaintMapImpl::iterator I = TM.begin(), E = TM.end(); I != E; ++I) { + Out << I->first << " : " << I->second << NL; + } +} + +void ProgramState::dumpTaint() const { + printTaint(llvm::errs()); +} + +//===----------------------------------------------------------------------===// +// Generic Data Map. +//===----------------------------------------------------------------------===// + +void *const* ProgramState::FindGDM(void *K) const { + return GDM.lookup(K); +} + +void* +ProgramStateManager::FindGDMContext(void *K, + void *(*CreateContext)(llvm::BumpPtrAllocator&), + void (*DeleteContext)(void*)) { + + std::pair& p = GDMContexts[K]; + if (!p.first) { + p.first = CreateContext(Alloc); + p.second = DeleteContext; + } + + return p.first; +} + +ProgramStateRef ProgramStateManager::addGDM(ProgramStateRef St, void *Key, void *Data){ + ProgramState::GenericDataMap M1 = St->getGDM(); + ProgramState::GenericDataMap M2 = GDMFactory.add(M1, Key, Data); + + if (M1 == M2) + return St; + + ProgramState NewSt = *St; + NewSt.GDM = M2; + return getPersistentState(NewSt); +} + +ProgramStateRef ProgramStateManager::removeGDM(ProgramStateRef state, void *Key) { + ProgramState::GenericDataMap OldM = state->getGDM(); + ProgramState::GenericDataMap NewM = GDMFactory.remove(OldM, Key); + + if (NewM == OldM) + return state; + + ProgramState NewState = *state; + NewState.GDM = NewM; + return getPersistentState(NewState); +} + +void ScanReachableSymbols::anchor() { } + +bool ScanReachableSymbols::scan(nonloc::CompoundVal val) { + for (nonloc::CompoundVal::iterator I=val.begin(), E=val.end(); I!=E; ++I) + if (!scan(*I)) + return false; + + return true; +} + +bool ScanReachableSymbols::scan(const SymExpr *sym) { + unsigned &isVisited = visited[sym]; + if (isVisited) + return true; + isVisited = 1; + + if (!visitor.VisitSymbol(sym)) + return false; + + // TODO: should be rewritten using SymExpr::symbol_iterator. + switch (sym->getKind()) { + case SymExpr::RegionValueKind: + case SymExpr::ConjuredKind: + case SymExpr::DerivedKind: + case SymExpr::ExtentKind: + case SymExpr::MetadataKind: + break; + case SymExpr::CastSymbolKind: + return scan(cast(sym)->getOperand()); + case SymExpr::SymIntKind: + return scan(cast(sym)->getLHS()); + case SymExpr::IntSymKind: + return scan(cast(sym)->getRHS()); + case SymExpr::SymSymKind: { + const SymSymExpr *x = cast(sym); + return scan(x->getLHS()) && scan(x->getRHS()); + } + } + return true; +} + +bool ScanReachableSymbols::scan(SVal val) { + if (loc::MemRegionVal *X = dyn_cast(&val)) + return scan(X->getRegion()); + + if (nonloc::LocAsInteger *X = dyn_cast(&val)) + return scan(X->getLoc()); + + if (SymbolRef Sym = val.getAsSymbol()) + return scan(Sym); + + if (const SymExpr *Sym = val.getAsSymbolicExpression()) + return scan(Sym); + + if (nonloc::CompoundVal *X = dyn_cast(&val)) + return scan(*X); + + return true; +} + +bool ScanReachableSymbols::scan(const MemRegion *R) { + if (isa(R)) + return true; + + unsigned &isVisited = visited[R]; + if (isVisited) + return true; + isVisited = 1; + + + if (!visitor.VisitMemRegion(R)) + return false; + + // If this is a symbolic region, visit the symbol for the region. + if (const SymbolicRegion *SR = dyn_cast(R)) + if (!visitor.VisitSymbol(SR->getSymbol())) + return false; + + // If this is a subregion, also visit the parent regions. + if (const SubRegion *SR = dyn_cast(R)) + if (!scan(SR->getSuperRegion())) + return false; + + // Now look at the binding to this region (if any). + if (!scan(state->getSValAsScalarOrLoc(R))) + return false; + + // Now look at the subregions. + if (!SRM.get()) + SRM.reset(state->getStateManager().getStoreManager(). + getSubRegionMap(state->getStore())); + + return SRM->iterSubRegions(R, *this); +} + +bool ProgramState::scanReachableSymbols(SVal val, SymbolVisitor& visitor) const { + ScanReachableSymbols S(this, visitor); + return S.scan(val); +} + +bool ProgramState::scanReachableSymbols(const SVal *I, const SVal *E, + SymbolVisitor &visitor) const { + ScanReachableSymbols S(this, visitor); + for ( ; I != E; ++I) { + if (!S.scan(*I)) + return false; + } + return true; +} + +bool ProgramState::scanReachableSymbols(const MemRegion * const *I, + const MemRegion * const *E, + SymbolVisitor &visitor) const { + ScanReachableSymbols S(this, visitor); + for ( ; I != E; ++I) { + if (!S.scan(*I)) + return false; + } + return true; +} + +ProgramStateRef ProgramState::addTaint(const Stmt *S, + const LocationContext *LCtx, + TaintTagType Kind) const { + if (const Expr *E = dyn_cast_or_null(S)) + S = E->IgnoreParens(); + + SymbolRef Sym = getSVal(S, LCtx).getAsSymbol(); + if (Sym) + return addTaint(Sym, Kind); + + const MemRegion *R = getSVal(S, LCtx).getAsRegion(); + addTaint(R, Kind); + + // Cannot add taint, so just return the state. + return this; +} + +ProgramStateRef ProgramState::addTaint(const MemRegion *R, + TaintTagType Kind) const { + if (const SymbolicRegion *SR = dyn_cast_or_null(R)) + return addTaint(SR->getSymbol(), Kind); + return this; +} + +ProgramStateRef ProgramState::addTaint(SymbolRef Sym, + TaintTagType Kind) const { + // If this is a symbol cast, remove the cast before adding the taint. Taint + // is cast agnostic. + while (const SymbolCast *SC = dyn_cast(Sym)) + Sym = SC->getOperand(); + + ProgramStateRef NewState = set(Sym, Kind); + assert(NewState); + return NewState; +} + +bool ProgramState::isTainted(const Stmt *S, const LocationContext *LCtx, + TaintTagType Kind) const { + if (const Expr *E = dyn_cast_or_null(S)) + S = E->IgnoreParens(); + + SVal val = getSVal(S, LCtx); + return isTainted(val, Kind); +} + +bool ProgramState::isTainted(SVal V, TaintTagType Kind) const { + if (const SymExpr *Sym = V.getAsSymExpr()) + return isTainted(Sym, Kind); + if (const MemRegion *Reg = V.getAsRegion()) + return isTainted(Reg, Kind); + return false; +} + +bool ProgramState::isTainted(const MemRegion *Reg, TaintTagType K) const { + if (!Reg) + return false; + + // Element region (array element) is tainted if either the base or the offset + // are tainted. + if (const ElementRegion *ER = dyn_cast(Reg)) + return isTainted(ER->getSuperRegion(), K) || isTainted(ER->getIndex(), K); + + if (const SymbolicRegion *SR = dyn_cast(Reg)) + return isTainted(SR->getSymbol(), K); + + if (const SubRegion *ER = dyn_cast(Reg)) + return isTainted(ER->getSuperRegion(), K); + + return false; +} + +bool ProgramState::isTainted(SymbolRef Sym, TaintTagType Kind) const { + if (!Sym) + return false; + + // Traverse all the symbols this symbol depends on to see if any are tainted. + bool Tainted = false; + for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end(); + SI != SE; ++SI) { + assert(isa(*SI)); + const TaintTagType *Tag = get(*SI); + Tainted = (Tag && *Tag == Kind); + + // If this is a SymbolDerived with a tainted parent, it's also tainted. + if (const SymbolDerived *SD = dyn_cast(*SI)) + Tainted = Tainted || isTainted(SD->getParentSymbol(), Kind); + + // If memory region is tainted, data is also tainted. + if (const SymbolRegionValue *SRV = dyn_cast(*SI)) + Tainted = Tainted || isTainted(SRV->getRegion(), Kind); + + // If If this is a SymbolCast from a tainted value, it's also tainted. + if (const SymbolCast *SC = dyn_cast(*SI)) + Tainted = Tainted || isTainted(SC->getOperand(), Kind); + + if (Tainted) + return true; + } + + return Tainted; +} diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp new file mode 100644 index 0000000..98eb958 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -0,0 +1,442 @@ +//== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines RangeConstraintManager, a class that tracks simple +// equality and inequality constraints on symbolic values of ProgramState. +// +//===----------------------------------------------------------------------===// + +#include "SimpleConstraintManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/ImmutableSet.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { class ConstraintRange {}; } +static int ConstraintRangeIndex = 0; + +/// A Range represents the closed range [from, to]. The caller must +/// guarantee that from <= to. Note that Range is immutable, so as not +/// to subvert RangeSet's immutability. +namespace { +class Range : public std::pair { +public: + Range(const llvm::APSInt &from, const llvm::APSInt &to) + : std::pair(&from, &to) { + assert(from <= to); + } + bool Includes(const llvm::APSInt &v) const { + return *first <= v && v <= *second; + } + const llvm::APSInt &From() const { + return *first; + } + const llvm::APSInt &To() const { + return *second; + } + const llvm::APSInt *getConcreteValue() const { + return &From() == &To() ? &From() : NULL; + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddPointer(&From()); + ID.AddPointer(&To()); + } +}; + + +class RangeTrait : public llvm::ImutContainerInfo { +public: + // When comparing if one Range is less than another, we should compare + // the actual APSInt values instead of their pointers. This keeps the order + // consistent (instead of comparing by pointer values) and can potentially + // be used to speed up some of the operations in RangeSet. + static inline bool isLess(key_type_ref lhs, key_type_ref rhs) { + return *lhs.first < *rhs.first || (!(*rhs.first < *lhs.first) && + *lhs.second < *rhs.second); + } +}; + +/// RangeSet contains a set of ranges. If the set is empty, then +/// there the value of a symbol is overly constrained and there are no +/// possible values for that symbol. +class RangeSet { + typedef llvm::ImmutableSet PrimRangeSet; + PrimRangeSet ranges; // no need to make const, since it is an + // ImmutableSet - this allows default operator= + // to work. +public: + typedef PrimRangeSet::Factory Factory; + typedef PrimRangeSet::iterator iterator; + + RangeSet(PrimRangeSet RS) : ranges(RS) {} + + iterator begin() const { return ranges.begin(); } + iterator end() const { return ranges.end(); } + + bool isEmpty() const { return ranges.isEmpty(); } + + /// Construct a new RangeSet representing '{ [from, to] }'. + RangeSet(Factory &F, const llvm::APSInt &from, const llvm::APSInt &to) + : ranges(F.add(F.getEmptySet(), Range(from, to))) {} + + /// Profile - Generates a hash profile of this RangeSet for use + /// by FoldingSet. + void Profile(llvm::FoldingSetNodeID &ID) const { ranges.Profile(ID); } + + /// getConcreteValue - If a symbol is contrained to equal a specific integer + /// constant then this method returns that value. Otherwise, it returns + /// NULL. + const llvm::APSInt* getConcreteValue() const { + return ranges.isSingleton() ? ranges.begin()->getConcreteValue() : 0; + } + +private: + void IntersectInRange(BasicValueFactory &BV, Factory &F, + const llvm::APSInt &Lower, + const llvm::APSInt &Upper, + PrimRangeSet &newRanges, + PrimRangeSet::iterator &i, + PrimRangeSet::iterator &e) const { + // There are six cases for each range R in the set: + // 1. R is entirely before the intersection range. + // 2. R is entirely after the intersection range. + // 3. R contains the entire intersection range. + // 4. R starts before the intersection range and ends in the middle. + // 5. R starts in the middle of the intersection range and ends after it. + // 6. R is entirely contained in the intersection range. + // These correspond to each of the conditions below. + for (/* i = begin(), e = end() */; i != e; ++i) { + if (i->To() < Lower) { + continue; + } + if (i->From() > Upper) { + break; + } + + if (i->Includes(Lower)) { + if (i->Includes(Upper)) { + newRanges = F.add(newRanges, Range(BV.getValue(Lower), + BV.getValue(Upper))); + break; + } else + newRanges = F.add(newRanges, Range(BV.getValue(Lower), i->To())); + } else { + if (i->Includes(Upper)) { + newRanges = F.add(newRanges, Range(i->From(), BV.getValue(Upper))); + break; + } else + newRanges = F.add(newRanges, *i); + } + } + } + +public: + // Returns a set containing the values in the receiving set, intersected with + // the closed range [Lower, Upper]. Unlike the Range type, this range uses + // modular arithmetic, corresponding to the common treatment of C integer + // overflow. Thus, if the Lower bound is greater than the Upper bound, the + // range is taken to wrap around. This is equivalent to taking the + // intersection with the two ranges [Min, Upper] and [Lower, Max], + // or, alternatively, /removing/ all integers between Upper and Lower. + RangeSet Intersect(BasicValueFactory &BV, Factory &F, + const llvm::APSInt &Lower, + const llvm::APSInt &Upper) const { + PrimRangeSet newRanges = F.getEmptySet(); + + PrimRangeSet::iterator i = begin(), e = end(); + if (Lower <= Upper) + IntersectInRange(BV, F, Lower, Upper, newRanges, i, e); + else { + // The order of the next two statements is important! + // IntersectInRange() does not reset the iteration state for i and e. + // Therefore, the lower range most be handled first. + IntersectInRange(BV, F, BV.getMinValue(Upper), Upper, newRanges, i, e); + IntersectInRange(BV, F, Lower, BV.getMaxValue(Lower), newRanges, i, e); + } + return newRanges; + } + + void print(raw_ostream &os) const { + bool isFirst = true; + os << "{ "; + for (iterator i = begin(), e = end(); i != e; ++i) { + if (isFirst) + isFirst = false; + else + os << ", "; + + os << '[' << i->From().toString(10) << ", " << i->To().toString(10) + << ']'; + } + os << " }"; + } + + bool operator==(const RangeSet &other) const { + return ranges == other.ranges; + } +}; +} // end anonymous namespace + +typedef llvm::ImmutableMap ConstraintRangeTy; + +namespace clang { +namespace ento { +template<> +struct ProgramStateTrait + : public ProgramStatePartialTrait { + static inline void *GDMIndex() { return &ConstraintRangeIndex; } +}; +} +} + +namespace { +class RangeConstraintManager : public SimpleConstraintManager{ + RangeSet GetRange(ProgramStateRef state, SymbolRef sym); +public: + RangeConstraintManager(SubEngine &subengine) + : SimpleConstraintManager(subengine) {} + + ProgramStateRef assumeSymNE(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment); + + ProgramStateRef assumeSymEQ(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment); + + ProgramStateRef assumeSymLT(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment); + + ProgramStateRef assumeSymGT(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment); + + ProgramStateRef assumeSymGE(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment); + + ProgramStateRef assumeSymLE(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment); + + const llvm::APSInt* getSymVal(ProgramStateRef St, SymbolRef sym) const; + + // FIXME: Refactor into SimpleConstraintManager? + bool isEqual(ProgramStateRef St, SymbolRef sym, const llvm::APSInt& V) const { + const llvm::APSInt *i = getSymVal(St, sym); + return i ? *i == V : false; + } + + ProgramStateRef removeDeadBindings(ProgramStateRef St, SymbolReaper& SymReaper); + + void print(ProgramStateRef St, raw_ostream &Out, + const char* nl, const char *sep); + +private: + RangeSet::Factory F; +}; + +} // end anonymous namespace + +ConstraintManager* ento::CreateRangeConstraintManager(ProgramStateManager&, + SubEngine &subeng) { + return new RangeConstraintManager(subeng); +} + +const llvm::APSInt* RangeConstraintManager::getSymVal(ProgramStateRef St, + SymbolRef sym) const { + const ConstraintRangeTy::data_type *T = St->get(sym); + return T ? T->getConcreteValue() : NULL; +} + +/// Scan all symbols referenced by the constraints. If the symbol is not alive +/// as marked in LSymbols, mark it as dead in DSymbols. +ProgramStateRef +RangeConstraintManager::removeDeadBindings(ProgramStateRef state, + SymbolReaper& SymReaper) { + + ConstraintRangeTy CR = state->get(); + ConstraintRangeTy::Factory& CRFactory = state->get_context(); + + for (ConstraintRangeTy::iterator I = CR.begin(), E = CR.end(); I != E; ++I) { + SymbolRef sym = I.getKey(); + if (SymReaper.maybeDead(sym)) + CR = CRFactory.remove(CR, sym); + } + + return state->set(CR); +} + +RangeSet +RangeConstraintManager::GetRange(ProgramStateRef state, SymbolRef sym) { + if (ConstraintRangeTy::data_type* V = state->get(sym)) + return *V; + + // Lazily generate a new RangeSet representing all possible values for the + // given symbol type. + QualType T = state->getSymbolManager().getType(sym); + BasicValueFactory& BV = state->getBasicVals(); + return RangeSet(F, BV.getMinValue(T), BV.getMaxValue(T)); +} + +//===------------------------------------------------------------------------=== +// assumeSymX methods: public interface for RangeConstraintManager. +//===------------------------------------------------------------------------===/ + +// The syntax for ranges below is mathematical, using [x, y] for closed ranges +// and (x, y) for open ranges. These ranges are modular, corresponding with +// a common treatment of C integer overflow. This means that these methods +// do not have to worry about overflow; RangeSet::Intersect can handle such a +// "wraparound" range. +// As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1, +// UINT_MAX, 0, 1, and 2. + +ProgramStateRef +RangeConstraintManager::assumeSymNE(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment) { + BasicValueFactory &BV = state->getBasicVals(); + + llvm::APSInt Lower = Int-Adjustment; + llvm::APSInt Upper = Lower; + --Lower; + ++Upper; + + // [Int-Adjustment+1, Int-Adjustment-1] + // Notice that the lower bound is greater than the upper bound. + RangeSet New = GetRange(state, sym).Intersect(BV, F, Upper, Lower); + return New.isEmpty() ? NULL : state->set(sym, New); +} + +ProgramStateRef +RangeConstraintManager::assumeSymEQ(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment) { + // [Int-Adjustment, Int-Adjustment] + BasicValueFactory &BV = state->getBasicVals(); + llvm::APSInt AdjInt = Int-Adjustment; + RangeSet New = GetRange(state, sym).Intersect(BV, F, AdjInt, AdjInt); + return New.isEmpty() ? NULL : state->set(sym, New); +} + +ProgramStateRef +RangeConstraintManager::assumeSymLT(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment) { + BasicValueFactory &BV = state->getBasicVals(); + + QualType T = state->getSymbolManager().getType(sym); + const llvm::APSInt &Min = BV.getMinValue(T); + + // Special case for Int == Min. This is always false. + if (Int == Min) + return NULL; + + llvm::APSInt Lower = Min-Adjustment; + llvm::APSInt Upper = Int-Adjustment; + --Upper; + + RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper); + return New.isEmpty() ? NULL : state->set(sym, New); +} + +ProgramStateRef +RangeConstraintManager::assumeSymGT(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment) { + BasicValueFactory &BV = state->getBasicVals(); + + QualType T = state->getSymbolManager().getType(sym); + const llvm::APSInt &Max = BV.getMaxValue(T); + + // Special case for Int == Max. This is always false. + if (Int == Max) + return NULL; + + llvm::APSInt Lower = Int-Adjustment; + llvm::APSInt Upper = Max-Adjustment; + ++Lower; + + RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper); + return New.isEmpty() ? NULL : state->set(sym, New); +} + +ProgramStateRef +RangeConstraintManager::assumeSymGE(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment) { + BasicValueFactory &BV = state->getBasicVals(); + + QualType T = state->getSymbolManager().getType(sym); + const llvm::APSInt &Min = BV.getMinValue(T); + + // Special case for Int == Min. This is always feasible. + if (Int == Min) + return state; + + const llvm::APSInt &Max = BV.getMaxValue(T); + + llvm::APSInt Lower = Int-Adjustment; + llvm::APSInt Upper = Max-Adjustment; + + RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper); + return New.isEmpty() ? NULL : state->set(sym, New); +} + +ProgramStateRef +RangeConstraintManager::assumeSymLE(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment) { + BasicValueFactory &BV = state->getBasicVals(); + + QualType T = state->getSymbolManager().getType(sym); + const llvm::APSInt &Max = BV.getMaxValue(T); + + // Special case for Int == Max. This is always feasible. + if (Int == Max) + return state; + + const llvm::APSInt &Min = BV.getMinValue(T); + + llvm::APSInt Lower = Min-Adjustment; + llvm::APSInt Upper = Int-Adjustment; + + RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper); + return New.isEmpty() ? NULL : state->set(sym, New); +} + +//===------------------------------------------------------------------------=== +// Pretty-printing. +//===------------------------------------------------------------------------===/ + +void RangeConstraintManager::print(ProgramStateRef St, raw_ostream &Out, + const char* nl, const char *sep) { + + ConstraintRangeTy Ranges = St->get(); + + if (Ranges.isEmpty()) { + Out << nl << sep << "Ranges are empty." << nl; + return; + } + + Out << nl << sep << "Ranges of symbol values:"; + for (ConstraintRangeTy::iterator I=Ranges.begin(), E=Ranges.end(); I!=E; ++I){ + Out << nl << ' ' << I.getKey() << " : "; + I.getData().print(Out); + } + Out << nl; +} diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp new file mode 100644 index 0000000..cc3ea8c --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -0,0 +1,2009 @@ +//== RegionStore.cpp - Field-sensitive store model --------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a basic region store model. In this model, we do have field +// sensitivity. But we assume nothing about the heap shape. So recursive data +// structures are largely ignored. Basically we do 1-limiting analysis. +// Parameter pointers are assumed with no aliasing. Pointee objects of +// parameters are created lazily. +// +//===----------------------------------------------------------------------===// +#include "clang/AST/CharUnits.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/ExprCXX.h" +#include "clang/Analysis/Analyses/LiveVariables.h" +#include "clang/Analysis/AnalysisContext.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" +#include "llvm/ADT/ImmutableList.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; +using llvm::Optional; + +//===----------------------------------------------------------------------===// +// Representation of binding keys. +//===----------------------------------------------------------------------===// + +namespace { +class BindingKey { +public: + enum Kind { Direct = 0x0, Default = 0x1 }; +private: + llvm ::PointerIntPair P; + uint64_t Offset; + + explicit BindingKey(const MemRegion *r, uint64_t offset, Kind k) + : P(r, (unsigned) k), Offset(offset) {} +public: + + bool isDirect() const { return P.getInt() == Direct; } + + const MemRegion *getRegion() const { return P.getPointer(); } + uint64_t getOffset() const { return Offset; } + + void Profile(llvm::FoldingSetNodeID& ID) const { + ID.AddPointer(P.getOpaqueValue()); + ID.AddInteger(Offset); + } + + static BindingKey Make(const MemRegion *R, Kind k); + + bool operator<(const BindingKey &X) const { + if (P.getOpaqueValue() < X.P.getOpaqueValue()) + return true; + if (P.getOpaqueValue() > X.P.getOpaqueValue()) + return false; + return Offset < X.Offset; + } + + bool operator==(const BindingKey &X) const { + return P.getOpaqueValue() == X.P.getOpaqueValue() && + Offset == X.Offset; + } + + bool isValid() const { + return getRegion() != NULL; + } +}; +} // end anonymous namespace + +BindingKey BindingKey::Make(const MemRegion *R, Kind k) { + if (const ElementRegion *ER = dyn_cast(R)) { + const RegionRawOffset &O = ER->getAsArrayOffset(); + + // FIXME: There are some ElementRegions for which we cannot compute + // raw offsets yet, including regions with symbolic offsets. These will be + // ignored by the store. + return BindingKey(O.getRegion(), O.getOffset().getQuantity(), k); + } + + return BindingKey(R, 0, k); +} + +namespace llvm { + static inline + raw_ostream &operator<<(raw_ostream &os, BindingKey K) { + os << '(' << K.getRegion() << ',' << K.getOffset() + << ',' << (K.isDirect() ? "direct" : "default") + << ')'; + return os; + } +} // end llvm namespace + +//===----------------------------------------------------------------------===// +// Actual Store type. +//===----------------------------------------------------------------------===// + +typedef llvm::ImmutableMap RegionBindings; + +//===----------------------------------------------------------------------===// +// Fine-grained control of RegionStoreManager. +//===----------------------------------------------------------------------===// + +namespace { +struct minimal_features_tag {}; +struct maximal_features_tag {}; + +class RegionStoreFeatures { + bool SupportsFields; +public: + RegionStoreFeatures(minimal_features_tag) : + SupportsFields(false) {} + + RegionStoreFeatures(maximal_features_tag) : + SupportsFields(true) {} + + void enableFields(bool t) { SupportsFields = t; } + + bool supportsFields() const { return SupportsFields; } +}; +} + +//===----------------------------------------------------------------------===// +// Main RegionStore logic. +//===----------------------------------------------------------------------===// + +namespace { + +class RegionStoreSubRegionMap : public SubRegionMap { +public: + typedef llvm::ImmutableSet Set; + typedef llvm::DenseMap Map; +private: + Set::Factory F; + Map M; +public: + bool add(const MemRegion* Parent, const MemRegion* SubRegion) { + Map::iterator I = M.find(Parent); + + if (I == M.end()) { + M.insert(std::make_pair(Parent, F.add(F.getEmptySet(), SubRegion))); + return true; + } + + I->second = F.add(I->second, SubRegion); + return false; + } + + void process(SmallVectorImpl &WL, const SubRegion *R); + + ~RegionStoreSubRegionMap() {} + + const Set *getSubRegions(const MemRegion *Parent) const { + Map::const_iterator I = M.find(Parent); + return I == M.end() ? NULL : &I->second; + } + + bool iterSubRegions(const MemRegion* Parent, Visitor& V) const { + Map::const_iterator I = M.find(Parent); + + if (I == M.end()) + return true; + + Set S = I->second; + for (Set::iterator SI=S.begin(),SE=S.end(); SI != SE; ++SI) { + if (!V.Visit(Parent, *SI)) + return false; + } + + return true; + } +}; + +void +RegionStoreSubRegionMap::process(SmallVectorImpl &WL, + const SubRegion *R) { + const MemRegion *superR = R->getSuperRegion(); + if (add(superR, R)) + if (const SubRegion *sr = dyn_cast(superR)) + WL.push_back(sr); +} + +class RegionStoreManager : public StoreManager { + const RegionStoreFeatures Features; + RegionBindings::Factory RBFactory; + +public: + RegionStoreManager(ProgramStateManager& mgr, const RegionStoreFeatures &f) + : StoreManager(mgr), + Features(f), + RBFactory(mgr.getAllocator()) {} + + SubRegionMap *getSubRegionMap(Store store) { + return getRegionStoreSubRegionMap(store); + } + + RegionStoreSubRegionMap *getRegionStoreSubRegionMap(Store store); + + Optional getDirectBinding(RegionBindings B, const MemRegion *R); + /// getDefaultBinding - Returns an SVal* representing an optional default + /// binding associated with a region and its subregions. + Optional getDefaultBinding(RegionBindings B, const MemRegion *R); + + /// setImplicitDefaultValue - Set the default binding for the provided + /// MemRegion to the value implicitly defined for compound literals when + /// the value is not specified. + StoreRef setImplicitDefaultValue(Store store, const MemRegion *R, QualType T); + + /// ArrayToPointer - Emulates the "decay" of an array to a pointer + /// type. 'Array' represents the lvalue of the array being decayed + /// to a pointer, and the returned SVal represents the decayed + /// version of that lvalue (i.e., a pointer to the first element of + /// the array). This is called by ExprEngine when evaluating + /// casts from arrays to pointers. + SVal ArrayToPointer(Loc Array); + + /// For DerivedToBase casts, create a CXXBaseObjectRegion and return it. + virtual SVal evalDerivedToBase(SVal derived, QualType basePtrType); + + /// \brief Evaluates C++ dynamic_cast cast. + /// The callback may result in the following 3 scenarios: + /// - Successful cast (ex: derived is subclass of base). + /// - Failed cast (ex: derived is definitely not a subclass of base). + /// - We don't know (base is a symbolic region and we don't have + /// enough info to determine if the cast will succeed at run time). + /// The function returns an SVal representing the derived class; it's + /// valid only if Failed flag is set to false. + virtual SVal evalDynamicCast(SVal base, QualType derivedPtrType,bool &Failed); + + StoreRef getInitialStore(const LocationContext *InitLoc) { + return StoreRef(RBFactory.getEmptyMap().getRootWithoutRetain(), *this); + } + + //===-------------------------------------------------------------------===// + // Binding values to regions. + //===-------------------------------------------------------------------===// + RegionBindings invalidateGlobalRegion(MemRegion::Kind K, + const Expr *Ex, + unsigned Count, + const LocationContext *LCtx, + RegionBindings B, + InvalidatedRegions *Invalidated); + + StoreRef invalidateRegions(Store store, ArrayRef Regions, + const Expr *E, unsigned Count, + const LocationContext *LCtx, + InvalidatedSymbols &IS, + const CallOrObjCMessage *Call, + InvalidatedRegions *Invalidated); + +public: // Made public for helper classes. + + void RemoveSubRegionBindings(RegionBindings &B, const MemRegion *R, + RegionStoreSubRegionMap &M); + + RegionBindings addBinding(RegionBindings B, BindingKey K, SVal V); + + RegionBindings addBinding(RegionBindings B, const MemRegion *R, + BindingKey::Kind k, SVal V); + + const SVal *lookup(RegionBindings B, BindingKey K); + const SVal *lookup(RegionBindings B, const MemRegion *R, BindingKey::Kind k); + + RegionBindings removeBinding(RegionBindings B, BindingKey K); + RegionBindings removeBinding(RegionBindings B, const MemRegion *R, + BindingKey::Kind k); + + RegionBindings removeBinding(RegionBindings B, const MemRegion *R) { + return removeBinding(removeBinding(B, R, BindingKey::Direct), R, + BindingKey::Default); + } + +public: // Part of public interface to class. + + StoreRef Bind(Store store, Loc LV, SVal V); + + // BindDefault is only used to initialize a region with a default value. + StoreRef BindDefault(Store store, const MemRegion *R, SVal V) { + RegionBindings B = GetRegionBindings(store); + assert(!lookup(B, R, BindingKey::Default)); + assert(!lookup(B, R, BindingKey::Direct)); + return StoreRef(addBinding(B, R, BindingKey::Default, V) + .getRootWithoutRetain(), *this); + } + + StoreRef BindCompoundLiteral(Store store, const CompoundLiteralExpr *CL, + const LocationContext *LC, SVal V); + + StoreRef BindDecl(Store store, const VarRegion *VR, SVal InitVal); + + StoreRef BindDeclWithNoInit(Store store, const VarRegion *) { + return StoreRef(store, *this); + } + + /// BindStruct - Bind a compound value to a structure. + StoreRef BindStruct(Store store, const TypedValueRegion* R, SVal V); + + StoreRef BindArray(Store store, const TypedValueRegion* R, SVal V); + + /// KillStruct - Set the entire struct to unknown. + StoreRef KillStruct(Store store, const TypedRegion* R, SVal DefaultVal); + + StoreRef Remove(Store store, Loc LV); + + void incrementReferenceCount(Store store) { + GetRegionBindings(store).manualRetain(); + } + + /// If the StoreManager supports it, decrement the reference count of + /// the specified Store object. If the reference count hits 0, the memory + /// associated with the object is recycled. + void decrementReferenceCount(Store store) { + GetRegionBindings(store).manualRelease(); + } + + bool includedInBindings(Store store, const MemRegion *region) const; + + /// \brief Return the value bound to specified location in a given state. + /// + /// The high level logic for this method is this: + /// getBinding (L) + /// if L has binding + /// return L's binding + /// else if L is in killset + /// return unknown + /// else + /// if L is on stack or heap + /// return undefined + /// else + /// return symbolic + SVal getBinding(Store store, Loc L, QualType T = QualType()); + + SVal getBindingForElement(Store store, const ElementRegion *R); + + SVal getBindingForField(Store store, const FieldRegion *R); + + SVal getBindingForObjCIvar(Store store, const ObjCIvarRegion *R); + + SVal getBindingForVar(Store store, const VarRegion *R); + + SVal getBindingForLazySymbol(const TypedValueRegion *R); + + SVal getBindingForFieldOrElementCommon(Store store, const TypedValueRegion *R, + QualType Ty, const MemRegion *superR); + + SVal getLazyBinding(const MemRegion *lazyBindingRegion, + Store lazyBindingStore); + + /// Get bindings for the values in a struct and return a CompoundVal, used + /// when doing struct copy: + /// struct s x, y; + /// x = y; + /// y's value is retrieved by this method. + SVal getBindingForStruct(Store store, const TypedValueRegion* R); + + SVal getBindingForArray(Store store, const TypedValueRegion* R); + + /// Used to lazily generate derived symbols for bindings that are defined + /// implicitly by default bindings in a super region. + Optional getBindingForDerivedDefaultValue(RegionBindings B, + const MemRegion *superR, + const TypedValueRegion *R, + QualType Ty); + + /// Get the state and region whose binding this region R corresponds to. + std::pair + GetLazyBinding(RegionBindings B, const MemRegion *R, + const MemRegion *originalRegion); + + StoreRef CopyLazyBindings(nonloc::LazyCompoundVal V, Store store, + const TypedRegion *R); + + //===------------------------------------------------------------------===// + // State pruning. + //===------------------------------------------------------------------===// + + /// removeDeadBindings - Scans the RegionStore of 'state' for dead values. + /// It returns a new Store with these values removed. + StoreRef removeDeadBindings(Store store, const StackFrameContext *LCtx, + SymbolReaper& SymReaper); + + StoreRef enterStackFrame(ProgramStateRef state, + const LocationContext *callerCtx, + const StackFrameContext *calleeCtx); + + //===------------------------------------------------------------------===// + // Region "extents". + //===------------------------------------------------------------------===// + + // FIXME: This method will soon be eliminated; see the note in Store.h. + DefinedOrUnknownSVal getSizeInElements(ProgramStateRef state, + const MemRegion* R, QualType EleTy); + + //===------------------------------------------------------------------===// + // Utility methods. + //===------------------------------------------------------------------===// + + static inline RegionBindings GetRegionBindings(Store store) { + return RegionBindings(static_cast(store)); + } + + void print(Store store, raw_ostream &Out, const char* nl, + const char *sep); + + void iterBindings(Store store, BindingsHandler& f) { + RegionBindings B = GetRegionBindings(store); + for (RegionBindings::iterator I=B.begin(), E=B.end(); I!=E; ++I) { + const BindingKey &K = I.getKey(); + if (!K.isDirect()) + continue; + if (const SubRegion *R = dyn_cast(I.getKey().getRegion())) { + // FIXME: Possibly incorporate the offset? + if (!f.HandleBinding(*this, store, R, I.getData())) + return; + } + } + } +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// RegionStore creation. +//===----------------------------------------------------------------------===// + +StoreManager *ento::CreateRegionStoreManager(ProgramStateManager& StMgr) { + RegionStoreFeatures F = maximal_features_tag(); + return new RegionStoreManager(StMgr, F); +} + +StoreManager * +ento::CreateFieldsOnlyRegionStoreManager(ProgramStateManager &StMgr) { + RegionStoreFeatures F = minimal_features_tag(); + F.enableFields(true); + return new RegionStoreManager(StMgr, F); +} + + +RegionStoreSubRegionMap* +RegionStoreManager::getRegionStoreSubRegionMap(Store store) { + RegionBindings B = GetRegionBindings(store); + RegionStoreSubRegionMap *M = new RegionStoreSubRegionMap(); + + SmallVector WL; + + for (RegionBindings::iterator I=B.begin(), E=B.end(); I!=E; ++I) + if (const SubRegion *R = dyn_cast(I.getKey().getRegion())) + M->process(WL, R); + + // We also need to record in the subregion map "intermediate" regions that + // don't have direct bindings but are super regions of those that do. + while (!WL.empty()) { + const SubRegion *R = WL.back(); + WL.pop_back(); + M->process(WL, R); + } + + return M; +} + +//===----------------------------------------------------------------------===// +// Region Cluster analysis. +//===----------------------------------------------------------------------===// + +namespace { +template +class ClusterAnalysis { +protected: + typedef BumpVector RegionCluster; + typedef llvm::DenseMap ClusterMap; + llvm::DenseMap Visited; + typedef SmallVector, 10> + WorkList; + + BumpVectorContext BVC; + ClusterMap ClusterM; + WorkList WL; + + RegionStoreManager &RM; + ASTContext &Ctx; + SValBuilder &svalBuilder; + + RegionBindings B; + + const bool includeGlobals; + +public: + ClusterAnalysis(RegionStoreManager &rm, ProgramStateManager &StateMgr, + RegionBindings b, const bool includeGlobals) + : RM(rm), Ctx(StateMgr.getContext()), + svalBuilder(StateMgr.getSValBuilder()), + B(b), includeGlobals(includeGlobals) {} + + RegionBindings getRegionBindings() const { return B; } + + RegionCluster &AddToCluster(BindingKey K) { + const MemRegion *R = K.getRegion(); + const MemRegion *baseR = R->getBaseRegion(); + RegionCluster &C = getCluster(baseR); + C.push_back(K, BVC); + static_cast(this)->VisitAddedToCluster(baseR, C); + return C; + } + + bool isVisited(const MemRegion *R) { + return (bool) Visited[&getCluster(R->getBaseRegion())]; + } + + RegionCluster& getCluster(const MemRegion *R) { + RegionCluster *&CRef = ClusterM[R]; + if (!CRef) { + void *Mem = BVC.getAllocator().template Allocate(); + CRef = new (Mem) RegionCluster(BVC, 10); + } + return *CRef; + } + + void GenerateClusters() { + // Scan the entire set of bindings and make the region clusters. + for (RegionBindings::iterator RI = B.begin(), RE = B.end(); RI != RE; ++RI){ + RegionCluster &C = AddToCluster(RI.getKey()); + if (const MemRegion *R = RI.getData().getAsRegion()) { + // Generate a cluster, but don't add the region to the cluster + // if there aren't any bindings. + getCluster(R->getBaseRegion()); + } + if (includeGlobals) { + const MemRegion *R = RI.getKey().getRegion(); + if (isa(R->getMemorySpace())) + AddToWorkList(R, C); + } + } + } + + bool AddToWorkList(const MemRegion *R, RegionCluster &C) { + if (unsigned &visited = Visited[&C]) + return false; + else + visited = 1; + + WL.push_back(std::make_pair(R, &C)); + return true; + } + + bool AddToWorkList(BindingKey K) { + return AddToWorkList(K.getRegion()); + } + + bool AddToWorkList(const MemRegion *R) { + const MemRegion *baseR = R->getBaseRegion(); + return AddToWorkList(baseR, getCluster(baseR)); + } + + void RunWorkList() { + while (!WL.empty()) { + const MemRegion *baseR; + RegionCluster *C; + llvm::tie(baseR, C) = WL.back(); + WL.pop_back(); + + // First visit the cluster. + static_cast(this)->VisitCluster(baseR, C->begin(), C->end()); + + // Next, visit the base region. + static_cast(this)->VisitBaseRegion(baseR); + } + } + +public: + void VisitAddedToCluster(const MemRegion *baseR, RegionCluster &C) {} + void VisitCluster(const MemRegion *baseR, BindingKey *I, BindingKey *E) {} + void VisitBaseRegion(const MemRegion *baseR) {} +}; +} + +//===----------------------------------------------------------------------===// +// Binding invalidation. +//===----------------------------------------------------------------------===// + +void RegionStoreManager::RemoveSubRegionBindings(RegionBindings &B, + const MemRegion *R, + RegionStoreSubRegionMap &M) { + + if (const RegionStoreSubRegionMap::Set *S = M.getSubRegions(R)) + for (RegionStoreSubRegionMap::Set::iterator I = S->begin(), E = S->end(); + I != E; ++I) + RemoveSubRegionBindings(B, *I, M); + + B = removeBinding(B, R); +} + +namespace { +class invalidateRegionsWorker : public ClusterAnalysis +{ + const Expr *Ex; + unsigned Count; + const LocationContext *LCtx; + StoreManager::InvalidatedSymbols &IS; + StoreManager::InvalidatedRegions *Regions; +public: + invalidateRegionsWorker(RegionStoreManager &rm, + ProgramStateManager &stateMgr, + RegionBindings b, + const Expr *ex, unsigned count, + const LocationContext *lctx, + StoreManager::InvalidatedSymbols &is, + StoreManager::InvalidatedRegions *r, + bool includeGlobals) + : ClusterAnalysis(rm, stateMgr, b, includeGlobals), + Ex(ex), Count(count), LCtx(lctx), IS(is), Regions(r) {} + + void VisitCluster(const MemRegion *baseR, BindingKey *I, BindingKey *E); + void VisitBaseRegion(const MemRegion *baseR); + +private: + void VisitBinding(SVal V); +}; +} + +void invalidateRegionsWorker::VisitBinding(SVal V) { + // A symbol? Mark it touched by the invalidation. + if (SymbolRef Sym = V.getAsSymbol()) + IS.insert(Sym); + + if (const MemRegion *R = V.getAsRegion()) { + AddToWorkList(R); + return; + } + + // Is it a LazyCompoundVal? All references get invalidated as well. + if (const nonloc::LazyCompoundVal *LCS = + dyn_cast(&V)) { + + const MemRegion *LazyR = LCS->getRegion(); + RegionBindings B = RegionStoreManager::GetRegionBindings(LCS->getStore()); + + for (RegionBindings::iterator RI = B.begin(), RE = B.end(); RI != RE; ++RI){ + const SubRegion *baseR = dyn_cast(RI.getKey().getRegion()); + if (baseR && baseR->isSubRegionOf(LazyR)) + VisitBinding(RI.getData()); + } + + return; + } +} + +void invalidateRegionsWorker::VisitCluster(const MemRegion *baseR, + BindingKey *I, BindingKey *E) { + for ( ; I != E; ++I) { + // Get the old binding. Is it a region? If so, add it to the worklist. + const BindingKey &K = *I; + if (const SVal *V = RM.lookup(B, K)) + VisitBinding(*V); + + B = RM.removeBinding(B, K); + } +} + +void invalidateRegionsWorker::VisitBaseRegion(const MemRegion *baseR) { + // Symbolic region? Mark that symbol touched by the invalidation. + if (const SymbolicRegion *SR = dyn_cast(baseR)) + IS.insert(SR->getSymbol()); + + // BlockDataRegion? If so, invalidate captured variables that are passed + // by reference. + if (const BlockDataRegion *BR = dyn_cast(baseR)) { + for (BlockDataRegion::referenced_vars_iterator + BI = BR->referenced_vars_begin(), BE = BR->referenced_vars_end() ; + BI != BE; ++BI) { + const VarRegion *VR = *BI; + const VarDecl *VD = VR->getDecl(); + if (VD->getAttr() || !VD->hasLocalStorage()) + AddToWorkList(VR); + } + return; + } + + // Otherwise, we have a normal data region. Record that we touched the region. + if (Regions) + Regions->push_back(baseR); + + if (isa(baseR) || isa(baseR)) { + // Invalidate the region by setting its default value to + // conjured symbol. The type of the symbol is irrelavant. + DefinedOrUnknownSVal V = + svalBuilder.getConjuredSymbolVal(baseR, Ex, LCtx, Ctx.IntTy, Count); + B = RM.addBinding(B, baseR, BindingKey::Default, V); + return; + } + + if (!baseR->isBoundable()) + return; + + const TypedValueRegion *TR = cast(baseR); + QualType T = TR->getValueType(); + + // Invalidate the binding. + if (T->isStructureOrClassType()) { + // Invalidate the region by setting its default value to + // conjured symbol. The type of the symbol is irrelavant. + DefinedOrUnknownSVal V = + svalBuilder.getConjuredSymbolVal(baseR, Ex, LCtx, Ctx.IntTy, Count); + B = RM.addBinding(B, baseR, BindingKey::Default, V); + return; + } + + if (const ArrayType *AT = Ctx.getAsArrayType(T)) { + // Set the default value of the array to conjured symbol. + DefinedOrUnknownSVal V = + svalBuilder.getConjuredSymbolVal(baseR, Ex, LCtx, + AT->getElementType(), Count); + B = RM.addBinding(B, baseR, BindingKey::Default, V); + return; + } + + if (includeGlobals && + isa(baseR->getMemorySpace())) { + // If the region is a global and we are invalidating all globals, + // just erase the entry. This causes all globals to be lazily + // symbolicated from the same base symbol. + B = RM.removeBinding(B, baseR); + return; + } + + + DefinedOrUnknownSVal V = svalBuilder.getConjuredSymbolVal(baseR, Ex, LCtx, + T,Count); + assert(SymbolManager::canSymbolicate(T) || V.isUnknown()); + B = RM.addBinding(B, baseR, BindingKey::Direct, V); +} + +RegionBindings RegionStoreManager::invalidateGlobalRegion(MemRegion::Kind K, + const Expr *Ex, + unsigned Count, + const LocationContext *LCtx, + RegionBindings B, + InvalidatedRegions *Invalidated) { + // Bind the globals memory space to a new symbol that we will use to derive + // the bindings for all globals. + const GlobalsSpaceRegion *GS = MRMgr.getGlobalsRegion(K); + SVal V = + svalBuilder.getConjuredSymbolVal(/* SymbolTag = */ (void*) GS, Ex, LCtx, + /* symbol type, doesn't matter */ Ctx.IntTy, + Count); + + B = removeBinding(B, GS); + B = addBinding(B, BindingKey::Make(GS, BindingKey::Default), V); + + // Even if there are no bindings in the global scope, we still need to + // record that we touched it. + if (Invalidated) + Invalidated->push_back(GS); + + return B; +} + +StoreRef RegionStoreManager::invalidateRegions(Store store, + ArrayRef Regions, + const Expr *Ex, unsigned Count, + const LocationContext *LCtx, + InvalidatedSymbols &IS, + const CallOrObjCMessage *Call, + InvalidatedRegions *Invalidated) { + invalidateRegionsWorker W(*this, StateMgr, + RegionStoreManager::GetRegionBindings(store), + Ex, Count, LCtx, IS, Invalidated, false); + + // Scan the bindings and generate the clusters. + W.GenerateClusters(); + + // Add the regions to the worklist. + for (ArrayRef::iterator + I = Regions.begin(), E = Regions.end(); I != E; ++I) + W.AddToWorkList(*I); + + W.RunWorkList(); + + // Return the new bindings. + RegionBindings B = W.getRegionBindings(); + + // For all globals which are not static nor immutable: determine which global + // regions should be invalidated and invalidate them. + // TODO: This could possibly be more precise with modules. + // + // System calls invalidate only system globals. + if (Call && Call->isInSystemHeader()) { + B = invalidateGlobalRegion(MemRegion::GlobalSystemSpaceRegionKind, + Ex, Count, LCtx, B, Invalidated); + // Internal calls might invalidate both system and internal globals. + } else { + B = invalidateGlobalRegion(MemRegion::GlobalSystemSpaceRegionKind, + Ex, Count, LCtx, B, Invalidated); + B = invalidateGlobalRegion(MemRegion::GlobalInternalSpaceRegionKind, + Ex, Count, LCtx, B, Invalidated); + } + + return StoreRef(B.getRootWithoutRetain(), *this); +} + +//===----------------------------------------------------------------------===// +// Extents for regions. +//===----------------------------------------------------------------------===// + +DefinedOrUnknownSVal +RegionStoreManager::getSizeInElements(ProgramStateRef state, + const MemRegion *R, + QualType EleTy) { + SVal Size = cast(R)->getExtent(svalBuilder); + const llvm::APSInt *SizeInt = svalBuilder.getKnownValue(state, Size); + if (!SizeInt) + return UnknownVal(); + + CharUnits RegionSize = CharUnits::fromQuantity(SizeInt->getSExtValue()); + + if (Ctx.getAsVariableArrayType(EleTy)) { + // FIXME: We need to track extra state to properly record the size + // of VLAs. Returning UnknownVal here, however, is a stop-gap so that + // we don't have a divide-by-zero below. + return UnknownVal(); + } + + CharUnits EleSize = Ctx.getTypeSizeInChars(EleTy); + + // If a variable is reinterpreted as a type that doesn't fit into a larger + // type evenly, round it down. + // This is a signed value, since it's used in arithmetic with signed indices. + return svalBuilder.makeIntVal(RegionSize / EleSize, false); +} + +//===----------------------------------------------------------------------===// +// Location and region casting. +//===----------------------------------------------------------------------===// + +/// ArrayToPointer - Emulates the "decay" of an array to a pointer +/// type. 'Array' represents the lvalue of the array being decayed +/// to a pointer, and the returned SVal represents the decayed +/// version of that lvalue (i.e., a pointer to the first element of +/// the array). This is called by ExprEngine when evaluating casts +/// from arrays to pointers. +SVal RegionStoreManager::ArrayToPointer(Loc Array) { + if (!isa(Array)) + return UnknownVal(); + + const MemRegion* R = cast(&Array)->getRegion(); + const TypedValueRegion* ArrayR = dyn_cast(R); + + if (!ArrayR) + return UnknownVal(); + + // Strip off typedefs from the ArrayRegion's ValueType. + QualType T = ArrayR->getValueType().getDesugaredType(Ctx); + const ArrayType *AT = cast(T); + T = AT->getElementType(); + + NonLoc ZeroIdx = svalBuilder.makeZeroArrayIndex(); + return loc::MemRegionVal(MRMgr.getElementRegion(T, ZeroIdx, ArrayR, Ctx)); +} + +SVal RegionStoreManager::evalDerivedToBase(SVal derived, QualType baseType) { + const CXXRecordDecl *baseDecl; + if (baseType->isPointerType()) + baseDecl = baseType->getCXXRecordDeclForPointerType(); + else + baseDecl = baseType->getAsCXXRecordDecl(); + + assert(baseDecl && "not a CXXRecordDecl?"); + + loc::MemRegionVal *derivedRegVal = dyn_cast(&derived); + if (!derivedRegVal) + return derived; + + const MemRegion *baseReg = + MRMgr.getCXXBaseObjectRegion(baseDecl, derivedRegVal->getRegion()); + + return loc::MemRegionVal(baseReg); +} + +SVal RegionStoreManager::evalDynamicCast(SVal base, QualType derivedType, + bool &Failed) { + Failed = false; + + loc::MemRegionVal *baseRegVal = dyn_cast(&base); + if (!baseRegVal) + return UnknownVal(); + const MemRegion *BaseRegion = baseRegVal->stripCasts(); + + // Assume the derived class is a pointer or a reference to a CXX record. + derivedType = derivedType->getPointeeType(); + assert(!derivedType.isNull()); + const CXXRecordDecl *DerivedDecl = derivedType->getAsCXXRecordDecl(); + if (!DerivedDecl && !derivedType->isVoidType()) + return UnknownVal(); + + // Drill down the CXXBaseObject chains, which represent upcasts (casts from + // derived to base). + const MemRegion *SR = BaseRegion; + while (const TypedRegion *TSR = dyn_cast_or_null(SR)) { + QualType BaseType = TSR->getLocationType()->getPointeeType(); + assert(!BaseType.isNull()); + const CXXRecordDecl *SRDecl = BaseType->getAsCXXRecordDecl(); + if (!SRDecl) + return UnknownVal(); + + // If found the derived class, the cast succeeds. + if (SRDecl == DerivedDecl) + return loc::MemRegionVal(TSR); + + // If the region type is a subclass of the derived type. + if (!derivedType->isVoidType() && SRDecl->isDerivedFrom(DerivedDecl)) { + // This occurs in two cases. + // 1) We are processing an upcast. + // 2) We are processing a downcast but we jumped directly from the + // ancestor to a child of the cast value, so conjure the + // appropriate region to represent value (the intermediate node). + return loc::MemRegionVal(MRMgr.getCXXBaseObjectRegion(DerivedDecl, + BaseRegion)); + } + + // If super region is not a parent of derived class, the cast definitely + // fails. + if (!derivedType->isVoidType() && + DerivedDecl->isProvablyNotDerivedFrom(SRDecl)) { + Failed = true; + return UnknownVal(); + } + + if (const CXXBaseObjectRegion *R = dyn_cast(TSR)) + // Drill down the chain to get the derived classes. + SR = R->getSuperRegion(); + else { + // We reached the bottom of the hierarchy. + + // If this is a cast to void*, return the region. + if (derivedType->isVoidType()) + return loc::MemRegionVal(TSR); + + // We did not find the derived class. We we must be casting the base to + // derived, so the cast should fail. + Failed = true; + return UnknownVal(); + } + } + + return UnknownVal(); +} + +//===----------------------------------------------------------------------===// +// Loading values from regions. +//===----------------------------------------------------------------------===// + +Optional RegionStoreManager::getDirectBinding(RegionBindings B, + const MemRegion *R) { + + if (const SVal *V = lookup(B, R, BindingKey::Direct)) + return *V; + + return Optional(); +} + +Optional RegionStoreManager::getDefaultBinding(RegionBindings B, + const MemRegion *R) { + if (R->isBoundable()) + if (const TypedValueRegion *TR = dyn_cast(R)) + if (TR->getValueType()->isUnionType()) + return UnknownVal(); + + if (const SVal *V = lookup(B, R, BindingKey::Default)) + return *V; + + return Optional(); +} + +SVal RegionStoreManager::getBinding(Store store, Loc L, QualType T) { + assert(!isa(L) && "location unknown"); + assert(!isa(L) && "location undefined"); + + // For access to concrete addresses, return UnknownVal. Checks + // for null dereferences (and similar errors) are done by checkers, not + // the Store. + // FIXME: We can consider lazily symbolicating such memory, but we really + // should defer this when we can reason easily about symbolicating arrays + // of bytes. + if (isa(L)) { + return UnknownVal(); + } + if (!isa(L)) { + return UnknownVal(); + } + + const MemRegion *MR = cast(L).getRegion(); + + if (isa(MR) || + isa(MR) || + isa(MR)) { + if (T.isNull()) { + if (const TypedRegion *TR = dyn_cast(MR)) + T = TR->getLocationType(); + else { + const SymbolicRegion *SR = cast(MR); + T = SR->getSymbol()->getType(Ctx); + } + } + MR = GetElementZeroRegion(MR, T); + } + + // FIXME: Perhaps this method should just take a 'const MemRegion*' argument + // instead of 'Loc', and have the other Loc cases handled at a higher level. + const TypedValueRegion *R = cast(MR); + QualType RTy = R->getValueType(); + + // FIXME: We should eventually handle funny addressing. e.g.: + // + // int x = ...; + // int *p = &x; + // char *q = (char*) p; + // char c = *q; // returns the first byte of 'x'. + // + // Such funny addressing will occur due to layering of regions. + + if (RTy->isStructureOrClassType()) + return getBindingForStruct(store, R); + + // FIXME: Handle unions. + if (RTy->isUnionType()) + return UnknownVal(); + + if (RTy->isArrayType()) + return getBindingForArray(store, R); + + // FIXME: handle Vector types. + if (RTy->isVectorType()) + return UnknownVal(); + + if (const FieldRegion* FR = dyn_cast(R)) + return CastRetrievedVal(getBindingForField(store, FR), FR, T, false); + + if (const ElementRegion* ER = dyn_cast(R)) { + // FIXME: Here we actually perform an implicit conversion from the loaded + // value to the element type. Eventually we want to compose these values + // more intelligently. For example, an 'element' can encompass multiple + // bound regions (e.g., several bound bytes), or could be a subset of + // a larger value. + return CastRetrievedVal(getBindingForElement(store, ER), ER, T, false); + } + + if (const ObjCIvarRegion *IVR = dyn_cast(R)) { + // FIXME: Here we actually perform an implicit conversion from the loaded + // value to the ivar type. What we should model is stores to ivars + // that blow past the extent of the ivar. If the address of the ivar is + // reinterpretted, it is possible we stored a different value that could + // fit within the ivar. Either we need to cast these when storing them + // or reinterpret them lazily (as we do here). + return CastRetrievedVal(getBindingForObjCIvar(store, IVR), IVR, T, false); + } + + if (const VarRegion *VR = dyn_cast(R)) { + // FIXME: Here we actually perform an implicit conversion from the loaded + // value to the variable type. What we should model is stores to variables + // that blow past the extent of the variable. If the address of the + // variable is reinterpretted, it is possible we stored a different value + // that could fit within the variable. Either we need to cast these when + // storing them or reinterpret them lazily (as we do here). + return CastRetrievedVal(getBindingForVar(store, VR), VR, T, false); + } + + RegionBindings B = GetRegionBindings(store); + const SVal *V = lookup(B, R, BindingKey::Direct); + + // Check if the region has a binding. + if (V) + return *V; + + // The location does not have a bound value. This means that it has + // the value it had upon its creation and/or entry to the analyzed + // function/method. These are either symbolic values or 'undefined'. + if (R->hasStackNonParametersStorage()) { + // All stack variables are considered to have undefined values + // upon creation. All heap allocated blocks are considered to + // have undefined values as well unless they are explicitly bound + // to specific values. + return UndefinedVal(); + } + + // All other values are symbolic. + return svalBuilder.getRegionValueSymbolVal(R); +} + +std::pair +RegionStoreManager::GetLazyBinding(RegionBindings B, const MemRegion *R, + const MemRegion *originalRegion) { + + if (originalRegion != R) { + if (Optional OV = getDefaultBinding(B, R)) { + if (const nonloc::LazyCompoundVal *V = + dyn_cast(OV.getPointer())) + return std::make_pair(V->getStore(), V->getRegion()); + } + } + + if (const ElementRegion *ER = dyn_cast(R)) { + const std::pair &X = + GetLazyBinding(B, ER->getSuperRegion(), originalRegion); + + if (X.second) + return std::make_pair(X.first, + MRMgr.getElementRegionWithSuper(ER, X.second)); + } + else if (const FieldRegion *FR = dyn_cast(R)) { + const std::pair &X = + GetLazyBinding(B, FR->getSuperRegion(), originalRegion); + + if (X.second) + return std::make_pair(X.first, + MRMgr.getFieldRegionWithSuper(FR, X.second)); + } + // C++ base object region is another kind of region that we should blast + // through to look for lazy compound value. It is like a field region. + else if (const CXXBaseObjectRegion *baseReg = + dyn_cast(R)) { + const std::pair &X = + GetLazyBinding(B, baseReg->getSuperRegion(), originalRegion); + + if (X.second) + return std::make_pair(X.first, + MRMgr.getCXXBaseObjectRegionWithSuper(baseReg, X.second)); + } + + // The NULL MemRegion indicates an non-existent lazy binding. A NULL Store is + // possible for a valid lazy binding. + return std::make_pair((Store) 0, (const MemRegion *) 0); +} + +SVal RegionStoreManager::getBindingForElement(Store store, + const ElementRegion* R) { + // Check if the region has a binding. + RegionBindings B = GetRegionBindings(store); + if (const Optional &V = getDirectBinding(B, R)) + return *V; + + const MemRegion* superR = R->getSuperRegion(); + + // Check if the region is an element region of a string literal. + if (const StringRegion *StrR=dyn_cast(superR)) { + // FIXME: Handle loads from strings where the literal is treated as + // an integer, e.g., *((unsigned int*)"hello") + QualType T = Ctx.getAsArrayType(StrR->getValueType())->getElementType(); + if (T != Ctx.getCanonicalType(R->getElementType())) + return UnknownVal(); + + const StringLiteral *Str = StrR->getStringLiteral(); + SVal Idx = R->getIndex(); + if (nonloc::ConcreteInt *CI = dyn_cast(&Idx)) { + int64_t i = CI->getValue().getSExtValue(); + // Abort on string underrun. This can be possible by arbitrary + // clients of getBindingForElement(). + if (i < 0) + return UndefinedVal(); + int64_t length = Str->getLength(); + // Technically, only i == length is guaranteed to be null. + // However, such overflows should be caught before reaching this point; + // the only time such an access would be made is if a string literal was + // used to initialize a larger array. + char c = (i >= length) ? '\0' : Str->getCodeUnit(i); + return svalBuilder.makeIntVal(c, T); + } + } + + // Check for loads from a code text region. For such loads, just give up. + if (isa(superR)) + return UnknownVal(); + + // Handle the case where we are indexing into a larger scalar object. + // For example, this handles: + // int x = ... + // char *y = &x; + // return *y; + // FIXME: This is a hack, and doesn't do anything really intelligent yet. + const RegionRawOffset &O = R->getAsArrayOffset(); + + // If we cannot reason about the offset, return an unknown value. + if (!O.getRegion()) + return UnknownVal(); + + if (const TypedValueRegion *baseR = + dyn_cast_or_null(O.getRegion())) { + QualType baseT = baseR->getValueType(); + if (baseT->isScalarType()) { + QualType elemT = R->getElementType(); + if (elemT->isScalarType()) { + if (Ctx.getTypeSizeInChars(baseT) >= Ctx.getTypeSizeInChars(elemT)) { + if (const Optional &V = getDirectBinding(B, superR)) { + if (SymbolRef parentSym = V->getAsSymbol()) + return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R); + + if (V->isUnknownOrUndef()) + return *V; + // Other cases: give up. We are indexing into a larger object + // that has some value, but we don't know how to handle that yet. + return UnknownVal(); + } + } + } + } + } + return getBindingForFieldOrElementCommon(store, R, R->getElementType(), + superR); +} + +SVal RegionStoreManager::getBindingForField(Store store, + const FieldRegion* R) { + + // Check if the region has a binding. + RegionBindings B = GetRegionBindings(store); + if (const Optional &V = getDirectBinding(B, R)) + return *V; + + QualType Ty = R->getValueType(); + return getBindingForFieldOrElementCommon(store, R, Ty, R->getSuperRegion()); +} + +Optional +RegionStoreManager::getBindingForDerivedDefaultValue(RegionBindings B, + const MemRegion *superR, + const TypedValueRegion *R, + QualType Ty) { + + if (const Optional &D = getDefaultBinding(B, superR)) { + const SVal &val = D.getValue(); + if (SymbolRef parentSym = val.getAsSymbol()) + return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R); + + if (val.isZeroConstant()) + return svalBuilder.makeZeroVal(Ty); + + if (val.isUnknownOrUndef()) + return val; + + // Lazy bindings are handled later. + if (isa(val)) + return Optional(); + + llvm_unreachable("Unknown default value"); + } + + return Optional(); +} + +SVal RegionStoreManager::getLazyBinding(const MemRegion *lazyBindingRegion, + Store lazyBindingStore) { + if (const ElementRegion *ER = dyn_cast(lazyBindingRegion)) + return getBindingForElement(lazyBindingStore, ER); + + return getBindingForField(lazyBindingStore, + cast(lazyBindingRegion)); +} + +SVal RegionStoreManager::getBindingForFieldOrElementCommon(Store store, + const TypedValueRegion *R, + QualType Ty, + const MemRegion *superR) { + + // At this point we have already checked in either getBindingForElement or + // getBindingForField if 'R' has a direct binding. + RegionBindings B = GetRegionBindings(store); + + // Record whether or not we see a symbolic index. That can completely + // be out of scope of our lookup. + bool hasSymbolicIndex = false; + + while (superR) { + if (const Optional &D = + getBindingForDerivedDefaultValue(B, superR, R, Ty)) + return *D; + + if (const ElementRegion *ER = dyn_cast(superR)) { + NonLoc index = ER->getIndex(); + if (!index.isConstant()) + hasSymbolicIndex = true; + } + + // If our super region is a field or element itself, walk up the region + // hierarchy to see if there is a default value installed in an ancestor. + if (const SubRegion *SR = dyn_cast(superR)) { + superR = SR->getSuperRegion(); + continue; + } + break; + } + + // Lazy binding? + Store lazyBindingStore = NULL; + const MemRegion *lazyBindingRegion = NULL; + llvm::tie(lazyBindingStore, lazyBindingRegion) = GetLazyBinding(B, R, R); + + if (lazyBindingRegion) + return getLazyBinding(lazyBindingRegion, lazyBindingStore); + + if (R->hasStackNonParametersStorage()) { + if (isa(R)) { + // Currently we don't reason specially about Clang-style vectors. Check + // if superR is a vector and if so return Unknown. + if (const TypedValueRegion *typedSuperR = + dyn_cast(superR)) { + if (typedSuperR->getValueType()->isVectorType()) + return UnknownVal(); + } + } + + // FIXME: We also need to take ElementRegions with symbolic indexes into + // account. This case handles both directly accessing an ElementRegion + // with a symbolic offset, but also fields within an element with + // a symbolic offset. + if (hasSymbolicIndex) + return UnknownVal(); + + return UndefinedVal(); + } + + // All other values are symbolic. + return svalBuilder.getRegionValueSymbolVal(R); +} + +SVal RegionStoreManager::getBindingForObjCIvar(Store store, + const ObjCIvarRegion* R) { + + // Check if the region has a binding. + RegionBindings B = GetRegionBindings(store); + + if (const Optional &V = getDirectBinding(B, R)) + return *V; + + const MemRegion *superR = R->getSuperRegion(); + + // Check if the super region has a default binding. + if (const Optional &V = getDefaultBinding(B, superR)) { + if (SymbolRef parentSym = V->getAsSymbol()) + return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R); + + // Other cases: give up. + return UnknownVal(); + } + + return getBindingForLazySymbol(R); +} + +SVal RegionStoreManager::getBindingForVar(Store store, const VarRegion *R) { + + // Check if the region has a binding. + RegionBindings B = GetRegionBindings(store); + + if (const Optional &V = getDirectBinding(B, R)) + return *V; + + // Lazily derive a value for the VarRegion. + const VarDecl *VD = R->getDecl(); + QualType T = VD->getType(); + const MemSpaceRegion *MS = R->getMemorySpace(); + + if (isa(MS) || + isa(MS)) + return svalBuilder.getRegionValueSymbolVal(R); + + if (isa(MS)) { + if (isa(MS)) { + // Is 'VD' declared constant? If so, retrieve the constant value. + QualType CT = Ctx.getCanonicalType(T); + if (CT.isConstQualified()) { + const Expr *Init = VD->getInit(); + // Do the null check first, as we want to call 'IgnoreParenCasts'. + if (Init) + if (const IntegerLiteral *IL = + dyn_cast(Init->IgnoreParenCasts())) { + const nonloc::ConcreteInt &V = svalBuilder.makeIntVal(IL); + return svalBuilder.evalCast(V, Init->getType(), IL->getType()); + } + } + + if (const Optional &V + = getBindingForDerivedDefaultValue(B, MS, R, CT)) + return V.getValue(); + + return svalBuilder.getRegionValueSymbolVal(R); + } + + if (T->isIntegerType()) + return svalBuilder.makeIntVal(0, T); + if (T->isPointerType()) + return svalBuilder.makeNull(); + + return UnknownVal(); + } + + return UndefinedVal(); +} + +SVal RegionStoreManager::getBindingForLazySymbol(const TypedValueRegion *R) { + // All other values are symbolic. + return svalBuilder.getRegionValueSymbolVal(R); +} + +SVal RegionStoreManager::getBindingForStruct(Store store, + const TypedValueRegion* R) { + assert(R->getValueType()->isStructureOrClassType()); + return svalBuilder.makeLazyCompoundVal(StoreRef(store, *this), R); +} + +SVal RegionStoreManager::getBindingForArray(Store store, + const TypedValueRegion * R) { + assert(Ctx.getAsConstantArrayType(R->getValueType())); + return svalBuilder.makeLazyCompoundVal(StoreRef(store, *this), R); +} + +bool RegionStoreManager::includedInBindings(Store store, + const MemRegion *region) const { + RegionBindings B = GetRegionBindings(store); + region = region->getBaseRegion(); + + for (RegionBindings::iterator it = B.begin(), ei = B.end(); it != ei; ++it) { + const BindingKey &K = it.getKey(); + if (region == K.getRegion()) + return true; + const SVal &D = it.getData(); + if (const MemRegion *r = D.getAsRegion()) + if (r == region) + return true; + } + return false; +} + +//===----------------------------------------------------------------------===// +// Binding values to regions. +//===----------------------------------------------------------------------===// + +StoreRef RegionStoreManager::Remove(Store store, Loc L) { + if (isa(L)) + if (const MemRegion* R = cast(L).getRegion()) + return StoreRef(removeBinding(GetRegionBindings(store), + R).getRootWithoutRetain(), + *this); + + return StoreRef(store, *this); +} + +StoreRef RegionStoreManager::Bind(Store store, Loc L, SVal V) { + if (isa(L)) + return StoreRef(store, *this); + + // If we get here, the location should be a region. + const MemRegion *R = cast(L).getRegion(); + + // Check if the region is a struct region. + if (const TypedValueRegion* TR = dyn_cast(R)) + if (TR->getValueType()->isStructureOrClassType()) + return BindStruct(store, TR, V); + + if (const ElementRegion *ER = dyn_cast(R)) { + if (ER->getIndex().isZeroConstant()) { + if (const TypedValueRegion *superR = + dyn_cast(ER->getSuperRegion())) { + QualType superTy = superR->getValueType(); + // For now, just invalidate the fields of the struct/union/class. + // This is for test rdar_test_7185607 in misc-ps-region-store.m. + // FIXME: Precisely handle the fields of the record. + if (superTy->isStructureOrClassType()) + return KillStruct(store, superR, UnknownVal()); + } + } + } + else if (const SymbolicRegion *SR = dyn_cast(R)) { + // Binding directly to a symbolic region should be treated as binding + // to element 0. + QualType T = SR->getSymbol()->getType(Ctx); + + // FIXME: Is this the right way to handle symbols that are references? + if (const PointerType *PT = T->getAs()) + T = PT->getPointeeType(); + else + T = T->getAs()->getPointeeType(); + + R = GetElementZeroRegion(SR, T); + } + + // Perform the binding. + RegionBindings B = GetRegionBindings(store); + return StoreRef(addBinding(B, R, BindingKey::Direct, + V).getRootWithoutRetain(), *this); +} + +StoreRef RegionStoreManager::BindDecl(Store store, const VarRegion *VR, + SVal InitVal) { + + QualType T = VR->getDecl()->getType(); + + if (T->isArrayType()) + return BindArray(store, VR, InitVal); + if (T->isStructureOrClassType()) + return BindStruct(store, VR, InitVal); + + return Bind(store, svalBuilder.makeLoc(VR), InitVal); +} + +// FIXME: this method should be merged into Bind(). +StoreRef RegionStoreManager::BindCompoundLiteral(Store store, + const CompoundLiteralExpr *CL, + const LocationContext *LC, + SVal V) { + return Bind(store, loc::MemRegionVal(MRMgr.getCompoundLiteralRegion(CL, LC)), + V); +} + +StoreRef RegionStoreManager::setImplicitDefaultValue(Store store, + const MemRegion *R, + QualType T) { + RegionBindings B = GetRegionBindings(store); + SVal V; + + if (Loc::isLocType(T)) + V = svalBuilder.makeNull(); + else if (T->isIntegerType()) + V = svalBuilder.makeZeroVal(T); + else if (T->isStructureOrClassType() || T->isArrayType()) { + // Set the default value to a zero constant when it is a structure + // or array. The type doesn't really matter. + V = svalBuilder.makeZeroVal(Ctx.IntTy); + } + else { + // We can't represent values of this type, but we still need to set a value + // to record that the region has been initialized. + // If this assertion ever fires, a new case should be added above -- we + // should know how to default-initialize any value we can symbolicate. + assert(!SymbolManager::canSymbolicate(T) && "This type is representable"); + V = UnknownVal(); + } + + return StoreRef(addBinding(B, R, BindingKey::Default, + V).getRootWithoutRetain(), *this); +} + +StoreRef RegionStoreManager::BindArray(Store store, const TypedValueRegion* R, + SVal Init) { + + const ArrayType *AT =cast(Ctx.getCanonicalType(R->getValueType())); + QualType ElementTy = AT->getElementType(); + Optional Size; + + if (const ConstantArrayType* CAT = dyn_cast(AT)) + Size = CAT->getSize().getZExtValue(); + + // Check if the init expr is a string literal. + if (loc::MemRegionVal *MRV = dyn_cast(&Init)) { + const StringRegion *S = cast(MRV->getRegion()); + + // Treat the string as a lazy compound value. + nonloc::LazyCompoundVal LCV = + cast(svalBuilder. + makeLazyCompoundVal(StoreRef(store, *this), S)); + return CopyLazyBindings(LCV, store, R); + } + + // Handle lazy compound values. + if (nonloc::LazyCompoundVal *LCV = dyn_cast(&Init)) + return CopyLazyBindings(*LCV, store, R); + + // Remaining case: explicit compound values. + + if (Init.isUnknown()) + return setImplicitDefaultValue(store, R, ElementTy); + + nonloc::CompoundVal& CV = cast(Init); + nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end(); + uint64_t i = 0; + + StoreRef newStore(store, *this); + for (; Size.hasValue() ? i < Size.getValue() : true ; ++i, ++VI) { + // The init list might be shorter than the array length. + if (VI == VE) + break; + + const NonLoc &Idx = svalBuilder.makeArrayIndex(i); + const ElementRegion *ER = MRMgr.getElementRegion(ElementTy, Idx, R, Ctx); + + if (ElementTy->isStructureOrClassType()) + newStore = BindStruct(newStore.getStore(), ER, *VI); + else if (ElementTy->isArrayType()) + newStore = BindArray(newStore.getStore(), ER, *VI); + else + newStore = Bind(newStore.getStore(), svalBuilder.makeLoc(ER), *VI); + } + + // If the init list is shorter than the array length, set the + // array default value. + if (Size.hasValue() && i < Size.getValue()) + newStore = setImplicitDefaultValue(newStore.getStore(), R, ElementTy); + + return newStore; +} + +StoreRef RegionStoreManager::BindStruct(Store store, const TypedValueRegion* R, + SVal V) { + + if (!Features.supportsFields()) + return StoreRef(store, *this); + + QualType T = R->getValueType(); + assert(T->isStructureOrClassType()); + + const RecordType* RT = T->getAs(); + RecordDecl *RD = RT->getDecl(); + + if (!RD->isCompleteDefinition()) + return StoreRef(store, *this); + + // Handle lazy compound values. + if (const nonloc::LazyCompoundVal *LCV=dyn_cast(&V)) + return CopyLazyBindings(*LCV, store, R); + + // We may get non-CompoundVal accidentally due to imprecise cast logic or + // that we are binding symbolic struct value. Kill the field values, and if + // the value is symbolic go and bind it as a "default" binding. + if (V.isUnknown() || !isa(V)) { + SVal SV = isa(V) ? V : UnknownVal(); + return KillStruct(store, R, SV); + } + + nonloc::CompoundVal& CV = cast(V); + nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end(); + + RecordDecl::field_iterator FI, FE; + StoreRef newStore(store, *this); + + for (FI = RD->field_begin(), FE = RD->field_end(); FI != FE; ++FI) { + + if (VI == VE) + break; + + // Skip any unnamed bitfields to stay in sync with the initializers. + if ((*FI)->isUnnamedBitfield()) + continue; + + QualType FTy = (*FI)->getType(); + const FieldRegion* FR = MRMgr.getFieldRegion(*FI, R); + + if (FTy->isArrayType()) + newStore = BindArray(newStore.getStore(), FR, *VI); + else if (FTy->isStructureOrClassType()) + newStore = BindStruct(newStore.getStore(), FR, *VI); + else + newStore = Bind(newStore.getStore(), svalBuilder.makeLoc(FR), *VI); + ++VI; + } + + // There may be fewer values in the initialize list than the fields of struct. + if (FI != FE) { + RegionBindings B = GetRegionBindings(newStore.getStore()); + B = addBinding(B, R, BindingKey::Default, svalBuilder.makeIntVal(0, false)); + newStore = StoreRef(B.getRootWithoutRetain(), *this); + } + + return newStore; +} + +StoreRef RegionStoreManager::KillStruct(Store store, const TypedRegion* R, + SVal DefaultVal) { + BindingKey key = BindingKey::Make(R, BindingKey::Default); + + // The BindingKey may be "invalid" if we cannot handle the region binding + // explicitly. One example is something like array[index], where index + // is a symbolic value. In such cases, we want to invalidate the entire + // array, as the index assignment could have been to any element. In + // the case of nested symbolic indices, we need to march up the region + // hierarchy untile we reach a region whose binding we can reason about. + const SubRegion *subReg = R; + + while (!key.isValid()) { + if (const SubRegion *tmp = dyn_cast(subReg->getSuperRegion())) { + subReg = tmp; + key = BindingKey::Make(tmp, BindingKey::Default); + } + else + break; + } + + // Remove the old bindings, using 'subReg' as the root of all regions + // we will invalidate. + RegionBindings B = GetRegionBindings(store); + OwningPtr + SubRegions(getRegionStoreSubRegionMap(store)); + RemoveSubRegionBindings(B, subReg, *SubRegions); + + // Set the default value of the struct region to "unknown". + if (!key.isValid()) + return StoreRef(B.getRootWithoutRetain(), *this); + + return StoreRef(addBinding(B, key, DefaultVal).getRootWithoutRetain(), *this); +} + +StoreRef RegionStoreManager::CopyLazyBindings(nonloc::LazyCompoundVal V, + Store store, + const TypedRegion *R) { + + // Nuke the old bindings stemming from R. + RegionBindings B = GetRegionBindings(store); + + OwningPtr + SubRegions(getRegionStoreSubRegionMap(store)); + + // B and DVM are updated after the call to RemoveSubRegionBindings. + RemoveSubRegionBindings(B, R, *SubRegions.get()); + + // Now copy the bindings. This amounts to just binding 'V' to 'R'. This + // results in a zero-copy algorithm. + return StoreRef(addBinding(B, R, BindingKey::Default, + V).getRootWithoutRetain(), *this); +} + +//===----------------------------------------------------------------------===// +// "Raw" retrievals and bindings. +//===----------------------------------------------------------------------===// + + +RegionBindings RegionStoreManager::addBinding(RegionBindings B, BindingKey K, + SVal V) { + if (!K.isValid()) + return B; + return RBFactory.add(B, K, V); +} + +RegionBindings RegionStoreManager::addBinding(RegionBindings B, + const MemRegion *R, + BindingKey::Kind k, SVal V) { + return addBinding(B, BindingKey::Make(R, k), V); +} + +const SVal *RegionStoreManager::lookup(RegionBindings B, BindingKey K) { + if (!K.isValid()) + return NULL; + return B.lookup(K); +} + +const SVal *RegionStoreManager::lookup(RegionBindings B, + const MemRegion *R, + BindingKey::Kind k) { + return lookup(B, BindingKey::Make(R, k)); +} + +RegionBindings RegionStoreManager::removeBinding(RegionBindings B, + BindingKey K) { + if (!K.isValid()) + return B; + return RBFactory.remove(B, K); +} + +RegionBindings RegionStoreManager::removeBinding(RegionBindings B, + const MemRegion *R, + BindingKey::Kind k){ + return removeBinding(B, BindingKey::Make(R, k)); +} + +//===----------------------------------------------------------------------===// +// State pruning. +//===----------------------------------------------------------------------===// + +namespace { +class removeDeadBindingsWorker : + public ClusterAnalysis { + SmallVector Postponed; + SymbolReaper &SymReaper; + const StackFrameContext *CurrentLCtx; + +public: + removeDeadBindingsWorker(RegionStoreManager &rm, + ProgramStateManager &stateMgr, + RegionBindings b, SymbolReaper &symReaper, + const StackFrameContext *LCtx) + : ClusterAnalysis(rm, stateMgr, b, + /* includeGlobals = */ false), + SymReaper(symReaper), CurrentLCtx(LCtx) {} + + // Called by ClusterAnalysis. + void VisitAddedToCluster(const MemRegion *baseR, RegionCluster &C); + void VisitCluster(const MemRegion *baseR, BindingKey *I, BindingKey *E); + + void VisitBindingKey(BindingKey K); + bool UpdatePostponed(); + void VisitBinding(SVal V); +}; +} + +void removeDeadBindingsWorker::VisitAddedToCluster(const MemRegion *baseR, + RegionCluster &C) { + + if (const VarRegion *VR = dyn_cast(baseR)) { + if (SymReaper.isLive(VR)) + AddToWorkList(baseR, C); + + return; + } + + if (const SymbolicRegion *SR = dyn_cast(baseR)) { + if (SymReaper.isLive(SR->getSymbol())) + AddToWorkList(SR, C); + else + Postponed.push_back(SR); + + return; + } + + if (isa(baseR)) { + AddToWorkList(baseR, C); + return; + } + + // CXXThisRegion in the current or parent location context is live. + if (const CXXThisRegion *TR = dyn_cast(baseR)) { + const StackArgumentsSpaceRegion *StackReg = + cast(TR->getSuperRegion()); + const StackFrameContext *RegCtx = StackReg->getStackFrame(); + if (RegCtx == CurrentLCtx || RegCtx->isParentOf(CurrentLCtx)) + AddToWorkList(TR, C); + } +} + +void removeDeadBindingsWorker::VisitCluster(const MemRegion *baseR, + BindingKey *I, BindingKey *E) { + for ( ; I != E; ++I) + VisitBindingKey(*I); +} + +void removeDeadBindingsWorker::VisitBinding(SVal V) { + // Is it a LazyCompoundVal? All referenced regions are live as well. + if (const nonloc::LazyCompoundVal *LCS = + dyn_cast(&V)) { + + const MemRegion *LazyR = LCS->getRegion(); + RegionBindings B = RegionStoreManager::GetRegionBindings(LCS->getStore()); + for (RegionBindings::iterator RI = B.begin(), RE = B.end(); RI != RE; ++RI){ + const SubRegion *baseR = dyn_cast(RI.getKey().getRegion()); + if (baseR && baseR->isSubRegionOf(LazyR)) + VisitBinding(RI.getData()); + } + return; + } + + // If V is a region, then add it to the worklist. + if (const MemRegion *R = V.getAsRegion()) + AddToWorkList(R); + + // Update the set of live symbols. + for (SymExpr::symbol_iterator SI = V.symbol_begin(), SE = V.symbol_end(); + SI!=SE; ++SI) + SymReaper.markLive(*SI); +} + +void removeDeadBindingsWorker::VisitBindingKey(BindingKey K) { + const MemRegion *R = K.getRegion(); + + // Mark this region "live" by adding it to the worklist. This will cause + // use to visit all regions in the cluster (if we haven't visited them + // already). + if (AddToWorkList(R)) { + // Mark the symbol for any live SymbolicRegion as "live". This means we + // should continue to track that symbol. + if (const SymbolicRegion *SymR = dyn_cast(R)) + SymReaper.markLive(SymR->getSymbol()); + + // For BlockDataRegions, enqueue the VarRegions for variables marked + // with __block (passed-by-reference). + // via BlockDeclRefExprs. + if (const BlockDataRegion *BD = dyn_cast(R)) { + for (BlockDataRegion::referenced_vars_iterator + RI = BD->referenced_vars_begin(), RE = BD->referenced_vars_end(); + RI != RE; ++RI) { + if ((*RI)->getDecl()->getAttr()) + AddToWorkList(*RI); + } + + // No possible data bindings on a BlockDataRegion. + return; + } + } + + // Visit the data binding for K. + if (const SVal *V = RM.lookup(B, K)) + VisitBinding(*V); +} + +bool removeDeadBindingsWorker::UpdatePostponed() { + // See if any postponed SymbolicRegions are actually live now, after + // having done a scan. + bool changed = false; + + for (SmallVectorImpl::iterator + I = Postponed.begin(), E = Postponed.end() ; I != E ; ++I) { + if (const SymbolicRegion *SR = cast_or_null(*I)) { + if (SymReaper.isLive(SR->getSymbol())) { + changed |= AddToWorkList(SR); + *I = NULL; + } + } + } + + return changed; +} + +StoreRef RegionStoreManager::removeDeadBindings(Store store, + const StackFrameContext *LCtx, + SymbolReaper& SymReaper) { + RegionBindings B = GetRegionBindings(store); + removeDeadBindingsWorker W(*this, StateMgr, B, SymReaper, LCtx); + W.GenerateClusters(); + + // Enqueue the region roots onto the worklist. + for (SymbolReaper::region_iterator I = SymReaper.region_begin(), + E = SymReaper.region_end(); I != E; ++I) { + W.AddToWorkList(*I); + } + + do W.RunWorkList(); while (W.UpdatePostponed()); + + // We have now scanned the store, marking reachable regions and symbols + // as live. We now remove all the regions that are dead from the store + // as well as update DSymbols with the set symbols that are now dead. + for (RegionBindings::iterator I = B.begin(), E = B.end(); I != E; ++I) { + const BindingKey &K = I.getKey(); + + // If the cluster has been visited, we know the region has been marked. + if (W.isVisited(K.getRegion())) + continue; + + // Remove the dead entry. + B = removeBinding(B, K); + + // Mark all non-live symbols that this binding references as dead. + if (const SymbolicRegion* SymR = dyn_cast(K.getRegion())) + SymReaper.maybeDead(SymR->getSymbol()); + + SVal X = I.getData(); + SymExpr::symbol_iterator SI = X.symbol_begin(), SE = X.symbol_end(); + for (; SI != SE; ++SI) + SymReaper.maybeDead(*SI); + } + + return StoreRef(B.getRootWithoutRetain(), *this); +} + + +StoreRef RegionStoreManager::enterStackFrame(ProgramStateRef state, + const LocationContext *callerCtx, + const StackFrameContext *calleeCtx) +{ + FunctionDecl const *FD = cast(calleeCtx->getDecl()); + FunctionDecl::param_const_iterator PI = FD->param_begin(), + PE = FD->param_end(); + StoreRef store = StoreRef(state->getStore(), *this); + + if (CallExpr const *CE = dyn_cast(calleeCtx->getCallSite())) { + CallExpr::const_arg_iterator AI = CE->arg_begin(), AE = CE->arg_end(); + + // Copy the arg expression value to the arg variables. We check that + // PI != PE because the actual number of arguments may be different than + // the function declaration. + for (; AI != AE && PI != PE; ++AI, ++PI) { + SVal ArgVal = state->getSVal(*AI, callerCtx); + store = Bind(store.getStore(), + svalBuilder.makeLoc(MRMgr.getVarRegion(*PI, calleeCtx)), + ArgVal); + } + } else if (const CXXConstructExpr *CE = + dyn_cast(calleeCtx->getCallSite())) { + CXXConstructExpr::const_arg_iterator AI = CE->arg_begin(), + AE = CE->arg_end(); + + // Copy the arg expression value to the arg variables. + for (; AI != AE; ++AI, ++PI) { + SVal ArgVal = state->getSVal(*AI, callerCtx); + store = Bind(store.getStore(), + svalBuilder.makeLoc(MRMgr.getVarRegion(*PI, calleeCtx)), + ArgVal); + } + } else + assert(isa(calleeCtx->getDecl())); + + return store; +} + +//===----------------------------------------------------------------------===// +// Utility methods. +//===----------------------------------------------------------------------===// + +void RegionStoreManager::print(Store store, raw_ostream &OS, + const char* nl, const char *sep) { + RegionBindings B = GetRegionBindings(store); + OS << "Store (direct and default bindings):" << nl; + + for (RegionBindings::iterator I = B.begin(), E = B.end(); I != E; ++I) + OS << ' ' << I.getKey() << " : " << I.getData() << nl; +} diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp new file mode 100644 index 0000000..9e97f5e --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -0,0 +1,386 @@ +// SValBuilder.cpp - Basic class for all SValBuilder implementations -*- C++ -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines SValBuilder, the base class for all (complete) SValBuilder +// implementations. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/ExprCXX.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h" + +using namespace clang; +using namespace ento; + +//===----------------------------------------------------------------------===// +// Basic SVal creation. +//===----------------------------------------------------------------------===// + +void SValBuilder::anchor() { } + +DefinedOrUnknownSVal SValBuilder::makeZeroVal(QualType type) { + if (Loc::isLocType(type)) + return makeNull(); + + if (type->isIntegerType()) + return makeIntVal(0, type); + + // FIXME: Handle floats. + // FIXME: Handle structs. + return UnknownVal(); +} + +NonLoc SValBuilder::makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op, + const llvm::APSInt& rhs, QualType type) { + // The Environment ensures we always get a persistent APSInt in + // BasicValueFactory, so we don't need to get the APSInt from + // BasicValueFactory again. + assert(lhs); + assert(!Loc::isLocType(type)); + return nonloc::SymbolVal(SymMgr.getSymIntExpr(lhs, op, rhs, type)); +} + +NonLoc SValBuilder::makeNonLoc(const llvm::APSInt& lhs, + BinaryOperator::Opcode op, const SymExpr *rhs, + QualType type) { + assert(rhs); + assert(!Loc::isLocType(type)); + return nonloc::SymbolVal(SymMgr.getIntSymExpr(lhs, op, rhs, type)); +} + +NonLoc SValBuilder::makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op, + const SymExpr *rhs, QualType type) { + assert(lhs && rhs); + assert(haveSameType(lhs->getType(Context), rhs->getType(Context)) == true); + assert(!Loc::isLocType(type)); + return nonloc::SymbolVal(SymMgr.getSymSymExpr(lhs, op, rhs, type)); +} + +NonLoc SValBuilder::makeNonLoc(const SymExpr *operand, + QualType fromTy, QualType toTy) { + assert(operand); + assert(!Loc::isLocType(toTy)); + return nonloc::SymbolVal(SymMgr.getCastSymbol(operand, fromTy, toTy)); +} + +SVal SValBuilder::convertToArrayIndex(SVal val) { + if (val.isUnknownOrUndef()) + return val; + + // Common case: we have an appropriately sized integer. + if (nonloc::ConcreteInt* CI = dyn_cast(&val)) { + const llvm::APSInt& I = CI->getValue(); + if (I.getBitWidth() == ArrayIndexWidth && I.isSigned()) + return val; + } + + return evalCastFromNonLoc(cast(val), ArrayIndexTy); +} + +nonloc::ConcreteInt SValBuilder::makeBoolVal(const CXXBoolLiteralExpr *boolean){ + return makeTruthVal(boolean->getValue()); +} + +DefinedOrUnknownSVal +SValBuilder::getRegionValueSymbolVal(const TypedValueRegion* region) { + QualType T = region->getValueType(); + + if (!SymbolManager::canSymbolicate(T)) + return UnknownVal(); + + SymbolRef sym = SymMgr.getRegionValueSymbol(region); + + if (Loc::isLocType(T)) + return loc::MemRegionVal(MemMgr.getSymbolicRegion(sym)); + + return nonloc::SymbolVal(sym); +} + +DefinedOrUnknownSVal +SValBuilder::getConjuredSymbolVal(const void *symbolTag, + const Expr *expr, + const LocationContext *LCtx, + unsigned count) { + QualType T = expr->getType(); + return getConjuredSymbolVal(symbolTag, expr, LCtx, T, count); +} + +DefinedOrUnknownSVal +SValBuilder::getConjuredSymbolVal(const void *symbolTag, + const Expr *expr, + const LocationContext *LCtx, + QualType type, + unsigned count) { + if (!SymbolManager::canSymbolicate(type)) + return UnknownVal(); + + SymbolRef sym = SymMgr.getConjuredSymbol(expr, LCtx, type, count, symbolTag); + + if (Loc::isLocType(type)) + return loc::MemRegionVal(MemMgr.getSymbolicRegion(sym)); + + return nonloc::SymbolVal(sym); +} + + +DefinedOrUnknownSVal +SValBuilder::getConjuredSymbolVal(const Stmt *stmt, + const LocationContext *LCtx, + QualType type, + unsigned visitCount) { + if (!SymbolManager::canSymbolicate(type)) + return UnknownVal(); + + SymbolRef sym = SymMgr.getConjuredSymbol(stmt, LCtx, type, visitCount); + + if (Loc::isLocType(type)) + return loc::MemRegionVal(MemMgr.getSymbolicRegion(sym)); + + return nonloc::SymbolVal(sym); +} + +DefinedSVal SValBuilder::getMetadataSymbolVal(const void *symbolTag, + const MemRegion *region, + const Expr *expr, QualType type, + unsigned count) { + assert(SymbolManager::canSymbolicate(type) && "Invalid metadata symbol type"); + + SymbolRef sym = + SymMgr.getMetadataSymbol(region, expr, type, count, symbolTag); + + if (Loc::isLocType(type)) + return loc::MemRegionVal(MemMgr.getSymbolicRegion(sym)); + + return nonloc::SymbolVal(sym); +} + +DefinedOrUnknownSVal +SValBuilder::getDerivedRegionValueSymbolVal(SymbolRef parentSymbol, + const TypedValueRegion *region) { + QualType T = region->getValueType(); + + if (!SymbolManager::canSymbolicate(T)) + return UnknownVal(); + + SymbolRef sym = SymMgr.getDerivedSymbol(parentSymbol, region); + + if (Loc::isLocType(T)) + return loc::MemRegionVal(MemMgr.getSymbolicRegion(sym)); + + return nonloc::SymbolVal(sym); +} + +DefinedSVal SValBuilder::getFunctionPointer(const FunctionDecl *func) { + return loc::MemRegionVal(MemMgr.getFunctionTextRegion(func)); +} + +DefinedSVal SValBuilder::getBlockPointer(const BlockDecl *block, + CanQualType locTy, + const LocationContext *locContext) { + const BlockTextRegion *BC = + MemMgr.getBlockTextRegion(block, locTy, locContext->getAnalysisDeclContext()); + const BlockDataRegion *BD = MemMgr.getBlockDataRegion(BC, locContext); + return loc::MemRegionVal(BD); +} + +//===----------------------------------------------------------------------===// + +SVal SValBuilder::makeGenericVal(ProgramStateRef State, + BinaryOperator::Opcode Op, + NonLoc LHS, NonLoc RHS, + QualType ResultTy) { + // If operands are tainted, create a symbol to ensure that we propagate taint. + if (State->isTainted(RHS) || State->isTainted(LHS)) { + const SymExpr *symLHS; + const SymExpr *symRHS; + + if (const nonloc::ConcreteInt *rInt = dyn_cast(&RHS)) { + symLHS = LHS.getAsSymExpr(); + return makeNonLoc(symLHS, Op, rInt->getValue(), ResultTy); + } + + if (const nonloc::ConcreteInt *lInt = dyn_cast(&LHS)) { + symRHS = RHS.getAsSymExpr(); + return makeNonLoc(lInt->getValue(), Op, symRHS, ResultTy); + } + + symLHS = LHS.getAsSymExpr(); + symRHS = RHS.getAsSymExpr(); + return makeNonLoc(symLHS, Op, symRHS, ResultTy); + } + return UnknownVal(); +} + + +SVal SValBuilder::evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op, + SVal lhs, SVal rhs, QualType type) { + + if (lhs.isUndef() || rhs.isUndef()) + return UndefinedVal(); + + if (lhs.isUnknown() || rhs.isUnknown()) + return UnknownVal(); + + if (isa(lhs)) { + if (isa(rhs)) + return evalBinOpLL(state, op, cast(lhs), cast(rhs), type); + + return evalBinOpLN(state, op, cast(lhs), cast(rhs), type); + } + + if (isa(rhs)) { + // Support pointer arithmetic where the addend is on the left + // and the pointer on the right. + assert(op == BO_Add); + + // Commute the operands. + return evalBinOpLN(state, op, cast(rhs), cast(lhs), type); + } + + return evalBinOpNN(state, op, cast(lhs), cast(rhs), type); +} + +DefinedOrUnknownSVal SValBuilder::evalEQ(ProgramStateRef state, + DefinedOrUnknownSVal lhs, + DefinedOrUnknownSVal rhs) { + return cast(evalBinOp(state, BO_EQ, lhs, rhs, + Context.IntTy)); +} + +/// Recursively check if the pointer types are equal modulo const, volatile, +/// and restrict qualifiers. Assumes the input types are canonical. +/// TODO: This is based off of code in SemaCast; can we reuse it. +static bool haveSimilarTypes(ASTContext &Context, QualType T1, + QualType T2) { + while (Context.UnwrapSimilarPointerTypes(T1, T2)) { + Qualifiers Quals1, Quals2; + T1 = Context.getUnqualifiedArrayType(T1, Quals1); + T2 = Context.getUnqualifiedArrayType(T2, Quals2); + + // Make sure that non cvr-qualifiers the other qualifiers (e.g., address + // spaces) are identical. + Quals1.removeCVRQualifiers(); + Quals2.removeCVRQualifiers(); + if (Quals1 != Quals2) + return false; + } + + if (T1 != T2) + return false; + + return true; +} + +// FIXME: should rewrite according to the cast kind. +SVal SValBuilder::evalCast(SVal val, QualType castTy, QualType originalTy) { + castTy = Context.getCanonicalType(castTy); + originalTy = Context.getCanonicalType(originalTy); + if (val.isUnknownOrUndef() || castTy == originalTy) + return val; + + // For const casts, just propagate the value. + if (!castTy->isVariableArrayType() && !originalTy->isVariableArrayType()) + if (haveSimilarTypes(Context, Context.getPointerType(castTy), + Context.getPointerType(originalTy))) + return val; + + // Check for casts from pointers to integers. + if (castTy->isIntegerType() && Loc::isLocType(originalTy)) + return evalCastFromLoc(cast(val), castTy); + + // Check for casts from integers to pointers. + if (Loc::isLocType(castTy) && originalTy->isIntegerType()) { + if (nonloc::LocAsInteger *LV = dyn_cast(&val)) { + if (const MemRegion *R = LV->getLoc().getAsRegion()) { + StoreManager &storeMgr = StateMgr.getStoreManager(); + R = storeMgr.castRegion(R, castTy); + return R ? SVal(loc::MemRegionVal(R)) : UnknownVal(); + } + return LV->getLoc(); + } + return dispatchCast(val, castTy); + } + + // Just pass through function and block pointers. + if (originalTy->isBlockPointerType() || originalTy->isFunctionPointerType()) { + assert(Loc::isLocType(castTy)); + return val; + } + + // Check for casts from array type to another type. + if (originalTy->isArrayType()) { + // We will always decay to a pointer. + val = StateMgr.ArrayToPointer(cast(val)); + + // Are we casting from an array to a pointer? If so just pass on + // the decayed value. + if (castTy->isPointerType()) + return val; + + // Are we casting from an array to an integer? If so, cast the decayed + // pointer value to an integer. + assert(castTy->isIntegerType()); + + // FIXME: Keep these here for now in case we decide soon that we + // need the original decayed type. + // QualType elemTy = cast(originalTy)->getElementType(); + // QualType pointerTy = C.getPointerType(elemTy); + return evalCastFromLoc(cast(val), castTy); + } + + // Check for casts from a region to a specific type. + if (const MemRegion *R = val.getAsRegion()) { + // FIXME: We should handle the case where we strip off view layers to get + // to a desugared type. + + if (!Loc::isLocType(castTy)) { + // FIXME: There can be gross cases where one casts the result of a function + // (that returns a pointer) to some other value that happens to fit + // within that pointer value. We currently have no good way to + // model such operations. When this happens, the underlying operation + // is that the caller is reasoning about bits. Conceptually we are + // layering a "view" of a location on top of those bits. Perhaps + // we need to be more lazy about mutual possible views, even on an + // SVal? This may be necessary for bit-level reasoning as well. + return UnknownVal(); + } + + // We get a symbolic function pointer for a dereference of a function + // pointer, but it is of function type. Example: + + // struct FPRec { + // void (*my_func)(int * x); + // }; + // + // int bar(int x); + // + // int f1_a(struct FPRec* foo) { + // int x; + // (*foo->my_func)(&x); + // return bar(x)+1; // no-warning + // } + + assert(Loc::isLocType(originalTy) || originalTy->isFunctionType() || + originalTy->isBlockPointerType() || castTy->isReferenceType()); + + StoreManager &storeMgr = StateMgr.getStoreManager(); + + // Delegate to store manager to get the result of casting a region to a + // different type. If the MemRegion* returned is NULL, this expression + // Evaluates to UnknownVal. + R = storeMgr.castRegion(R, castTy); + return R ? SVal(loc::MemRegionVal(R)) : UnknownVal(); + } + + return dispatchCast(val, castTy); +} diff --git a/clang/lib/StaticAnalyzer/Core/SVals.cpp b/clang/lib/StaticAnalyzer/Core/SVals.cpp new file mode 100644 index 0000000..b94aff4 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/SVals.cpp @@ -0,0 +1,331 @@ +//= RValues.cpp - Abstract RValues for Path-Sens. Value Tracking -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines SVal, Loc, and NonLoc, classes that represent +// abstract r-values for use with path-sensitive value tracking. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/AST/ExprObjC.h" +#include "clang/Basic/IdentifierTable.h" +using namespace clang; +using namespace ento; +using llvm::APSInt; + +//===----------------------------------------------------------------------===// +// Symbol iteration within an SVal. +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// Utility methods. +//===----------------------------------------------------------------------===// + +bool SVal::hasConjuredSymbol() const { + if (const nonloc::SymbolVal* SV = dyn_cast(this)) { + SymbolRef sym = SV->getSymbol(); + if (isa(sym)) + return true; + } + + if (const loc::MemRegionVal *RV = dyn_cast(this)) { + const MemRegion *R = RV->getRegion(); + if (const SymbolicRegion *SR = dyn_cast(R)) { + SymbolRef sym = SR->getSymbol(); + if (isa(sym)) + return true; + } + } + + return false; +} + +const FunctionDecl *SVal::getAsFunctionDecl() const { + if (const loc::MemRegionVal* X = dyn_cast(this)) { + const MemRegion* R = X->getRegion(); + if (const FunctionTextRegion *CTR = R->getAs()) + return CTR->getDecl(); + } + + return 0; +} + +/// \brief If this SVal is a location (subclasses Loc) and wraps a symbol, +/// return that SymbolRef. Otherwise return 0. +/// +/// Implicit casts (ex: void* -> char*) can turn Symbolic region into Element +/// region. If that is the case, gets the underlining region. +SymbolRef SVal::getAsLocSymbol() const { + // FIXME: should we consider SymbolRef wrapped in CodeTextRegion? + if (const nonloc::LocAsInteger *X = dyn_cast(this)) + return X->getLoc().getAsLocSymbol(); + + if (const loc::MemRegionVal *X = dyn_cast(this)) { + const MemRegion *R = X->stripCasts(); + if (const SymbolicRegion *SymR = dyn_cast(R)) + return SymR->getSymbol(); + } + return 0; +} + +/// Get the symbol in the SVal or its base region. +SymbolRef SVal::getLocSymbolInBase() const { + const loc::MemRegionVal *X = dyn_cast(this); + + if (!X) + return 0; + + const MemRegion *R = X->getRegion(); + + while (const SubRegion *SR = dyn_cast(R)) { + if (const SymbolicRegion *SymR = dyn_cast(SR)) + return SymR->getSymbol(); + else + R = SR->getSuperRegion(); + } + + return 0; +} + +// TODO: The next 3 functions have to be simplified. + +/// \brief If this SVal wraps a symbol return that SymbolRef. +/// Otherwise return 0. +SymbolRef SVal::getAsSymbol() const { + // FIXME: should we consider SymbolRef wrapped in CodeTextRegion? + if (const nonloc::SymbolVal *X = dyn_cast(this)) + return X->getSymbol(); + + return getAsLocSymbol(); +} + +/// getAsSymbolicExpression - If this Sval wraps a symbolic expression then +/// return that expression. Otherwise return NULL. +const SymExpr *SVal::getAsSymbolicExpression() const { + if (const nonloc::SymbolVal *X = dyn_cast(this)) + return X->getSymbol(); + + return getAsSymbol(); +} + +const SymExpr* SVal::getAsSymExpr() const { + const SymExpr* Sym = getAsSymbol(); + if (!Sym) + Sym = getAsSymbolicExpression(); + return Sym; +} + +const MemRegion *SVal::getAsRegion() const { + if (const loc::MemRegionVal *X = dyn_cast(this)) + return X->getRegion(); + + if (const nonloc::LocAsInteger *X = dyn_cast(this)) { + return X->getLoc().getAsRegion(); + } + + return 0; +} + +const MemRegion *loc::MemRegionVal::stripCasts() const { + const MemRegion *R = getRegion(); + return R ? R->StripCasts() : NULL; +} + +const void *nonloc::LazyCompoundVal::getStore() const { + return static_cast(Data)->getStore(); +} + +const TypedRegion *nonloc::LazyCompoundVal::getRegion() const { + return static_cast(Data)->getRegion(); +} + +//===----------------------------------------------------------------------===// +// Other Iterators. +//===----------------------------------------------------------------------===// + +nonloc::CompoundVal::iterator nonloc::CompoundVal::begin() const { + return getValue()->begin(); +} + +nonloc::CompoundVal::iterator nonloc::CompoundVal::end() const { + return getValue()->end(); +} + +//===----------------------------------------------------------------------===// +// Useful predicates. +//===----------------------------------------------------------------------===// + +bool SVal::isConstant() const { + return isa(this) || isa(this); +} + +bool SVal::isConstant(int I) const { + if (isa(*this)) + return cast(*this).getValue() == I; + else if (isa(*this)) + return cast(*this).getValue() == I; + else + return false; +} + +bool SVal::isZeroConstant() const { + return isConstant(0); +} + + +//===----------------------------------------------------------------------===// +// Transfer function dispatch for Non-Locs. +//===----------------------------------------------------------------------===// + +SVal nonloc::ConcreteInt::evalBinOp(SValBuilder &svalBuilder, + BinaryOperator::Opcode Op, + const nonloc::ConcreteInt& R) const { + const llvm::APSInt* X = + svalBuilder.getBasicValueFactory().evalAPSInt(Op, getValue(), R.getValue()); + + if (X) + return nonloc::ConcreteInt(*X); + else + return UndefinedVal(); +} + +nonloc::ConcreteInt +nonloc::ConcreteInt::evalComplement(SValBuilder &svalBuilder) const { + return svalBuilder.makeIntVal(~getValue()); +} + +nonloc::ConcreteInt +nonloc::ConcreteInt::evalMinus(SValBuilder &svalBuilder) const { + return svalBuilder.makeIntVal(-getValue()); +} + +//===----------------------------------------------------------------------===// +// Transfer function dispatch for Locs. +//===----------------------------------------------------------------------===// + +SVal loc::ConcreteInt::evalBinOp(BasicValueFactory& BasicVals, + BinaryOperator::Opcode Op, + const loc::ConcreteInt& R) const { + + assert (Op == BO_Add || Op == BO_Sub || + (Op >= BO_LT && Op <= BO_NE)); + + const llvm::APSInt* X = BasicVals.evalAPSInt(Op, getValue(), R.getValue()); + + if (X) + return loc::ConcreteInt(*X); + else + return UndefinedVal(); +} + +//===----------------------------------------------------------------------===// +// Pretty-Printing. +//===----------------------------------------------------------------------===// + +void SVal::dump() const { dumpToStream(llvm::errs()); } + +void SVal::dumpToStream(raw_ostream &os) const { + switch (getBaseKind()) { + case UnknownKind: + os << "Unknown"; + break; + case NonLocKind: + cast(this)->dumpToStream(os); + break; + case LocKind: + cast(this)->dumpToStream(os); + break; + case UndefinedKind: + os << "Undefined"; + break; + } +} + +void NonLoc::dumpToStream(raw_ostream &os) const { + switch (getSubKind()) { + case nonloc::ConcreteIntKind: { + const nonloc::ConcreteInt& C = *cast(this); + if (C.getValue().isUnsigned()) + os << C.getValue().getZExtValue(); + else + os << C.getValue().getSExtValue(); + os << ' ' << (C.getValue().isUnsigned() ? 'U' : 'S') + << C.getValue().getBitWidth() << 'b'; + break; + } + case nonloc::SymbolValKind: { + os << cast(this)->getSymbol(); + break; + } + case nonloc::LocAsIntegerKind: { + const nonloc::LocAsInteger& C = *cast(this); + os << C.getLoc() << " [as " << C.getNumBits() << " bit integer]"; + break; + } + case nonloc::CompoundValKind: { + const nonloc::CompoundVal& C = *cast(this); + os << "compoundVal{"; + bool first = true; + for (nonloc::CompoundVal::iterator I=C.begin(), E=C.end(); I!=E; ++I) { + if (first) { + os << ' '; first = false; + } + else + os << ", "; + + (*I).dumpToStream(os); + } + os << "}"; + break; + } + case nonloc::LazyCompoundValKind: { + const nonloc::LazyCompoundVal &C = *cast(this); + os << "lazyCompoundVal{" << const_cast(C.getStore()) + << ',' << C.getRegion() + << '}'; + break; + } + default: + assert (false && "Pretty-printed not implemented for this NonLoc."); + break; + } +} + +void Loc::dumpToStream(raw_ostream &os) const { + switch (getSubKind()) { + case loc::ConcreteIntKind: + os << cast(this)->getValue().getZExtValue() << " (Loc)"; + break; + case loc::GotoLabelKind: + os << "&&" << cast(this)->getLabel()->getName(); + break; + case loc::MemRegionKind: + os << '&' << cast(this)->getRegion()->getString(); + break; + case loc::ObjCPropRefKind: { + const ObjCPropertyRefExpr *E = cast(this)->getPropRefExpr(); + os << "objc-prop{"; + if (E->isSuperReceiver()) + os << "super."; + else if (E->getBase()) + os << "."; + + if (E->isImplicitProperty()) + os << E->getImplicitPropertyGetter()->getSelector().getAsString(); + else + os << E->getExplicitProperty()->getName(); + + os << "}"; + break; + } + default: + llvm_unreachable("Pretty-printing not implemented for this Loc."); + } +} diff --git a/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp new file mode 100644 index 0000000..a76a2da --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp @@ -0,0 +1,307 @@ +//== SimpleConstraintManager.cpp --------------------------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines SimpleConstraintManager, a class that holds code shared +// between BasicConstraintManager and RangeConstraintManager. +// +//===----------------------------------------------------------------------===// + +#include "SimpleConstraintManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" + +namespace clang { + +namespace ento { + +SimpleConstraintManager::~SimpleConstraintManager() {} + +bool SimpleConstraintManager::canReasonAbout(SVal X) const { + nonloc::SymbolVal *SymVal = dyn_cast(&X); + if (SymVal && SymVal->isExpression()) { + const SymExpr *SE = SymVal->getSymbol(); + + if (const SymIntExpr *SIE = dyn_cast(SE)) { + switch (SIE->getOpcode()) { + // We don't reason yet about bitwise-constraints on symbolic values. + case BO_And: + case BO_Or: + case BO_Xor: + return false; + // We don't reason yet about these arithmetic constraints on + // symbolic values. + case BO_Mul: + case BO_Div: + case BO_Rem: + case BO_Shl: + case BO_Shr: + return false; + // All other cases. + default: + return true; + } + } + + return false; + } + + return true; +} + +ProgramStateRef SimpleConstraintManager::assume(ProgramStateRef state, + DefinedSVal Cond, + bool Assumption) { + if (isa(Cond)) + return assume(state, cast(Cond), Assumption); + else + return assume(state, cast(Cond), Assumption); +} + +ProgramStateRef SimpleConstraintManager::assume(ProgramStateRef state, Loc cond, + bool assumption) { + state = assumeAux(state, cond, assumption); + return SU.processAssume(state, cond, assumption); +} + +ProgramStateRef SimpleConstraintManager::assumeAux(ProgramStateRef state, + Loc Cond, bool Assumption) { + + BasicValueFactory &BasicVals = state->getBasicVals(); + + switch (Cond.getSubKind()) { + default: + assert (false && "'Assume' not implemented for this Loc."); + return state; + + case loc::MemRegionKind: { + // FIXME: Should this go into the storemanager? + + const MemRegion *R = cast(Cond).getRegion(); + const SubRegion *SubR = dyn_cast(R); + + while (SubR) { + // FIXME: now we only find the first symbolic region. + if (const SymbolicRegion *SymR = dyn_cast(SubR)) { + const llvm::APSInt &zero = BasicVals.getZeroWithPtrWidth(); + if (Assumption) + return assumeSymNE(state, SymR->getSymbol(), zero, zero); + else + return assumeSymEQ(state, SymR->getSymbol(), zero, zero); + } + SubR = dyn_cast(SubR->getSuperRegion()); + } + + // FALL-THROUGH. + } + + case loc::GotoLabelKind: + return Assumption ? state : NULL; + + case loc::ConcreteIntKind: { + bool b = cast(Cond).getValue() != 0; + bool isFeasible = b ? Assumption : !Assumption; + return isFeasible ? state : NULL; + } + } // end switch +} + +ProgramStateRef SimpleConstraintManager::assume(ProgramStateRef state, + NonLoc cond, + bool assumption) { + state = assumeAux(state, cond, assumption); + return SU.processAssume(state, cond, assumption); +} + +static BinaryOperator::Opcode NegateComparison(BinaryOperator::Opcode op) { + // FIXME: This should probably be part of BinaryOperator, since this isn't + // the only place it's used. (This code was copied from SimpleSValBuilder.cpp.) + switch (op) { + default: + llvm_unreachable("Invalid opcode."); + case BO_LT: return BO_GE; + case BO_GT: return BO_LE; + case BO_LE: return BO_GT; + case BO_GE: return BO_LT; + case BO_EQ: return BO_NE; + case BO_NE: return BO_EQ; + } +} + + +ProgramStateRef SimpleConstraintManager::assumeAuxForSymbol( + ProgramStateRef State, + SymbolRef Sym, + bool Assumption) { + QualType T = State->getSymbolManager().getType(Sym); + const llvm::APSInt &zero = State->getBasicVals().getValue(0, T); + if (Assumption) + return assumeSymNE(State, Sym, zero, zero); + else + return assumeSymEQ(State, Sym, zero, zero); +} + +ProgramStateRef SimpleConstraintManager::assumeAux(ProgramStateRef state, + NonLoc Cond, + bool Assumption) { + + // We cannot reason about SymSymExprs, and can only reason about some + // SymIntExprs. + if (!canReasonAbout(Cond)) { + // Just add the constraint to the expression without trying to simplify. + SymbolRef sym = Cond.getAsSymExpr(); + return assumeAuxForSymbol(state, sym, Assumption); + } + + BasicValueFactory &BasicVals = state->getBasicVals(); + SymbolManager &SymMgr = state->getSymbolManager(); + + switch (Cond.getSubKind()) { + default: + llvm_unreachable("'Assume' not implemented for this NonLoc"); + + case nonloc::SymbolValKind: { + nonloc::SymbolVal& SV = cast(Cond); + SymbolRef sym = SV.getSymbol(); + assert(sym); + + // Handle SymbolData. + if (!SV.isExpression()) { + return assumeAuxForSymbol(state, sym, Assumption); + + // Handle symbolic expression. + } else { + // We can only simplify expressions whose RHS is an integer. + const SymIntExpr *SE = dyn_cast(sym); + if (!SE) + return assumeAuxForSymbol(state, sym, Assumption); + + BinaryOperator::Opcode op = SE->getOpcode(); + // Implicitly compare non-comparison expressions to 0. + if (!BinaryOperator::isComparisonOp(op)) { + QualType T = SymMgr.getType(SE); + const llvm::APSInt &zero = BasicVals.getValue(0, T); + op = (Assumption ? BO_NE : BO_EQ); + return assumeSymRel(state, SE, op, zero); + } + // From here on out, op is the real comparison we'll be testing. + if (!Assumption) + op = NegateComparison(op); + + return assumeSymRel(state, SE->getLHS(), op, SE->getRHS()); + } + } + + case nonloc::ConcreteIntKind: { + bool b = cast(Cond).getValue() != 0; + bool isFeasible = b ? Assumption : !Assumption; + return isFeasible ? state : NULL; + } + + case nonloc::LocAsIntegerKind: + return assumeAux(state, cast(Cond).getLoc(), + Assumption); + } // end switch +} + +static llvm::APSInt computeAdjustment(const SymExpr *LHS, + SymbolRef &Sym) { + llvm::APSInt DefaultAdjustment; + DefaultAdjustment = 0; + + // First check if the LHS is a simple symbol reference. + if (isa(LHS)) + return DefaultAdjustment; + + // Next, see if it's a "($sym+constant1)" expression. + const SymIntExpr *SE = dyn_cast(LHS); + + // We cannot simplify "($sym1+$sym2)". + if (!SE) + return DefaultAdjustment; + + // Get the constant out of the expression "($sym+constant1)" or + // "+constant1". + Sym = SE->getLHS(); + switch (SE->getOpcode()) { + case BO_Add: + return SE->getRHS(); + case BO_Sub: + return -SE->getRHS(); + default: + // We cannot simplify non-additive operators. + return DefaultAdjustment; + } +} + +ProgramStateRef SimpleConstraintManager::assumeSymRel(ProgramStateRef state, + const SymExpr *LHS, + BinaryOperator::Opcode op, + const llvm::APSInt& Int) { + assert(BinaryOperator::isComparisonOp(op) && + "Non-comparison ops should be rewritten as comparisons to zero."); + + // We only handle simple comparisons of the form "$sym == constant" + // or "($sym+constant1) == constant2". + // The adjustment is "constant1" in the above expression. It's used to + // "slide" the solution range around for modular arithmetic. For example, + // x < 4 has the solution [0, 3]. x+2 < 4 has the solution [0-2, 3-2], which + // in modular arithmetic is [0, 1] U [UINT_MAX-1, UINT_MAX]. It's up to + // the subclasses of SimpleConstraintManager to handle the adjustment. + SymbolRef Sym = LHS; + llvm::APSInt Adjustment = computeAdjustment(LHS, Sym); + + // FIXME: This next section is a hack. It silently converts the integers to + // be of the same type as the symbol, which is not always correct. Really the + // comparisons should be performed using the Int's type, then mapped back to + // the symbol's range of values. + ProgramStateManager &StateMgr = state->getStateManager(); + ASTContext &Ctx = StateMgr.getContext(); + + QualType T = Sym->getType(Ctx); + assert(T->isIntegerType() || Loc::isLocType(T)); + unsigned bitwidth = Ctx.getTypeSize(T); + bool isSymUnsigned + = T->isUnsignedIntegerOrEnumerationType() || Loc::isLocType(T); + + // Convert the adjustment. + Adjustment.setIsUnsigned(isSymUnsigned); + Adjustment = Adjustment.extOrTrunc(bitwidth); + + // Convert the right-hand side integer. + llvm::APSInt ConvertedInt(Int, isSymUnsigned); + ConvertedInt = ConvertedInt.extOrTrunc(bitwidth); + + switch (op) { + default: + // No logic yet for other operators. assume the constraint is feasible. + return state; + + case BO_EQ: + return assumeSymEQ(state, Sym, ConvertedInt, Adjustment); + + case BO_NE: + return assumeSymNE(state, Sym, ConvertedInt, Adjustment); + + case BO_GT: + return assumeSymGT(state, Sym, ConvertedInt, Adjustment); + + case BO_GE: + return assumeSymGE(state, Sym, ConvertedInt, Adjustment); + + case BO_LT: + return assumeSymLT(state, Sym, ConvertedInt, Adjustment); + + case BO_LE: + return assumeSymLE(state, Sym, ConvertedInt, Adjustment); + } // end switch +} + +} // end of namespace ento + +} // end of namespace clang diff --git a/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.h b/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.h new file mode 100644 index 0000000..e082d9d --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.h @@ -0,0 +1,101 @@ +//== SimpleConstraintManager.h ----------------------------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Code shared between BasicConstraintManager and RangeConstraintManager. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_GR_SIMPLE_CONSTRAINT_MANAGER_H +#define LLVM_CLANG_GR_SIMPLE_CONSTRAINT_MANAGER_H + +#include "clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" + +namespace clang { + +namespace ento { + +class SimpleConstraintManager : public ConstraintManager { + SubEngine &SU; +public: + SimpleConstraintManager(SubEngine &subengine) : SU(subengine) {} + virtual ~SimpleConstraintManager(); + + //===------------------------------------------------------------------===// + // Common implementation for the interface provided by ConstraintManager. + //===------------------------------------------------------------------===// + + ProgramStateRef assume(ProgramStateRef state, DefinedSVal Cond, + bool Assumption); + + ProgramStateRef assume(ProgramStateRef state, Loc Cond, bool Assumption); + + ProgramStateRef assume(ProgramStateRef state, NonLoc Cond, bool Assumption); + + ProgramStateRef assumeSymRel(ProgramStateRef state, + const SymExpr *LHS, + BinaryOperator::Opcode op, + const llvm::APSInt& Int); + +protected: + + //===------------------------------------------------------------------===// + // Interface that subclasses must implement. + //===------------------------------------------------------------------===// + + // Each of these is of the form "$sym+Adj <> V", where "<>" is the comparison + // operation for the method being invoked. + virtual ProgramStateRef assumeSymNE(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& V, + const llvm::APSInt& Adjustment) = 0; + + virtual ProgramStateRef assumeSymEQ(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& V, + const llvm::APSInt& Adjustment) = 0; + + virtual ProgramStateRef assumeSymLT(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& V, + const llvm::APSInt& Adjustment) = 0; + + virtual ProgramStateRef assumeSymGT(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& V, + const llvm::APSInt& Adjustment) = 0; + + virtual ProgramStateRef assumeSymLE(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& V, + const llvm::APSInt& Adjustment) = 0; + + virtual ProgramStateRef assumeSymGE(ProgramStateRef state, SymbolRef sym, + const llvm::APSInt& V, + const llvm::APSInt& Adjustment) = 0; + + //===------------------------------------------------------------------===// + // Internal implementation. + //===------------------------------------------------------------------===// + + bool canReasonAbout(SVal X) const; + + ProgramStateRef assumeAux(ProgramStateRef state, + Loc Cond, + bool Assumption); + + ProgramStateRef assumeAux(ProgramStateRef state, + NonLoc Cond, + bool Assumption); + + ProgramStateRef assumeAuxForSymbol(ProgramStateRef State, + SymbolRef Sym, + bool Assumption); +}; + +} // end GR namespace + +} // end clang namespace + +#endif diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp new file mode 100644 index 0000000..d0558f1 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -0,0 +1,973 @@ +// SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines SimpleSValBuilder, a basic implementation of SValBuilder. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" + +using namespace clang; +using namespace ento; + +namespace { +class SimpleSValBuilder : public SValBuilder { +protected: + virtual SVal dispatchCast(SVal val, QualType castTy); + virtual SVal evalCastFromNonLoc(NonLoc val, QualType castTy); + virtual SVal evalCastFromLoc(Loc val, QualType castTy); + +public: + SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context, + ProgramStateManager &stateMgr) + : SValBuilder(alloc, context, stateMgr) {} + virtual ~SimpleSValBuilder() {} + + virtual SVal evalMinus(NonLoc val); + virtual SVal evalComplement(NonLoc val); + virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, + NonLoc lhs, NonLoc rhs, QualType resultTy); + virtual SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, + Loc lhs, Loc rhs, QualType resultTy); + virtual SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, + Loc lhs, NonLoc rhs, QualType resultTy); + + /// getKnownValue - evaluates a given SVal. If the SVal has only one possible + /// (integer) value, that value is returned. Otherwise, returns NULL. + virtual const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V); + + SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op, + const llvm::APSInt &RHS, QualType resultTy); +}; +} // end anonymous namespace + +SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc, + ASTContext &context, + ProgramStateManager &stateMgr) { + return new SimpleSValBuilder(alloc, context, stateMgr); +} + +//===----------------------------------------------------------------------===// +// Transfer function for Casts. +//===----------------------------------------------------------------------===// + +SVal SimpleSValBuilder::dispatchCast(SVal Val, QualType CastTy) { + assert(isa(&Val) || isa(&Val)); + return isa(Val) ? evalCastFromLoc(cast(Val), CastTy) + : evalCastFromNonLoc(cast(Val), CastTy); +} + +SVal SimpleSValBuilder::evalCastFromNonLoc(NonLoc val, QualType castTy) { + + bool isLocType = Loc::isLocType(castTy); + + if (nonloc::LocAsInteger *LI = dyn_cast(&val)) { + if (isLocType) + return LI->getLoc(); + + // FIXME: Correctly support promotions/truncations. + unsigned castSize = Context.getTypeSize(castTy); + if (castSize == LI->getNumBits()) + return val; + return makeLocAsInteger(LI->getLoc(), castSize); + } + + if (const SymExpr *se = val.getAsSymbolicExpression()) { + QualType T = Context.getCanonicalType(se->getType(Context)); + // If types are the same or both are integers, ignore the cast. + // FIXME: Remove this hack when we support symbolic truncation/extension. + // HACK: If both castTy and T are integers, ignore the cast. This is + // not a permanent solution. Eventually we want to precisely handle + // extension/truncation of symbolic integers. This prevents us from losing + // precision when we assign 'x = y' and 'y' is symbolic and x and y are + // different integer types. + if (haveSameType(T, castTy)) + return val; + + if (!isLocType) + return makeNonLoc(se, T, castTy); + return UnknownVal(); + } + + // If value is a non integer constant, produce unknown. + if (!isa(val)) + return UnknownVal(); + + // Only handle casts from integers to integers - if val is an integer constant + // being cast to a non integer type, produce unknown. + if (!isLocType && !castTy->isIntegerType()) + return UnknownVal(); + + llvm::APSInt i = cast(val).getValue(); + i.setIsUnsigned(castTy->isUnsignedIntegerOrEnumerationType() || + Loc::isLocType(castTy)); + i = i.extOrTrunc(Context.getTypeSize(castTy)); + + if (isLocType) + return makeIntLocVal(i); + else + return makeIntVal(i); +} + +SVal SimpleSValBuilder::evalCastFromLoc(Loc val, QualType castTy) { + + // Casts from pointers -> pointers, just return the lval. + // + // Casts from pointers -> references, just return the lval. These + // can be introduced by the frontend for corner cases, e.g + // casting from va_list* to __builtin_va_list&. + // + if (Loc::isLocType(castTy) || castTy->isReferenceType()) + return val; + + // FIXME: Handle transparent unions where a value can be "transparently" + // lifted into a union type. + if (castTy->isUnionType()) + return UnknownVal(); + + if (castTy->isIntegerType()) { + unsigned BitWidth = Context.getTypeSize(castTy); + + if (!isa(val)) + return makeLocAsInteger(val, BitWidth); + + llvm::APSInt i = cast(val).getValue(); + i.setIsUnsigned(castTy->isUnsignedIntegerOrEnumerationType() || + Loc::isLocType(castTy)); + i = i.extOrTrunc(BitWidth); + return makeIntVal(i); + } + + // All other cases: return 'UnknownVal'. This includes casting pointers + // to floats, which is probably badness it itself, but this is a good + // intermediate solution until we do something better. + return UnknownVal(); +} + +//===----------------------------------------------------------------------===// +// Transfer function for unary operators. +//===----------------------------------------------------------------------===// + +SVal SimpleSValBuilder::evalMinus(NonLoc val) { + switch (val.getSubKind()) { + case nonloc::ConcreteIntKind: + return cast(val).evalMinus(*this); + default: + return UnknownVal(); + } +} + +SVal SimpleSValBuilder::evalComplement(NonLoc X) { + switch (X.getSubKind()) { + case nonloc::ConcreteIntKind: + return cast(X).evalComplement(*this); + default: + return UnknownVal(); + } +} + +//===----------------------------------------------------------------------===// +// Transfer function for binary operators. +//===----------------------------------------------------------------------===// + +static BinaryOperator::Opcode NegateComparison(BinaryOperator::Opcode op) { + switch (op) { + default: + llvm_unreachable("Invalid opcode."); + case BO_LT: return BO_GE; + case BO_GT: return BO_LE; + case BO_LE: return BO_GT; + case BO_GE: return BO_LT; + case BO_EQ: return BO_NE; + case BO_NE: return BO_EQ; + } +} + +static BinaryOperator::Opcode ReverseComparison(BinaryOperator::Opcode op) { + switch (op) { + default: + llvm_unreachable("Invalid opcode."); + case BO_LT: return BO_GT; + case BO_GT: return BO_LT; + case BO_LE: return BO_GE; + case BO_GE: return BO_LE; + case BO_EQ: + case BO_NE: + return op; + } +} + +SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS, + BinaryOperator::Opcode op, + const llvm::APSInt &RHS, + QualType resultTy) { + bool isIdempotent = false; + + // Check for a few special cases with known reductions first. + switch (op) { + default: + // We can't reduce this case; just treat it normally. + break; + case BO_Mul: + // a*0 and a*1 + if (RHS == 0) + return makeIntVal(0, resultTy); + else if (RHS == 1) + isIdempotent = true; + break; + case BO_Div: + // a/0 and a/1 + if (RHS == 0) + // This is also handled elsewhere. + return UndefinedVal(); + else if (RHS == 1) + isIdempotent = true; + break; + case BO_Rem: + // a%0 and a%1 + if (RHS == 0) + // This is also handled elsewhere. + return UndefinedVal(); + else if (RHS == 1) + return makeIntVal(0, resultTy); + break; + case BO_Add: + case BO_Sub: + case BO_Shl: + case BO_Shr: + case BO_Xor: + // a+0, a-0, a<<0, a>>0, a^0 + if (RHS == 0) + isIdempotent = true; + break; + case BO_And: + // a&0 and a&(~0) + if (RHS == 0) + return makeIntVal(0, resultTy); + else if (RHS.isAllOnesValue()) + isIdempotent = true; + break; + case BO_Or: + // a|0 and a|(~0) + if (RHS == 0) + isIdempotent = true; + else if (RHS.isAllOnesValue()) { + const llvm::APSInt &Result = BasicVals.Convert(resultTy, RHS); + return nonloc::ConcreteInt(Result); + } + break; + } + + // Idempotent ops (like a*1) can still change the type of an expression. + // Wrap the LHS up in a NonLoc again and let evalCastFromNonLoc do the + // dirty work. + if (isIdempotent) + return evalCastFromNonLoc(nonloc::SymbolVal(LHS), resultTy); + + // If we reach this point, the expression cannot be simplified. + // Make a SymbolVal for the entire expression. + return makeNonLoc(LHS, op, RHS, resultTy); +} + +SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state, + BinaryOperator::Opcode op, + NonLoc lhs, NonLoc rhs, + QualType resultTy) { + // Handle trivial case where left-side and right-side are the same. + if (lhs == rhs) + switch (op) { + default: + break; + case BO_EQ: + case BO_LE: + case BO_GE: + return makeTruthVal(true, resultTy); + case BO_LT: + case BO_GT: + case BO_NE: + return makeTruthVal(false, resultTy); + case BO_Xor: + case BO_Sub: + return makeIntVal(0, resultTy); + case BO_Or: + case BO_And: + return evalCastFromNonLoc(lhs, resultTy); + } + + while (1) { + switch (lhs.getSubKind()) { + default: + return makeGenericVal(state, op, lhs, rhs, resultTy); + case nonloc::LocAsIntegerKind: { + Loc lhsL = cast(lhs).getLoc(); + switch (rhs.getSubKind()) { + case nonloc::LocAsIntegerKind: + return evalBinOpLL(state, op, lhsL, + cast(rhs).getLoc(), + resultTy); + case nonloc::ConcreteIntKind: { + // Transform the integer into a location and compare. + llvm::APSInt i = cast(rhs).getValue(); + i.setIsUnsigned(true); + i = i.extOrTrunc(Context.getTypeSize(Context.VoidPtrTy)); + return evalBinOpLL(state, op, lhsL, makeLoc(i), resultTy); + } + default: + switch (op) { + case BO_EQ: + return makeTruthVal(false, resultTy); + case BO_NE: + return makeTruthVal(true, resultTy); + default: + // This case also handles pointer arithmetic. + return makeGenericVal(state, op, lhs, rhs, resultTy); + } + } + } + case nonloc::ConcreteIntKind: { + const nonloc::ConcreteInt& lhsInt = cast(lhs); + + // Is the RHS a symbol we can simplify? + // FIXME: This was mostly copy/pasted from the LHS-is-a-symbol case. + if (const nonloc::SymbolVal *srhs = dyn_cast(&rhs)) { + SymbolRef RSym = srhs->getSymbol(); + if (RSym->getType(Context)->isIntegerType()) { + if (const llvm::APSInt *Constant = state->getSymVal(RSym)) { + // The symbol evaluates to a constant. + const llvm::APSInt *rhs_I; + if (BinaryOperator::isRelationalOp(op)) + rhs_I = &BasicVals.Convert(lhsInt.getValue(), *Constant); + else + rhs_I = &BasicVals.Convert(resultTy, *Constant); + + rhs = nonloc::ConcreteInt(*rhs_I); + } + } + } + + if (isa(rhs)) { + return lhsInt.evalBinOp(*this, op, cast(rhs)); + } else { + const llvm::APSInt& lhsValue = lhsInt.getValue(); + + // Swap the left and right sides and flip the operator if doing so + // allows us to better reason about the expression (this is a form + // of expression canonicalization). + // While we're at it, catch some special cases for non-commutative ops. + NonLoc tmp = rhs; + rhs = lhs; + lhs = tmp; + + switch (op) { + case BO_LT: + case BO_GT: + case BO_LE: + case BO_GE: + op = ReverseComparison(op); + continue; + case BO_EQ: + case BO_NE: + case BO_Add: + case BO_Mul: + case BO_And: + case BO_Xor: + case BO_Or: + continue; + case BO_Shr: + if (lhsValue.isAllOnesValue() && lhsValue.isSigned()) + // At this point lhs and rhs have been swapped. + return rhs; + // FALL-THROUGH + case BO_Shl: + if (lhsValue == 0) + // At this point lhs and rhs have been swapped. + return rhs; + return makeGenericVal(state, op, rhs, lhs, resultTy); + default: + return makeGenericVal(state, op, rhs, lhs, resultTy); + } + } + } + case nonloc::SymbolValKind: { + nonloc::SymbolVal *selhs = cast(&lhs); + + // LHS is a symbolic expression. + if (selhs->isExpression()) { + + // Only handle LHS of the form "$sym op constant", at least for now. + const SymIntExpr *symIntExpr = + dyn_cast(selhs->getSymbol()); + + if (!symIntExpr) + return makeGenericVal(state, op, lhs, rhs, resultTy); + + // Is this a logical not? (!x is represented as x == 0.) + if (op == BO_EQ && rhs.isZeroConstant()) { + // We know how to negate certain expressions. Simplify them here. + + BinaryOperator::Opcode opc = symIntExpr->getOpcode(); + switch (opc) { + default: + // We don't know how to negate this operation. + // Just handle it as if it were a normal comparison to 0. + break; + case BO_LAnd: + case BO_LOr: + llvm_unreachable("Logical operators handled by branching logic."); + case BO_Assign: + case BO_MulAssign: + case BO_DivAssign: + case BO_RemAssign: + case BO_AddAssign: + case BO_SubAssign: + case BO_ShlAssign: + case BO_ShrAssign: + case BO_AndAssign: + case BO_XorAssign: + case BO_OrAssign: + case BO_Comma: + llvm_unreachable("'=' and ',' operators handled by ExprEngine."); + case BO_PtrMemD: + case BO_PtrMemI: + llvm_unreachable("Pointer arithmetic not handled here."); + case BO_LT: + case BO_GT: + case BO_LE: + case BO_GE: + case BO_EQ: + case BO_NE: + // Negate the comparison and make a value. + opc = NegateComparison(opc); + assert(symIntExpr->getType(Context) == resultTy); + return makeNonLoc(symIntExpr->getLHS(), opc, + symIntExpr->getRHS(), resultTy); + } + } + + // For now, only handle expressions whose RHS is a constant. + const nonloc::ConcreteInt *rhsInt = dyn_cast(&rhs); + if (!rhsInt) + return makeGenericVal(state, op, lhs, rhs, resultTy); + + // If both the LHS and the current expression are additive, + // fold their constants. + if (BinaryOperator::isAdditiveOp(op)) { + BinaryOperator::Opcode lop = symIntExpr->getOpcode(); + if (BinaryOperator::isAdditiveOp(lop)) { + // resultTy may not be the best type to convert to, but it's + // probably the best choice in expressions with mixed type + // (such as x+1U+2LL). The rules for implicit conversions should + // choose a reasonable type to preserve the expression, and will + // at least match how the value is going to be used. + const llvm::APSInt &first = + BasicVals.Convert(resultTy, symIntExpr->getRHS()); + const llvm::APSInt &second = + BasicVals.Convert(resultTy, rhsInt->getValue()); + const llvm::APSInt *newRHS; + if (lop == op) + newRHS = BasicVals.evalAPSInt(BO_Add, first, second); + else + newRHS = BasicVals.evalAPSInt(BO_Sub, first, second); + return MakeSymIntVal(symIntExpr->getLHS(), lop, *newRHS, resultTy); + } + } + + // Otherwise, make a SymbolVal out of the expression. + return MakeSymIntVal(symIntExpr, op, rhsInt->getValue(), resultTy); + + // LHS is a simple symbol (not a symbolic expression). + } else { + nonloc::SymbolVal *slhs = cast(&lhs); + SymbolRef Sym = slhs->getSymbol(); + QualType lhsType = Sym->getType(Context); + + // The conversion type is usually the result type, but not in the case + // of relational expressions. + QualType conversionType = resultTy; + if (BinaryOperator::isRelationalOp(op)) + conversionType = lhsType; + + // Does the symbol simplify to a constant? If so, "fold" the constant + // by setting 'lhs' to a ConcreteInt and try again. + if (lhsType->isIntegerType()) + if (const llvm::APSInt *Constant = state->getSymVal(Sym)) { + // The symbol evaluates to a constant. If necessary, promote the + // folded constant (LHS) to the result type. + const llvm::APSInt &lhs_I = BasicVals.Convert(conversionType, + *Constant); + lhs = nonloc::ConcreteInt(lhs_I); + + // Also promote the RHS (if necessary). + + // For shifts, it is not necessary to promote the RHS. + if (BinaryOperator::isShiftOp(op)) + continue; + + // Other operators: do an implicit conversion. This shouldn't be + // necessary once we support truncation/extension of symbolic values. + if (nonloc::ConcreteInt *rhs_I = dyn_cast(&rhs)){ + rhs = nonloc::ConcreteInt(BasicVals.Convert(conversionType, + rhs_I->getValue())); + } + + continue; + } + + // Is the RHS a symbol we can simplify? + if (const nonloc::SymbolVal *srhs = dyn_cast(&rhs)) { + SymbolRef RSym = srhs->getSymbol(); + if (RSym->getType(Context)->isIntegerType()) { + if (const llvm::APSInt *Constant = state->getSymVal(RSym)) { + // The symbol evaluates to a constant. + const llvm::APSInt &rhs_I = BasicVals.Convert(conversionType, + *Constant); + rhs = nonloc::ConcreteInt(rhs_I); + } + } + } + + if (isa(rhs)) { + return MakeSymIntVal(slhs->getSymbol(), op, + cast(rhs).getValue(), + resultTy); + } + + return makeGenericVal(state, op, lhs, rhs, resultTy); + } + } + } + } +} + +// FIXME: all this logic will change if/when we have MemRegion::getLocation(). +SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, + BinaryOperator::Opcode op, + Loc lhs, Loc rhs, + QualType resultTy) { + // Only comparisons and subtractions are valid operations on two pointers. + // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15]. + // However, if a pointer is casted to an integer, evalBinOpNN may end up + // calling this function with another operation (PR7527). We don't attempt to + // model this for now, but it could be useful, particularly when the + // "location" is actually an integer value that's been passed through a void*. + if (!(BinaryOperator::isComparisonOp(op) || op == BO_Sub)) + return UnknownVal(); + + // Special cases for when both sides are identical. + if (lhs == rhs) { + switch (op) { + default: + llvm_unreachable("Unimplemented operation for two identical values"); + case BO_Sub: + return makeZeroVal(resultTy); + case BO_EQ: + case BO_LE: + case BO_GE: + return makeTruthVal(true, resultTy); + case BO_NE: + case BO_LT: + case BO_GT: + return makeTruthVal(false, resultTy); + } + } + + switch (lhs.getSubKind()) { + default: + llvm_unreachable("Ordering not implemented for this Loc."); + + case loc::GotoLabelKind: + // The only thing we know about labels is that they're non-null. + if (rhs.isZeroConstant()) { + switch (op) { + default: + break; + case BO_Sub: + return evalCastFromLoc(lhs, resultTy); + case BO_EQ: + case BO_LE: + case BO_LT: + return makeTruthVal(false, resultTy); + case BO_NE: + case BO_GT: + case BO_GE: + return makeTruthVal(true, resultTy); + } + } + // There may be two labels for the same location, and a function region may + // have the same address as a label at the start of the function (depending + // on the ABI). + // FIXME: we can probably do a comparison against other MemRegions, though. + // FIXME: is there a way to tell if two labels refer to the same location? + return UnknownVal(); + + case loc::ConcreteIntKind: { + // If one of the operands is a symbol and the other is a constant, + // build an expression for use by the constraint manager. + if (SymbolRef rSym = rhs.getAsLocSymbol()) { + // We can only build expressions with symbols on the left, + // so we need a reversible operator. + if (!BinaryOperator::isComparisonOp(op)) + return UnknownVal(); + + const llvm::APSInt &lVal = cast(lhs).getValue(); + return makeNonLoc(rSym, ReverseComparison(op), lVal, resultTy); + } + + // If both operands are constants, just perform the operation. + if (loc::ConcreteInt *rInt = dyn_cast(&rhs)) { + SVal ResultVal = cast(lhs).evalBinOp(BasicVals, op, + *rInt); + if (Loc *Result = dyn_cast(&ResultVal)) + return evalCastFromLoc(*Result, resultTy); + else + return UnknownVal(); + } + + // Special case comparisons against NULL. + // This must come after the test if the RHS is a symbol, which is used to + // build constraints. The address of any non-symbolic region is guaranteed + // to be non-NULL, as is any label. + assert(isa(rhs) || isa(rhs)); + if (lhs.isZeroConstant()) { + switch (op) { + default: + break; + case BO_EQ: + case BO_GT: + case BO_GE: + return makeTruthVal(false, resultTy); + case BO_NE: + case BO_LT: + case BO_LE: + return makeTruthVal(true, resultTy); + } + } + + // Comparing an arbitrary integer to a region or label address is + // completely unknowable. + return UnknownVal(); + } + case loc::MemRegionKind: { + if (loc::ConcreteInt *rInt = dyn_cast(&rhs)) { + // If one of the operands is a symbol and the other is a constant, + // build an expression for use by the constraint manager. + if (SymbolRef lSym = lhs.getAsLocSymbol()) + return MakeSymIntVal(lSym, op, rInt->getValue(), resultTy); + + // Special case comparisons to NULL. + // This must come after the test if the LHS is a symbol, which is used to + // build constraints. The address of any non-symbolic region is guaranteed + // to be non-NULL. + if (rInt->isZeroConstant()) { + switch (op) { + default: + break; + case BO_Sub: + return evalCastFromLoc(lhs, resultTy); + case BO_EQ: + case BO_LT: + case BO_LE: + return makeTruthVal(false, resultTy); + case BO_NE: + case BO_GT: + case BO_GE: + return makeTruthVal(true, resultTy); + } + } + + // Comparing a region to an arbitrary integer is completely unknowable. + return UnknownVal(); + } + + // Get both values as regions, if possible. + const MemRegion *LeftMR = lhs.getAsRegion(); + assert(LeftMR && "MemRegionKind SVal doesn't have a region!"); + + const MemRegion *RightMR = rhs.getAsRegion(); + if (!RightMR) + // The RHS is probably a label, which in theory could address a region. + // FIXME: we can probably make a more useful statement about non-code + // regions, though. + return UnknownVal(); + + // If both values wrap regions, see if they're from different base regions. + const MemRegion *LeftBase = LeftMR->getBaseRegion(); + const MemRegion *RightBase = RightMR->getBaseRegion(); + if (LeftBase != RightBase && + !isa(LeftBase) && !isa(RightBase)) { + switch (op) { + default: + return UnknownVal(); + case BO_EQ: + return makeTruthVal(false, resultTy); + case BO_NE: + return makeTruthVal(true, resultTy); + } + } + + // The two regions are from the same base region. See if they're both a + // type of region we know how to compare. + const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace(); + const MemSpaceRegion *RightMS = RightBase->getMemorySpace(); + + // Heuristic: assume that no symbolic region (whose memory space is + // unknown) is on the stack. + // FIXME: we should be able to be more precise once we can do better + // aliasing constraints for symbolic regions, but this is a reasonable, + // albeit unsound, assumption that holds most of the time. + if (isa(LeftMS) ^ isa(RightMS)) { + switch (op) { + default: + break; + case BO_EQ: + return makeTruthVal(false, resultTy); + case BO_NE: + return makeTruthVal(true, resultTy); + } + } + + // FIXME: If/when there is a getAsRawOffset() for FieldRegions, this + // ElementRegion path and the FieldRegion path below should be unified. + if (const ElementRegion *LeftER = dyn_cast(LeftMR)) { + // First see if the right region is also an ElementRegion. + const ElementRegion *RightER = dyn_cast(RightMR); + if (!RightER) + return UnknownVal(); + + // Next, see if the two ERs have the same super-region and matching types. + // FIXME: This should do something useful even if the types don't match, + // though if both indexes are constant the RegionRawOffset path will + // give the correct answer. + if (LeftER->getSuperRegion() == RightER->getSuperRegion() && + LeftER->getElementType() == RightER->getElementType()) { + // Get the left index and cast it to the correct type. + // If the index is unknown or undefined, bail out here. + SVal LeftIndexVal = LeftER->getIndex(); + NonLoc *LeftIndex = dyn_cast(&LeftIndexVal); + if (!LeftIndex) + return UnknownVal(); + LeftIndexVal = evalCastFromNonLoc(*LeftIndex, resultTy); + LeftIndex = dyn_cast(&LeftIndexVal); + if (!LeftIndex) + return UnknownVal(); + + // Do the same for the right index. + SVal RightIndexVal = RightER->getIndex(); + NonLoc *RightIndex = dyn_cast(&RightIndexVal); + if (!RightIndex) + return UnknownVal(); + RightIndexVal = evalCastFromNonLoc(*RightIndex, resultTy); + RightIndex = dyn_cast(&RightIndexVal); + if (!RightIndex) + return UnknownVal(); + + // Actually perform the operation. + // evalBinOpNN expects the two indexes to already be the right type. + return evalBinOpNN(state, op, *LeftIndex, *RightIndex, resultTy); + } + + // If the element indexes aren't comparable, see if the raw offsets are. + RegionRawOffset LeftOffset = LeftER->getAsArrayOffset(); + RegionRawOffset RightOffset = RightER->getAsArrayOffset(); + + if (LeftOffset.getRegion() != NULL && + LeftOffset.getRegion() == RightOffset.getRegion()) { + CharUnits left = LeftOffset.getOffset(); + CharUnits right = RightOffset.getOffset(); + + switch (op) { + default: + return UnknownVal(); + case BO_LT: + return makeTruthVal(left < right, resultTy); + case BO_GT: + return makeTruthVal(left > right, resultTy); + case BO_LE: + return makeTruthVal(left <= right, resultTy); + case BO_GE: + return makeTruthVal(left >= right, resultTy); + case BO_EQ: + return makeTruthVal(left == right, resultTy); + case BO_NE: + return makeTruthVal(left != right, resultTy); + } + } + + // If we get here, we have no way of comparing the ElementRegions. + return UnknownVal(); + } + + // See if both regions are fields of the same structure. + // FIXME: This doesn't handle nesting, inheritance, or Objective-C ivars. + if (const FieldRegion *LeftFR = dyn_cast(LeftMR)) { + // Only comparisons are meaningful here! + if (!BinaryOperator::isComparisonOp(op)) + return UnknownVal(); + + // First see if the right region is also a FieldRegion. + const FieldRegion *RightFR = dyn_cast(RightMR); + if (!RightFR) + return UnknownVal(); + + // Next, see if the two FRs have the same super-region. + // FIXME: This doesn't handle casts yet, and simply stripping the casts + // doesn't help. + if (LeftFR->getSuperRegion() != RightFR->getSuperRegion()) + return UnknownVal(); + + const FieldDecl *LeftFD = LeftFR->getDecl(); + const FieldDecl *RightFD = RightFR->getDecl(); + const RecordDecl *RD = LeftFD->getParent(); + + // Make sure the two FRs are from the same kind of record. Just in case! + // FIXME: This is probably where inheritance would be a problem. + if (RD != RightFD->getParent()) + return UnknownVal(); + + // We know for sure that the two fields are not the same, since that + // would have given us the same SVal. + if (op == BO_EQ) + return makeTruthVal(false, resultTy); + if (op == BO_NE) + return makeTruthVal(true, resultTy); + + // Iterate through the fields and see which one comes first. + // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field + // members and the units in which bit-fields reside have addresses that + // increase in the order in which they are declared." + bool leftFirst = (op == BO_LT || op == BO_LE); + for (RecordDecl::field_iterator I = RD->field_begin(), + E = RD->field_end(); I!=E; ++I) { + if (*I == LeftFD) + return makeTruthVal(leftFirst, resultTy); + if (*I == RightFD) + return makeTruthVal(!leftFirst, resultTy); + } + + llvm_unreachable("Fields not found in parent record's definition"); + } + + // If we get here, we have no way of comparing the regions. + return UnknownVal(); + } + } +} + +SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state, + BinaryOperator::Opcode op, + Loc lhs, NonLoc rhs, QualType resultTy) { + + // Special case: rhs is a zero constant. + if (rhs.isZeroConstant()) + return lhs; + + // Special case: 'rhs' is an integer that has the same width as a pointer and + // we are using the integer location in a comparison. Normally this cannot be + // triggered, but transfer functions like those for OSCommpareAndSwapBarrier32 + // can generate comparisons that trigger this code. + // FIXME: Are all locations guaranteed to have pointer width? + if (BinaryOperator::isComparisonOp(op)) { + if (nonloc::ConcreteInt *rhsInt = dyn_cast(&rhs)) { + const llvm::APSInt *x = &rhsInt->getValue(); + ASTContext &ctx = Context; + if (ctx.getTypeSize(ctx.VoidPtrTy) == x->getBitWidth()) { + // Convert the signedness of the integer (if necessary). + if (x->isSigned()) + x = &getBasicValueFactory().getValue(*x, true); + + return evalBinOpLL(state, op, lhs, loc::ConcreteInt(*x), resultTy); + } + } + } + + // We are dealing with pointer arithmetic. + + // Handle pointer arithmetic on constant values. + if (nonloc::ConcreteInt *rhsInt = dyn_cast(&rhs)) { + if (loc::ConcreteInt *lhsInt = dyn_cast(&lhs)) { + const llvm::APSInt &leftI = lhsInt->getValue(); + assert(leftI.isUnsigned()); + llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true); + + // Convert the bitwidth of rightI. This should deal with overflow + // since we are dealing with concrete values. + rightI = rightI.extOrTrunc(leftI.getBitWidth()); + + // Offset the increment by the pointer size. + llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true); + rightI *= Multiplicand; + + // Compute the adjusted pointer. + switch (op) { + case BO_Add: + rightI = leftI + rightI; + break; + case BO_Sub: + rightI = leftI - rightI; + break; + default: + llvm_unreachable("Invalid pointer arithmetic operation"); + } + return loc::ConcreteInt(getBasicValueFactory().getValue(rightI)); + } + } + + // Handle cases where 'lhs' is a region. + if (const MemRegion *region = lhs.getAsRegion()) { + rhs = cast(convertToArrayIndex(rhs)); + SVal index = UnknownVal(); + const MemRegion *superR = 0; + QualType elementType; + + if (const ElementRegion *elemReg = dyn_cast(region)) { + assert(op == BO_Add || op == BO_Sub); + index = evalBinOpNN(state, op, elemReg->getIndex(), rhs, + getArrayIndexType()); + superR = elemReg->getSuperRegion(); + elementType = elemReg->getElementType(); + } + else if (isa(region)) { + superR = region; + index = rhs; + if (const PointerType *PT = resultTy->getAs()) { + elementType = PT->getPointeeType(); + } + else { + const ObjCObjectPointerType *OT = + resultTy->getAs(); + elementType = OT->getPointeeType(); + } + } + + if (NonLoc *indexV = dyn_cast(&index)) { + return loc::MemRegionVal(MemMgr.getElementRegion(elementType, *indexV, + superR, getContext())); + } + } + return UnknownVal(); +} + +const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state, + SVal V) { + if (V.isUnknownOrUndef()) + return NULL; + + if (loc::ConcreteInt* X = dyn_cast(&V)) + return &X->getValue(); + + if (nonloc::ConcreteInt* X = dyn_cast(&V)) + return &X->getValue(); + + if (SymbolRef Sym = V.getAsSymbol()) + return state->getSymVal(Sym); + + // FIXME: Add support for SymExprs. + return NULL; +} diff --git a/clang/lib/StaticAnalyzer/Core/Store.cpp b/clang/lib/StaticAnalyzer/Core/Store.cpp new file mode 100644 index 0000000..11748ae --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/Store.cpp @@ -0,0 +1,362 @@ +//== Store.cpp - Interface for maps from Locations to Values ----*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defined the types Store and StoreManager. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/Store.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/AST/CharUnits.h" +#include "clang/AST/DeclObjC.h" + +using namespace clang; +using namespace ento; + +StoreManager::StoreManager(ProgramStateManager &stateMgr) + : svalBuilder(stateMgr.getSValBuilder()), StateMgr(stateMgr), + MRMgr(svalBuilder.getRegionManager()), Ctx(stateMgr.getContext()) {} + +StoreRef StoreManager::enterStackFrame(ProgramStateRef state, + const LocationContext *callerCtx, + const StackFrameContext *calleeCtx) { + return StoreRef(state->getStore(), *this); +} + +const MemRegion *StoreManager::MakeElementRegion(const MemRegion *Base, + QualType EleTy, uint64_t index) { + NonLoc idx = svalBuilder.makeArrayIndex(index); + return MRMgr.getElementRegion(EleTy, idx, Base, svalBuilder.getContext()); +} + +// FIXME: Merge with the implementation of the same method in MemRegion.cpp +static bool IsCompleteType(ASTContext &Ctx, QualType Ty) { + if (const RecordType *RT = Ty->getAs()) { + const RecordDecl *D = RT->getDecl(); + if (!D->getDefinition()) + return false; + } + + return true; +} + +StoreRef StoreManager::BindDefault(Store store, const MemRegion *R, SVal V) { + return StoreRef(store, *this); +} + +const ElementRegion *StoreManager::GetElementZeroRegion(const MemRegion *R, + QualType T) { + NonLoc idx = svalBuilder.makeZeroArrayIndex(); + assert(!T.isNull()); + return MRMgr.getElementRegion(T, idx, R, Ctx); +} + +const MemRegion *StoreManager::castRegion(const MemRegion *R, QualType CastToTy) { + + ASTContext &Ctx = StateMgr.getContext(); + + // Handle casts to Objective-C objects. + if (CastToTy->isObjCObjectPointerType()) + return R->StripCasts(); + + if (CastToTy->isBlockPointerType()) { + // FIXME: We may need different solutions, depending on the symbol + // involved. Blocks can be casted to/from 'id', as they can be treated + // as Objective-C objects. This could possibly be handled by enhancing + // our reasoning of downcasts of symbolic objects. + if (isa(R) || isa(R)) + return R; + + // We don't know what to make of it. Return a NULL region, which + // will be interpretted as UnknownVal. + return NULL; + } + + // Now assume we are casting from pointer to pointer. Other cases should + // already be handled. + QualType PointeeTy = CastToTy->getPointeeType(); + QualType CanonPointeeTy = Ctx.getCanonicalType(PointeeTy); + + // Handle casts to void*. We just pass the region through. + if (CanonPointeeTy.getLocalUnqualifiedType() == Ctx.VoidTy) + return R; + + // Handle casts from compatible types. + if (R->isBoundable()) + if (const TypedValueRegion *TR = dyn_cast(R)) { + QualType ObjTy = Ctx.getCanonicalType(TR->getValueType()); + if (CanonPointeeTy == ObjTy) + return R; + } + + // Process region cast according to the kind of the region being cast. + switch (R->getKind()) { + case MemRegion::CXXThisRegionKind: + case MemRegion::GenericMemSpaceRegionKind: + case MemRegion::StackLocalsSpaceRegionKind: + case MemRegion::StackArgumentsSpaceRegionKind: + case MemRegion::HeapSpaceRegionKind: + case MemRegion::UnknownSpaceRegionKind: + case MemRegion::StaticGlobalSpaceRegionKind: + case MemRegion::GlobalInternalSpaceRegionKind: + case MemRegion::GlobalSystemSpaceRegionKind: + case MemRegion::GlobalImmutableSpaceRegionKind: { + llvm_unreachable("Invalid region cast"); + } + + case MemRegion::FunctionTextRegionKind: + case MemRegion::BlockTextRegionKind: + case MemRegion::BlockDataRegionKind: + case MemRegion::StringRegionKind: + // FIXME: Need to handle arbitrary downcasts. + case MemRegion::SymbolicRegionKind: + case MemRegion::AllocaRegionKind: + case MemRegion::CompoundLiteralRegionKind: + case MemRegion::FieldRegionKind: + case MemRegion::ObjCIvarRegionKind: + case MemRegion::ObjCStringRegionKind: + case MemRegion::VarRegionKind: + case MemRegion::CXXTempObjectRegionKind: + case MemRegion::CXXBaseObjectRegionKind: + return MakeElementRegion(R, PointeeTy); + + case MemRegion::ElementRegionKind: { + // If we are casting from an ElementRegion to another type, the + // algorithm is as follows: + // + // (1) Compute the "raw offset" of the ElementRegion from the + // base region. This is done by calling 'getAsRawOffset()'. + // + // (2a) If we get a 'RegionRawOffset' after calling + // 'getAsRawOffset()', determine if the absolute offset + // can be exactly divided into chunks of the size of the + // casted-pointee type. If so, create a new ElementRegion with + // the pointee-cast type as the new ElementType and the index + // being the offset divded by the chunk size. If not, create + // a new ElementRegion at offset 0 off the raw offset region. + // + // (2b) If we don't a get a 'RegionRawOffset' after calling + // 'getAsRawOffset()', it means that we are at offset 0. + // + // FIXME: Handle symbolic raw offsets. + + const ElementRegion *elementR = cast(R); + const RegionRawOffset &rawOff = elementR->getAsArrayOffset(); + const MemRegion *baseR = rawOff.getRegion(); + + // If we cannot compute a raw offset, throw up our hands and return + // a NULL MemRegion*. + if (!baseR) + return NULL; + + CharUnits off = rawOff.getOffset(); + + if (off.isZero()) { + // Edge case: we are at 0 bytes off the beginning of baseR. We + // check to see if type we are casting to is the same as the base + // region. If so, just return the base region. + if (const TypedValueRegion *TR = dyn_cast(baseR)) { + QualType ObjTy = Ctx.getCanonicalType(TR->getValueType()); + QualType CanonPointeeTy = Ctx.getCanonicalType(PointeeTy); + if (CanonPointeeTy == ObjTy) + return baseR; + } + + // Otherwise, create a new ElementRegion at offset 0. + return MakeElementRegion(baseR, PointeeTy); + } + + // We have a non-zero offset from the base region. We want to determine + // if the offset can be evenly divided by sizeof(PointeeTy). If so, + // we create an ElementRegion whose index is that value. Otherwise, we + // create two ElementRegions, one that reflects a raw offset and the other + // that reflects the cast. + + // Compute the index for the new ElementRegion. + int64_t newIndex = 0; + const MemRegion *newSuperR = 0; + + // We can only compute sizeof(PointeeTy) if it is a complete type. + if (IsCompleteType(Ctx, PointeeTy)) { + // Compute the size in **bytes**. + CharUnits pointeeTySize = Ctx.getTypeSizeInChars(PointeeTy); + if (!pointeeTySize.isZero()) { + // Is the offset a multiple of the size? If so, we can layer the + // ElementRegion (with elementType == PointeeTy) directly on top of + // the base region. + if (off % pointeeTySize == 0) { + newIndex = off / pointeeTySize; + newSuperR = baseR; + } + } + } + + if (!newSuperR) { + // Create an intermediate ElementRegion to represent the raw byte. + // This will be the super region of the final ElementRegion. + newSuperR = MakeElementRegion(baseR, Ctx.CharTy, off.getQuantity()); + } + + return MakeElementRegion(newSuperR, PointeeTy, newIndex); + } + } + + llvm_unreachable("unreachable"); +} + + +/// CastRetrievedVal - Used by subclasses of StoreManager to implement +/// implicit casts that arise from loads from regions that are reinterpreted +/// as another region. +SVal StoreManager::CastRetrievedVal(SVal V, const TypedValueRegion *R, + QualType castTy, bool performTestOnly) { + + if (castTy.isNull() || V.isUnknownOrUndef()) + return V; + + ASTContext &Ctx = svalBuilder.getContext(); + + if (performTestOnly) { + // Automatically translate references to pointers. + QualType T = R->getValueType(); + if (const ReferenceType *RT = T->getAs()) + T = Ctx.getPointerType(RT->getPointeeType()); + + assert(svalBuilder.getContext().hasSameUnqualifiedType(castTy, T)); + return V; + } + + return svalBuilder.dispatchCast(V, castTy); +} + +SVal StoreManager::getLValueFieldOrIvar(const Decl *D, SVal Base) { + if (Base.isUnknownOrUndef()) + return Base; + + Loc BaseL = cast(Base); + const MemRegion* BaseR = 0; + + switch (BaseL.getSubKind()) { + case loc::MemRegionKind: + BaseR = cast(BaseL).getRegion(); + break; + + case loc::GotoLabelKind: + // These are anormal cases. Flag an undefined value. + return UndefinedVal(); + + case loc::ConcreteIntKind: + // While these seem funny, this can happen through casts. + // FIXME: What we should return is the field offset. For example, + // add the field offset to the integer value. That way funny things + // like this work properly: &(((struct foo *) 0xa)->f) + return Base; + + default: + llvm_unreachable("Unhandled Base."); + } + + // NOTE: We must have this check first because ObjCIvarDecl is a subclass + // of FieldDecl. + if (const ObjCIvarDecl *ID = dyn_cast(D)) + return loc::MemRegionVal(MRMgr.getObjCIvarRegion(ID, BaseR)); + + return loc::MemRegionVal(MRMgr.getFieldRegion(cast(D), BaseR)); +} + +SVal StoreManager::getLValueIvar(const ObjCIvarDecl *decl, SVal base) { + return getLValueFieldOrIvar(decl, base); +} + +SVal StoreManager::getLValueElement(QualType elementType, NonLoc Offset, + SVal Base) { + + // If the base is an unknown or undefined value, just return it back. + // FIXME: For absolute pointer addresses, we just return that value back as + // well, although in reality we should return the offset added to that + // value. + if (Base.isUnknownOrUndef() || isa(Base)) + return Base; + + const MemRegion* BaseRegion = cast(Base).getRegion(); + + // Pointer of any type can be cast and used as array base. + const ElementRegion *ElemR = dyn_cast(BaseRegion); + + // Convert the offset to the appropriate size and signedness. + Offset = cast(svalBuilder.convertToArrayIndex(Offset)); + + if (!ElemR) { + // + // If the base region is not an ElementRegion, create one. + // This can happen in the following example: + // + // char *p = __builtin_alloc(10); + // p[1] = 8; + // + // Observe that 'p' binds to an AllocaRegion. + // + return loc::MemRegionVal(MRMgr.getElementRegion(elementType, Offset, + BaseRegion, Ctx)); + } + + SVal BaseIdx = ElemR->getIndex(); + + if (!isa(BaseIdx)) + return UnknownVal(); + + const llvm::APSInt& BaseIdxI = cast(BaseIdx).getValue(); + + // Only allow non-integer offsets if the base region has no offset itself. + // FIXME: This is a somewhat arbitrary restriction. We should be using + // SValBuilder here to add the two offsets without checking their types. + if (!isa(Offset)) { + if (isa(BaseRegion->StripCasts())) + return UnknownVal(); + + return loc::MemRegionVal(MRMgr.getElementRegion(elementType, Offset, + ElemR->getSuperRegion(), + Ctx)); + } + + const llvm::APSInt& OffI = cast(Offset).getValue(); + assert(BaseIdxI.isSigned()); + + // Compute the new index. + nonloc::ConcreteInt NewIdx(svalBuilder.getBasicValueFactory().getValue(BaseIdxI + + OffI)); + + // Construct the new ElementRegion. + const MemRegion *ArrayR = ElemR->getSuperRegion(); + return loc::MemRegionVal(MRMgr.getElementRegion(elementType, NewIdx, ArrayR, + Ctx)); +} + +StoreManager::BindingsHandler::~BindingsHandler() {} + +bool StoreManager::FindUniqueBinding::HandleBinding(StoreManager& SMgr, + Store store, + const MemRegion* R, + SVal val) { + SymbolRef SymV = val.getAsLocSymbol(); + if (!SymV || SymV != Sym) + return true; + + if (Binding) { + First = false; + return false; + } + else + Binding = R; + + return true; +} + +void SubRegionMap::anchor() { } +void SubRegionMap::Visitor::anchor() { } diff --git a/clang/lib/StaticAnalyzer/Core/SubEngine.cpp b/clang/lib/StaticAnalyzer/Core/SubEngine.cpp new file mode 100644 index 0000000..350f4b8 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/SubEngine.cpp @@ -0,0 +1,14 @@ +//== SubEngine.cpp - Interface of the subengine of CoreEngine ------*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h" + +using namespace clang::ento; + +void SubEngine::anchor() { } diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp new file mode 100644 index 0000000..adefb58 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -0,0 +1,540 @@ +//== SymbolManager.h - Management of Symbolic Values ------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines SymbolManager, a class that manages symbolic values +// created for use by ExprEngine and related classes. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" +#include "clang/Analysis/Analyses/LiveVariables.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/Store.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +void SymExpr::anchor() { } + +void SymExpr::dump() const { + dumpToStream(llvm::errs()); +} + +static void print(raw_ostream &os, BinaryOperator::Opcode Op) { + switch (Op) { + default: + llvm_unreachable("operator printing not implemented"); + case BO_Mul: os << '*' ; break; + case BO_Div: os << '/' ; break; + case BO_Rem: os << '%' ; break; + case BO_Add: os << '+' ; break; + case BO_Sub: os << '-' ; break; + case BO_Shl: os << "<<" ; break; + case BO_Shr: os << ">>" ; break; + case BO_LT: os << "<" ; break; + case BO_GT: os << '>' ; break; + case BO_LE: os << "<=" ; break; + case BO_GE: os << ">=" ; break; + case BO_EQ: os << "==" ; break; + case BO_NE: os << "!=" ; break; + case BO_And: os << '&' ; break; + case BO_Xor: os << '^' ; break; + case BO_Or: os << '|' ; break; + } +} + +void SymIntExpr::dumpToStream(raw_ostream &os) const { + os << '('; + getLHS()->dumpToStream(os); + os << ") "; + print(os, getOpcode()); + os << ' ' << getRHS().getZExtValue(); + if (getRHS().isUnsigned()) os << 'U'; +} + +void IntSymExpr::dumpToStream(raw_ostream &os) const { + os << ' ' << getLHS().getZExtValue(); + if (getLHS().isUnsigned()) os << 'U'; + print(os, getOpcode()); + os << '('; + getRHS()->dumpToStream(os); + os << ") "; +} + +void SymSymExpr::dumpToStream(raw_ostream &os) const { + os << '('; + getLHS()->dumpToStream(os); + os << ") "; + os << '('; + getRHS()->dumpToStream(os); + os << ')'; +} + +void SymbolCast::dumpToStream(raw_ostream &os) const { + os << '(' << ToTy.getAsString() << ") ("; + Operand->dumpToStream(os); + os << ')'; +} + +void SymbolConjured::dumpToStream(raw_ostream &os) const { + os << "conj_$" << getSymbolID() << '{' << T.getAsString() << '}'; +} + +void SymbolDerived::dumpToStream(raw_ostream &os) const { + os << "derived_$" << getSymbolID() << '{' + << getParentSymbol() << ',' << getRegion() << '}'; +} + +void SymbolExtent::dumpToStream(raw_ostream &os) const { + os << "extent_$" << getSymbolID() << '{' << getRegion() << '}'; +} + +void SymbolMetadata::dumpToStream(raw_ostream &os) const { + os << "meta_$" << getSymbolID() << '{' + << getRegion() << ',' << T.getAsString() << '}'; +} + +void SymbolData::anchor() { } + +void SymbolRegionValue::dumpToStream(raw_ostream &os) const { + os << "reg_$" << getSymbolID() << "<" << R << ">"; +} + +bool SymExpr::symbol_iterator::operator==(const symbol_iterator &X) const { + return itr == X.itr; +} + +bool SymExpr::symbol_iterator::operator!=(const symbol_iterator &X) const { + return itr != X.itr; +} + +SymExpr::symbol_iterator::symbol_iterator(const SymExpr *SE) { + itr.push_back(SE); + while (!isa(itr.back())) expand(); +} + +SymExpr::symbol_iterator &SymExpr::symbol_iterator::operator++() { + assert(!itr.empty() && "attempting to iterate on an 'end' iterator"); + assert(isa(itr.back())); + itr.pop_back(); + if (!itr.empty()) + while (!isa(itr.back())) expand(); + return *this; +} + +SymbolRef SymExpr::symbol_iterator::operator*() { + assert(!itr.empty() && "attempting to dereference an 'end' iterator"); + return cast(itr.back()); +} + +void SymExpr::symbol_iterator::expand() { + const SymExpr *SE = itr.back(); + itr.pop_back(); + + switch (SE->getKind()) { + case SymExpr::RegionValueKind: + case SymExpr::ConjuredKind: + case SymExpr::DerivedKind: + case SymExpr::ExtentKind: + case SymExpr::MetadataKind: + return; + case SymExpr::CastSymbolKind: + itr.push_back(cast(SE)->getOperand()); + return; + case SymExpr::SymIntKind: + itr.push_back(cast(SE)->getLHS()); + return; + case SymExpr::IntSymKind: + itr.push_back(cast(SE)->getRHS()); + return; + case SymExpr::SymSymKind: { + const SymSymExpr *x = cast(SE); + itr.push_back(x->getLHS()); + itr.push_back(x->getRHS()); + return; + } + } + llvm_unreachable("unhandled expansion case"); +} + +const SymbolRegionValue* +SymbolManager::getRegionValueSymbol(const TypedValueRegion* R) { + llvm::FoldingSetNodeID profile; + SymbolRegionValue::Profile(profile, R); + void *InsertPos; + SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos); + if (!SD) { + SD = (SymExpr*) BPAlloc.Allocate(); + new (SD) SymbolRegionValue(SymbolCounter, R); + DataSet.InsertNode(SD, InsertPos); + ++SymbolCounter; + } + + return cast(SD); +} + +const SymbolConjured* +SymbolManager::getConjuredSymbol(const Stmt *E, const LocationContext *LCtx, + QualType T, unsigned Count, + const void *SymbolTag) { + + llvm::FoldingSetNodeID profile; + SymbolConjured::Profile(profile, E, T, Count, LCtx, SymbolTag); + void *InsertPos; + SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos); + if (!SD) { + SD = (SymExpr*) BPAlloc.Allocate(); + new (SD) SymbolConjured(SymbolCounter, E, LCtx, T, Count, SymbolTag); + DataSet.InsertNode(SD, InsertPos); + ++SymbolCounter; + } + + return cast(SD); +} + +const SymbolDerived* +SymbolManager::getDerivedSymbol(SymbolRef parentSymbol, + const TypedValueRegion *R) { + + llvm::FoldingSetNodeID profile; + SymbolDerived::Profile(profile, parentSymbol, R); + void *InsertPos; + SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos); + if (!SD) { + SD = (SymExpr*) BPAlloc.Allocate(); + new (SD) SymbolDerived(SymbolCounter, parentSymbol, R); + DataSet.InsertNode(SD, InsertPos); + ++SymbolCounter; + } + + return cast(SD); +} + +const SymbolExtent* +SymbolManager::getExtentSymbol(const SubRegion *R) { + llvm::FoldingSetNodeID profile; + SymbolExtent::Profile(profile, R); + void *InsertPos; + SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos); + if (!SD) { + SD = (SymExpr*) BPAlloc.Allocate(); + new (SD) SymbolExtent(SymbolCounter, R); + DataSet.InsertNode(SD, InsertPos); + ++SymbolCounter; + } + + return cast(SD); +} + +const SymbolMetadata* +SymbolManager::getMetadataSymbol(const MemRegion* R, const Stmt *S, QualType T, + unsigned Count, const void *SymbolTag) { + + llvm::FoldingSetNodeID profile; + SymbolMetadata::Profile(profile, R, S, T, Count, SymbolTag); + void *InsertPos; + SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos); + if (!SD) { + SD = (SymExpr*) BPAlloc.Allocate(); + new (SD) SymbolMetadata(SymbolCounter, R, S, T, Count, SymbolTag); + DataSet.InsertNode(SD, InsertPos); + ++SymbolCounter; + } + + return cast(SD); +} + +const SymbolCast* +SymbolManager::getCastSymbol(const SymExpr *Op, + QualType From, QualType To) { + llvm::FoldingSetNodeID ID; + SymbolCast::Profile(ID, Op, From, To); + void *InsertPos; + SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); + if (!data) { + data = (SymbolCast*) BPAlloc.Allocate(); + new (data) SymbolCast(Op, From, To); + DataSet.InsertNode(data, InsertPos); + } + + return cast(data); +} + +const SymIntExpr *SymbolManager::getSymIntExpr(const SymExpr *lhs, + BinaryOperator::Opcode op, + const llvm::APSInt& v, + QualType t) { + llvm::FoldingSetNodeID ID; + SymIntExpr::Profile(ID, lhs, op, v, t); + void *InsertPos; + SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); + + if (!data) { + data = (SymIntExpr*) BPAlloc.Allocate(); + new (data) SymIntExpr(lhs, op, v, t); + DataSet.InsertNode(data, InsertPos); + } + + return cast(data); +} + +const IntSymExpr *SymbolManager::getIntSymExpr(const llvm::APSInt& lhs, + BinaryOperator::Opcode op, + const SymExpr *rhs, + QualType t) { + llvm::FoldingSetNodeID ID; + IntSymExpr::Profile(ID, lhs, op, rhs, t); + void *InsertPos; + SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); + + if (!data) { + data = (IntSymExpr*) BPAlloc.Allocate(); + new (data) IntSymExpr(lhs, op, rhs, t); + DataSet.InsertNode(data, InsertPos); + } + + return cast(data); +} + +const SymSymExpr *SymbolManager::getSymSymExpr(const SymExpr *lhs, + BinaryOperator::Opcode op, + const SymExpr *rhs, + QualType t) { + llvm::FoldingSetNodeID ID; + SymSymExpr::Profile(ID, lhs, op, rhs, t); + void *InsertPos; + SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); + + if (!data) { + data = (SymSymExpr*) BPAlloc.Allocate(); + new (data) SymSymExpr(lhs, op, rhs, t); + DataSet.InsertNode(data, InsertPos); + } + + return cast(data); +} + +QualType SymbolConjured::getType(ASTContext&) const { + return T; +} + +QualType SymbolDerived::getType(ASTContext &Ctx) const { + return R->getValueType(); +} + +QualType SymbolExtent::getType(ASTContext &Ctx) const { + return Ctx.getSizeType(); +} + +QualType SymbolMetadata::getType(ASTContext&) const { + return T; +} + +QualType SymbolRegionValue::getType(ASTContext &C) const { + return R->getValueType(); +} + +SymbolManager::~SymbolManager() { + for (SymbolDependTy::const_iterator I = SymbolDependencies.begin(), + E = SymbolDependencies.end(); I != E; ++I) { + delete I->second; + } + +} + +bool SymbolManager::canSymbolicate(QualType T) { + T = T.getCanonicalType(); + + if (Loc::isLocType(T)) + return true; + + if (T->isIntegerType()) + return T->isScalarType(); + + if (T->isRecordType() && !T->isUnionType()) + return true; + + return false; +} + +void SymbolManager::addSymbolDependency(const SymbolRef Primary, + const SymbolRef Dependent) { + SymbolDependTy::iterator I = SymbolDependencies.find(Primary); + SymbolRefSmallVectorTy *dependencies = 0; + if (I == SymbolDependencies.end()) { + dependencies = new SymbolRefSmallVectorTy(); + SymbolDependencies[Primary] = dependencies; + } else { + dependencies = I->second; + } + dependencies->push_back(Dependent); +} + +const SymbolRefSmallVectorTy *SymbolManager::getDependentSymbols( + const SymbolRef Primary) { + SymbolDependTy::const_iterator I = SymbolDependencies.find(Primary); + if (I == SymbolDependencies.end()) + return 0; + return I->second; +} + +void SymbolReaper::markDependentsLive(SymbolRef sym) { + // Do not mark dependents more then once. + SymbolMapTy::iterator LI = TheLiving.find(sym); + assert(LI != TheLiving.end() && "The primary symbol is not live."); + if (LI->second == HaveMarkedDependents) + return; + LI->second = HaveMarkedDependents; + + if (const SymbolRefSmallVectorTy *Deps = SymMgr.getDependentSymbols(sym)) { + for (SymbolRefSmallVectorTy::const_iterator I = Deps->begin(), + E = Deps->end(); I != E; ++I) { + if (TheLiving.find(*I) != TheLiving.end()) + continue; + markLive(*I); + } + } +} + +void SymbolReaper::markLive(SymbolRef sym) { + TheLiving[sym] = NotProcessed; + TheDead.erase(sym); + markDependentsLive(sym); +} + +void SymbolReaper::markLive(const MemRegion *region) { + RegionRoots.insert(region); +} + +void SymbolReaper::markInUse(SymbolRef sym) { + if (isa(sym)) + MetadataInUse.insert(sym); +} + +bool SymbolReaper::maybeDead(SymbolRef sym) { + if (isLive(sym)) + return false; + + TheDead.insert(sym); + return true; +} + +bool SymbolReaper::isLiveRegion(const MemRegion *MR) { + if (RegionRoots.count(MR)) + return true; + + MR = MR->getBaseRegion(); + + if (const SymbolicRegion *SR = dyn_cast(MR)) + return isLive(SR->getSymbol()); + + if (const VarRegion *VR = dyn_cast(MR)) + return isLive(VR, true); + + // FIXME: This is a gross over-approximation. What we really need is a way to + // tell if anything still refers to this region. Unlike SymbolicRegions, + // AllocaRegions don't have associated symbols, though, so we don't actually + // have a way to track their liveness. + if (isa(MR)) + return true; + + if (isa(MR)) + return true; + + if (isa(MR)) + return true; + + return false; +} + +bool SymbolReaper::isLive(SymbolRef sym) { + if (TheLiving.count(sym)) { + markDependentsLive(sym); + return true; + } + + if (const SymbolDerived *derived = dyn_cast(sym)) { + if (isLive(derived->getParentSymbol())) { + markLive(sym); + return true; + } + return false; + } + + if (const SymbolExtent *extent = dyn_cast(sym)) { + if (isLiveRegion(extent->getRegion())) { + markLive(sym); + return true; + } + return false; + } + + if (const SymbolMetadata *metadata = dyn_cast(sym)) { + if (MetadataInUse.count(sym)) { + if (isLiveRegion(metadata->getRegion())) { + markLive(sym); + MetadataInUse.erase(sym); + return true; + } + } + return false; + } + + // Interogate the symbol. It may derive from an input value to + // the analyzed function/method. + return isa(sym); +} + +bool +SymbolReaper::isLive(const Stmt *ExprVal, const LocationContext *ELCtx) const { + if (LCtx != ELCtx) { + // If the reaper's location context is a parent of the expression's + // location context, then the expression value is now "out of scope". + if (LCtx->isParentOf(ELCtx)) + return false; + return true; + } + + return LCtx->getAnalysis()->isLive(Loc, ExprVal); +} + +bool SymbolReaper::isLive(const VarRegion *VR, bool includeStoreBindings) const{ + const StackFrameContext *VarContext = VR->getStackFrame(); + const StackFrameContext *CurrentContext = LCtx->getCurrentStackFrame(); + + if (VarContext == CurrentContext) { + if (LCtx->getAnalysis()->isLive(Loc, VR->getDecl())) + return true; + + if (!includeStoreBindings) + return false; + + unsigned &cachedQuery = + const_cast(this)->includedRegionCache[VR]; + + if (cachedQuery) { + return cachedQuery == 1; + } + + // Query the store to see if the region occurs in any live bindings. + if (Store store = reapedStore.getStore()) { + bool hasRegion = + reapedStore.getStoreManager().includedInBindings(store, VR); + cachedQuery = hasRegion ? 1 : 2; + return hasRegion; + } + + return false; + } + + return VarContext->isParentOf(CurrentContext); +} + +SymbolVisitor::~SymbolVisitor() {} diff --git a/clang/lib/StaticAnalyzer/Core/TextPathDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/TextPathDiagnostics.cpp new file mode 100644 index 0000000..fe912df --- /dev/null +++ b/clang/lib/StaticAnalyzer/Core/TextPathDiagnostics.cpp @@ -0,0 +1,69 @@ +//===--- TextPathDiagnostics.cpp - Text Diagnostics for Paths ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the TextPathDiagnostics object. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h" +#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/Support/raw_ostream.h" +using namespace clang; +using namespace ento; +using namespace llvm; + +namespace { + +/// \brief Simple path diagnostic client used for outputting as diagnostic notes +/// the sequence of events. +class TextPathDiagnostics : public PathDiagnosticConsumer { + const std::string OutputFile; + DiagnosticsEngine &Diag; + +public: + TextPathDiagnostics(const std::string& output, DiagnosticsEngine &diag) + : OutputFile(output), Diag(diag) {} + + void FlushDiagnosticsImpl(std::vector &Diags, + SmallVectorImpl *FilesMade); + + virtual StringRef getName() const { + return "TextPathDiagnostics"; + } + + PathGenerationScheme getGenerationScheme() const { return Minimal; } + bool supportsLogicalOpControlFlow() const { return true; } + bool supportsAllBlockEdges() const { return true; } + virtual bool useVerboseDescription() const { return true; } +}; + +} // end anonymous namespace + +PathDiagnosticConsumer* +ento::createTextPathDiagnosticConsumer(const std::string& out, + const Preprocessor &PP) { + return new TextPathDiagnostics(out, PP.getDiagnostics()); +} + +void TextPathDiagnostics::FlushDiagnosticsImpl( + std::vector &Diags, + SmallVectorImpl *FilesMade) { + for (std::vector::iterator it = Diags.begin(), + et = Diags.end(); it != et; ++it) { + const PathDiagnostic *D = *it; + for (PathPieces::const_iterator I = D->path.begin(), E = D->path.end(); + I != E; ++I) { + unsigned diagID = + Diag.getDiagnosticIDs()->getCustomDiagID(DiagnosticIDs::Note, + (*I)->getString()); + Diag.Report((*I)->getLocation().asLocation(), diagID); + } + } +} diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp new file mode 100644 index 0000000..008f744 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp @@ -0,0 +1,680 @@ +//===--- AnalysisConsumer.cpp - ASTConsumer for running Analyses ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// "Meta" ASTConsumer for running different source analyses. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "AnalysisConsumer" + +#include "AnalysisConsumer.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/ParentMap.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/CallGraph.h" +#include "clang/StaticAnalyzer/Frontend/CheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Checkers/LocalCheckers.h" +#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h" + +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Frontend/AnalyzerOptions.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/Timer.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" + +#include + +using namespace clang; +using namespace ento; +using llvm::SmallPtrSet; + +static ExplodedNode::Auditor* CreateUbiViz(); + +STATISTIC(NumFunctionTopLevel, "The # of functions at top level."); +STATISTIC(NumFunctionsAnalyzed, "The # of functions analysed (as top level)."); +STATISTIC(NumBlocksInAnalyzedFunctions, + "The # of basic blocks in the analyzed functions."); +STATISTIC(PercentReachableBlocks, "The % of reachable basic blocks."); + +//===----------------------------------------------------------------------===// +// Special PathDiagnosticConsumers. +//===----------------------------------------------------------------------===// + +static PathDiagnosticConsumer* +createPlistHTMLDiagnosticConsumer(const std::string& prefix, + const Preprocessor &PP) { + PathDiagnosticConsumer *PD = + createHTMLDiagnosticConsumer(llvm::sys::path::parent_path(prefix), PP); + return createPlistDiagnosticConsumer(prefix, PP, PD); +} + +//===----------------------------------------------------------------------===// +// AnalysisConsumer declaration. +//===----------------------------------------------------------------------===// + +namespace { + +class AnalysisConsumer : public ASTConsumer, + public RecursiveASTVisitor { + enum AnalysisMode { + ANALYSIS_SYNTAX, + ANALYSIS_PATH, + ANALYSIS_ALL + }; + + /// Mode of the analyzes while recursively visiting Decls. + AnalysisMode RecVisitorMode; + /// Bug Reporter to use while recursively visiting Decls. + BugReporter *RecVisitorBR; + +public: + ASTContext *Ctx; + const Preprocessor &PP; + const std::string OutDir; + AnalyzerOptions Opts; + ArrayRef Plugins; + + /// \brief Stores the declarations from the local translation unit. + /// Note, we pre-compute the local declarations at parse time as an + /// optimization to make sure we do not deserialize everything from disk. + /// The local declaration to all declarations ratio might be very small when + /// working with a PCH file. + SetOfDecls LocalTUDecls; + + // PD is owned by AnalysisManager. + PathDiagnosticConsumer *PD; + + StoreManagerCreator CreateStoreMgr; + ConstraintManagerCreator CreateConstraintMgr; + + OwningPtr checkerMgr; + OwningPtr Mgr; + + /// Time the analyzes time of each translation unit. + static llvm::Timer* TUTotalTimer; + + /// The information about analyzed functions shared throughout the + /// translation unit. + FunctionSummariesTy FunctionSummaries; + + AnalysisConsumer(const Preprocessor& pp, + const std::string& outdir, + const AnalyzerOptions& opts, + ArrayRef plugins) + : RecVisitorMode(ANALYSIS_ALL), RecVisitorBR(0), + Ctx(0), PP(pp), OutDir(outdir), Opts(opts), Plugins(plugins), PD(0) { + DigestAnalyzerOptions(); + if (Opts.PrintStats) { + llvm::EnableStatistics(); + TUTotalTimer = new llvm::Timer("Analyzer Total Time"); + } + } + + ~AnalysisConsumer() { + if (Opts.PrintStats) + delete TUTotalTimer; + } + + void DigestAnalyzerOptions() { + // Create the PathDiagnosticConsumer. + if (!OutDir.empty()) { + switch (Opts.AnalysisDiagOpt) { + default: +#define ANALYSIS_DIAGNOSTICS(NAME, CMDFLAG, DESC, CREATEFN, AUTOCREATE) \ + case PD_##NAME: PD = CREATEFN(OutDir, PP); break; +#include "clang/Frontend/Analyses.def" + } + } else if (Opts.AnalysisDiagOpt == PD_TEXT) { + // Create the text client even without a specified output file since + // it just uses diagnostic notes. + PD = createTextPathDiagnosticConsumer("", PP); + } + + // Create the analyzer component creators. + switch (Opts.AnalysisStoreOpt) { + default: + llvm_unreachable("Unknown store manager."); +#define ANALYSIS_STORE(NAME, CMDFLAG, DESC, CREATEFN) \ + case NAME##Model: CreateStoreMgr = CREATEFN; break; +#include "clang/Frontend/Analyses.def" + } + + switch (Opts.AnalysisConstraintsOpt) { + default: + llvm_unreachable("Unknown store manager."); +#define ANALYSIS_CONSTRAINTS(NAME, CMDFLAG, DESC, CREATEFN) \ + case NAME##Model: CreateConstraintMgr = CREATEFN; break; +#include "clang/Frontend/Analyses.def" + } + } + + void DisplayFunction(const Decl *D, AnalysisMode Mode) { + if (!Opts.AnalyzerDisplayProgress) + return; + + SourceManager &SM = Mgr->getASTContext().getSourceManager(); + PresumedLoc Loc = SM.getPresumedLoc(D->getLocation()); + if (Loc.isValid()) { + llvm::errs() << "ANALYZE"; + switch (Mode) { + case ANALYSIS_SYNTAX: llvm::errs() << "(Syntax)"; break; + case ANALYSIS_PATH: llvm::errs() << "(Path Sensitive)"; break; + case ANALYSIS_ALL: break; + }; + llvm::errs() << ": " << Loc.getFilename(); + if (isa(D) || isa(D)) { + const NamedDecl *ND = cast(D); + llvm::errs() << ' ' << *ND << '\n'; + } + else if (isa(D)) { + llvm::errs() << ' ' << "block(line:" << Loc.getLine() << ",col:" + << Loc.getColumn() << '\n'; + } + else if (const ObjCMethodDecl *MD = dyn_cast(D)) { + Selector S = MD->getSelector(); + llvm::errs() << ' ' << S.getAsString(); + } + } + } + + virtual void Initialize(ASTContext &Context) { + Ctx = &Context; + checkerMgr.reset(createCheckerManager(Opts, PP.getLangOpts(), Plugins, + PP.getDiagnostics())); + Mgr.reset(new AnalysisManager(*Ctx, PP.getDiagnostics(), + PP.getLangOpts(), PD, + CreateStoreMgr, CreateConstraintMgr, + checkerMgr.get(), + Opts.MaxNodes, Opts.MaxLoop, + Opts.VisualizeEGDot, Opts.VisualizeEGUbi, + Opts.AnalysisPurgeOpt, Opts.EagerlyAssume, + Opts.TrimGraph, + Opts.UnoptimizedCFG, Opts.CFGAddImplicitDtors, + Opts.CFGAddInitializers, + Opts.EagerlyTrimEGraph, + Opts.IPAMode, + Opts.InlineMaxStackDepth, + Opts.InlineMaxFunctionSize, + Opts.InliningMode, + Opts.NoRetryExhausted)); + } + + /// \brief Store the top level decls in the set to be processed later on. + /// (Doing this pre-processing avoids deserialization of data from PCH.) + virtual bool HandleTopLevelDecl(DeclGroupRef D); + virtual void HandleTopLevelDeclInObjCContainer(DeclGroupRef D); + + virtual void HandleTranslationUnit(ASTContext &C); + + /// \brief Build the call graph for all the top level decls of this TU and + /// use it to define the order in which the functions should be visited. + void HandleDeclsGallGraph(); + + /// \brief Run analyzes(syntax or path sensitive) on the given function. + /// \param Mode - determines if we are requesting syntax only or path + /// sensitive only analysis. + /// \param VisitedCallees - The output parameter, which is populated with the + /// set of functions which should be considered analyzed after analyzing the + /// given root function. + void HandleCode(Decl *D, AnalysisMode Mode, + SetOfConstDecls *VisitedCallees = 0); + + void RunPathSensitiveChecks(Decl *D, SetOfConstDecls *VisitedCallees); + void ActionExprEngine(Decl *D, bool ObjCGCEnabled, + SetOfConstDecls *VisitedCallees); + + /// Visitors for the RecursiveASTVisitor. + + /// Handle callbacks for arbitrary Decls. + bool VisitDecl(Decl *D) { + checkerMgr->runCheckersOnASTDecl(D, *Mgr, *RecVisitorBR); + return true; + } + + bool VisitFunctionDecl(FunctionDecl *FD) { + IdentifierInfo *II = FD->getIdentifier(); + if (II && II->getName().startswith("__inline")) + return true; + + // We skip function template definitions, as their semantics is + // only determined when they are instantiated. + if (FD->isThisDeclarationADefinition() && + !FD->isDependentContext()) { + HandleCode(FD, RecVisitorMode); + } + return true; + } + + bool VisitObjCMethodDecl(ObjCMethodDecl *MD) { + checkerMgr->runCheckersOnASTDecl(MD, *Mgr, *RecVisitorBR); + if (MD->isThisDeclarationADefinition()) + HandleCode(MD, RecVisitorMode); + return true; + } + +private: + void storeTopLevelDecls(DeclGroupRef DG); + + /// \brief Check if we should skip (not analyze) the given function. + bool skipFunction(Decl *D); + +}; +} // end anonymous namespace + + +//===----------------------------------------------------------------------===// +// AnalysisConsumer implementation. +//===----------------------------------------------------------------------===// +llvm::Timer* AnalysisConsumer::TUTotalTimer = 0; + +bool AnalysisConsumer::HandleTopLevelDecl(DeclGroupRef DG) { + storeTopLevelDecls(DG); + return true; +} + +void AnalysisConsumer::HandleTopLevelDeclInObjCContainer(DeclGroupRef DG) { + storeTopLevelDecls(DG); +} + +void AnalysisConsumer::storeTopLevelDecls(DeclGroupRef DG) { + for (DeclGroupRef::iterator I = DG.begin(), E = DG.end(); I != E; ++I) { + + // Skip ObjCMethodDecl, wait for the objc container to avoid + // analyzing twice. + if (isa(*I)) + continue; + + LocalTUDecls.insert(*I); + } +} + +void AnalysisConsumer::HandleDeclsGallGraph() { + // Otherwise, use the Callgraph to derive the order. + // Build the Call Graph. + CallGraph CG; + // Add all the top level declarations to the graph. + for (SetOfDecls::iterator I = LocalTUDecls.begin(), + E = LocalTUDecls.end(); I != E; ++I) + CG.addToCallGraph(*I); + + // Find the top level nodes - children of root + the unreachable (parentless) + // nodes. + llvm::SmallVector TopLevelFunctions; + for (CallGraph::nodes_iterator TI = CG.parentless_begin(), + TE = CG.parentless_end(); TI != TE; ++TI) { + TopLevelFunctions.push_back(*TI); + NumFunctionTopLevel++; + } + CallGraphNode *Entry = CG.getRoot(); + for (CallGraphNode::iterator I = Entry->begin(), + E = Entry->end(); I != E; ++I) { + TopLevelFunctions.push_back(*I); + NumFunctionTopLevel++; + } + + // Make sure the nodes are sorted in order reverse of their definition in the + // translation unit. This step is very important for performance. It ensures + // that we analyze the root functions before the externally available + // subroutines. + std::queue BFSQueue; + for (llvm::SmallVector::reverse_iterator + TI = TopLevelFunctions.rbegin(), TE = TopLevelFunctions.rend(); + TI != TE; ++TI) + BFSQueue.push(*TI); + + // BFS over all of the functions, while skipping the ones inlined into + // the previously processed functions. Use external Visited set, which is + // also modified when we inline a function. + SmallPtrSet Visited; + while(!BFSQueue.empty()) { + CallGraphNode *N = BFSQueue.front(); + BFSQueue.pop(); + + // Skip the functions which have been processed already or previously + // inlined. + if (Visited.count(N)) + continue; + + // Analyze the function. + SetOfConstDecls VisitedCallees; + Decl *D = N->getDecl(); + assert(D); + HandleCode(D, ANALYSIS_PATH, + (Mgr->InliningMode == All ? 0 : &VisitedCallees)); + + // Add the visited callees to the global visited set. + for (SetOfConstDecls::const_iterator I = VisitedCallees.begin(), + E = VisitedCallees.end(); I != E; ++I){ + CallGraphNode *VN = CG.getNode(*I); + if (VN) + Visited.insert(VN); + } + Visited.insert(N); + + // Push the children into the queue. + for (CallGraphNode::const_iterator CI = N->begin(), + CE = N->end(); CI != CE; ++CI) { + BFSQueue.push(*CI); + } + } +} + +void AnalysisConsumer::HandleTranslationUnit(ASTContext &C) { + // Don't run the actions if an error has occurred with parsing the file. + DiagnosticsEngine &Diags = PP.getDiagnostics(); + if (Diags.hasErrorOccurred() || Diags.hasFatalErrorOccurred()) + return; + + { + if (TUTotalTimer) TUTotalTimer->startTimer(); + + // Introduce a scope to destroy BR before Mgr. + BugReporter BR(*Mgr); + TranslationUnitDecl *TU = C.getTranslationUnitDecl(); + checkerMgr->runCheckersOnASTDecl(TU, *Mgr, BR); + + // Run the AST-only checks using the order in which functions are defined. + // If inlining is not turned on, use the simplest function order for path + // sensitive analyzes as well. + RecVisitorMode = (Mgr->shouldInlineCall() ? ANALYSIS_SYNTAX : ANALYSIS_ALL); + RecVisitorBR = &BR; + + // Process all the top level declarations. + for (SetOfDecls::iterator I = LocalTUDecls.begin(), + E = LocalTUDecls.end(); I != E; ++I) + TraverseDecl(*I); + + if (Mgr->shouldInlineCall()) + HandleDeclsGallGraph(); + + // After all decls handled, run checkers on the entire TranslationUnit. + checkerMgr->runCheckersOnEndOfTranslationUnit(TU, *Mgr, BR); + + RecVisitorBR = 0; + } + + // Explicitly destroy the PathDiagnosticConsumer. This will flush its output. + // FIXME: This should be replaced with something that doesn't rely on + // side-effects in PathDiagnosticConsumer's destructor. This is required when + // used with option -disable-free. + Mgr.reset(NULL); + + if (TUTotalTimer) TUTotalTimer->stopTimer(); + + // Count how many basic blocks we have not covered. + NumBlocksInAnalyzedFunctions = FunctionSummaries.getTotalNumBasicBlocks(); + if (NumBlocksInAnalyzedFunctions > 0) + PercentReachableBlocks = + (FunctionSummaries.getTotalNumVisitedBasicBlocks() * 100) / + NumBlocksInAnalyzedFunctions; + +} + +static void FindBlocks(DeclContext *D, SmallVectorImpl &WL) { + if (BlockDecl *BD = dyn_cast(D)) + WL.push_back(BD); + + for (DeclContext::decl_iterator I = D->decls_begin(), E = D->decls_end(); + I!=E; ++I) + if (DeclContext *DC = dyn_cast(*I)) + FindBlocks(DC, WL); +} + +static std::string getFunctionName(const Decl *D) { + if (const ObjCMethodDecl *ID = dyn_cast(D)) { + return ID->getSelector().getAsString(); + } + if (const FunctionDecl *ND = dyn_cast(D)) { + IdentifierInfo *II = ND->getIdentifier(); + if (II) + return II->getName(); + } + return ""; +} + +bool AnalysisConsumer::skipFunction(Decl *D) { + if (!Opts.AnalyzeSpecificFunction.empty() && + getFunctionName(D) != Opts.AnalyzeSpecificFunction) + return true; + + // Don't run the actions on declarations in header files unless + // otherwise specified. + SourceManager &SM = Ctx->getSourceManager(); + SourceLocation SL = SM.getExpansionLoc(D->getLocation()); + if (!Opts.AnalyzeAll && !SM.isFromMainFile(SL)) + return true; + + return false; +} + +void AnalysisConsumer::HandleCode(Decl *D, AnalysisMode Mode, + SetOfConstDecls *VisitedCallees) { + if (skipFunction(D)) + return; + + DisplayFunction(D, Mode); + + // Clear the AnalysisManager of old AnalysisDeclContexts. + Mgr->ClearContexts(); + + // Dispatch on the actions. + SmallVector WL; + WL.push_back(D); + + if (D->hasBody() && Opts.AnalyzeNestedBlocks) + FindBlocks(cast(D), WL); + + BugReporter BR(*Mgr); + for (SmallVectorImpl::iterator WI=WL.begin(), WE=WL.end(); + WI != WE; ++WI) + if ((*WI)->hasBody()) { + if (Mode != ANALYSIS_PATH) + checkerMgr->runCheckersOnASTBody(*WI, *Mgr, BR); + if (Mode != ANALYSIS_SYNTAX && checkerMgr->hasPathSensitiveCheckers()) { + RunPathSensitiveChecks(*WI, VisitedCallees); + NumFunctionsAnalyzed++; + } + } +} + +//===----------------------------------------------------------------------===// +// Path-sensitive checking. +//===----------------------------------------------------------------------===// + +void AnalysisConsumer::ActionExprEngine(Decl *D, bool ObjCGCEnabled, + SetOfConstDecls *VisitedCallees) { + // Construct the analysis engine. First check if the CFG is valid. + // FIXME: Inter-procedural analysis will need to handle invalid CFGs. + if (!Mgr->getCFG(D)) + return; + + ExprEngine Eng(*Mgr, ObjCGCEnabled, VisitedCallees, &FunctionSummaries); + + // Set the graph auditor. + OwningPtr Auditor; + if (Mgr->shouldVisualizeUbigraph()) { + Auditor.reset(CreateUbiViz()); + ExplodedNode::SetAuditor(Auditor.get()); + } + + // Execute the worklist algorithm. + Eng.ExecuteWorkList(Mgr->getAnalysisDeclContextManager().getStackFrame(D, 0), + Mgr->getMaxNodes()); + + // Release the auditor (if any) so that it doesn't monitor the graph + // created BugReporter. + ExplodedNode::SetAuditor(0); + + // Visualize the exploded graph. + if (Mgr->shouldVisualizeGraphviz()) + Eng.ViewGraph(Mgr->shouldTrimGraph()); + + // Display warnings. + Eng.getBugReporter().FlushReports(); +} + +void AnalysisConsumer::RunPathSensitiveChecks(Decl *D, + SetOfConstDecls *Visited) { + + switch (Mgr->getLangOpts().getGC()) { + case LangOptions::NonGC: + ActionExprEngine(D, false, Visited); + break; + + case LangOptions::GCOnly: + ActionExprEngine(D, true, Visited); + break; + + case LangOptions::HybridGC: + ActionExprEngine(D, false, Visited); + ActionExprEngine(D, true, Visited); + break; + } +} + +//===----------------------------------------------------------------------===// +// AnalysisConsumer creation. +//===----------------------------------------------------------------------===// + +ASTConsumer* ento::CreateAnalysisConsumer(const Preprocessor& pp, + const std::string& outDir, + const AnalyzerOptions& opts, + ArrayRef plugins) { + // Disable the effects of '-Werror' when using the AnalysisConsumer. + pp.getDiagnostics().setWarningsAsErrors(false); + + return new AnalysisConsumer(pp, outDir, opts, plugins); +} + +//===----------------------------------------------------------------------===// +// Ubigraph Visualization. FIXME: Move to separate file. +//===----------------------------------------------------------------------===// + +namespace { + +class UbigraphViz : public ExplodedNode::Auditor { + OwningPtr Out; + llvm::sys::Path Dir, Filename; + unsigned Cntr; + + typedef llvm::DenseMap VMap; + VMap M; + +public: + UbigraphViz(raw_ostream *out, llvm::sys::Path& dir, + llvm::sys::Path& filename); + + ~UbigraphViz(); + + virtual void AddEdge(ExplodedNode *Src, ExplodedNode *Dst); +}; + +} // end anonymous namespace + +static ExplodedNode::Auditor* CreateUbiViz() { + std::string ErrMsg; + + llvm::sys::Path Dir = llvm::sys::Path::GetTemporaryDirectory(&ErrMsg); + if (!ErrMsg.empty()) + return 0; + + llvm::sys::Path Filename = Dir; + Filename.appendComponent("llvm_ubi"); + Filename.makeUnique(true,&ErrMsg); + + if (!ErrMsg.empty()) + return 0; + + llvm::errs() << "Writing '" << Filename.str() << "'.\n"; + + OwningPtr Stream; + Stream.reset(new llvm::raw_fd_ostream(Filename.c_str(), ErrMsg)); + + if (!ErrMsg.empty()) + return 0; + + return new UbigraphViz(Stream.take(), Dir, Filename); +} + +void UbigraphViz::AddEdge(ExplodedNode *Src, ExplodedNode *Dst) { + + assert (Src != Dst && "Self-edges are not allowed."); + + // Lookup the Src. If it is a new node, it's a root. + VMap::iterator SrcI= M.find(Src); + unsigned SrcID; + + if (SrcI == M.end()) { + M[Src] = SrcID = Cntr++; + *Out << "('vertex', " << SrcID << ", ('color','#00ff00'))\n"; + } + else + SrcID = SrcI->second; + + // Lookup the Dst. + VMap::iterator DstI= M.find(Dst); + unsigned DstID; + + if (DstI == M.end()) { + M[Dst] = DstID = Cntr++; + *Out << "('vertex', " << DstID << ")\n"; + } + else { + // We have hit DstID before. Change its style to reflect a cache hit. + DstID = DstI->second; + *Out << "('change_vertex_style', " << DstID << ", 1)\n"; + } + + // Add the edge. + *Out << "('edge', " << SrcID << ", " << DstID + << ", ('arrow','true'), ('oriented', 'true'))\n"; +} + +UbigraphViz::UbigraphViz(raw_ostream *out, llvm::sys::Path& dir, + llvm::sys::Path& filename) + : Out(out), Dir(dir), Filename(filename), Cntr(0) { + + *Out << "('vertex_style_attribute', 0, ('shape', 'icosahedron'))\n"; + *Out << "('vertex_style', 1, 0, ('shape', 'sphere'), ('color', '#ffcc66')," + " ('size', '1.5'))\n"; +} + +UbigraphViz::~UbigraphViz() { + Out.reset(0); + llvm::errs() << "Running 'ubiviz' program... "; + std::string ErrMsg; + llvm::sys::Path Ubiviz = llvm::sys::Program::FindProgramByName("ubiviz"); + std::vector args; + args.push_back(Ubiviz.c_str()); + args.push_back(Filename.c_str()); + args.push_back(0); + + if (llvm::sys::Program::ExecuteAndWait(Ubiviz, &args[0],0,0,0,0,&ErrMsg)) { + llvm::errs() << "Error viewing graph: " << ErrMsg << "\n"; + } + + // Delete the directory. + Dir.eraseFromDisk(true); +} diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.h b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.h new file mode 100644 index 0000000..5a16bff --- /dev/null +++ b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.h @@ -0,0 +1,43 @@ +//===--- AnalysisConsumer.h - Front-end Analysis Engine Hooks ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header contains the functions necessary for a front-end to run various +// analyses. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_GR_ANALYSISCONSUMER_H +#define LLVM_CLANG_GR_ANALYSISCONSUMER_H + +#include "clang/Basic/LLVM.h" +#include + +namespace clang { + +class AnalyzerOptions; +class ASTConsumer; +class Preprocessor; +class DiagnosticsEngine; + +namespace ento { +class CheckerManager; + +/// CreateAnalysisConsumer - Creates an ASTConsumer to run various code +/// analysis passes. (The set of analyses run is controlled by command-line +/// options.) +ASTConsumer* CreateAnalysisConsumer(const Preprocessor &pp, + const std::string &output, + const AnalyzerOptions& opts, + ArrayRef plugins); + +} // end GR namespace + +} // end clang namespace + +#endif diff --git a/clang/lib/StaticAnalyzer/Frontend/CMakeLists.txt b/clang/lib/StaticAnalyzer/Frontend/CMakeLists.txt new file mode 100644 index 0000000..bbcb085 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Frontend/CMakeLists.txt @@ -0,0 +1,21 @@ +set(LLVM_NO_RTTI 1) + +set(LLVM_USED_LIBS clangBasic clangLex clangAST clangFrontend clangRewrite + clangStaticAnalyzerCheckers) + +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/../Checkers ) + +add_clang_library(clangStaticAnalyzerFrontend + AnalysisConsumer.cpp + CheckerRegistration.cpp + FrontendActions.cpp + ) + +add_dependencies(clangStaticAnalyzerFrontend + clangStaticAnalyzerCheckers + clangStaticAnalyzerCore + ClangAttrClasses + ClangAttrList + ClangDeclNodes + ClangStmtNodes + ) diff --git a/clang/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp b/clang/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp new file mode 100644 index 0000000..c06da0d --- /dev/null +++ b/clang/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp @@ -0,0 +1,133 @@ +//===--- CheckerRegistration.cpp - Registration for the Analyzer Checkers -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines the registration function for the analyzer checkers. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Frontend/CheckerRegistration.h" +#include "clang/StaticAnalyzer/Frontend/FrontendActions.h" +#include "clang/StaticAnalyzer/Checkers/ClangCheckers.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/CheckerOptInfo.h" +#include "clang/StaticAnalyzer/Core/CheckerRegistry.h" +#include "clang/Frontend/AnalyzerOptions.h" +#include "clang/Frontend/FrontendDiagnostic.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" + +using namespace clang; +using namespace ento; +using llvm::sys::DynamicLibrary; + +namespace { +class ClangCheckerRegistry : public CheckerRegistry { + typedef void (*RegisterCheckersFn)(CheckerRegistry &); + + static bool isCompatibleAPIVersion(const char *versionString); + static void warnIncompatible(DiagnosticsEngine *diags, StringRef pluginPath, + const char *pluginAPIVersion); + +public: + ClangCheckerRegistry(ArrayRef plugins, + DiagnosticsEngine *diags = 0); +}; + +} // end anonymous namespace + +ClangCheckerRegistry::ClangCheckerRegistry(ArrayRef plugins, + DiagnosticsEngine *diags) { + registerBuiltinCheckers(*this); + + for (ArrayRef::iterator i = plugins.begin(), e = plugins.end(); + i != e; ++i) { + // Get access to the plugin. + DynamicLibrary lib = DynamicLibrary::getPermanentLibrary(i->c_str()); + + // See if it's compatible with this build of clang. + const char *pluginAPIVersion = + (const char *) lib.getAddressOfSymbol("clang_analyzerAPIVersionString"); + if (!isCompatibleAPIVersion(pluginAPIVersion)) { + warnIncompatible(diags, *i, pluginAPIVersion); + continue; + } + + // Register its checkers. + RegisterCheckersFn registerPluginCheckers = + (RegisterCheckersFn) (intptr_t) lib.getAddressOfSymbol( + "clang_registerCheckers"); + if (registerPluginCheckers) + registerPluginCheckers(*this); + } +} + +bool ClangCheckerRegistry::isCompatibleAPIVersion(const char *versionString) { + // If the version string is null, it's not an analyzer plugin. + if (versionString == 0) + return false; + + // For now, none of the static analyzer API is considered stable. + // Versions must match exactly. + if (strcmp(versionString, CLANG_ANALYZER_API_VERSION_STRING) == 0) + return true; + + return false; +} + +void ClangCheckerRegistry::warnIncompatible(DiagnosticsEngine *diags, + StringRef pluginPath, + const char *pluginAPIVersion) { + if (!diags) + return; + if (!pluginAPIVersion) + return; + + diags->Report(diag::warn_incompatible_analyzer_plugin_api) + << llvm::sys::path::filename(pluginPath); + diags->Report(diag::note_incompatible_analyzer_plugin_api) + << CLANG_ANALYZER_API_VERSION_STRING + << pluginAPIVersion; +} + + +CheckerManager *ento::createCheckerManager(const AnalyzerOptions &opts, + const LangOptions &langOpts, + ArrayRef plugins, + DiagnosticsEngine &diags) { + OwningPtr checkerMgr(new CheckerManager(langOpts)); + + SmallVector checkerOpts; + for (unsigned i = 0, e = opts.CheckersControlList.size(); i != e; ++i) { + const std::pair &opt = opts.CheckersControlList[i]; + checkerOpts.push_back(CheckerOptInfo(opt.first.c_str(), opt.second)); + } + + ClangCheckerRegistry allCheckers(plugins, &diags); + allCheckers.initializeManager(*checkerMgr, checkerOpts); + checkerMgr->finishedCheckerRegistration(); + + for (unsigned i = 0, e = checkerOpts.size(); i != e; ++i) { + if (checkerOpts[i].isUnclaimed()) + diags.Report(diag::warn_unknown_analyzer_checker) + << checkerOpts[i].getName(); + } + + return checkerMgr.take(); +} + +void ento::printCheckerHelp(raw_ostream &out, ArrayRef plugins) { + out << "OVERVIEW: Clang Static Analyzer Checkers List\n\n"; + out << "USAGE: -analyzer-checker \n\n"; + + ClangCheckerRegistry(plugins).printHelp(out); +} diff --git a/clang/lib/StaticAnalyzer/Frontend/FrontendActions.cpp b/clang/lib/StaticAnalyzer/Frontend/FrontendActions.cpp new file mode 100644 index 0000000..85a18ec --- /dev/null +++ b/clang/lib/StaticAnalyzer/Frontend/FrontendActions.cpp @@ -0,0 +1,23 @@ +//===--- FrontendActions.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Frontend/FrontendActions.h" +#include "clang/Frontend/CompilerInstance.h" +#include "AnalysisConsumer.h" +using namespace clang; +using namespace ento; + +ASTConsumer *AnalysisAction::CreateASTConsumer(CompilerInstance &CI, + StringRef InFile) { + return CreateAnalysisConsumer(CI.getPreprocessor(), + CI.getFrontendOpts().OutputFile, + CI.getAnalyzerOpts(), + CI.getFrontendOpts().Plugins); +} + diff --git a/clang/lib/StaticAnalyzer/Frontend/Makefile b/clang/lib/StaticAnalyzer/Frontend/Makefile new file mode 100644 index 0000000..2698120 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Frontend/Makefile @@ -0,0 +1,19 @@ +##===- clang/lib/StaticAnalyzer/Frontend/Makefile ----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# Starting point into the static analyzer land for the driver. +# +##===----------------------------------------------------------------------===## + +CLANG_LEVEL := ../../.. +LIBRARYNAME := clangStaticAnalyzerFrontend + +CPP.Flags += -I${PROJ_OBJ_DIR}/../Checkers + +include $(CLANG_LEVEL)/Makefile diff --git a/clang/lib/StaticAnalyzer/Makefile b/clang/lib/StaticAnalyzer/Makefile new file mode 100644 index 0000000..c166f06 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Makefile @@ -0,0 +1,18 @@ +##===- clang/lib/StaticAnalyzer/Makefile -------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements analyses built on top of source-level CFGs. +# +##===----------------------------------------------------------------------===## + +CLANG_LEVEL := ../.. +DIRS := Checkers Frontend +PARALLEL_DIRS := Core + +include $(CLANG_LEVEL)/Makefile diff --git a/clang/lib/StaticAnalyzer/README.txt b/clang/lib/StaticAnalyzer/README.txt new file mode 100644 index 0000000..d4310c5 --- /dev/null +++ b/clang/lib/StaticAnalyzer/README.txt @@ -0,0 +1,139 @@ +//===----------------------------------------------------------------------===// +// Clang Static Analyzer +//===----------------------------------------------------------------------===// + += Library Structure = + +The analyzer library has two layers: a (low-level) static analysis +engine (GRExprEngine.cpp and friends), and some static checkers +(*Checker.cpp). The latter are built on top of the former via the +Checker and CheckerVisitor interfaces (Checker.h and +CheckerVisitor.h). The Checker interface is designed to be minimal +and simple for checker writers, and attempts to isolate them from much +of the gore of the internal analysis engine. + += How It Works = + +The analyzer is inspired by several foundational research papers ([1], +[2]). (FIXME: kremenek to add more links) + +In a nutshell, the analyzer is basically a source code simulator that +traces out possible paths of execution. The state of the program +(values of variables and expressions) is encapsulated by the state +(ProgramState). A location in the program is called a program point +(ProgramPoint), and the combination of state and program point is a +node in an exploded graph (ExplodedGraph). The term "exploded" comes +from exploding the control-flow edges in the control-flow graph (CFG). + +Conceptually the analyzer does a reachability analysis through the +ExplodedGraph. We start at a root node, which has the entry program +point and initial state, and then simulate transitions by analyzing +individual expressions. The analysis of an expression can cause the +state to change, resulting in a new node in the ExplodedGraph with an +updated program point and an updated state. A bug is found by hitting +a node that satisfies some "bug condition" (basically a violation of a +checking invariant). + +The analyzer traces out multiple paths by reasoning about branches and +then bifurcating the state: on the true branch the conditions of the +branch are assumed to be true and on the false branch the conditions +of the branch are assumed to be false. Such "assumptions" create +constraints on the values of the program, and those constraints are +recorded in the ProgramState object (and are manipulated by the +ConstraintManager). If assuming the conditions of a branch would +cause the constraints to be unsatisfiable, the branch is considered +infeasible and that path is not taken. This is how we get +path-sensitivity. We reduce exponential blow-up by caching nodes. If +a new node with the same state and program point as an existing node +would get generated, the path "caches out" and we simply reuse the +existing node. Thus the ExplodedGraph is not a DAG; it can contain +cycles as paths loop back onto each other and cache out. + +ProgramState and ExplodedNodes are basically immutable once created. Once +one creates a ProgramState, you need to create a new one to get a new +ProgramState. This immutability is key since the ExplodedGraph represents +the behavior of the analyzed program from the entry point. To +represent these efficiently, we use functional data structures (e.g., +ImmutableMaps) which share data between instances. + +Finally, individual Checkers work by also manipulating the analysis +state. The analyzer engine talks to them via a visitor interface. +For example, the PreVisitCallExpr() method is called by GRExprEngine +to tell the Checker that we are about to analyze a CallExpr, and the +checker is asked to check for any preconditions that might not be +satisfied. The checker can do nothing, or it can generate a new +ProgramState and ExplodedNode which contains updated checker state. If it +finds a bug, it can tell the BugReporter object about the bug, +providing it an ExplodedNode which is the last node in the path that +triggered the problem. + += Notes about C++ = + +Since now constructors are seen before the variable that is constructed +in the CFG, we create a temporary object as the destination region that +is constructed into. See ExprEngine::VisitCXXConstructExpr(). + +In ExprEngine::processCallExit(), we always bind the object region to the +evaluated CXXConstructExpr. Then in VisitDeclStmt(), we compute the +corresponding lazy compound value if the variable is not a reference, and +bind the variable region to the lazy compound value. If the variable +is a reference, just use the object region as the initilizer value. + +Before entering a C++ method (or ctor/dtor), the 'this' region is bound +to the object region. In ctors, we synthesize 'this' region with +CXXRecordDecl*, which means we do not use type qualifiers. In methods, we +synthesize 'this' region with CXXMethodDecl*, which has getThisType() +taking type qualifiers into account. It does not matter we use qualified +'this' region in one method and unqualified 'this' region in another +method, because we only need to ensure the 'this' region is consistent +when we synthesize it and create it directly from CXXThisExpr in a single +method call. + += Working on the Analyzer = + +If you are interested in bringing up support for C++ expressions, the +best place to look is the visitation logic in GRExprEngine, which +handles the simulation of individual expressions. There are plenty of +examples there of how other expressions are handled. + +If you are interested in writing checkers, look at the Checker and +CheckerVisitor interfaces (Checker.h and CheckerVisitor.h). Also look +at the files named *Checker.cpp for examples on how you can implement +these interfaces. + += Debugging the Analyzer = + +There are some useful command-line options for debugging. For example: + +$ clang -cc1 -help | grep analyze + -analyze-function + -analyzer-display-progress + -analyzer-viz-egraph-graphviz + ... + +The first allows you to specify only analyzing a specific function. +The second prints to the console what function is being analyzed. The +third generates a graphviz dot file of the ExplodedGraph. This is +extremely useful when debugging the analyzer and viewing the +simulation results. + +Of course, viewing the CFG (Control-Flow Graph) is also useful: + +$ clang -cc1 -help | grep cfg + -cfg-add-implicit-dtors Add C++ implicit destructors to CFGs for all analyses + -cfg-add-initializers Add C++ initializers to CFGs for all analyses + -cfg-dump Display Control-Flow Graphs + -cfg-view View Control-Flow Graphs using GraphViz + -unoptimized-cfg Generate unoptimized CFGs for all analyses + +-cfg-dump dumps a textual representation of the CFG to the console, +and -cfg-view creates a GraphViz representation. + += References = + +[1] Precise interprocedural dataflow analysis via graph reachability, + T Reps, S Horwitz, and M Sagiv, POPL '95, + http://portal.acm.org/citation.cfm?id=199462 + +[2] A memory model for static analysis of C programs, Z Xu, T + Kremenek, and J Zhang, http://lcs.ios.ac.cn/~xzx/memmodel.pdf -- cgit v1.2.3