diff options
Diffstat (limited to 'clang/lib/Analysis/FormatString.cpp')
-rw-r--r-- | clang/lib/Analysis/FormatString.cpp | 678 |
1 files changed, 678 insertions, 0 deletions
diff --git a/clang/lib/Analysis/FormatString.cpp b/clang/lib/Analysis/FormatString.cpp new file mode 100644 index 0000000..ba45865 --- /dev/null +++ b/clang/lib/Analysis/FormatString.cpp @@ -0,0 +1,678 @@ +// FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Shared details for processing format strings of printf and scanf +// (and friends). +// +//===----------------------------------------------------------------------===// + +#include "FormatStringParsing.h" +#include "clang/Basic/LangOptions.h" + +using clang::analyze_format_string::ArgTypeResult; +using clang::analyze_format_string::FormatStringHandler; +using clang::analyze_format_string::FormatSpecifier; +using clang::analyze_format_string::LengthModifier; +using clang::analyze_format_string::OptionalAmount; +using clang::analyze_format_string::PositionContext; +using clang::analyze_format_string::ConversionSpecifier; +using namespace clang; + +// Key function to FormatStringHandler. +FormatStringHandler::~FormatStringHandler() {} + +//===----------------------------------------------------------------------===// +// Functions for parsing format strings components in both printf and +// scanf format strings. +//===----------------------------------------------------------------------===// + +OptionalAmount +clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { + const char *I = Beg; + UpdateOnReturn <const char*> UpdateBeg(Beg, I); + + unsigned accumulator = 0; + bool hasDigits = false; + + for ( ; I != E; ++I) { + char c = *I; + if (c >= '0' && c <= '9') { + hasDigits = true; + accumulator = (accumulator * 10) + (c - '0'); + continue; + } + + if (hasDigits) + return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, + false); + + break; + } + + return OptionalAmount(); +} + +OptionalAmount +clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, + const char *E, + unsigned &argIndex) { + if (*Beg == '*') { + ++Beg; + return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); + } + + return ParseAmount(Beg, E); +} + +OptionalAmount +clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, + const char *Start, + const char *&Beg, + const char *E, + PositionContext p) { + if (*Beg == '*') { + const char *I = Beg + 1; + const OptionalAmount &Amt = ParseAmount(I, E); + + if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { + H.HandleInvalidPosition(Beg, I - Beg, p); + return OptionalAmount(false); + } + + if (I == E) { + // No more characters left? + H.HandleIncompleteSpecifier(Start, E - Start); + return OptionalAmount(false); + } + + assert(Amt.getHowSpecified() == OptionalAmount::Constant); + + if (*I == '$') { + // Handle positional arguments + + // Special case: '*0$', since this is an easy mistake. + if (Amt.getConstantAmount() == 0) { + H.HandleZeroPosition(Beg, I - Beg + 1); + return OptionalAmount(false); + } + + const char *Tmp = Beg; + Beg = ++I; + + return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, + Tmp, 0, true); + } + + H.HandleInvalidPosition(Beg, I - Beg, p); + return OptionalAmount(false); + } + + return ParseAmount(Beg, E); +} + + +bool +clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, + FormatSpecifier &CS, + const char *Start, + const char *&Beg, const char *E, + unsigned *argIndex) { + // FIXME: Support negative field widths. + if (argIndex) { + CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); + } + else { + const OptionalAmount Amt = + ParsePositionAmount(H, Start, Beg, E, + analyze_format_string::FieldWidthPos); + + if (Amt.isInvalid()) + return true; + CS.setFieldWidth(Amt); + } + return false; +} + +bool +clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, + FormatSpecifier &FS, + const char *Start, + const char *&Beg, + const char *E) { + const char *I = Beg; + + const OptionalAmount &Amt = ParseAmount(I, E); + + if (I == E) { + // No more characters left? + H.HandleIncompleteSpecifier(Start, E - Start); + return true; + } + + if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { + // Warn that positional arguments are non-standard. + H.HandlePosition(Start, I - Start); + + // Special case: '%0$', since this is an easy mistake. + if (Amt.getConstantAmount() == 0) { + H.HandleZeroPosition(Start, I - Start); + return true; + } + + FS.setArgIndex(Amt.getConstantAmount() - 1); + FS.setUsesPositionalArg(); + // Update the caller's pointer if we decided to consume + // these characters. + Beg = I; + return false; + } + + return false; +} + +bool +clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, + const char *&I, + const char *E, + const LangOptions &LO, + bool IsScanf) { + LengthModifier::Kind lmKind = LengthModifier::None; + const char *lmPosition = I; + switch (*I) { + default: + return false; + case 'h': + ++I; + lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar) + : LengthModifier::AsShort; + break; + case 'l': + ++I; + lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong) + : LengthModifier::AsLong; + break; + case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; + case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; + case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; + case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; + case 'q': lmKind = LengthModifier::AsQuad; ++I; break; + case 'a': + if (IsScanf && !LO.C99 && !LO.CPlusPlus0x) { + // For scanf in C90, look at the next character to see if this should + // be parsed as the GNU extension 'a' length modifier. If not, this + // will be parsed as a conversion specifier. + ++I; + if (I != E && (*I == 's' || *I == 'S' || *I == '[')) { + lmKind = LengthModifier::AsAllocate; + break; + } + --I; + } + return false; + case 'm': + if (IsScanf) { + lmKind = LengthModifier::AsMAllocate; + ++I; + break; + } + return false; + } + LengthModifier lm(lmPosition, lmKind); + FS.setLengthModifier(lm); + return true; +} + +//===----------------------------------------------------------------------===// +// Methods on ArgTypeResult. +//===----------------------------------------------------------------------===// + +bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { + switch (K) { + case InvalidTy: + llvm_unreachable("ArgTypeResult must be valid"); + + case UnknownTy: + return true; + + case AnyCharTy: { + if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) + switch (BT->getKind()) { + default: + break; + case BuiltinType::Char_S: + case BuiltinType::SChar: + case BuiltinType::UChar: + case BuiltinType::Char_U: + return true; + } + return false; + } + + case SpecificTy: { + argTy = C.getCanonicalType(argTy).getUnqualifiedType(); + if (T == argTy) + return true; + // Check for "compatible types". + if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) + switch (BT->getKind()) { + default: + break; + case BuiltinType::Char_S: + case BuiltinType::SChar: + return T == C.UnsignedCharTy; + case BuiltinType::Char_U: + case BuiltinType::UChar: + return T == C.SignedCharTy; + case BuiltinType::Short: + return T == C.UnsignedShortTy; + case BuiltinType::UShort: + return T == C.ShortTy; + case BuiltinType::Int: + return T == C.UnsignedIntTy; + case BuiltinType::UInt: + return T == C.IntTy; + case BuiltinType::Long: + return T == C.UnsignedLongTy; + case BuiltinType::ULong: + return T == C.LongTy; + case BuiltinType::LongLong: + return T == C.UnsignedLongLongTy; + case BuiltinType::ULongLong: + return T == C.LongLongTy; + } + return false; + } + + case CStrTy: { + const PointerType *PT = argTy->getAs<PointerType>(); + if (!PT) + return false; + QualType pointeeTy = PT->getPointeeType(); + if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) + switch (BT->getKind()) { + case BuiltinType::Void: + case BuiltinType::Char_U: + case BuiltinType::UChar: + case BuiltinType::Char_S: + case BuiltinType::SChar: + return true; + default: + break; + } + + return false; + } + + case WCStrTy: { + const PointerType *PT = argTy->getAs<PointerType>(); + if (!PT) + return false; + QualType pointeeTy = + C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); + return pointeeTy == C.getWCharType(); + } + + case WIntTy: { + // Instead of doing a lookup for the definition of 'wint_t' (which + // is defined by the system headers) instead see if wchar_t and + // the argument type promote to the same type. + QualType PromoWChar = + C.getWCharType()->isPromotableIntegerType() + ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType(); + QualType PromoArg = + argTy->isPromotableIntegerType() + ? C.getPromotedIntegerType(argTy) : argTy; + + PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType(); + PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType(); + + return PromoWChar == PromoArg; + } + + case CPointerTy: + return argTy->isPointerType() || argTy->isObjCObjectPointerType() || + argTy->isBlockPointerType() || argTy->isNullPtrType(); + + case ObjCPointerTy: { + if (argTy->getAs<ObjCObjectPointerType>() || + argTy->getAs<BlockPointerType>()) + return true; + + // Handle implicit toll-free bridging. + if (const PointerType *PT = argTy->getAs<PointerType>()) { + // Things such as CFTypeRef are really just opaque pointers + // to C structs representing CF types that can often be bridged + // to Objective-C objects. Since the compiler doesn't know which + // structs can be toll-free bridged, we just accept them all. + QualType pointee = PT->getPointeeType(); + if (pointee->getAsStructureType() || pointee->isVoidType()) + return true; + } + return false; + } + } + + llvm_unreachable("Invalid ArgTypeResult Kind!"); +} + +QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { + switch (K) { + case InvalidTy: + llvm_unreachable("No representative type for Invalid ArgTypeResult"); + case UnknownTy: + return QualType(); + case AnyCharTy: + return C.CharTy; + case SpecificTy: + return T; + case CStrTy: + return C.getPointerType(C.CharTy); + case WCStrTy: + return C.getPointerType(C.getWCharType()); + case ObjCPointerTy: + return C.ObjCBuiltinIdTy; + case CPointerTy: + return C.VoidPtrTy; + case WIntTy: { + QualType WC = C.getWCharType(); + return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC; + } + } + + llvm_unreachable("Invalid ArgTypeResult Kind!"); +} + +std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const { + std::string S = getRepresentativeType(C).getAsString(); + if (Name && S != Name) + return std::string("'") + Name + "' (aka '" + S + "')"; + return std::string("'") + S + "'"; +} + + +//===----------------------------------------------------------------------===// +// Methods on OptionalAmount. +//===----------------------------------------------------------------------===// + +ArgTypeResult +analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { + return Ctx.IntTy; +} + +//===----------------------------------------------------------------------===// +// Methods on LengthModifier. +//===----------------------------------------------------------------------===// + +const char * +analyze_format_string::LengthModifier::toString() const { + switch (kind) { + case AsChar: + return "hh"; + case AsShort: + return "h"; + case AsLong: // or AsWideChar + return "l"; + case AsLongLong: + return "ll"; + case AsQuad: + return "q"; + case AsIntMax: + return "j"; + case AsSizeT: + return "z"; + case AsPtrDiff: + return "t"; + case AsLongDouble: + return "L"; + case AsAllocate: + return "a"; + case AsMAllocate: + return "m"; + case None: + return ""; + } + return NULL; +} + +//===----------------------------------------------------------------------===// +// Methods on ConversionSpecifier. +//===----------------------------------------------------------------------===// + +const char *ConversionSpecifier::toString() const { + switch (kind) { + case dArg: return "d"; + case iArg: return "i"; + case oArg: return "o"; + case uArg: return "u"; + case xArg: return "x"; + case XArg: return "X"; + case fArg: return "f"; + case FArg: return "F"; + case eArg: return "e"; + case EArg: return "E"; + case gArg: return "g"; + case GArg: return "G"; + case aArg: return "a"; + case AArg: return "A"; + case cArg: return "c"; + case sArg: return "s"; + case pArg: return "p"; + case nArg: return "n"; + case PercentArg: return "%"; + case ScanListArg: return "["; + case InvalidSpecifier: return NULL; + + // MacOS X unicode extensions. + case CArg: return "C"; + case SArg: return "S"; + + // Objective-C specific specifiers. + case ObjCObjArg: return "@"; + + // GlibC specific specifiers. + case PrintErrno: return "m"; + } + return NULL; +} + +//===----------------------------------------------------------------------===// +// Methods on OptionalAmount. +//===----------------------------------------------------------------------===// + +void OptionalAmount::toString(raw_ostream &os) const { + switch (hs) { + case Invalid: + case NotSpecified: + return; + case Arg: + if (UsesDotPrefix) + os << "."; + if (usesPositionalArg()) + os << "*" << getPositionalArgIndex() << "$"; + else + os << "*"; + break; + case Constant: + if (UsesDotPrefix) + os << "."; + os << amt; + break; + } +} + +bool FormatSpecifier::hasValidLengthModifier() const { + switch (LM.getKind()) { + case LengthModifier::None: + return true; + + // Handle most integer flags + case LengthModifier::AsChar: + case LengthModifier::AsShort: + case LengthModifier::AsLongLong: + case LengthModifier::AsQuad: + case LengthModifier::AsIntMax: + case LengthModifier::AsSizeT: + case LengthModifier::AsPtrDiff: + switch (CS.getKind()) { + case ConversionSpecifier::dArg: + case ConversionSpecifier::iArg: + case ConversionSpecifier::oArg: + case ConversionSpecifier::uArg: + case ConversionSpecifier::xArg: + case ConversionSpecifier::XArg: + case ConversionSpecifier::nArg: + return true; + default: + return false; + } + + // Handle 'l' flag + case LengthModifier::AsLong: + switch (CS.getKind()) { + case ConversionSpecifier::dArg: + case ConversionSpecifier::iArg: + case ConversionSpecifier::oArg: + case ConversionSpecifier::uArg: + case ConversionSpecifier::xArg: + case ConversionSpecifier::XArg: + case ConversionSpecifier::aArg: + case ConversionSpecifier::AArg: + case ConversionSpecifier::fArg: + case ConversionSpecifier::FArg: + case ConversionSpecifier::eArg: + case ConversionSpecifier::EArg: + case ConversionSpecifier::gArg: + case ConversionSpecifier::GArg: + case ConversionSpecifier::nArg: + case ConversionSpecifier::cArg: + case ConversionSpecifier::sArg: + case ConversionSpecifier::ScanListArg: + return true; + default: + return false; + } + + case LengthModifier::AsLongDouble: + switch (CS.getKind()) { + case ConversionSpecifier::aArg: + case ConversionSpecifier::AArg: + case ConversionSpecifier::fArg: + case ConversionSpecifier::FArg: + case ConversionSpecifier::eArg: + case ConversionSpecifier::EArg: + case ConversionSpecifier::gArg: + case ConversionSpecifier::GArg: + return true; + // GNU extension. + case ConversionSpecifier::dArg: + case ConversionSpecifier::iArg: + case ConversionSpecifier::oArg: + case ConversionSpecifier::uArg: + case ConversionSpecifier::xArg: + case ConversionSpecifier::XArg: + return true; + default: + return false; + } + + case LengthModifier::AsAllocate: + switch (CS.getKind()) { + case ConversionSpecifier::sArg: + case ConversionSpecifier::SArg: + case ConversionSpecifier::ScanListArg: + return true; + default: + return false; + } + + case LengthModifier::AsMAllocate: + switch (CS.getKind()) { + case ConversionSpecifier::cArg: + case ConversionSpecifier::CArg: + case ConversionSpecifier::sArg: + case ConversionSpecifier::SArg: + case ConversionSpecifier::ScanListArg: + return true; + default: + return false; + } + } + llvm_unreachable("Invalid LengthModifier Kind!"); +} + +bool FormatSpecifier::hasStandardLengthModifier() const { + switch (LM.getKind()) { + case LengthModifier::None: + case LengthModifier::AsChar: + case LengthModifier::AsShort: + case LengthModifier::AsLong: + case LengthModifier::AsLongLong: + case LengthModifier::AsIntMax: + case LengthModifier::AsSizeT: + case LengthModifier::AsPtrDiff: + case LengthModifier::AsLongDouble: + return true; + case LengthModifier::AsAllocate: + case LengthModifier::AsMAllocate: + case LengthModifier::AsQuad: + return false; + } + llvm_unreachable("Invalid LengthModifier Kind!"); +} + +bool FormatSpecifier::hasStandardConversionSpecifier(const LangOptions &LangOpt) const { + switch (CS.getKind()) { + case ConversionSpecifier::cArg: + case ConversionSpecifier::dArg: + case ConversionSpecifier::iArg: + case ConversionSpecifier::oArg: + case ConversionSpecifier::uArg: + case ConversionSpecifier::xArg: + case ConversionSpecifier::XArg: + case ConversionSpecifier::fArg: + case ConversionSpecifier::FArg: + case ConversionSpecifier::eArg: + case ConversionSpecifier::EArg: + case ConversionSpecifier::gArg: + case ConversionSpecifier::GArg: + case ConversionSpecifier::aArg: + case ConversionSpecifier::AArg: + case ConversionSpecifier::sArg: + case ConversionSpecifier::pArg: + case ConversionSpecifier::nArg: + case ConversionSpecifier::ObjCObjArg: + case ConversionSpecifier::ScanListArg: + case ConversionSpecifier::PercentArg: + return true; + case ConversionSpecifier::CArg: + case ConversionSpecifier::SArg: + return LangOpt.ObjC1 || LangOpt.ObjC2; + case ConversionSpecifier::InvalidSpecifier: + case ConversionSpecifier::PrintErrno: + return false; + } + llvm_unreachable("Invalid ConversionSpecifier Kind!"); +} + +bool FormatSpecifier::hasStandardLengthConversionCombination() const { + if (LM.getKind() == LengthModifier::AsLongDouble) { + switch(CS.getKind()) { + case ConversionSpecifier::dArg: + case ConversionSpecifier::iArg: + case ConversionSpecifier::oArg: + case ConversionSpecifier::uArg: + case ConversionSpecifier::xArg: + case ConversionSpecifier::XArg: + return false; + default: + return true; + } + } + return true; +} |