From f88d4f700606eb15d83795b188810e0451b2e157 Mon Sep 17 00:00:00 2001 From: Carlo Zancanaro Date: Fri, 18 May 2012 13:22:09 +1000 Subject: Add the outline stuff, too. A bit more lit review work. --- tex/lit-review/main.tex | 64 ++++++++++++++++++++++++-------- tex/lit-review/references.bib | 36 +++++++++++++++++- tex/outline/Makefile | 8 ++++ tex/outline/main.tex | 68 ++++++++++++++++++++++++++++++++++ tex/outline/references.bib | 86 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 245 insertions(+), 17 deletions(-) create mode 100644 tex/outline/Makefile create mode 100644 tex/outline/main.tex create mode 100644 tex/outline/references.bib (limited to 'tex') diff --git a/tex/lit-review/main.tex b/tex/lit-review/main.tex index 8f5c5ce..28a9495 100644 --- a/tex/lit-review/main.tex +++ b/tex/lit-review/main.tex @@ -48,7 +48,8 @@ abstraction we can still define some semantics which reflect the program itself a negative number is a negative number). Each abstract domain must also contain a "top" $(\top)$ and a "bottom" $(\bot)$ element. The existence of these two elements turn our abstract domain into a complete lattice, which ensures some -helpful properties of our abstract domain. +helpful properties of our abstract domain, including the existence of solutions +to fixpoint-equations (\cite{tarski}). By constructing appropriate abstract domains we can infer properties of the program's concrete semantics which are relevant to our application. The above @@ -71,7 +72,7 @@ positive/negative/zero. The abstract domain of "zones", are handled in \cite{mine:padoII}. These zones provide a way to perform analysis on the bounds of variable differences (ie. to place bounds as $a - b \leq c$) and are also referred to as Difference Bound -Matrices (which are used in other model-checking applications +Matrices (which are used in other model-checking applications, such as \cite{Yovine98modelchecking}). The domain of zones is then extended by the same author into the "Octagon @@ -88,6 +89,14 @@ one can define a polyhedra $1 \times x_1 + (-1) \times x_2 + c \ge 0$ which is equivalent. The octagon domain is $(\pm 1) \times x_1 + (\pm 1) \times x_2 + c \ge 0$. +Non-integral abstract domains have also been investigated in papers such as +\cite{springerlink}, which considers the abstract domain +of strings with abstract values being regular expressions, but it seems the +majority of the literature is concerned with providing bounds for integral, or +at the very least numerical, expressions. The reasons for this are fairly +obvious when one considers the ubiquity of integers and the relative simplicity +of the numerical domain compared to many others. + \section{Fixpoints} When performing static analysis there is a great need to perform some sort of @@ -96,9 +105,10 @@ When looping, for instance, it is necessary to "iterate" (in some sense) to find what values each variable may take during a loop iteration. The most basic method for doing so is simply to perform a complete iteration in -the abstract domain, but doing so has no guarantee of termination and would not -be expected to be performant. In \cite{CousotCousot77-1} an approach of -"widening" and "narrowing" is introduced. +the abstract domain, but doing so within the integers has no guarantee of +termination (due to the integers being an unbounded set) and would not be +expected to be performant. In \cite{CousotCousot77-1} an approach of "widening" +and "narrowing" is introduced. This widening/narrowing method performs some fixed number of iteration steps. If, after this fixed number, the iteration has not stabilised then it assumes @@ -108,20 +118,42 @@ wherein variables are brought back into the finite domain by some terminating condition on the loop. This approach is quite general, and so is applicable to many instances of real -programs, but is a crude approximation of the reality of the program. In -specific instances it is possible to perform a better approximation. In -particular the case of monotonic, expansive operators is considered in \cite{}. % something from thomas here -With this approach code points are converted into fixpoint equations with -$minimum$ and $maximum$ expressions. These fixpoint equations can then be solved -in order to give information about the program's abstract semantics. These -equations are solved using the technique of strategy iteration \cite{} % something about strategy iteration here -combined with a fast fixpoint-iteration algorithm (such as the one presented in -\cite{DBLP:tr/trier/MI96-11}). - -%Other test citation: \cite{DBLP:tr/trier/MI96-11} +programs, but is a crude approximation of the reality of the program (albeit, +with surprisingly good results). In specific instances it is possible to perform +a better approximation. In particular the case of addition, multiplication (by +positive constants), minimum and maximum is considered in +\cite{Gawlitza:2007:PFC:1762174.1762203}. With this approach code points are +converted into fixpoint equations with $minimum$ and $maximum$ expressions. +These fixpoint equations can then be solved in order to give information about +the program's abstract semantics. These equations are solved using a form of +policy iteration (referred to as strategy iteration) over the $maximum$ +expressions present in each term. A second step consisting of ordinary +fixpoint-iteration over the resulting $minimum$-expressions is then performed. + +When fixpoint-iteration is known to terminate then it is possible to get +significant speed gains by processing each expression according to their +dependencies. The approach presented in \cite{DBLP:tr/trier/MI96-11}, in +particular, improves the real-world performance of their fixpoint-equation +solver. It should be noted, however, that such evaluation strategies can do +nothing to affect the worst-case complexity as it is always possible to +construct a system in which evaluation of each variable is necessary at every +stage until the system stabilises. \section{Conclusion} +Static analysis is of great practical importance to real world developers. It +gives us the ability to create tools which automatically identify common errors +in programs due to developer errors and so allows us to provide greater +guarantees about the behaviour of our software. + +Unfortunately complete static analysis of a program is an undecidable problem, +and so we must perform abstract interpretation of a program's semantics in order +to provide useful results of analysis. Many different abstract domains exist in +which to consider programs, with varying precision and computational complexity. +In particular the abstract domain of polyhedra (and the related domains of +zones and ranges) are of great use when performing analysis of a program's +integral semantics. + \bibliographystyle{abbrv} \bibliography{references.bib} \end{document} diff --git a/tex/lit-review/references.bib b/tex/lit-review/references.bib index d6db721..d746821 100644 --- a/tex/lit-review/references.bib +++ b/tex/lit-review/references.bib @@ -80,4 +80,38 @@ pages = {114--152}, publisher = {Springer-Verlag} } - +@inproceedings{Gawlitza:2007:PFC:1762174.1762203, + author = {Gawlitza, Thomas and Seidl, Helmut}, + title = {Precise fixpoint computation through strategy iteration}, + booktitle = {Proceedings of the 16th European conference on Programming}, + series = {ESOP'07}, + year = {2007}, + isbn = {978-3-540-71314-2}, + location = {Braga, Portugal}, + pages = {300--315}, + numpages = {16}, + url = {http://dl.acm.org/citation.cfm?id=1762174.1762203}, + acmid = {1762203}, + publisher = {Springer-Verlag}, + address = {Berlin, Heidelberg}, +} +@article{tarski, + author = {Tarski, Alfred}, + title = {A lattice-theoretical fixpoint theorem and its + applications.}, + year = {1955} +} +@incollection {springerlink, + author = {Christensen, Aske and Mø, Anders and Schwartzbach, Michael}, + affiliation = {BRICS Denmark}, + title = {Precise Analysis of String Expressions}, + booktitle = {Static Analysis}, + series = {Lecture Notes in Computer Science}, + editor = {Cousot, Radhia}, + publisher = {Springer Berlin / Heidelberg}, + isbn = {978-3-540-40325-8}, + keyword = {Computer Science}, + pages = {1076-1076}, + volume = {2694}, + year = {2003} +} diff --git a/tex/outline/Makefile b/tex/outline/Makefile new file mode 100644 index 0000000..66eb399 --- /dev/null +++ b/tex/outline/Makefile @@ -0,0 +1,8 @@ +all: + pdflatex main.tex + bibtex main + pdflatex main.tex + pdflatex main.tex + +clean: + rm -f *.bbl *.aux *.blg *.log *.pdf *.out diff --git a/tex/outline/main.tex b/tex/outline/main.tex new file mode 100644 index 0000000..8c5cf98 --- /dev/null +++ b/tex/outline/main.tex @@ -0,0 +1,68 @@ +\documentclass{article} + +\usepackage[a4paper]{geometry} +\usepackage{hyperref} +\usepackage{amsmath} + +\title{INFO5993 \\ Research outline} +\author{Carlo Zancanaro} + +\begin{document} +\maketitle + +\section{Introduction} + +As my research contribution I intend to implement an algorithm presented in +\cite{gawlitza} for solving systems of fixpoint-equations for their least +solutions. I will then utilise this algorithm for the implementation of a static +analysis pass, interpreting program semantics over zones, within the LLVM +compiler framework. Finally I will be evaluating this new technique to determine +whether it is of practical use in real-world static analysis. + +\section{Technologies} + +My implementation will be provided in the {\tt C++} programming language, as it +is the language in which LLVM has been written. It will primarily be designed to +analyse programs written in the {\tt C} language. + +At the conclusion of my thesis I will have an implementation of the above +algorithm implemented as an LLVM optimisation/analysis pass. This will plug in +to the LLVM/Clang static-analysis framework in order to leverage the significant +work that has been done on the LLVM project. This will also mean that I do not +have to write a complete parser for the {\tt C} programming language (which +would no doubt be a long and error-prone operation). + +\section{Evaluation} + +To evaluate the algorithm I will be investigating two factors: execution time +and analysis range. + +\subsection{Execution time} + +Given the increased precision of results provided by the above algorithm I will +be interested in determining the runtime performance of the algorithm. Static +analysis is traditionally a slow process and so any speed-gains which can be +made will be especially beneficial to real world applications. This will also +mean that my optimisations must be especially aggressive to ensure that there is +no missed opportunity for speed gains to be made. + +\subsection{Analysis range} + +In the above algorithm there is a trade-off made wherein we sacrifice the +generality of the algorithm to provide more precise results. An important factor +in the utility of this algorithm will be whether it is applicable to the +majority of real-world code. The restrictions in the paper seem to be general +enough to encompass a large amount of existing code, but it remains to be shown +empirically whether this is the case. + +\section{Conclusion} + +My thesis will consist of implementing and evaluating a new approach to +fixpoint-iteration in order to perform abstract interpretation over zones for +the verification of program invariants. This implementation will be done within +the LLVM-compiler framework. The evaluation will particularly consider factors +of execution time and analysis range of the algorithm. + +\bibliographystyle{abbrv} +\bibliography{references.bib} +\end{document} diff --git a/tex/outline/references.bib b/tex/outline/references.bib new file mode 100644 index 0000000..9735e26 --- /dev/null +++ b/tex/outline/references.bib @@ -0,0 +1,86 @@ +@article{DBLP:tr/trier/MI96-11, + author = {Christian Fecht and + Helmut Seidl}, + title = {An Even Faster Solver for General Systems of Equations}, + journal = {Universit{\"a}t Trier, Mathematik/Informatik, Forschungsbericht}, + volume = {96-11}, + year = {1996}, + bibsource = {DBLP, http://dblp.uni-trier.de} +} +@inproceedings{CousotCousot77-1, + author = {Cousot, P{.} and Cousot, R{.}}, + title = {Abstract interpretation: a unified lattice model for static + analysis of programs by construction or approximation of + fixpoints}, + pages = {238--252}, + booktitle = {Conference Record of the Fourth Annual ACM SIGPLAN-SIGACT + Symposium on Principles of Programming Languages}, + address = {Los Angeles, California}, + publisher = {ACM Press, New York, NY}, + year = 1977, +} +@article{mine:hosc06, + author = {Min\'e, A{.}}, + title = {The Octagon Abstract Domain}, + journal = {Higher-Order and Symbolic Computation}, + editor = {O. Danvy}, + publisher = {Springer}, + year = {2006}, + volume = {19}, + number = {1}, + pages = {31--100}, + note = {\url{http://www.di.ens.fr/~mine/publi/article-mine-HOSC06.pdf}} +} +@inproceedings{mine:padoII, + author = {Min\'e, A{.}}, + title = {A New Numerical Abstract Domain Based on Difference-Bound Matrices}, + booktitle = {Proc. of the 2d Symp. on Programs as Data Objects (PADO II)}, + series = {Lecture Notes in Computer Science}, + year = {2001}, + month = {May}, + pages = {155--172}, + volume = {2053}, + publisher = {Springer}, + location = {Aarhus, Danemark}, + note = {\url{http://www.di.ens.fr/~mine/publi/article-mine-padoII.pdf}} +} +@inproceedings{DBLP:conf/vmcai/SankaranarayananSM05, + author = {Sriram Sankaranarayanan and + Henny B. Sipma and + Zohar Manna}, + title = {Scalable Analysis of Linear Systems Using Mathematical Programming}, + booktitle = {VMCAI}, + year = {2005}, + pages = {25-41}, + ee = {http://dx.doi.org/10.1007/978-3-540-30579-8_2}, + bibsource = {DBLP, http://dblp.uni-trier.de} +} +@inproceedings{Dor:2001:CCS:647170.718291, + author = {Dor, Nurit and Rodeh, Michael and Sagiv, Shmuel}, + title = {Cleanness Checking of String Manipulations in C Programs via + Integer Analysis}, + booktitle = {Proceedings of the 8th International Symposium on Static + Analysis}, + series = {SAS '01}, + year = {2001}, + isbn = {3-540-42314-1}, + pages = {194--212}, + numpages = {19}, + url = {http://dl.acm.org/citation.cfm?id=647170.718291}, + acmid = {718291}, + publisher = {Springer-Verlag}, + address = {London, UK, UK}, +} +@INPROCEEDINGS{Yovine98modelchecking, + author = {Sergio Yovine}, + title = {Model checking timed automata}, + booktitle = {In European Educational Forum: School on Embedded + Systems}, + year = {1998}, + pages = {114--152}, + publisher = {Springer-Verlag} +} +@article{gawlitza, + author = {Thomas Martin Gawlitza and Hemut Seidl}, + title = {Abstract Interpretation over Zones without Widening} +} -- cgit v1.2.3