From f823ccc928ae09a63ffef5b6cfe47966e2da1890 Mon Sep 17 00:00:00 2001 From: Carlo Zancanaro Date: Mon, 26 Nov 2012 13:59:29 +1100 Subject: A bunch more writing. Write write write. --- tex/thesis/abstract/abstract.tex | 11 +++- tex/thesis/contribution/contribution.tex | 102 ++++++++++++++++--------------- tex/thesis/introduction/introduction.tex | 45 ++++++++++++-- tex/thesis/litreview/litreview.tex | 81 ++++++++++++------------ tex/thesis/references.bib | 52 ++++++++++++++++ 5 files changed, 197 insertions(+), 94 deletions(-) (limited to 'tex') diff --git a/tex/thesis/abstract/abstract.tex b/tex/thesis/abstract/abstract.tex index 4333635..2326f61 100644 --- a/tex/thesis/abstract/abstract.tex +++ b/tex/thesis/abstract/abstract.tex @@ -1,5 +1,12 @@ \chapter*{Abstract} -The abstract goes in here. - +In today's increasingly technological world, software bugs can have +significant consequences. In order to limit the effect of these bugs +software engineers employ a wide variety of tests, but these are +insufficient to guarantee that software is bug free. +In this thesis we present an implementation of a game-theoretic static +analyser described in \cite{EasyChair:117} as a method of finding bugs +and guaranteeing correctness. We present our own enhancements to +improve the algorithm's running time and evaluate our improvements on +a number of benchmarks. diff --git a/tex/thesis/contribution/contribution.tex b/tex/thesis/contribution/contribution.tex index 48a5e73..fac83c9 100644 --- a/tex/thesis/contribution/contribution.tex +++ b/tex/thesis/contribution/contribution.tex @@ -50,8 +50,8 @@ In this chapter the main theoretical contribution of this thesis is presented: an extension on a $\max$-strategy iteration algorithm for solving fixpoint equations over the integers with monotonic operators\cite{Gawlitza:2007:PFC:1762174.1762203} (see Section -\ref{sec:gawlitza-algorithm}). We devise an algorithm which runs in -linear time in the best case. +\ref{sec:gawlitza-algorithm}). We devise an algorithm which performs +considerably better for practical equation systems. In this chapter we will begin by presenting the Work-list Depth First Search (W-DFS) fixpoint algorithm developed by Seidl et @@ -135,7 +135,7 @@ variables. We will also define $\CZ = \Z \cup \{-\infty, \infty\}$. \begin{definition} \textbf{Equation System:} An equation system is a mapping from variables to expressions. $\system : X \to E$. We define - $E_\system \subset E$ to be the preimage of system, that is + $E_\system \subset E$ to be the preimage of $\system$, that is $E_\system = \{ e \in E \mid system(x) = e, \forall x \in X \}$. As each element of $E$ is also a function $(X \to \CZ) \to \CZ$, so an equation system is considered as a function $\system : X \to @@ -170,16 +170,6 @@ variables. We will also define $\CZ = \Z \cup \{-\infty, \infty\}$. \end{align*} \end{definition} -\begin{definition} - A variable or expression $x$ is said to \textbf{depend} on $y$ if - $x(\rho) \ne x(\varrho)$ and - \begin{align*} - \rho(z) &= \varrho(z) & \forall z \in X \backslash \{y\} \\ - \rho(y) &\ne \varrho(y) - \end{align*} - - If $x$ depends on $y$ then $y$ is said to \emph{influence} $x$. -\end{definition} \section{Fixpoint Iteration} \subsection{Kleene Iteration} @@ -191,7 +181,7 @@ reach the least/greatest solution if there is one to be found, but it will often perform many more evaluations than are necessary. This algorithm is presented in Listing \ref{algo:kleene}. -\begin{algorithm}[H] +\begin{algorithm}[tbphH] \begin{algorithmic} \Function {solvefixpoint} {$\system \in \Systems$} \State $k = 0$ @@ -223,7 +213,7 @@ iteration will continue without bound, tending towards $\{ x \mapsto -\infty, y \mapsto 0 \}$ rather than the greatest fixpoint of $\{ x \mapsto -\infty, y \mapsto -\infty \}$. -\begin{figure}[H] +\begin{figure}[tbphH] \begin{align*} x &= \min(0, x - 1) \\ y &= \min(0, \infty \oplus x) & @@ -240,15 +230,41 @@ at most restricted to $|X|$ iterations in the particular case of the $\max$-strategy iteration algorithm considered in this thesis. Kleene iteration is therefore restricted to $\Theta(|X|^3)$ right hand side evaluations at most in our application. We seek to improve on this -performance in the next section. +performance in the next section by taking into account dependencies +between expressions. + +\begin{definition} + An expression $x$ is said to \textbf{depend} on $y$ if and only if + $x(\rho) \ne x(\varrho)$ and + \begin{align*} + \rho(z) &= \varrho(z) & \forall z \in X \backslash \{y\} \\ + \rho(y) &\ne \varrho(y) + \end{align*} + + Intuitively, $x$ depends on $y$ if and only if a change in the value + of $y$ induces a change in the value of $x$. + + If $x$ depends on $y$ then $y$ is said to \emph{influence} $x$. +\end{definition} + \subsection{W-DFS algorithm} \label{sec:wdfs} -The W-DFS algorithm of Seidl et al.~takes into account dependencies -between variables as it solves the system. By taking into account -dependencies we can leave portions of the system unevaluated when we -are certain that those values have not changed. The algorithm is -presented in Listing \ref{algo:wdfs}. +Several algorithms have been presented in literature for finding +solutions to general fixpoint equation systems. In particular has been +the \emph{topdown solver} of Charlier and Van +Hentenryck\cite{TD-fixpoint} and the \emph{worklist} solver of +Kildall\cite{Kildall:1973:UAG:512927.512945} which has been further +extended by others\cite{Jorgensen94findingfixpoints,FP-efficient}. The +W-DFS algorithm of Seidl et al.\cite{DBLP:tr/trier/MI96-11} is an +attempt to combine the best aspects of these general fixpoint +algorithms. + +The W-DFS algorithm takes into account dependencies between variables +as it solves the system. By taking into account dependencies we can +leave portions of the system unevaluated when we are certain that +those values have not changed. The algorithm is presented in Listing +\ref{algo:wdfs}. \begin{algorithm} \begin{algorithmic} @@ -343,7 +359,7 @@ The W-DFS algorithm provides us with a \emph{local}, \emph{demand-driven} solver for greatest fixpoints of monotonic fixpoint problems. -\begin{figure}[H] +\begin{figure}[tbphH] \begin{tikzpicture} \node[draw] (full) at (0,-2) { \begin{minipage}{0.3\textwidth} @@ -399,14 +415,10 @@ other subsystem. To compute the value of $x$ in the greatest fixpoint it is unnecessary to compute the value of the variable $d$, but it is necessary to compute the value of $y$. The W-DFS algorithm, will only evaluate the necessary subsystem for a requested variable, leaving the -other system unevaluated. In order to capture this property we will -define the notion of a \emph{local} solver. +other subsystem unevaluated. We call this algorithm \emph{local}, +because it considers only as many variables as are necessary to +compute the requested values. -\begin{definition} - \textbf{Local:} A solver is said be \emph{local} if, for some - equation system, $\system$, and some $e \in E_\system$, it will - solve for $e_x$ without evaluating every element of $E_\system$. -\end{definition} \section{$\max$-strategy Iteration} @@ -475,7 +487,7 @@ approach is to perform a loop, improving the strategy at each step, until an equilibrium point is reached. The approach is presented in Listing \ref{algo:naive-strategy}. -\begin{algorithm}[H] +\begin{algorithm}[tbphH] \begin{algorithmic} \Assumptions \begin{tabularx}{0.9\textwidth}{rX} @@ -526,7 +538,7 @@ the W-DFS algorithm presented above in Section \ref{sec:wdfs}. Listing \ref{algo:adapted-wdfs} presents one variation on W-DFS to solve $\max$-strategy iteration problems. -\begin{algorithm}[H] +\begin{algorithm}[tbphH] \begin{algorithmic} \Globals \begin{tabularx}{0.9\textwidth}{rX} @@ -665,7 +677,8 @@ stabilises (at which point the fixpoint will also stabilise). Both portions of the LDSI algorithm are \emph{demand driven}, and so any necessary evaluation of strategies or fixpoint values is delayed until the point when it is required. This means that the solver will -be \emph{local} (see Section \ref{sec:wdfs}). +be \emph{local}, considering only the subset of variables necessary to +calculate the values we are interested in.. @@ -701,7 +714,7 @@ This algorithm is presented in three parts. \subsection{Top level} \label{sec:ldsi:top-level} -\begin{algorithm}[H] +\begin{algorithm}[tbphH] \begin{algorithmic} \Globals \begin{tabularx}{0.9\textwidth}{rX} @@ -749,7 +762,7 @@ in Sections \ref{sec:wdfs} and \ref{sec:adapted-wdfs}. The only exception is $\touchedFP$, which is a newly introduced variable to track variables which are touched by the fixpoint iteration steps. -\begin{algorithm}[H] +\begin{algorithm}[tbphH] \begin{algorithmic} \Function {init} {$s \in \Systems$} \State $D = \underline{-\infty}$ @@ -803,7 +816,7 @@ have already been computed to avoid performing additional work. The last stable value is stored in this procedure to identify \emph{changed} values, rather than only \emph{unstable} ones. -\begin{algorithm}[H] +\begin{algorithm}[tbphH] \begin{algorithmic} \Function {evalfixpoint} {$x \in X$, $y \in X$} \State $\solve \fixpoint(y)$ @@ -822,7 +835,7 @@ algorithm, except for the fact that $\solve$ has been renamed to $\solve \fixpoint$. $\eval \fixpoint$ performs the same function as the $\eval$ function in Listing \ref{algo:wdfs}. -\begin{algorithm}[H] +\begin{algorithm}[tbphH] \begin{algorithmic} \Procedure {invalidatefixpoint} {$x \in X$} \Comment{Invalidate a fixpoint variable} @@ -863,7 +876,7 @@ set of changed variables for the $\max$-strategy iteration phase. -\begin{algorithm}[H] +\begin{algorithm}[tbphH] \begin{algorithmic} \Procedure {solvefixpoint} {$x \in X$} \If {$x \not \in \stableFP$} @@ -909,7 +922,7 @@ being stabilised. -\begin{algorithm}[H] +\begin{algorithm}[tbphH] \begin{algorithmic} \Function {evalstrategy} {$x \in E_{\max}$, $y \in E_{\max}$} \State $\solve \strategy(y)$ @@ -957,7 +970,7 @@ directly, but if it is dependent on its own value (as in $\{ x = x + 1 \}$, for example), then it will be destabilised by the transitivity of $\invalidate \strategy$. -\begin{algorithm}[H] +\begin{algorithm}[tbphH] \begin{algorithmic} \Procedure {invalidatestrategy} {$x \in E_{\max}$} \If {$x \in \stableSI$} @@ -982,7 +995,7 @@ by a $\solve \strategy$ call. All $\max$-expressions which (transitively) depend on $x$ are destabilised recursively. -\begin{algorithm}[H] +\begin{algorithm}[tbphH] \begin{algorithmic} \Procedure {solvestrategy} {$x \in E_{\max}$} \If {$x \not \in \stableSI$} @@ -1019,13 +1032,6 @@ Section \ref{sec:adapted-wdfs}. \subsection{Correctness} \label{sec:ldsi:correctness} -To argue the correctness of our LDSI algorithm we will first argue the -correctness of the fixpoint iteration portion of the algorithm. We -will then argue the correctness of the $\max$-strategy iteration -portion, including the communication between the fixpoint iteration -and $\max$-strategy iteration. - - The fixpoint iteration portion of the algorithm is concerned with finding the greatest fixpoint, $\solnFP(s) : X \to D$ for the system $s = \sigma(\system)$. If we consider all valid fixpoints, $\fix(s) = \{ @@ -1187,6 +1193,4 @@ correctly solve their respective portions of the problem, and the communication between them is correct, then we know that the overall algorithm is correct. - \subsection{Implementation} \label{sec:ldsi:implementation} - diff --git a/tex/thesis/introduction/introduction.tex b/tex/thesis/introduction/introduction.tex index f59f30f..ffc6cd2 100644 --- a/tex/thesis/introduction/introduction.tex +++ b/tex/thesis/introduction/introduction.tex @@ -2,19 +2,54 @@ \section{Motivation} -Software bugs are bad. Bugs have significant costs in terms of time, -money, security and lives. Hailpern and Santhanam state that -debugging, verification and testing can easily range from 50\% to 75\% -of the total development cost of a +%Main points: +% - software is taking on more important roles +% - software bugs are dangerous +% - software bugs are expensive +% - we have to minimise the number of bugs we have + +In today's increasingly technological world, software bugs can have +significant consequences, ranging from the relatively minor +frustration caused to average users to causing deaths. There have been +several incidents in recent years in which a bug in a software system +has led directly to injury or death. X-ray machines which provide too +high a dose of radiation, cars which continue to accelerate against +the driver's wishes and other dangerous situations have all come about +as a direct result of software bugs. + +Software bugs also have significant financial costs, with Hailpern and +Santhanam static that debugging, verification and testing can easily +comprise 50\% to 75\% of the total development cost of a system\cite{Hailpern01softwaredebugging}. +In order to limit the number of bugs we have it has become commonplace +to employ sophisticated approaches to testing. Many different types of +testing are done to attempt to ensure that software is bug-free. These +tests, although extremely useful, are inherently incapable of +\emph{guaranteeing} that software is free of bugs. This is especially +a problem in critical software systems, such as those found in +aeroplanes or large industrial machinery, where software failure can +have catastrophic consequences. + +In order to provide a guarantee that software is free of bugs we must +in some way \emph{verify} the software before running it. While it is +possible to write programs in a way that is easier to verify, it is +still a difficult and expensive process to provide verification. + +Recently there has been work done in automatically analysing programs +in order to identify bugs. This work is broadly classed \emph{static + analysis} and this thesis contributes to this work. + + + + \section{Contribution} In this thesis we present an implementation of the strategy-iteration based static analyser presented by Gawlitza et al.\cite{EasyChair:117}. Our implementation has several enhancements which significantly improve the practical performance of the analyser -on real-world data. +on real-world programs. Theoretical contribution: \begin{enumerate} diff --git a/tex/thesis/litreview/litreview.tex b/tex/thesis/litreview/litreview.tex index 1e0ef90..109864e 100644 --- a/tex/thesis/litreview/litreview.tex +++ b/tex/thesis/litreview/litreview.tex @@ -10,19 +10,19 @@ \chapter{Preliminaries} \label{chap:litreview} -In this chapter we will review the relevant portion of literature -pertaining to static analysis. We will briefly cover the semantics of -programs generally before looking more closely at the framework of -abstract interpretation for program analysis. The chapter will -conclude with an explanation of one particular method of abstract -interpretation over zones. +In this chapter we review the relevant literature pertaining to static +analysis. We briefly cover the semantics of programs generally before +looking more closely at the framework of abstract interpretation for +program analysis. The chapter concludes with an explanation of the +particular method of abstract interpretation over intervals and zones +which is implemented in this thesis. \section{Program Semantics} In order to perform any static analysis of a program it is necessary to have some notion of the \emph{semantics}, or meaning, of a program. The semantics of a program can be captured in several ways, -but for our purposes we will consider \emph{denotational}, or +but for our purposes we consider \emph{denotational}, or \emph{mathematical}, \emph{semantics}\cite{SCOTT70B}. Denotational semantics views a program as a (partial) function built @@ -77,7 +77,7 @@ following example: One possible abstraction would be to instead consider a variable's \emph{sign}. That is, to consider $x$ as satisfying one of the - following (the symbols in parentheses will be used to refer to these + following (the symbols in parentheses are used to refer to these values from now): \begin{align*} x &\in \Z & (\pm) \\ @@ -180,14 +180,14 @@ to perform abstract interpretation over zones. \subsection{Partial-ordering Formulation} -To begin our construction of the equation systems we first need to -formalise one of a CFG. For our purposes we will consider a CFG to be -a tuple $G = (N, E, s, st)$. A finite set of \emph{control-points} -$N$, a finite set $E \subseteq N \times N$ of \emph{edges} between -these control points (ie. if $(u,v) \in E$ then an edge exists in the -CFG from $u$ to $v$). A mapping $s: N \times N \to \Stmt$ taking each -edge in the CFG to a \emph{statement} $\in \Stmt$. $st \in N$ denotes -the special \emph{start control-point}. +To begin our construction of the equation systems we need to formalise +one of a CFG. We consider a CFG to be a tuple $G = (N, E, s, st)$. A +finite set of \emph{control-points} $N$, a finite set $E \subseteq N +\times N$ of \emph{edges} between these control points (ie. if $(u,v) +\in E$ then an edge exists in the CFG from $u$ to $v$). A mapping $s: +N \times N \to \Stmt$ taking each edge in the CFG to a +\emph{statement} $\in \Stmt$. $st \in N$ denotes the special +\emph{start control-point}. We can now begin our construction of our abstract semantics $V^\#[v], \forall v \in N$. @@ -280,15 +280,15 @@ least as big as the complete set of abstract states (as in \label{fig:partial-ordered-construction} \end{figure} - We will now attempt to derive a system of inequalities from this for - the \emph{least upper bound} + We now attempt to derive a system of inequalities from this for the + \emph{least upper bound} \begin{align*} ub: {} & ~\Vars \to \CZ \\ ub(x) &= \min\{v \in \CZ ~|~ x \le v\}. \end{align*} We wish to find a value for $ub(x)$ at each program point in our - original program. We will denote these values by $ub_A(x)$, - $ub_B(x)$ and $ub_C(x)$. + original program. We denote these values by $ub_A(x)$, $ub_B(x)$ and + $ub_C(x)$. For $A$ the value of $x$ is entirely unknown, so no useful bound can be given. The possible values of $x$ are bounded only by the top @@ -307,10 +307,10 @@ least as big as the complete set of abstract states (as in The edge from $B$ to itself is somewhat more complex. To begin with it changes the value of $x$ depending on the previous value of - $x$. It also is executed conditionally (when $x \le 99$), which will - affect the bound on $x$. Because the condition limits the growth of - $x$ the corresponding equation for this edge is $ub(x_B) \ge - \min(x, 99) + 1$. + $x$. It also is executed conditionally (when $x \le 99$), which + affects the bound on $x$. Because the condition limits the growth of + $x$ the corresponding equation for this edge is $ub(x_B) \ge \min(x, + 99) + 1$. This leaves us with two simultaneous constraints on the value of $ub(x_B)$. @@ -348,7 +348,7 @@ least as big as the complete set of abstract states (as in The partial-ordering formulation considered above is a helpful presentation of the problem in terms of constraints, but an equivalent system can be constructed to aid us in locating the \emph{least} -solution to the system. +solution to the constraint system. For every partial-ordering on a set there exists an equivalent lattice, defined in terms of a meet operator ($\vee$) and a join @@ -358,10 +358,11 @@ operator ($\land$), which are defined by the following relationships: x \le y ~&\iff~ x \land y = x \end{align*} -In the case that we are considering, with $x, y \in \CZ$, the meet and -join operators correspond with $\max$ and $\min$, respectively. This -allows us to simplify the system of inequalities in Example -\ref{example:partial-ordered-construction} to the following: +In the case of Example \ref{example:partial-ordered-construction}, +with $x, y \in \CZ$, the meet and join operators correspond with +$\max$ and $\min$, respectively. This allows us to simplify the system +of inequalities in Example \ref{example:partial-ordered-construction} +to the following: \begin{align*} ub(x_A) &= \infty \\ ub(x_B) &= \max(1, \min(ub(x_B), 99) + 1) \\ @@ -370,7 +371,11 @@ allows us to simplify the system of inequalities in Example The two constraints on the upper bound of $x_B$ can be combined into one constraint by taking the maximum of them. This is because $x \ge -\max(a, b) ~\implies~ x \ge a \text{ and } x \ge b$ +\max(a, b) ~\implies~ x \ge a \text{ and } x \ge b$. The +Knaster-Tarski Fixpoint +%We change from using $\ge$ in our constraints to using $=$ in our +%equation system as a consequence of the Knaster-Tarski Fixpoint +%Theorem\cite{Knaster-Tarski}. @@ -378,19 +383,19 @@ one constraint by taking the maximum of them. This is because $x \ge \label{example:both-bounds} In order to determine a lower bound for $x$ at each program point it - is sufficient to determine an \emph{upper} bound for $-x$. We will - again consider the program fragment in Figure + is sufficient to determine an \emph{upper} bound for $-x$. We again + consider the program fragment in Figure \ref{fig:partial-ordered-construction}. - We will now try to derive an equation system for the \emph{upper} - and \emph{lower} bounds of $x$. + We now try to derive an equation system for the \emph{upper} and + \emph{lower} bounds of $x$. In order to capture the state of the variables at each program point - we will consider the abstract state as the two-dimensional vector + we consider the abstract state as the two-dimensional vector $(ub(x), -lb(x))$, that is the upper bound of $x$, $ub(x)$, and the \emph{negated} lower bound of $x$, $-lb(x)$. The least fixpoint of - our system of equations over $\CZ^2$ will then give us our bounds - for $ub(x)$ and $lb(x)$. + our system of equations over $\CZ^2$ then gives us bounds for + $ub(x)$ and $lb(x)$. To begin with, let us once again consider the node $A$. At the point of $A$ there is no information as to the bounds of $x$, so our @@ -580,7 +585,7 @@ one constraint by taking the maximum of them. This is because $x \ge \label{sec-1-3-5} -Reconsidering our earlier equation system, we will now attempt to find +Reconsidering our earlier equation system, we now attempt to find the minimal fixpoint by means of \emph{policy iteration} and \emph{fixpoint iteration}. diff --git a/tex/thesis/references.bib b/tex/thesis/references.bib index 3993b5e..9b0eb6b 100644 --- a/tex/thesis/references.bib +++ b/tex/thesis/references.bib @@ -207,3 +207,55 @@ Entscheidungsproblem}, volume = 42, year = 1936 } +@article{Knaster-Tarski, + author = {Tarski, Alfred}, + journal = {Pacific Journal of Mathematics}, + number = {2}, + pages = {285-309}, + title = {A Lattice-Theoretical Fixpoint Theorem and its Applications}, + volume = {5}, + year = {1955}, +} +@TECHREPORT{TD-fixpoint, + author = {Baudouin Le Charlier and Pascal Van Hentenryck}, + title = {A Universal Top-Down Fixpoint Algorithm}, + institution = {}, + year = {1992} +} +@INPROCEEDINGS{Jorgensen94findingfixpoints, + author = {Niels Jørgensen}, + title = {Finding Fixpoints in Finite Function Spaces Using Neededness Analysis and Chaotic Iteration}, + booktitle = {In SAS'94}, + year = {1994}, + pages = {329--345}, + publisher = {Springer, LNCS} +} +@inproceedings{Kildall:1973:UAG:512927.512945, + author = {Kildall, Gary A.}, + title = {A unified approach to global program optimization}, + booktitle = {Proceedings of the 1st annual ACM SIGACT-SIGPLAN symposium on Principles of programming languages}, + series = {POPL '73}, + year = {1973}, + location = {Boston, Massachusetts}, + pages = {194--206}, + numpages = {13}, + url = {http://doi.acm.org/10.1145/512927.512945}, + doi = {10.1145/512927.512945}, + acmid = {512945}, + publisher = {ACM}, + address = {New York, NY, USA}, +} +@incollection{FP-efficient, +year={1994}, +isbn={978-3-540-58485-8}, +booktitle={Static Analysis}, +volume={864}, +series={Lecture Notes in Computer Science}, +editor={Charlier, Baudouin}, +doi={10.1007/3-540-58485-4_49}, +title={Efficient fixpoint computation}, +url={http://dx.doi.org/10.1007/3-540-58485-4_49}, +publisher={Springer Berlin Heidelberg}, +author={Vergauwen, B. and Wauman, J. and Lewi, J.}, +pages={314-328} +} -- cgit v1.2.3