From 62d8d7011c339f9b192099b2fc6a6d2f567f0a4d Mon Sep 17 00:00:00 2001 From: Carlo Zancanaro Date: Wed, 28 Nov 2012 11:09:10 +1100 Subject: Ahhh! Lots of things. --- tex/thesis/implementation/implementation.tex | 355 +++++++++++++++++++++++++-- 1 file changed, 335 insertions(+), 20 deletions(-) (limited to 'tex/thesis/implementation/implementation.tex') diff --git a/tex/thesis/implementation/implementation.tex b/tex/thesis/implementation/implementation.tex index 02f2180..287cf8d 100644 --- a/tex/thesis/implementation/implementation.tex +++ b/tex/thesis/implementation/implementation.tex @@ -21,22 +21,198 @@ presented in Chapter \ref{chap:contribution}. The primary difference is the use of objects instead of higher-order functions, as C++'s support for higher-order functions was insufficient for our purposes. -The solver was developed using only the STL for the core of the -solver. A front-end was developed using the ANTLR parser generator -\cite{antlr} to test the solver separately to the LLVM/Clang -integration. +The solver was developed using only the Standard Template +Library~\cite{stl} for the core of the solver. A front-end was +developed using the ANTLR parser generator \cite{antlr} to test the +solver without the equation system generation of the clang framework. +A high level overview of the implementation is presented in Figure +\ref{fig:solver-arch}. The strategy iteration and fixpoint iteration +loops provide feedback to each other about which values have changed +at each iteration, continuing until the strategy stabilises. + +\begin{figure} + \begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node + distance=4cm,main node/.style={rectangle,draw,minimum + height=2cm,minimum width=3cm}] + + \node[main node] (maximiser) {Strategy iteration}; + \node[main node] (minimiser) [below of=maximiser] {Fixpoint iteration}; + \node (input) [node distance=5cm,left of=maximiser] {\emph{input}}; + \node (output) [node distance=5cm,right of=minimiser] {\emph{output}}; + + \path [] + (input) edge node {Equation system} (maximiser) + (maximiser) edge [bend left] node {Improved strategy} (minimiser) + (minimiser) edge [bend left] node [align=center]{Greatest fixpoint \\ of strategy} (maximiser) + (minimiser) edge node [align=center] {Least fixpoint \\ of system} (output) + ; + + \end{tikzpicture} + \caption{A very high level overview of the solver implementation.} + \label{fig:solver-arch} +\end{figure} + +In the following subsections we provide some timings for two general +classes of equation systems. Each of the equation systems consists +solely of inexpensive operations, either a variable lookup or a +constant term, so we are comparing only the effect of the algorithms +without consideration of the expense of calculating subexpressions. + +All these measurements were performed on a Core 2 Duo E8400 with 4GB +of memory running Debian sid. Binaries were compiled with GCC 4.7.2 +with the -O3 flag. The source code for the solver is available +online~\cite{bitbucket-repo}. \subsection{Chain} +Our first class of example equation systems is a chain. These equation +systems have the following form: +\begin{align*} + x_0 &= \max(-inf, 0) \\ + x_1 &= \max(-inf, x_0) \\ + ... &~ ... \\ + x_n &= \max(-inf, x_{n-1}) +\end{align*} + +This particular class of equation system is the simplest of the class +of equations with acyclic dependency graphs. In this particular case +our solver significantly outperforms the naive algorithm for most +problem sizes. For equation systems of less than four variables the +naive approach is slightly faster, but this is of no real practical +concern as in both cases it takes less than a millisecond to provide a +result. + +\begin{figure} + \begin{tikzpicture} + \begin{loglogaxis}[% + scale only axis, + width=0.7\textwidth, + height=10cm, + xmin=1, xmax=65536, + xlabel=size of equation system, + ymin=0, ymax=250, + ylabel=time (in seconds), + axis on top, + legend entries={DSI, Naive}] + \addplot coordinates{ + (1,0.000018267) + (2,0.000042359) + (4,0.000077031) + (8,0.000155439) + (16,0.000308687) + (32,0.000570927) + (64,0.001190886) + (128,0.002441834) + (256,0.004609652) + (512,0.009588408) + (1024,0.018226228) + (2048,0.036292838) + (4096,0.076125736) + (8192,0.152150204) + (16384,0.307867610) + (32768,0.654639048) + (65536,1.190741627) + }; + + \addplot coordinates{ + (1,0.000012504 ) + (2,0.000021558 ) + (4,0.000060316 ) + (8,0.000276887 ) + (16,0.001414449 ) + (32,0.009278044 ) + (64,0.067620090 ) + (128,0.508776074 ) + (256,3.939142736 ) + (512,31.373059038 ) + (1024,246.603110160 ) + }; + + \end{loglogaxis} + \end{tikzpicture} + \caption{Chart comparing the performance of our DSI algorithm + compared to a naive approach on simple chain examples. Note that + both axes are logarithmic.} + \label{fig:solver-chain} +\end{figure} \subsection{Cycle} +The next class of equation system we will consider will be long +\emph{cycles}. These will +\begin{align*} + x_0 &= \max(-inf, 0, x_n) \\ + x_1 &= \max(-inf, x_0) \\ + ... &~ ... \\ + x_n &= \max(-inf, x_{n-1}) +\end{align*} +Our dependency graph is no longer acyclic, so finding the value of any +one variable requires the evaluation, and potential strategy +improvement, of all other values. The chart in Figure +\ref{fig:solver-cycle} shows the running time of the DSI algorithm +again, showing that the addition of an acyclic dependency does not +have a significant effect. + + +\begin{figure} + \begin{tikzpicture} + \begin{loglogaxis}[% + scale only axis, + width=0.7\textwidth, + height=10cm, + xmin=1, xmax=65536, + xlabel=size of equation system, + ymin=0, ymax=250, + ylabel=time (in seconds), + axis on top, + legend entries={DSI, Naive}] + + \addplot coordinates{ + (1,0.000016665) + (2,0.000044187) + (4,0.000081627) + (8,0.000159545) + (16,0.000309742) + (32,0.000627197) + (64,0.001236493) + (128,0.002338711) + (256,0.004967883) + (512,0.009164379) + (1024,0.018700068) + (2048,0.041385796) + (4096,0.073920841) + (8192,0.150712424) + (16384,0.302473783) + (32768,0.665616132) + (65536,1.220667681) + }; + + \addplot coordinates{ + (1,0.000012877 ) + (2,0.000021507 ) + (4,0.000059573 ) + (8,0.000252723 ) + (16,0.001419862 ) + (32,0.009221822 ) + (64,0.067315559 ) + (128,0.500673679 ) + (256,3.912675638 ) + (512,31.750177791 ) + (1024,249.912189424 ) + }; + + \end{loglogaxis} + \end{tikzpicture} + \caption{Chart comparing the performance of our DSI algorithm + compared to a naive approach on simple cyclical examples. Note + that both axes are logarithmic.} + \label{fig:solver-cycle} +\end{figure} -\subsection{Dense} @@ -66,11 +242,13 @@ different points in the program. \node[main node,fill=red!60] (eqns) [below of=clang-analysis] {Zone analysis}; \node[main node] (dsi) [below of=eqns] {DSI}; - \node (reality) [node distance=5cm,right of=eqns] {}; + \node (input) [node distance=7cm,left of=clang-analysis] {\emph{input}}; + \node (reality) [node distance=5cm,right of=eqns] {\emph{output}}; \path [] %(clang) edge[bend right] node[anchor=east] {CFG} (clang-analysis) - (clang-analysis) edge[bend right] node[anchor=east] {CFG} (eqns) + (input) edge node {Source code} (clang-analysis) + (clang-analysis) edge node[anchor=east] {CFG} (eqns) (eqns) edge[bend right] node[anchor=east] {Equation System} (dsi) (dsi) edge[bend right] node[anchor=west] {Solution} (eqns) (eqns) edge node[anchor=north] {Variable bounds} (reality) @@ -84,28 +262,165 @@ different points in the program. \end{figure} -\pagebreak -\subsection{Chain} -\lstinputlisting{implementation/experiments/chain.c} +In the sections which follow we present the results of evaluating our +analysis pass on several small programs. The full output from our tool +is provided in Appendix~\ref{appendix:tool-output}, but a condensed +form of the equation systems and variable bounds is presented here. In +the following equation systems we use $MCF\{G\}$ to denote the minimum +cost flow through the flow network $G$. \pagebreak \subsection{Counter} \lstinputlisting{implementation/experiments/counter.c} -\lstinputlisting{implementation/experiments/backwards_counter.c} +This program is extremely simple, consisting only of incrementing a +variable repeatedly in a loop. It is possible to gain a precise result +from a widening and narrowing on this function, so we also achieve a +precise result with this approach. -\pagebreak -\subsection{Nested Loops} -\lstinputlisting{implementation/experiments/nested.c} +Since we only have a single variable, we only wish to determine the +bounds on $i$ and $-i$. Thus, we have the following equation system: +\begin{align*} + ub(i)_A &= \infty \\ + -lb(i)_A &= \infty \\ + ub(i)_B &= \max(-\infty, 0, + MCF\left\{\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node + distance=2cm] + \node (i)[draw,circle] {$1$}; + \node (dummy)[draw,circle,right of=i] {$-1$}; + \path [use as bounding box] (0,0) rectangle (0.5,1); + \path + (i) edge[->,bend left] node {$\min(ub(i)_B, 99)$} (dummy) + (dummy) edge[->,bend left] node {$-lb(i)_B$} (i); + \end{tikzpicture}\right\} + 1) \\ +% + -lb(i)_B &= \max(-\infty, 0, + MCF\left\{\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node + distance=2cm] + \node (i)[draw,circle] {$-1$}; + \node (dummy)[draw,circle,right of=i] {$1$}; + \path [use as bounding box] (0,0) rectangle (0.5,1); + \path + (i) edge[->,bend left] node {$\min(ub(i)_B, 99)$} (dummy) + (dummy) edge[->,bend left] node {$-lb(i)_B$} (i); + \end{tikzpicture}\right\} - 1) \\ +% +% + ub(i)_C &= \max(-\infty, + MCF\left\{\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node + distance=2.5cm] + \node (i)[draw,circle] {$1$}; + \node (dummy)[draw,circle,right of=i] {$-1$}; + \path [use as bounding box] (0,0) rectangle (0.75,1.25); + \path + (i) edge[->,bend left] node {$ub(i)_B$} (dummy) + (dummy) edge[->,bend left] node {$\min(-lb(i)_B, -100)$} (i); + \end{tikzpicture}\right\}) \\ +% + -lb(i)_C &= \max(-\infty, + MCF\left\{\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node + distance=2.5cm] + \node (i)[draw,circle] {$-1$}; + \node (dummy)[draw,circle,right of=i] {$1$}; + \path [use as bounding box] (0,0) rectangle (0.75,1.25); + \path + (i) edge[->,bend left] node {$ub(i)_B$} (dummy) + (dummy) edge[->,bend left] node {$\min(-lb(i)_B, -100)$} (i); + \end{tikzpicture}\right\}) +\end{align*} + +Solving this system of equations gives us the following bounds on $i$: +\\ +\begin{center} + \begin{tabular}{c|cc} + Block & ub(i) & lb(i) \\ + \hline + A & $\infty$ & $-\infty$ \\ + B & $100$ & $0$ \\ + C & $100$ & $-100$ + \end{tabular} +\end{center} + +This is a precise analysis of the values of $i$, performed without +widening/narrowing. It captures the possible values of $i$ precisely +and identifies the value of $i$ at the exit of the loop. \pagebreak \subsection{Double counting} \lstinputlisting{implementation/experiments/example.c} -\pagebreak -\subsection{Fibonacci} -\lstinputlisting{implementation/experiments/fib.c} +In this example we are incrementing two different program variables at +the same time, based on a condition of one of the variables. In +standard interval analysis it is impossible to capture the +relationship between these variables, but by performing abstract +interpretation over zones we can perform a more precise analysis. -\pagebreak -\subsection{Unreachable} -\lstinputlisting{implementation/experiments/irreducible.c} +This example is a translation to C of the example presented by +Gawlitza et al.~\cite{EasyChair:117}. In our abstract analysis we will +consider the values $x1$, $x2$ and $x2 - x1$. + +\begin{align*} + ub(x1)_A &= \infty \\ + ub(x2)_A &= \infty \\ + ub(x2 - x1)_A &= \infty \\ +% + ub(x1)_B &= \max(-\infty, 0, + MCF\left\{\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node + distance=2.5cm] + \node (x1)[draw,circle] {$1$}; + \node (x2)[draw,circle,below of=x1] {$0$}; + \node (dummy)[draw,circle,below right of=x1] {$-1$}; + \path + (x1) edge[->,bend left] node {$\min(8, ub(x1)_B)$} (dummy) + (x2) edge[->,bend left] node {$ub(x2)_B$} (x1) + (x2) edge[->,bend left] node {$ub(x2 - x1)_B$} (dummy); + \end{tikzpicture}\right\} + 2) \\ + ub(x2)_B &= \max(-\infty, 1, + MCF\left\{\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node + distance=2.5cm] + \node (x1)[draw,circle] {$0$}; + \node (x2)[draw,circle,below of=x1] {$1$}; + \node (dummy)[draw,circle,below right of=x1] {$-1$}; + \path + (x1) edge[->,bend left] node {$\min(8, ub(x1)_B)$} (dummy) + (x2) edge[->,bend left] node {$ub(x2)_B$} (x1) + (x2) edge[->,bend left] node {$ub(x2 - x1)_B$} (dummy); + \end{tikzpicture}\right\} + 2) \\ + ub(x2 - x1)_B &= \max(-\infty, 1, + MCF\left\{\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node + distance=2.5cm] + \node (x1)[draw,circle] {$-1$}; + \node (x2)[draw,circle,below of=x1] {$1$}; + \node (dummy)[draw,circle,below right of=x1] {$0$}; + \path + (x1) edge[->,bend left] node {$\min(8, ub(x1)_B)$} (dummy) + (x2) edge[->,bend left] node {$ub(x2)_B$} (x1) + (x2) edge[->,bend left] node {$ub(x2 - x1)_B$} (dummy); + \end{tikzpicture}\right\}) \\ +% +% + ub(x1)_C &= \max(-\infty, ub(x1)_B) \\ + ub(x2)_C &= \max(-\infty, ub(x2)_B) \\ + ub(x2 - x1)_C &= \max(-\infty, ub(x2 - x1)_B) +\end{align*} + + +Solving this system of equations gives us the following bounds on +$x1,x2$ and $x2-x1$: +\\ +\begin{center} + \begin{tabular}{c|ccc} + Block & ub(x1) & ub(x2) & ub(x2 - x1) \\ + \hline + A & $\infty$ & $\infty$ & $\infty$ \\ + B & $10$ & $11$ & $1$ \\ + C & $10$ & $11$ & $1$ + \end{tabular} +\end{center} + +This result is more precise than any that could have been obtained by +interval analysis. We accurately capture the relationship between $x1$ +and $x2$ by bounding $x2 - x1 \le 1$, and this allows us to correctly +determine the upper bound of $x2$ at the exit of the loop. The output +of our automatic analysis for this function can be seen in Section +\ref{sec:example-function-output}. -- cgit v1.2.3