From 62d8d7011c339f9b192099b2fc6a6d2f567f0a4d Mon Sep 17 00:00:00 2001
From: Carlo Zancanaro <carlo@carlo-laptop>
Date: Wed, 28 Nov 2012 11:09:10 +1100
Subject: Ahhh! Lots of things.

---
 tex/thesis/implementation/implementation.tex | 355 +++++++++++++++++++++++++--
 1 file changed, 335 insertions(+), 20 deletions(-)

(limited to 'tex/thesis/implementation/implementation.tex')

diff --git a/tex/thesis/implementation/implementation.tex b/tex/thesis/implementation/implementation.tex
index 02f2180..287cf8d 100644
--- a/tex/thesis/implementation/implementation.tex
+++ b/tex/thesis/implementation/implementation.tex
@@ -21,22 +21,198 @@ presented in Chapter \ref{chap:contribution}. The primary difference
 is the use of objects instead of higher-order functions, as C++'s
 support for higher-order functions was insufficient for our purposes.
 
-The solver was developed using only the STL for the core of the
-solver. A front-end was developed using the ANTLR parser generator
-\cite{antlr} to test the solver separately to the LLVM/Clang
-integration.
+The solver was developed using only the Standard Template
+Library~\cite{stl} for the core of the solver. A front-end was
+developed using the ANTLR parser generator \cite{antlr} to test the
+solver without the equation system generation of the clang framework.
 
+A high level overview of the implementation is presented in Figure
+\ref{fig:solver-arch}. The strategy iteration and fixpoint iteration
+loops provide feedback to each other about which values have changed
+at each iteration, continuing until the strategy stabilises.
+
+\begin{figure}
+  \begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node
+      distance=4cm,main node/.style={rectangle,draw,minimum
+        height=2cm,minimum width=3cm}]
+
+    \node[main node] (maximiser) {Strategy iteration};
+    \node[main node] (minimiser) [below of=maximiser] {Fixpoint iteration};
+    \node (input) [node distance=5cm,left of=maximiser] {\emph{input}};
+    \node (output) [node distance=5cm,right of=minimiser] {\emph{output}};
+
+    \path []
+    (input) edge node {Equation system} (maximiser)
+    (maximiser) edge [bend left] node {Improved strategy} (minimiser)
+    (minimiser) edge [bend left] node [align=center]{Greatest fixpoint \\ of strategy} (maximiser)
+    (minimiser) edge node [align=center] {Least fixpoint \\ of system} (output)
+    ;
+
+  \end{tikzpicture}
+  \caption{A very high level overview of the solver implementation.}
+  \label{fig:solver-arch}
+\end{figure}
+
+In the following subsections we provide some timings for two general
+classes of equation systems. Each of the equation systems consists
+solely of inexpensive operations, either a variable lookup or a
+constant term, so we are comparing only the effect of the algorithms
+without consideration of the expense of calculating subexpressions.
+
+All these measurements were performed on a Core 2 Duo E8400 with 4GB
+of memory running Debian sid. Binaries were compiled with GCC 4.7.2
+with the -O3 flag. The source code for the solver is available
+online~\cite{bitbucket-repo}.
 
 \subsection{Chain}
 
+Our first class of example equation systems is a chain. These equation
+systems have the following form:
+\begin{align*}
+  x_0 &= \max(-inf, 0) \\
+  x_1 &= \max(-inf, x_0) \\
+  ... &~ ... \\
+  x_n &= \max(-inf, x_{n-1})
+\end{align*}
+
+This particular class of equation system is the simplest of the class
+of equations with acyclic dependency graphs. In this particular case
+our solver significantly outperforms the naive algorithm for most
+problem sizes. For equation systems of less than four variables the
+naive approach is slightly faster, but this is of no real practical
+concern as in both cases it takes less than a millisecond to provide a
+result.
+
+\begin{figure}
+  \begin{tikzpicture}
+    \begin{loglogaxis}[% 
+        scale only axis,
+        width=0.7\textwidth,
+        height=10cm,
+        xmin=1, xmax=65536,
+        xlabel=size of equation system,
+        ymin=0, ymax=250,
+        ylabel=time (in seconds),
+        axis on top,
+        legend entries={DSI, Naive}]
 
+      \addplot coordinates{ 
+        (1,0.000018267)
+        (2,0.000042359)
+        (4,0.000077031)
+        (8,0.000155439)
+        (16,0.000308687)
+        (32,0.000570927)
+        (64,0.001190886)
+        (128,0.002441834)
+        (256,0.004609652)
+        (512,0.009588408)
+        (1024,0.018226228)
+        (2048,0.036292838)
+        (4096,0.076125736)
+        (8192,0.152150204)
+        (16384,0.307867610)
+        (32768,0.654639048)
+        (65536,1.190741627)
+      };
+
+      \addplot coordinates{ 
+        (1,0.000012504 )
+        (2,0.000021558 )
+        (4,0.000060316 )
+        (8,0.000276887 )
+        (16,0.001414449 )
+        (32,0.009278044 )
+        (64,0.067620090 )
+        (128,0.508776074 )
+        (256,3.939142736 )
+        (512,31.373059038 )
+        (1024,246.603110160 )
+      };
+
+    \end{loglogaxis}
+  \end{tikzpicture}
+  \caption{Chart comparing the performance of our DSI algorithm
+    compared to a naive approach on simple chain examples. Note that
+    both axes are logarithmic.}
+  \label{fig:solver-chain}
+\end{figure}
 
 
 \subsection{Cycle}
 
+The next class of equation system we will consider will be long
+\emph{cycles}. These will 
+\begin{align*}
+  x_0 &= \max(-inf, 0, x_n) \\
+  x_1 &= \max(-inf, x_0) \\
+  ... &~ ... \\
+  x_n &= \max(-inf, x_{n-1})
+\end{align*}
 
+Our dependency graph is no longer acyclic, so finding the value of any
+one variable requires the evaluation, and potential strategy
+improvement, of all other values. The chart in Figure
+\ref{fig:solver-cycle} shows the running time of the DSI algorithm
+again, showing that the addition of an acyclic dependency does not
+have a significant effect.
+
+
+\begin{figure}
+  \begin{tikzpicture}
+    \begin{loglogaxis}[% 
+        scale only axis,
+        width=0.7\textwidth,
+        height=10cm,
+        xmin=1, xmax=65536,
+        xlabel=size of equation system,
+        ymin=0, ymax=250,
+        ylabel=time (in seconds),
+        axis on top,
+        legend entries={DSI, Naive}]
+
+      \addplot coordinates{ 
+        (1,0.000016665)
+        (2,0.000044187)
+        (4,0.000081627)
+        (8,0.000159545)
+        (16,0.000309742)
+        (32,0.000627197)
+        (64,0.001236493)
+        (128,0.002338711)
+        (256,0.004967883)
+        (512,0.009164379)
+        (1024,0.018700068)
+        (2048,0.041385796)
+        (4096,0.073920841)
+        (8192,0.150712424)
+        (16384,0.302473783)
+        (32768,0.665616132)
+        (65536,1.220667681)
+      };
+
+      \addplot coordinates{ 
+        (1,0.000012877 )
+        (2,0.000021507 )
+        (4,0.000059573 )
+        (8,0.000252723 )
+        (16,0.001419862 )
+        (32,0.009221822 )
+        (64,0.067315559 )
+        (128,0.500673679 )
+        (256,3.912675638 )
+        (512,31.750177791 )
+        (1024,249.912189424 )
+      };
+
+    \end{loglogaxis}
+  \end{tikzpicture}
+  \caption{Chart comparing the performance of our DSI algorithm
+    compared to a naive approach on simple cyclical examples. Note
+    that both axes are logarithmic.}
+  \label{fig:solver-cycle}
+\end{figure}
 
-\subsection{Dense}
 
 
 
@@ -66,11 +242,13 @@ different points in the program.
     \node[main node,fill=red!60] (eqns) [below of=clang-analysis]
          {Zone analysis};
     \node[main node] (dsi) [below of=eqns] {DSI};
-    \node (reality) [node distance=5cm,right of=eqns] {};
+    \node (input) [node distance=7cm,left of=clang-analysis] {\emph{input}};
+    \node (reality) [node distance=5cm,right of=eqns] {\emph{output}};
 
     \path []
     %(clang) edge[bend right] node[anchor=east] {CFG} (clang-analysis)
-    (clang-analysis) edge[bend right] node[anchor=east] {CFG} (eqns)
+    (input) edge node {Source code} (clang-analysis)
+    (clang-analysis) edge node[anchor=east] {CFG} (eqns)
     (eqns) edge[bend right] node[anchor=east] {Equation System} (dsi)
     (dsi) edge[bend right] node[anchor=west] {Solution} (eqns)
     (eqns) edge node[anchor=north] {Variable bounds} (reality)
@@ -84,28 +262,165 @@ different points in the program.
 \end{figure}
 
 
-\pagebreak
-\subsection{Chain}
-\lstinputlisting{implementation/experiments/chain.c}
+In the sections which follow we present the results of evaluating our
+analysis pass on several small programs. The full output from our tool
+is provided in Appendix~\ref{appendix:tool-output}, but a condensed
+form of the equation systems and variable bounds is presented here. In
+the following equation systems we use $MCF\{G\}$ to denote the minimum
+cost flow through the flow network $G$.
 
 \pagebreak
 \subsection{Counter}
 \lstinputlisting{implementation/experiments/counter.c}
 
-\lstinputlisting{implementation/experiments/backwards_counter.c}
+This program is extremely simple, consisting only of incrementing a
+variable repeatedly in a loop. It is possible to gain a precise result
+from a widening and narrowing on this function, so we also achieve a
+precise result with this approach.
 
-\pagebreak
-\subsection{Nested Loops}
-\lstinputlisting{implementation/experiments/nested.c}
+Since we only have a single variable, we only wish to determine the
+bounds on $i$ and $-i$. Thus, we have the following equation system:
+\begin{align*}
+  ub(i)_A &= \infty \\
+  -lb(i)_A &= \infty \\
+  ub(i)_B &= \max(-\infty, 0,
+  MCF\left\{\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node
+    distance=2cm]
+    \node (i)[draw,circle] {$1$};
+    \node (dummy)[draw,circle,right of=i] {$-1$};
+    \path [use as bounding box] (0,0) rectangle (0.5,1);
+    \path
+    (i) edge[->,bend left] node {$\min(ub(i)_B, 99)$} (dummy)
+    (dummy) edge[->,bend left] node {$-lb(i)_B$} (i);
+  \end{tikzpicture}\right\} + 1) \\
+%
+  -lb(i)_B &= \max(-\infty, 0,
+  MCF\left\{\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node
+    distance=2cm]
+    \node (i)[draw,circle] {$-1$};
+    \node (dummy)[draw,circle,right of=i] {$1$};
+    \path [use as bounding box] (0,0) rectangle (0.5,1);
+    \path
+    (i) edge[->,bend left] node {$\min(ub(i)_B, 99)$} (dummy)
+    (dummy) edge[->,bend left] node {$-lb(i)_B$} (i);
+  \end{tikzpicture}\right\} - 1) \\
+%
+%
+  ub(i)_C &= \max(-\infty,
+  MCF\left\{\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node
+    distance=2.5cm]
+    \node (i)[draw,circle] {$1$};
+    \node (dummy)[draw,circle,right of=i] {$-1$};
+    \path [use as bounding box] (0,0) rectangle (0.75,1.25);
+    \path
+    (i) edge[->,bend left] node {$ub(i)_B$} (dummy)
+    (dummy) edge[->,bend left] node {$\min(-lb(i)_B, -100)$} (i);
+  \end{tikzpicture}\right\}) \\
+%
+  -lb(i)_C &= \max(-\infty,
+  MCF\left\{\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node
+    distance=2.5cm]
+    \node (i)[draw,circle] {$-1$};
+    \node (dummy)[draw,circle,right of=i] {$1$};
+    \path [use as bounding box] (0,0) rectangle (0.75,1.25);
+    \path
+    (i) edge[->,bend left] node {$ub(i)_B$} (dummy)
+    (dummy) edge[->,bend left] node {$\min(-lb(i)_B, -100)$} (i);
+  \end{tikzpicture}\right\})
+\end{align*}
+
+Solving this system of equations gives us the following bounds on $i$:
+\\
+\begin{center}
+  \begin{tabular}{c|cc}
+    Block & ub(i) & lb(i) \\
+    \hline 
+    A & $\infty$ & $-\infty$ \\
+    B & $100$ & $0$ \\
+    C & $100$ & $-100$
+  \end{tabular}
+\end{center}
+
+This is a precise analysis of the values of $i$, performed without
+widening/narrowing. It captures the possible values of $i$ precisely
+and identifies the value of $i$ at the exit of the loop.
 
 \pagebreak
 \subsection{Double counting}
 \lstinputlisting{implementation/experiments/example.c}
 
-\pagebreak
-\subsection{Fibonacci}
-\lstinputlisting{implementation/experiments/fib.c}
+In this example we are incrementing two different program variables at
+the same time, based on a condition of one of the variables. In
+standard interval analysis it is impossible to capture the
+relationship between these variables, but by performing abstract
+interpretation over zones we can perform a more precise analysis.
 
-\pagebreak
-\subsection{Unreachable}
-\lstinputlisting{implementation/experiments/irreducible.c}
+This example is a translation to C of the example presented by
+Gawlitza et al.~\cite{EasyChair:117}. In our abstract analysis we will
+consider the values $x1$, $x2$ and $x2 - x1$.
+
+\begin{align*}
+  ub(x1)_A &= \infty \\
+  ub(x2)_A &= \infty \\
+  ub(x2 - x1)_A &= \infty \\
+%
+  ub(x1)_B &= \max(-\infty, 0,
+  MCF\left\{\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node
+        distance=2.5cm]
+      \node (x1)[draw,circle] {$1$};
+      \node (x2)[draw,circle,below of=x1] {$0$};
+      \node (dummy)[draw,circle,below right of=x1] {$-1$};
+      \path
+      (x1) edge[->,bend left] node {$\min(8, ub(x1)_B)$} (dummy)
+      (x2) edge[->,bend left] node {$ub(x2)_B$} (x1)
+      (x2) edge[->,bend left] node {$ub(x2 - x1)_B$} (dummy);
+    \end{tikzpicture}\right\} + 2) \\
+  ub(x2)_B &= \max(-\infty, 1,
+  MCF\left\{\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node
+        distance=2.5cm]
+      \node (x1)[draw,circle] {$0$};
+      \node (x2)[draw,circle,below of=x1] {$1$};
+      \node (dummy)[draw,circle,below right of=x1] {$-1$};
+      \path
+      (x1) edge[->,bend left] node {$\min(8, ub(x1)_B)$} (dummy)
+      (x2) edge[->,bend left] node {$ub(x2)_B$} (x1)
+      (x2) edge[->,bend left] node {$ub(x2 - x1)_B$} (dummy);
+    \end{tikzpicture}\right\} + 2) \\
+  ub(x2 - x1)_B &= \max(-\infty, 1,
+  MCF\left\{\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node
+        distance=2.5cm]
+      \node (x1)[draw,circle] {$-1$};
+      \node (x2)[draw,circle,below of=x1] {$1$};
+      \node (dummy)[draw,circle,below right of=x1] {$0$};
+      \path
+      (x1) edge[->,bend left] node {$\min(8, ub(x1)_B)$} (dummy)
+      (x2) edge[->,bend left] node {$ub(x2)_B$} (x1)
+      (x2) edge[->,bend left] node {$ub(x2 - x1)_B$} (dummy);
+    \end{tikzpicture}\right\})  \\
+%
+%
+  ub(x1)_C &= \max(-\infty, ub(x1)_B)  \\
+  ub(x2)_C &= \max(-\infty, ub(x2)_B)  \\
+  ub(x2 - x1)_C &= \max(-\infty, ub(x2 - x1)_B)
+\end{align*}
+
+
+Solving this system of equations gives us the following bounds on
+$x1,x2$ and $x2-x1$:
+\\
+\begin{center}
+  \begin{tabular}{c|ccc}
+    Block & ub(x1) & ub(x2) & ub(x2 - x1) \\
+    \hline 
+    A & $\infty$ & $\infty$ & $\infty$ \\
+    B & $10$ & $11$ & $1$ \\
+    C & $10$ & $11$ & $1$
+  \end{tabular} 
+\end{center}
+
+This result is more precise than any that could have been obtained by
+interval analysis. We accurately capture the relationship between $x1$
+and $x2$ by bounding $x2 - x1 \le 1$, and this allows us to correctly
+determine the upper bound of $x2$ at the exit of the loop. The output
+of our automatic analysis for this function can be seen in Section
+\ref{sec:example-function-output}.
-- 
cgit v1.2.3