\documentclass{beamer} \title{Implementing and Evaluating a Strategy-Iteration Based Static Analyser within the LLVM framework} \author{Carlo Zancanaro} \def\sid{309222257} \def\supervisor{Bernhard Scholz} \usepackage[numbers]{natbib} \usepackage{mdframed} \usepackage{algorithmicx} \usepackage{tikz} \usepackage{pgfplots} \pgfplotsset{compat=1.4} \algblockx[While]{While}{EndWhile}{\textbf{while}~}{\textbf{endwhile}} \newcommand\Z{\mathbb{Z}} \newcommand\CZ{\overline{\Z}} \def\newblock{\hskip .11em plus .33em minus .07em} % important line \begin{document} \begin{frame} \maketitle \end{frame} \section{Introduction} \begin{frame}{Bugs are bad} \begin{itemize} \item<1-> Money - recently Knight Capital, US\$440 million lost in a day \item<2-> Time - 50\% of development time is spent debugging\cite{debugging-book} \item<3-> Security - buffer overflows and other security flaws \item<4-> Lives - X-ray machines, helicopters, cars \end{itemize} \end{frame} \begin{frame}{Static analysis is good} \begin{itemize} \item<1-> The more bugs we can catch at compile time, the better \item<2-> We can't catch all bugs - Rice's theorem\cite{Rice} \end{itemize} \end{frame} \section{Prior work} \begin{frame}[fragile]{Modelling programs} \begin{center} \begin{mdframed} \begin{algorithmic} \State $x = 0$ \State $y = 1$ \While {$x < 8$} \Comment{value at start of this line} \State $x = x + 2$ \State $y = y + 2$ \EndWhile \end{algorithmic} \end{mdframed} \end{center} \begin{align*} \only<1>{~ \\ ~} \only<2>{x &= \{0\} \\ y &= \{1\}} \only<3>{x &= \{0, 2\} \\ y &= \{1, 3\}} \only<4>{x &= \{0, 2, 4\} \\ y &= \{1, 3, 5\}} \only<5>{x &= \{0, 2, 4, 6\} \\ y &= \{1, 3, 5, 7\}} \only<6>{x &= \{0, 2, 4, 6, 8\} \\ y &= \{1, 3, 5, 7, 9\}} \end{align*} \end{frame} \begin{frame}{Abstract interpretation} Basic idea: simplify your domain Instead of arbitrary subsets of $\Z$, something less precise: \begin{itemize} \item signs\cite{CousotCousot77-1}: $x \in \{ \Z, \Z^+, \Z^-, 0\}$ \item ranges\cite{CousotCousot77-1}: $x \le a; -x \le b, ~a,b \in \Z$ \item zones\cite{mine:padoII}: $x - y \le c; \pm x \le c ~c \in Z$ \end{itemize} \end{frame} \begin{frame}[fragile]{Abstract interpretation} \begin{figure} \begin{tikzpicture}[scale=0.6,dot/.style={circle,fill,inner sep=1pt}] \draw (0,0) -- coordinate (x axis mid) (10,0); \draw (0,0) -- coordinate (y axis mid) (0,10); \foreach \x in {0,...,10} \draw (\x,1pt) -- (\x,-3pt) node[anchor=north] {\x}; \node[anchor=west] at (10,0) {$x$}; \foreach \y in {0,...,10} \draw (1pt,\y) -- (-3pt,\y) node[anchor=east] {\y}; \node[anchor=south] at (0,10) {$y$}; \foreach \v in {0,...,4} \node [dot,radius=0.2pt] at (0 + 2*\v,1 + 2*\v) { }; \draw [draw=black, fill=gray, fill opacity=0.2] (0,1) -- (0,9) -- (8,9) -- (8,1) -- (0,1); \end{tikzpicture} \caption{Comparison between concrete and abstract domains} \end{figure} \end{frame} \begin{frame}{$\max$-strategy improvement} \begin{itemize} \item Transform a program into equations \item Solve equations \end{itemize} \end{frame} \begin{frame}[fragile]{$\max$-strategy example} \begin{center} \begin{mdframed} \begin{algorithmic} \State $x = 0$ \Comment $A$ \While {$x \le 8$} \Comment $B$ \State $x = x + 2$ \EndWhile \State print$(x)$ \Comment $C$ \end{algorithmic} \end{mdframed} \end{center} \alt<1>{\begin{align*} ub(x)_A &\ge \infty \\ ub(x)_B &\ge 0 \\ ub(x)_B &\ge \min(ub(x)_B, 8) + 2 \\ ub(x)_C &\ge ub(x)_B \end{align*} }{\begin{align*} ub(x)_A &= \infty \\ ub(x)_B &= \max(0, \min(ub(x)_B, 8) + 2) \\ ub(x)_C &= ub(x)_B \\ ~ \end{align*}} \only<2>{} \end{frame} \begin{frame}[fragile]{$\max$-strategies} A $\max$-strategy is a decision about which argument in a $\max$-expression to choose. \begin{align*} ub(x)_A &= \infty \\ ub(x)_B &= \max(\alert<2>{0}, \alert<3>{\min(ub(x)_B, 8) + 2}) \\ ub(x)_C &= ub(x)_B \end{align*} \end{frame} \begin{frame}[fragile]{Solver} \begin{figure} \begin{tikzpicture} \node [rectangle, draw, align=center](max) at (0,0) {Maximiser \\ improve strategy}; \node [rectangle, draw, align=center](min) at (0,-3) {Minimiser \\ find assignment}; \path[] (min) edge[bend left,arrows={-latex}] node[anchor=east]{variable assignment} (max) (max) edge[bend left,arrows={-latex}] node[anchor=west]{$\max$-strategy} (min); \end{tikzpicture} \caption{The high-level structure of the solver presented in \cite{Gawlitza:2007:PFC:1762174.1762203}} \end{figure} \end{frame} \section{Contribution} \begin{frame}[fragile]{Solver - enhancements} The big idea: take into account data-dependencies \begin{align*} \alert<4>{x} &\alert<4>{= \max(0, \min(x - 1, \alert<3>{y}))} \\ \alert<2-4>{y} &= \max(0, x + 5, x) \\ z &= \max(0, z + 1, x) \end{align*} \end{frame} \begin{frame}[fragile]{Solver - enhancements} \begin{figure} \begin{tikzpicture} \node [rectangle, draw, align=center](max) at (0,0) {Maximiser \\ improve strategy}; \node [rectangle, draw, align=center](min) at (0,-3) {Minimiser \\ find assignment}; \path[] (min) edge[bend left,arrows={-latex}] node[anchor=east,align=right]{variable assignment \\ change set} (max) (max) edge[bend left,arrows={-latex}] node[anchor=west,align=left]{$\max$-strategy \\ change set} (min); \end{tikzpicture} \caption{The high-level structure of our enhanced solver} \end{figure} \end{frame} \section{Results} \begin{frame}{Implementation} \begin{itemize} \item Implemented in C++ \item Integrated into the LLVM/Clang static analysis framework \end{itemize} \end{frame} \begin{frame}{Example system} \begin{align*} x_0 &= \max(-\infty, 0) \\ x_1 &= \max(-\infty, x_0) \\ x_2 &= \max(-\infty, x_1) \\ ... &~ ... \\ x_n &= \max(-\infty, x_{n-1}) \end{align*} \end{frame} \begin{frame}{Runtime improvements} \begin{figure} \begin{tikzpicture} \begin{loglogaxis}[% scale only axis, width=9cm, height=5cm, xmin=1, xmax=65536, xlabel=size of equation system, ymin=0, ymax=40, ylabel=time (in seconds), axis on top] \addplot[ ybar, bar width=0.5cm, bar shift=0in, fill=red, draw=black] plot coordinates{ (1,0.000021982 ) (2,0.000037083 ) (4,0.000090918 ) (8,0.000331411 ) (16,0.001678790 ) (32,0.010238535 ) (64,0.069774912 ) (128,0.520062180 ) (256,3.983806321 ) (512,31.469653195 ) }; \end{loglogaxis} \end{tikzpicture} \caption{Performance of the naive algorithm} \end{figure} \end{frame} \begin{frame}{Runtime improvements} \begin{figure} \begin{tikzpicture} \begin{loglogaxis}[% scale only axis, width=9cm, height=5cm, xmin=1, xmax=65536, xlabel=size of equation system, ymin=0, ymax=40, ylabel=time (in seconds), axis on top] \addplot[ ybar, bar width=0.5cm, bar shift=0in, fill=red, draw=black] plot coordinates{ (1,0.000044508 ) (2,0.000063953 ) (4,0.000094303 ) (8,0.000176679 ) (16,0.000312958 ) (32,0.000642421 ) (64,0.001194996 ) (128,0.002451839 ) (256,0.004786286 ) (512,0.009280946 ) (1024,0.019159827 ) (2048,0.038067689 ) (4096,0.080535540 ) (8192,0.151181571 ) (16384,0.339113750 ) (32768,0.593622054 ) (65536,1.217525509 ) }; \end{loglogaxis} \end{tikzpicture} \caption{Performance of our improved algorithm} \end{figure} \end{frame} \section{Future work} \begin{frame}{Future work} \begin{itemize} \item Still slightly over-approximating dependencies \item LLVM/Clang integration is only a proof-of-concept \end{itemize} \end{frame} \begin{frame}[fragile,allowframebreaks]{References} \phantomsection \bibliographystyle{abbrvnat} \bibliography{references} \end{frame} \begin{frame}{Contributions} \begin{itemize} \item Improvement of $\max$-strategy iteration algorithm, leveraging sparsity of variable dependencies \item Implementation of a $\max$-strategy iteration based static analyser in the LLVM/Clang framework \end{itemize} \end{frame} \end{document}