\documentclass[pdf]{prosper}
\usepackage[toc,highlight,linbit,notes,hlsections]{HA-prosper}

\title{Principles of Compiler Design}
\subtitle{- The Brainf*ck Compiler -}
\author{Clifford Wolf - www.clifford.at \\
http://www.clifford.at/papers/2004/compiler/}

\DefaultTransition{Wipe}
\TitleSlideNav{FullScreen}
\NormalSlideNav{ShowBookmarks}
\LeftFoot{\href{http://www.clifford.at}{Clifford Wolf}, \today}
\RightFoot{\href{http://www.clifford.at/papers/2004/compiler/}{http://www.clifford.at/papers/2004/compiler/}}


\begin{document}

\maketitle

% ============================================================================

\tsectionandpart{Introduction}

\begin{slide}{Introduction}
\begin{itemize}

\item My presentation at 20C3 about CPU design featuring a Brainf*ck CPU was a big success

\vspace*{.5cm}
\item My original plan for 21C3 was to build a Brainf*ck CPU with tubes..

\vspace*{.5cm}
\item But:\\
\begin{center}
The only thing more dangerous than a hardware guy with a code patch is a
programmer with a soldering iron.
\end{center}

\vspace*{.5cm}
\item So this is a presentation about compiler design featuring a Brainf*ck Compiler.

\end{itemize}
\end{slide}

\begin{slide}{Overview (1/2)}

In this presentation I will discuss:
\begin{itemize}

\vspace*{.5cm}
\item A little introduction to Brainf*ck

\vspace*{.5cm}
\item Components of a compiler, overview

\vspace*{.5cm}
\item Designing and implementing lexers

\vspace*{.5cm}
\item Designing and implementing parsers

\vspace*{.5cm}
\item Designing and implementing code generators

\vspace*{.5cm}
\item Tools (flex, bison, iburg, etc.)

\end{itemize}
\end{slide}

\begin{slide}{Overview (2/2)}
\begin{itemize}

\item Overview of more complex code generators
\begin{itemize}
\item Abstract syntax trees
\item Intermediate representations
\item Basic block analysis
\item Backpatching
\item Dynamic programming
\item Optimizations
\end{itemize}

\vspace*{.5cm}
\item Design and implementation of the Brainf*ck Compiler

\vspace*{.5cm}
\item Implementation of and code generation for stack machines

\vspace*{.5cm}
\item Design and implementation of the SPL Project

\vspace*{.5cm}
\item Design and implementation of LL(regex) parsers

\end{itemize}
\end{slide}

\begin{slide}{Aim}
\begin{itemize}

\item After this presentation, the auditors ..

\vspace*{.5cm}
\item .. should have a rough idea of how compilers are working.

\vspace*{.5cm}
\item .. should be able to implement parsers for complex configuration files.

\vspace*{.5cm}
\item .. should be able to implement code-generators for stack machines.

\vspace*{.5cm}
\item .. should have a rough idea of code-generation for register machines.

\end{itemize}
\end{slide}

% ============================================================================

\tsectionandpart{Brainf*ck}

\begin{slide}{Overview}
\begin{itemize}

\item Brainf*ck is a very simple turing-complete programming language.

\vspace*{.3cm}
\item It has only 8 instructions and no instruction parameters.

\vspace*{.3cm}
\item Each instruction is represented by one character: \\
	{\tt < > + - . , [ ]}

\vspace*{.3cm}
\item All other characters in the input are ignored.

\vspace*{.3cm}
\item A Brainfuck program has an implicit byte pointer which is free to move around within an array of 30000 bytes, initially all set to zero. The pointer itself is initialized to point to the beginning of this array.

\end{itemize}

\vspace*{.3cm}
\begin{center}
Some languages are designed to solve a problem. \\
Others are designed to prove a point.
\end{center}

\end{slide}

\begin{slide}{Instructions}
\begin{center}
\small
\begin{tabular}{|l|c|}
\hline
{\tt >} & Increment the pointer.
  \hspace{1cm} {\tt ++p;} \\
\hline
{\tt <} & Decrement the pointer.
  \hspace{1cm} {\tt --p;} \\
\hline
{\tt +} & Increment the byte at the pointer.
  \hspace{1cm} {\tt ++*p;} \\
\hline
{\tt -} & Decrement the byte at the pointer.
  \hspace{1cm} {\tt ++*p;} \\
\hline
{\tt .} &  Output the byte at the pointer. \\
  & {\tt putchar(*p);} \\
\hline
{\tt ,} & Input a byte and store it in the byte at the pointer. \\
  & {\tt *p = getchar();} \\
\hline
{\tt [} & Jump forward past the matching {\tt ]} if the byte at the pointer is zero. \\
  & {\tt while (*p) \{} \\
\hline
{\tt ]} & Jump backward to the matching {\tt [} unless the byte at the pointer is zero. \\
  & {\tt \}} \\
\hline

\hline
\end{tabular}
\end{center}
\end{slide}

\begin{slide}{Implementing "while"}
\begin{itemize}

\item Implementing a while statement is easy, because the Brainf*ck {\tt [ .. ]} statement is a while loop.

\vspace*{.5cm}
\item So {\tt while (x) \{ <foobar> \}} becomes:

\vspace*{.5cm}
\begin{verbatim}
<move pointer to a>
[
<foobar>
<move pointer to a>
]
\end{verbatim}

\end{itemize}
\end{slide}

\begin{slide}{Implementing "x=y"}
\begin{itemize}

\item Implementing assignment (copy) instructions is a bit more complex.

\vspace*{.3cm}
\item The straight forward way of doing that resets y to zero:

\vspace*{.2cm}
\begin{verbatim}
<move pointer to y> [ -
<move pointer to x> +
<move pointer to y> ]
\end{verbatim}

\vspace*{.3cm}
\item So, a temporary variable t is needed:

\vspace*{.2cm}
\begin{verbatim}
<move pointer to y> [ -
<move pointer to t> +
<move pointer to y> ]

<move pointer to t> [ -
<move pointer to x> +
<move pointer to y> +
<move pointer to t> ]
\end{verbatim}

\end{itemize}
\end{slide}

\begin{slide}{Implementing "if"}
\begin{itemize}

\item The if statement is like a while-loop, but it should run its block only
once. Again, a temporary variable is needed to implement {\tt if (x) \{ <foobar> \}}:

\vspace*{.2cm}
\begin{verbatim}
<move pointer to x> [ -
<move pointer to t> +
<move pointer to x> ]

<move pointer to t> [

    [ -
    <move pointer to x> +
    <move pointer to t> ]

    <foobar>

<move pointer to t> ]
\end{verbatim}

\end{itemize}
\end{slide}

\begin{slide}{Functions}
\begin{itemize}

\item Brainf*ck has no construct for functions.

\vspace*{.5cm}
\item The compiler has support for macros which are always inlined.

\vspace*{.5cm}
\item The generated code may become huge if macros are used intensively.

\vspace*{.5cm}
\item So recursions must be implemented using explicit stacks.

\end{itemize}
\end{slide}

% ============================================================================

\tsectionandpart{Lexer and Parser}

\begin{slide}{Lexer}
\begin{itemize}

\item The lexer reads the compiler input and transforms it to lexical tokens.

\vspace*{.5cm}
\item E.g. the lexer reads the input "{\tt while}" and returns the numerical
constant {\tt TOKEN\_WHILE}.

\vspace*{.5cm}
\item Tokens may have additional attributes. E.g. the textual input "{\tt 123}"
may be transformed to the token {\tt TOKEN\_NUMBER} with the integer value 123
attached to it.

\vspace*{.5cm}
\item The lexer is usually implemented as function which is called by the parser.

\end{itemize}
\end{slide}

\begin{slide}{Parser}
\begin{itemize}

\item The parser consumes the lexical tokens (terminal symbols) and reduces
sequences of terminal and non-terminal symbols to non-terminal symbols.

\vspace*{.5cm}
\item The parser creates the so-called parse tree.

\vspace*{.5cm}
\item The parse tree never exists as such as memory-structure.

\vspace*{.5cm}
\item Instead the parse-tree just defines the order in which so-called
reduction functions are called.

\vspace*{.5cm}
\item It is possible to create tree-like memory structures in this reduction
functions which look like the parse tree. This structures are called "Abstract
Syntax Tree".

\end{itemize}
\end{slide}

\begin{slide}{BNF}
BNF (Backus-Naur Form) is a way of writing down parser definitions. A BNF for
parsing a simple assign statement (like ``{\tt x = y + z * 3}'') could look
like (yacc style syntax):

\vspace*{.3cm}
\begin{verbatim}
assign: NAME '=' expression;

primary: NAME | NUMBER
|        '(' expression ')';

product: primary
|        product '*' primary
|        product '/' primary;

sum: product
|    sum '+' product
|    sum '-' product;

expression: sum;
\end{verbatim}
\end{slide}

\begin{slide}{Reduce Functions}
\begin{itemize}

\item Whenever a sequence of symbols is reduced to a non-terminal symbol, a
reduce function is called. E.g.:

\vspace*{.3cm}
\begin{verbatim}
%union {
        int numval;
}
%type <numval> sum product

%%

sum: product
|    sum '+' product   { $$ = $1 + $3; }
|    sum '-' product   { $$ = $1 + $3; };
\end{verbatim}

\vspace*{.5cm}
\item The attributes of the symbols on the right side of the reduction can be
accessed using \$1 .. \${\it n}. The attributes of the resulting symbol can be
accessed with \$\$.

\end{itemize}
\end{slide}

\begin{slide}{Algorithms}
\begin{itemize}

\item A huge number of different parser algorithms exists.

\vspace*{.5cm}
\item The two most important algorithms are LL(N) and LALR(N).

\vspace*{.5cm}
\item Other algorithms are LL(k), LL(regex), GLR and Ad-Hoc.

\vspace*{.5cm}
\item Most hand written parsers are LL(1) parsers.

\vspace*{.5cm}
\item Most parser generators create LALR(1) parsers.

\vspace*{.5cm}
\item A detailed discussion of various parser algorithms can be found in ``The
Dragonbook'' (see references on last slide).

\vspace*{.5cm}
\item The design and implementation of LL(1) parsers is also discussed in the
section about LL(regex) parsers.

\end{itemize}
\end{slide}

\begin{slide}{Conflicts}
\begin{itemize}

\item Sometimes a parser grammar is ambiguous.

\vspace*{.5cm}
\item In this cases, the parser has to choose one possible interpretation of
the input.

\vspace*{.5cm}
\item LALR parsers distinguish between reduce-reduce and shift-reduce conflicts.

\vspace*{.5cm}
\item Reduce-reduce conflicts should be avoided when writing the BNF.

\vspace*{.5cm}
\item Shift-reduce conflicts are always solved by shifting.

\end{itemize}
\end{slide}

% ============================================================================

\tsectionandpart{Code Generators}

\begin{slide}{Overview}
\begin{itemize}

\item Writing the code generator is the most complex part of a compiler project.

\vspace*{.5cm}
\item Usually the code-generation is split up in different stages, such as:

\begin{itemize}
\item Creating an Abstract-Syntax tree
\item Creating an intermediate code
\item Creating the output code
\end{itemize}

\vspace*{.5cm}
\item A code-generator which creates assembler code is usually much easier to
write than a code-generator creating binaries.

\end{itemize}
\end{slide}

\begin{slide}{Simple Code Generators}
\begin{itemize}

\item Simple code generators may generate code directly in the parser.

\vspace*{.5cm}
\item This is possible if no anonymous variables exist (BFC) or the target
machine is a stack-machine (SPL).
\end{itemize}

\vspace*{.5cm}
Example:
\begin{verbatim}
if_stmt:
  TK_IF TK_ARGS_BEGIN TK_STRING TK_ARGS_END stmt
{
  $$ = xprintf(0, 0, "%s{", debug_info());
  $$ = xprintf($$, $5, "(#tmp_if)<#tmp_if>[-]"
                       "<%s>[-<#tmp_if>+]"
                       "<#tmp_if>[[-<%s>+]\n", $3, $3);
  $$ = xprintf($$, 0, "]}");
}
\end{verbatim}

\end{slide}

% ============================================================================

\tsectionandpart{Tools}

\begin{slide}{Overview}
\begin{itemize}

\item There are tools for writing compilers.

\vspace*{.5cm}
\item Most of these tools cover the lexer/parser step only.

\vspace*{.5cm}
\item Most of these tools generate c-code from a declarative language.

\vspace*{1cm}
\item Use those tools but understand what they are doing!

\end{itemize}
\end{slide}

\begin{slide}{Flex / Lex}
\begin{itemize}

\item Flex (Fast Lex) is the GNU successor of Lex.

\vspace*{.5cm}
\item The lex input file ({\tt *.l}) is a list or regular expressions and actions.

\vspace*{.5cm}
\item The ``actions'' are c code which should be executed when the lexer finds
a match for the regular expression in the input.

\vspace*{.5cm}
\item Most actions simply return the token to the parser.

\vspace*{.5cm}
\item It is possible to skip patterns (e.g. white spaces) by not providing an
action at all.

\end{itemize}
\end{slide}

\begin{slide}{Yacc / Bison}
\begin{itemize}

\item Bison is the GNU successor of Yacc (Yet Another Compiler Compiler).

\vspace*{.5cm}
\item Bison is a parser generator.

\vspace*{.5cm}
\item The bison input ({\tt *.y}) is a BNF with reduce functions.

\vspace*{.5cm}
\item The generated parser is a LALR(1) parser.

\vspace*{.5cm}
\item Bison can also generate GLR parsers.

\end{itemize}
\end{slide}

\begin{slide}{Burg / iBurg}
\begin{itemize}

\item iBurg is the successor of Burg.

\vspace*{.5cm}
\item iBurg is a ``Code Generator Generator''.

\vspace*{.5cm}
\item The code generator generated by iBurg implements the ``dynamic
programming'' algorithm.

\vspace*{.5cm}
\item It is a bit like a parser for an abstract syntax tree with an extremely
ambiguous BNF.

\vspace*{.5cm}
\item The reductions have cost values applied and an iBurg code generator
chooses the cheapest fit.

\end{itemize}
\end{slide}

\begin{slide}{PCCTS}
\begin{itemize}

\item PCCTS is the ``Purdue Compiler-Compiler Tool Set''.

\vspace*{.5cm}
\item PCCTS is a parser generator for LL(k) parsers in C++.

\vspace*{.5cm}
\item The PCCTS toolkit was written by Terence J. Parr of the MageLang
Institute.

\vspace*{.5cm}
\item His current project is antlr 2 - a complete redesign of pccts, written in
Java, that generates Java or C++.

\vspace*{.5cm}
\item PCCTS is now maintained by Tom Moog, Polhode, Inc.

\end{itemize}
\end{slide}

% ============================================================================

\tsectionandpart{Complex Code Generators}

\begin{slide}{Overview}
\begin{itemize}

\item Unfortunately it's not possible to cover code generation in depth in this
presentation.

\vspace*{.5cm}
\item However, I will try to give a rough overview of the topic and explain
the most important terms.

\end{itemize}
\end{slide}

\begin{slide}{Abstract syntax trees}
\begin{itemize}

\item With some languages it is hard to create intermediate code directly from
the parser.

\vspace*{.5cm}
\item In compilers for such languages, an abstract syntax tree is created from
the parser.

\vspace*{.5cm}
\item The intermediate code generation can then be done in different phases
which may process the abstract syntax tree bottom-up and top-down.

\end{itemize}
\end{slide}

\begin{slide}{Intermediate representations}
\begin{itemize}

\item Most compilers create intermediate code from the input and generate
output code from this intermediate code.

\vspace*{.5cm}
\item Usually the intermediate code is some kind of three-address code
assembler language.

\vspace*{.5cm}
\item The GCC intermediate language is called RTL and is a wild mix of
imperative and functional programming.

\vspace*{.5cm}
\item Intermediate representations which are easily converted to trees (such as
functional approaches) are better for dynamic programming, but are usually not
optimal for ad-hoc code generators.

\end{itemize}
\end{slide}

\begin{slide}{Basic block analysis}
\begin{itemize}

\item A code block from one jump target to the next is called ``Basic Block''.

\vspace*{.5cm}
\item Optimizations in basic blocks are an entirely different class of
optimization than those which can be applied to a larger code block.

\vspace*{.5cm}
\item Many compilers create intermediate language trees for each basic block
and then create the code for it using dynamic programming.

\end{itemize}
\end{slide}

\begin{slide}{Backpatching}
\begin{itemize}

\item It is often necessary to create jump instructions without knowing the
jump target address yet.

\vspace*{.5cm}
\item This problem is solved by outputting a dummy target address and fixing it
later.

\vspace*{.5cm}
\item This procedure is called backpatching.

\vspace*{.5cm}
\item The Brainf*ck compiler doesn't need backpatching because Brainf*ck
doesn't have jump instructions and addresses.

\vspace*{.5cm}
\item However, the Brainf*ck runtime bundled with the compiler is using
backpatching to optimize the runtime speed.

\end{itemize}
\end{slide}

\begin{slide}{Dynamic programming}
\begin{itemize}

\item Dynamic programming is an algorithm for generating assembler code from
intermediate language trees.

\vspace*{.5cm}
\item Code generators such as Burg and iBurg are implementing the dynamic
programming algorithm.

\vspace*{.5cm}
\item Dynamic programming uses two different phases.

\vspace*{.5cm}
\item In the first phase, the tree is labeled to find the cheapest matches in
the rule set (bottom-up).

\vspace*{.5cm}
\item In the 2nd phase, the code for the cheapest solution is generated (top-down).

\end{itemize}
\end{slide}

\begin{slide}{Optimizations}
\begin{itemize}

\item Most optimizing compilers perform different optimizations in different
compilation phases.

\vspace*{.5cm}
\item So most compilers don't have a separate ``the optimizer'' code path.

\vspace*{.5cm}
\item Some important optimizations are:

\begin{itemize}
\item Global register allocation
\item Loop detection and unrolling
\item Common subexpression elimination
\item Peephole optimizations
\end{itemize}

\vspace*{.5cm}
\item The Brainf*ck compiler does not optimize.

\vspace*{.5cm}
\item The SPL compiler has a simple peephole optimizer.

\end{itemize}
\end{slide}

% ============================================================================

\tsectionandpart{The BF Compiler}

\begin{slide}{Overview}
\begin{itemize}

\item The project is split up in an assembler and a compiler.

\vspace*{.5cm}
\item The assembler handles variable names and manages the pointer position.

\vspace*{.5cm}
\item The compiler reads BFC input files and creates assembler code.

\vspace*{.5cm}
\item The assembler has an ad-hoc lexer and parser.

\vspace*{.5cm}
\item The compiler has a flex generated lexer and a bison generated parser.

\vspace*{.5cm}
\item The compiler generates the assembler code directly from the parser reduce
functions.

\end{itemize}
\end{slide}

\begin{slide}{Assembler}
\begin{itemize}

\item The operators {\tt [ + } and {\tt -} are unmodified.

\vspace*{.3cm}
\item The {\tt ]} operator sets the pointer back to the position where it was
at {\tt [}.

\vspace*{.3cm}
\item A named variable can be defined with {\tt (x)}.

\vspace*{.3cm}
\item The pointer can be set to a named variable with {\tt <x>}.

\vspace*{.3cm}
\item A name space is defined with {\tt \{ ... \}}.

\vspace*{.3cm}
\item A block in single quotes is passed through unmodified.

\vspace*{.3cm}
\item Larger spaces can be defined with {\tt (x.42)}.

\vspace*{.3cm}
\item An alias for another variable can be defined with {\tt (x:y)}.

\end{itemize}
\end{slide}

\begin{slide}{Compiler}
\begin{itemize}

\item Variables are declared with {\tt var x;}.

\vspace*{.5cm}
\item C-like expressions for {\tt =}, {\tt +=}, {\tt -=}, {\tt if} and {\tt
while} are available.

\vspace*{.5cm}
\item Macros can be defined with {\tt macro x() \{ ... \}}.

\vspace*{.5cm}
\item All variables are passed using call-by-reference.

\vspace*{.5cm}
\item The compiler can't evaluate complex expressions.

\vspace*{.5cm}
\item Higher functions (such as comparisons and multiply) are implemented
using built-in functions.

\end{itemize}
\end{slide}

\begin{slide}{Running}
\begin{itemize}

\item The compiler and the assembler are both filter programs.

\vspace*{.5cm}
\item So compilation is done by:\\
\begin{verbatim}
$ ./bfc < hanoi.bfc | ./bfa > hanoi.bf
Code: 53884 bytes, Data: 275 bytes.
\end{verbatim}

\vspace*{.5cm}
\item The {\tt bfrun} executable is a simple Brainf*ck interpreter:
\begin{verbatim}
$ ./bfrun hanoi.bf
\end{verbatim}

\end{itemize}
\end{slide}

\begin{slide}{Implementation}
\begin{center}
\begin{huge}

\vspace*{.5cm}
Code review of the assembler.

\vspace*{.7cm}
.. and the compiler.

\vspace*{.7cm}
.. and the built-ins library.

\vspace*{.7cm}
.. and the hanoi example.

\end{huge}
\end{center}
\end{slide}

% ============================================================================

\tsectionandpart{Stack Machines}

\begin{slide}{Overview}
\begin{itemize}

\item Stack machine are a computer architecture, like register machines or
accumulator machines.

\vspace*{.5cm}
\item Every instruction pops it's arguments from the stack and pushes the result
back on the stack.

\vspace*{.5cm}
\item Special instructions push the content of a variable on the stack or pop a
value from the stack and write it back to a variable.

\vspace*{.5cm}
\item Stack machines are great for virtual machines in scripting languages
because code generation is very easy.

\vspace*{.5cm}
\item However, stack machines are less efficient than register machines and are
harder to implement in hardware.

\end{itemize}
\end{slide}

\begin{slide}{Example}

\vspace*{1.5cm}
\begin{large}
\begin{verbatim}
           x = 5 * ( 3 + y );

               PUSHC "5"
               PUSHC "3"
               PUSH  "y"
               IADD
               IMUL
               POP   "x"
\end{verbatim}
\end{large}

\end{slide}

% ============================================================================

\tsectionandpart{The SPL Project}

\begin{slide}{Overview}
\begin{itemize}

\item SPL is an embeddable scripting language with C-like syntax.

\vspace*{.3cm}
\item It has support for arrays, hashes, objects, perl regular expressions, etc. pp.

\vspace*{.3cm}
\item The entire state of the virtual machine can be dumped at any time and
execution of the program resumed later.

\vspace*{.3cm}
\item In SPL there is a clear separation of compiler, assembler, optimizer and
virtual machine.

\vspace*{.3cm}
\item It's possible to run pre-compiled binaries, program directly in the VM
assembly, use multi threading, step-debug programs, etc. pp.

\vspace*{.3cm}
\item SPL is a very small project, so it is a good example for implementing
high-level language compilers for stack machines.

\end{itemize}
\end{slide}

\begin{slide}{WebSPL}
\begin{itemize}

\item WebSPL is a framework for web application development.

\vspace*{.5cm}
\item It creates a state over the stateless HTTP protocol using the
dump/restore features of SPL.

\vspace*{.5cm}
\item I.e. it is possible to print out an updated HTML page and then call a
function which ``waits'' for the user to do anything and returns then.

\vspace*{.5cm}
\item WebSPL is still missing some bindings for various SQL implementations,
XML and XSLT bindings, the WSF (WebSPL Forms) library and some other stuff..

\vspace*{.5cm}
\item Right now I'm looking for people who want to participate in the project.

\end{itemize}
\end{slide}

\begin{slide}{Example}
\begin{tiny}
\begin{verbatim}
object Friend {
        var id;
<...>
        method winmain(sid) {
                title = name;
                .sid = sid;
                while (1) {
                        template = "show";
                        bother_user();
                        if ( defined cgi.param.edit ) {
                                template = "edit";
                                bother_user();
                                name = cgi.param.new_name;
                                phone = cgi.param.new_phone;
                                email = cgi.param.new_email;
                                addr = cgi.param.new_addr;
                                title = name;
                        }
                        if ( defined cgi.param.delfriend ) {
                                delete friends.[id].links.[cgi.param.delfriend];
                                delete friends.[cgi.param.delfriend].links.[id];
                        }
                        if ( defined cgi.param.delete ) {
                                delete friends.[id];
                                foreach f (friends)
                                        delete friends.[f].links.[id];
                                &windows.[winid].finish();
                        }
                }
        }
}
\end{verbatim}
\end{tiny}
\end{slide}

% ============================================================================

\tsectionandpart{LL(regex) parsers}

\begin{slide}{Overview}
\begin{itemize}

\item LL parsers (recursive decent parsers) are straight-forward implementations
of a BNF.

\vspace*{.5cm}
\item Usually parsers read lexemes (tokens) from a lexer.

\vspace*{.5cm}
\item A LL(N) parser has access to N lookahead symbols to decide which
reduction should be applied.

\vspace*{.5cm}
\item Usually LL(N) parsers are LL(1) parsers.

\vspace*{.5cm}
\item LL(regex) parsers are LL parsers with no lexer but a regex engine.

\vspace*{.5cm}
\item LL(regex) parsers are very easy to implement in perl.

\end{itemize}
\end{slide}

\begin{slide}{Left recursions}
\begin{itemize}

\item Often a BNF contains left recursion:
\begin{verbatim}
<...>
product: primary
|        product '*' primary
|        product '/' primary;
<...>
\end{verbatim}

\vspace*{.3cm}
\item Left recursions cause LL parsers to run into an endless recursion.

\vspace*{.3cm}
\item There are algorithms for converting left recursions to right recursions
without effecting the organization of the parse tree.

\vspace*{.3cm}
\item But the resulting BNF is much more complex than the original one.

\vspace*{.3cm}
\item Most parser generators do that automatically (e.g. bison).

\end{itemize}
\end{slide}

\begin{slide}{Example}
\begin{center}

\begin{huge}
\vspace*{1.5cm}
Code review of llregex.pl.
\end{huge}

\vspace*{1cm}
http://www.clifford.at/papers/2004/compiler/llregex.pl

\end{center}
\end{slide}

% ============================================================================

\tsectionandpart{URLs and References}

\begin{slide}{URLs and References}
\begin{itemize}

\item My Brainf*ck Projects: \\
http://www.clifford.at/bfcpu/

\vspace*{.3cm}
\item The SPL Project: \\
http://www.clifford.at/spl/

\vspace*{.3cm}
\item Clifford Wolf: \\
http://www.clifford.at/

\vspace*{.3cm}
\item ``The Dragonbook'' \\ Compilers: Principles, Techniques and Tools \\
by Alfred V. Aho, Ravi Sethi, and Jeffrey D. Ullman \\ Addison-Wesley 1986; ISBN 0-201-10088-6

\vspace*{.3cm}
\item LINBIT Information Technologies \\
http://www.linbit.com/

\end{itemize}

\vspace*{.3cm}
\begin{center}
http://www.clifford.at/papers/2004/compiler/
\end{center}

\end{slide}

% ============================================================================

\end{document}

