
% this RequirePackage is a workaround for the bug reported in:
% http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=254807
\RequirePackage{color}

\documentclass[pdf]{prosper}
\usepackage[toc,highlight,clifford,notes,hlsections]{HA-prosper}
\usepackage{color}

\usepackage{svn}
\SVNdate $Date: 2006-06-04 15:52:53 +0200 (Sun, 04 Jun 2006) $

% The \sc variant doesn't look very well in the HA-prosper titles
% \def\csync2{{\sc Csync$^{2}$}}
\def\csync2{{\bf Csync$^{2}$}}

\definecolor{darkred}{rgb}{.7,0,0}
\definecolor{darkgreen}{rgb}{0,.7,0}
\definecolor{darkblue}{rgb}{0,0,.7}

\title{Cluster syncronisation with \csync2}
\subtitle{Synchronizing configuration files and application images}
\author{Clifford Wolf\\
\institution{\csync2 - \href{http://oss.linbit.com/csync2/}{http://oss.linbit.com/csync2/}}\\
\institution{ROCK Linux - \href{http://www.rocklinux.org/}{http://www.rocklinux.org/}}\\
\institution{SPL - \href{http://www.clifford.at/}{http://www.clifford.at/}}}

\DefaultTransition{Wipe}
\TitleSlideNav{FullScreen}
\NormalSlideNav{ShowBookmarks}
\LeftFoot{\href{http://www.clifford.at}{Clifford Wolf}}
\RightFoot{\href{http://oss.linbit.com/}{\csync2 - oss.linbit.com}}


\begin{document}

\maketitle

% ============================================================================

\tsectionandpart{Introduction}


\begin{slide}{Synchronous vs. Asynchronous}
\begin{itemize}

\item Synchronous synchronization (e.g. DRBD)
\begin{itemize}
\item \textcolor{darkred}{Difficult in active-active setups}
\item \textcolor{darkred}{Even more difficult in clusters with more than two hosts}
\item \textcolor{darkred}{Needs to be implemented in kernel space}
\item \textcolor{darkgreen}{All hosts always have the same data}
\end{itemize}

\vspace*{.5cm}
\item Asynchronous synchronization (e.g. \csync2)
\begin{itemize}
\item \textcolor{darkgreen}{Can also be implemented easily for complex environments}
\item \textcolor{darkgreen}{Much simpler and thus less error-prone algorithms}
\item \textcolor{darkgreen}{It is possible to test changes before deploying them}
\item \textcolor{darkred}{Hosts can be out of sync temporary}
\end{itemize}

\vspace*{.5cm}
\item None of the both is better.
\item Use the synchronization method which fits your usage scenario.

\end{itemize}
\end{slide}


\begin{slide}{\csync2 Overview}
\begin{itemize}

\item A Free Software (GPL) Tool for asynchronous synchronization.

\vspace*{.5cm}
\item csync2 is a ``single shot'' command line tool.
\item Uses its own network protocol (TCP 30865).
\item The daemon can be run via inetd or stand alone.
\item Provides a wide range of synchronization features.

\vspace*{.5cm}
\item Tested well on Linux and Cygwin.
\item Is expected to work well on non-Linux Unices too.
\item Uses libsqlite (version 2) for the backend database.
\item Might not compile instantaneously with non-gcc compilers.

\end{itemize}
\end{slide}


\begin{slide}{\csync2 Features}
\begin{itemize}

\item Conflict detection
\begin{itemize}
\item When a file has been changed on more than one host, a conflict is detected.
\item Conflicts can be resolved manually or automatically.
\end{itemize}

\vspace*{.5cm}
\item Replicating file removals
\begin{itemize}
\item A file removal is detected as such and replicated correctly.
\end{itemize}

\vspace*{.5cm}
\item Complex setups
\begin{itemize}
\item More than two hosts and multiple synchronization groups
\item Different base directories on different hosts
\end{itemize}

\vspace*{.5cm}
\item Reacting to updates
\begin{itemize}
\item Letting csync2 execute  arbitrary commands in reaction to file updates.
\end{itemize}

\end{itemize}
\end{slide}


% ============================================================================

\tsectionandpart{The \csync2 Algorithm}

\begin{slide}{Scanning}
\begin{itemize}

\item All files referred by the configuration file are {\tt stat()}ed.

\vspace*{.5cm}
\item The file metadata (mtime, size, etc.) are written to the local backend
database (sqlite).

\vspace*{.5cm}
\item All modifications (addition, removals and changes) are scheduled for
syncing.
\item This is done by adding the files in question to the {\tt dirty} table
in the backend database.

\vspace*{.5cm}
\item The scanning code is triggered by {\tt csync2 -c}, {\tt csync2 -x} and
when update requests from a peer are received.

\end{itemize}
\end{slide}


\begin{slide}{Updating (1/2)}
\begin{itemize}

\item The files listed in the {\tt dirty} table are sent to the peers.

\vspace*{.5cm}
\item The communication between the peers is handled with {\csync2}s own
protocol (TCP port 30865).

\vspace*{.5cm}
\item Authentication is performed using pre-shared-keys, the ip addresses and
the SSL certificates.

\vspace*{.5cm}
\item The rsync algorithm is used for actually updating files on the peers.

\vspace*{.5cm}
\item \csync2 detects if a file didn't change at all (e.g. the file has been
{\tt touch}ed.

\end{itemize}
\end{slide}


\begin{slide}{Updating (2/2)}
\begin{itemize}

\item The peer can refuse to update the file (e.g. if a conflict is detected).

\vspace*{.5cm}
\item The record in the {\tt dirty} table is removed if the update was
succesful.

\vspace*{.5cm}
\item The peer executes the action handlers, if any are configured for the
updated files.

\vspace*{.5cm}
\item The updating code is triggered by {\tt csync2 -u} and {\tt csync2 -x}.

\end{itemize}
\end{slide}


\begin{slide}{Comparing}
\begin{itemize}

\item It also is possible to compare the csync2 databases on two hosts.

\vspace*{.5cm}
\item \csync2 can generate lists of different files and unified diffs.

\vspace*{.5cm}
\item This is e.g. useful for resolving conflicts.

\vspace*{.5cm}
\item The databases are compared, not the filesystem.
\item So it is a good idea to do a full scan on the hosts before comparing
them.

\vspace*{.5cm}
\item The compare code is triggered by {\tt csync2 -T}.

\end{itemize}
\end{slide}


\begin{slide}{Performance}
\begin{itemize}

\item We have excelent experiences with setups with a few hundret thousand
files with a few (up to four) hosts as well as a few thousand files with many
hosts.

\vspace*{.2cm}
\item The actual performance depends a lot on the hardware, filesystems and
I/O load of the hosts.

\vspace*{.2cm}
\item \csync2 implements a full mash synchronisation network. Really huge
setups with hundrets of hosts and many changes might be better synchronized
using a star or tree synchronisation network.

\vspace*{.2cm}
\item The term "good performance" always depends on your requirements. Make
tests before deploying \csync2 in huge environments.

\vspace*{.2cm}
\item There is a setup with 3.6 million files on four hosts. This is pretty
much the limit of what makes sense to synchronize with \csync2.

\end{itemize}
\end{slide}

% ============================================================================

\tsectionandpart{Setting up \csync2}

\begin{slide}{Building and Installing}
\begin{itemize}

\item \csync2 can be built with \\ {\tt ./configure \&\& make \&\& make install}

\vspace*{.5cm}
\item Beware of distributions with horribly outdated \csync2 packages!

\vspace*{.5cm}
\item The \csync2 sources are prepared to be built with {\tt rpmbuild} and {\tt
debuild}.

\vspace*{.5cm}
\item {\tt make cert} creates a self-signed SSL certificate for \csync2.

\vspace*{.5cm}
\item Records in {\tt /etc/services} and {\tt /etc/inetd.conf} need to be
created.

\vspace*{.5cm}
\item \csync2 depends on libsqlite (version 2) and librsync.

\end{itemize}
\end{slide}


\begin{slide}{Configuration}
\begin{itemize}

\item The configuration file {\tt /etc/csync2.cfg} needs to be written.

\vspace*{.5cm}
\item This configuration file defines so-called synchronization groups.

\vspace*{.5cm}
\item The synchronization groups contain:
\begin{itemize}
\item A list of hosts
\item A pre-shared key file
\item File include/exclude patterns
\item Action handlers (optional)
\end{itemize}

\vspace*{.5cm}
\item The same configuration file can be used on all hosts.

\vspace*{.5cm}
\item Some example configurations are discussed later in the presentation.

\end{itemize}
\end{slide}


\begin{slide}{Running \csync2}
\begin{itemize}

\item Running full scan and update: {\tt csync2 -x}
\item .. usually \csync2 is always executed this way

\vspace*{.5cm}
\item Just running a full scan: {\tt csync2 -cr /}
\item .. the {\tt -r} makes \csync2 operate recursively

\vspace*{.5cm}
\item Just running a full update: {\tt csync2 -u}

\vspace*{.5cm}
\item Showing a unified diff for a file: {\tt csync2 -TT /etc/hosts}

\vspace*{.5cm}
\item Printing the help message: {\tt csync2}
\item .. or {\tt man csync2} for a more verbose version.

\end{itemize}
\end{slide}


\begin{slide}{Bootstrapping}
\begin{itemize}

\item In most cases it is ok to simply run a full \csync2 scan and update cyle
on all hosts ({\tt csync2 -x}).

\vspace*{.5cm}
\item This will try to sync everything from every host to all peers.
\item For large setups this is a waste of resources.

\vspace*{.5cm}
\item It is possible to run a full scan without scheduling updates by running
{\tt csync2 -cIr /}.

\vspace*{.5cm}
\item And it is possible to schedule updates by comparing the hosts using {\tt
csync2 -TI}.

\vspace*{.5cm}
\item This is much faster.
\item But it is not applicable for daily operations because the detection
of conflicts and file removals doesn't work this way.

\end{itemize}
\end{slide}

% ============================================================================

\tsectionandpart{Example Configs}

\begin{slide}{Minimalistic}
\begin{verbatim}
group mygroup
{
    host host1 host2;

    key /etc/csync2.key_mygroup;

    include /var/sharedfiles;
}
\end{verbatim}
\end{slide}

\begin{slide}{Multiple Groups}
\begin{verbatim}
group biggroup {
    host host1 host2 host3 host4;
    key /etc/csync2.key_biggroup;
    include /etc/hosts;
}

group smallgroup12 {
    host host1 host2;
    key /etc/csync2.key_smallgroup12;
    include /etc/passwd /etc/shadow /etc/group;
}

group smallgroup34 {
    host host3 host4;
    key /etc/csync2.key_smallgroup34;
    include /etc/passwd /etc/shadow /etc/group;
}
\end{verbatim}
\end{slide}

\begin{slide}{Include/Exclude Patterns}
\begin{verbatim}
group mygroup
{
    host host1 host2;
    key /etc/csync2.key_mygroup;

    # pathname patterns
    # last match is decisive, exclude per default
    include /etc/apache2;
    exclude /etc/apache2/envvars;
    exclude /etc/apache2/ssl;

    # basename patterns
    # last match is decisive, include per default
    exclude *~ .*.swp;
    include apache2.conf~;
}
\end{verbatim}
\end{slide}

\begin{slide}{Prefix Sections}
\begin{verbatim}
group mygroup
{
    host host1 host2 host3;
    key /etc/csync2.key_mygroup;
    include %homedir%/bob;
}

prefix homedir
{
    on host1:
        /export/users;
    on *:
        /home;
}
\end{verbatim}
\end{slide}

\begin{slide}{Actions}
\begin{verbatim}
group mygroup
{
    host host1 host2 host3;
    key /etc/csync2.key_mygroup;
    include /etc/apache2;

    action
    {
        pattern /etc/apache/httpd.conf;
        pattern /etc/apache/sites-available/*;
        exec "/usr/sbin/apache2ctl graceful";
        logfile "/var/log/csync2_action.log";
        do-local;
    }
}
\end{verbatim}
\end{slide}

\begin{slide}{Backups}
\begin{verbatim}
group mygroup
{
    host host1 host2;
    key /etc/csync2.key_mygroup;
    include /var/sharedfiles;

    backup-directory /var/backups/csync2;
    backup-generations 3;
}
\end{verbatim}
\end{slide}

\begin{slide}{Sync Networks}
\begin{verbatim}
group mygroup
{
    host host1@host1-back host2@host2-back;

    key /etc/csync2.key_mygroup;
    include /var/sharedfiles;
}
\end{verbatim}
\end{slide}

\begin{slide}{More..}
A full description of the {\tt csync2.cfg} syntax can be found in the \csync2
manual.
\end{slide}

% ============================================================================

\tsectionandpart{URLs and References}


\begin{slide}{Related Projects}
\begin{itemize}

\item librsync: A library for rsync-like binary diffing \\
http://librsync.sourceforge.net/

\vspace*{.5cm}
\item sqlite: A small embeddable SQL database engine \\
http://www.sqlite.org/

\vspace*{1.5cm}
\item rsync: popular file sychronisation tool \\
http://rsync.samba.org/

\vspace*{.5cm}
\item unison: another file sychronisation tool \\
http://www.cis.upenn.edu/~bcpierce/unison/

\end{itemize}
\end{slide}


\begin{slide}{Credits}
\begin{itemize}

\item LINBIT Information Technologies GmbH: \\
http://www.linbit.com/

\vspace*{.5cm}
\item Clifford Wolf: \\
http://www.clifford.at/

\end{itemize}

\vspace*{2cm}\hspace*{3cm}
http://oss.linbit.com/csync2/

\end{slide}

% ============================================================================

\end{document}

