\documentclass[pdf]{prosper}
\usepackage[toc,highlight,linbit,notes,hlsections]{HA-prosper}

\title{Cluster syncronisation with csync2}
\subtitle{Managing distributed config files and application images}
\author{Clifford Wolf\\
\institution{ROCK Linux - \href{http://www.rocklinux.org}{http://www.rocklinux.org}}\\
\institution{CNGW - \href{http://www.cngw.org}{http://www.cngw.org}}\\
\institution{LINBIT - \href{http://www.linbit.com}{http://www.linbit.com}}}

\DefaultTransition{Wipe}
\TitleSlideNav{FullScreen}
\NormalSlideNav{ShowBookmarks}
\LeftFoot{\href{http://www.clifford.at}{Clifford Wolf}, \today}
\RightFoot{\href{http://oss.linbit.com/}{csync2 - oss.linbit.com}}


\begin{document}

\maketitle

% ============================================================================

\tsectionandpart{Introduction}


\begin{slide}{The Problem}
\begin{itemize}

\item Multiple hosts are sharing the same files
\begin{itemize}
\item Configuration files
\item Application images
\item Other data such as htdocs
\end{itemize}

\vspace*{.5cm}
\item Changes are rare but possible
\begin{itemize}
\item Changes can happen on any cluster node
\item Changes might also be file removals or moves
\item Conflicts are possible and should be detected
\end{itemize}

\vspace*{.5cm}
\item Setups can be very complex
\begin{itemize}
\item High number of nodes and some might be down
\item File-groups which are only synced between some hosts
\item Actions to be triggered after updating some files
\end{itemize}

\end{itemize}
\end{slide}

\begin{slide}{The Solution}
\begin{itemize}

\item Csync2 targets this problems

\vspace*{.5cm}
\item Csync2 has some requirements:
\begin{itemize}
\item A GNU C-Compiler (alloca, etc)
\item GNU Flex and GNU Bison (config file parser)
\item The sqlite and librsync libraries
\item Autoconf+Automake
\end{itemize}

\vspace*{.5cm}
\item Csync2 has been developed for and tested in large
cluster environments at LINBIT Information Technologies.

\end{itemize}
\end{slide}

% ============================================================================

\tsectionandpart{Csync2 Overview}

\begin{slide}{Building and Installing (1)}
\begin{itemize}

\item ROCK Linux
\begin{itemize}
\item {\tt rocket build csync2}
\item {\tt rocket create\_gem csync2}
\item {\tt mine -i csync2-1.13.gem}
\end{itemize}

\vspace*{.5cm}
\item Debian GNU/LINUX
\begin{itemize}
\item {\tt tar xzf csync2-1.13.tar.gz}
\item {\tt cd csync2-1.13; debuild}
\item {\tt dpkg -i csync2\_1.13-1\_i386.deb}
\end{itemize}

\vspace*{.5cm}
\item Others
\begin{itemize}
\item {\tt tar xzf csync2-1.13.tar.gz}
\item {\tt cd csync2-1.13; ./configure}
\item {\tt make \&\& make install}
\end{itemize}

\end{itemize}
\end{slide}

\begin{slide}{Building and Installing (2)}
\begin{itemize}

\item /etc/services
\begin{itemize}
\item {\tt csync2 30865/tcp}
\end{itemize}

\vspace*{.5cm}
\item /etc/inetd.conf
\begin{itemize}
\item {\tt csync2 stream tcp nowait root /usr/sbin/csync2 csync2 -i}
\end{itemize}

\vspace*{.5cm}
\item /etc/csync2*.cfg
\begin{itemize}
\item Add your configs and syncronisation groups
\end{itemize}

\vspace*{.5cm}
\item /var/lib/csync2/*
\begin{itemize}
\item Bootstrap the local status databases
\end{itemize}

\end{itemize}
\end{slide}


\begin{slide}{Csync2 Files}
\begin{itemize}

\item {\tt /usr/sbin/csync2} \\
csync2 executeable

\vspace*{.2cm}
\item {\tt /usr/share/csync2/csync2\_locheck.sh} \\
example csync2 logout check

\vspace*{.6cm}
\item {\tt /etc/csync2.cfg} \\
config file for default config

\vspace*{.2cm}
\item {\tt /var/lib/csync2/{\it hostname}.db} \\
database file for default config

\vspace*{.6cm}
\item {\tt /etc/csync2\_{\it foobar}.cfg} \\
config file for {\it foobar} config

\vspace*{.2cm}
\item {\tt /var/lib/csync2/{\it hostname}\_{\it foobar}.db} \\
database file for {\it foobar} config

\end{itemize}
\end{slide}


\begin{slide}{Check Mode}
\begin{itemize}

\item {\tt csync2 -crv /}

\vspace*{.5cm}
\item Compares local filesystem with local status database

\vspace*{.5cm}
\item Changed files are updated in the status database
\item .. and are added to the dirty database

\vspace*{.5cm}
\item Needs aprox 1.5 sec for 15.000 files (117.023 syscalls) on an AMD
Athlon(tm) XP 1800+ Server on Linux 2.4.25.

\vspace*{.5cm}
\item When no filename is given, the hint database (csync2 -h) is used.

\end{itemize}
\end{slide}


\begin{slide}{Update Mode}
\begin{itemize}

\item {\tt csync2 -uv}

\vspace*{.5cm}
\item Update remote hosts accoring to the dirty database

\vspace*{.5cm}
\item Conflicts are detected automatically
\item .. and can be resolved using {\tt csync2 -f}

\vspace*{.5cm}
\item Failed updates are kept in the dirty database for later re-tries.

\vspace*{.5cm}
\item A dry-run is done by {\tt csync2 -ud}

\vspace*{.5cm}
\item When no filename is given, the entire dirty-db is used.

\end{itemize}
\end{slide}


\begin{slide}{Simple Mode}
\begin{itemize}

\item {\tt csync2 -xv}

\vspace*{.5cm}
\item Check and update. This is what most people want to do.

\vspace*{.5cm}
\item As with the other modes, this automatcially resolves relative paths.
So e.g. this is possible: {\tt csync2 -xr .}

\vspace*{.5cm}
\item All options for {\tt -c} and {\tt -u} are accepted.

\vspace*{.5cm}
\item When no filename is given, {\tt -r /} is implied.

\end{itemize}
\end{slide}


\begin{slide}{Example configuration}
\begin{verbatim}
group mygroup
{
        host host1 host2 host3;
        host host4@host4-eth2;

        key /etc/csync2.key_mygroup;

        include /etc/apache;
        include /home/bob;
        exclude /home/bob/temp;
        exclude *~ .*;

        action {
                pattern /etc/apache/httpd.conf;
                pattern /etc/apache/sites-available/*;
                exec "/usr/sbin/apache2ctl graceful";
                logfile "/var/log/csync2_action.log";
                do-local;
        }
}
\end{verbatim}
\end{slide}

\begin{slide}{Csync2 usage (1)}
{\scriptsize
\begin{verbatim}
csync2 v1.12 - cluster synchronisation tool, 2nd generation
LINBIT Information Technologies GmbH <http://www.linbit.com>
Copyright (C) 2004  Clifford Wolf <clifford@clifford.at>
This program is free software under the terms of the GNU GPL.

Usage: csync2 [-v..] [-C config-name] [-D database-dir] [-N hostname] ..

With file parameters:
-h [-r] file..          Add (recursive) hints for check to db
-c [-r] file..          Check files and maybe add to dirty db
-u [-d] [-r] file..     Updates files if listed in dirty db
-f file..               Force this file in sync (resolve conflict)
-m file..               Mark files in database as dirty

Simple mode:
-x [-d] [[-r] file..]   Run checks for all given files and update
                        remote hosts.
\end{verbatim}
}
\end{slide}

\begin{slide}{Csync2 usage (2)}
{\scriptsize
\begin{verbatim}
Without file parameters:
-c      Check all hints in db and eventually mark files as dirty
-u [-d] Update (transfer dirty files to peers and mark as clear)

-H      List all pending hints from status db
-L      List all file-entries from status db
-M      List all dirty files from status db

-S myname peername      List file-entries from status db for this
                        synchronisation pair.

-T                      Test if this node is in sync with all peers.

-T myname peername      Test if this synchronisation pair is in sync.

-T myname peer file     Show difference between file on peer and local.

The modes -H, -L, -M and -S return 2 if the requested db is empty.
The mode -T returns 2 if both hosts are in sync.

-i      Run in inetd server mode.
-R      Remove files from database which don't match config entries.
\end{verbatim}
}
\end{slide}

\begin{slide}{Csync2 usage (3)}
{\scriptsize
\begin{verbatim}
Modifiers:
-r      Recursive operation over subdirectories
-d      Dry-run on all remote update operations

-B      Don't block everything into big SQL transactions. This
        slows down csync2 but allows multiple csync2 processes to
        access the database at the same time. Use e.g. when slow
        lines are used or huge files are transfered.

-I      Init-run. Use with care and read the documentation first!
        You usually don't need this option unless you are
        initializing groups with really large file lists.

-G Group1,Group2,Group3,...
        Only use this groups from config-file.

-P peer1,peer1,...
        Only update this peers (still mark all as dirty).

Creating key file:
csync2 -k filename
\end{verbatim}
}
\end{slide}

% ============================================================================

\tsectionandpart{Csync2 Configuration}

\begin{slide}{Configs, Groups and Keys}
\begin{itemize}

\item Multiple configurations can exist in parallel: \\
{\tt /etc/csync2.cfg}, {\tt /etc/csync2\_{\it foobar}.cfg}, ...
\item A configuration can be selected using the {\tt -C} option.

\vspace*{.2cm}
\item A configuration contains of multiple groups.
\item A group(-list) can be selected using the {\tt -G} option.

\end{itemize}

\vspace*{.2cm}
\begin{verbatim}
group gallien {
        host asterix obelix;
        key /etc/csync2.key_gallien;
}
group entenhausen {
        host dagobert donald daisy duesentrieb;
        key /etc/csync2.key_entenhausen;
}
\end{verbatim}

\vspace*{.2cm}
\begin{itemize}
\item Every group must have a key-file (shared secret).
\item This key-file can be generated with {\tt csync2 -k {\it keyfile}}
\end{itemize}
\end{slide}

\begin{slide}{Host definitions}
\begin{itemize}

\item Every group has a host list assigned.
\item Groups which do not have the local hostname in them are ignored.

\vspace*{.5cm}
\item The hostname may be different from the interface name.
\item .. e.g. if crossover cables are used.
\vspace*{.2cm}
\item This must be written as {\tt host-name@interface-name}.

\end{itemize}

\vspace*{.5cm}
\begin{verbatim}
group gallien {
        host asterix@asterix-x obelix@obelix-x;
        key /etc/csync2.key_gallien;
}
\end{verbatim}

\end{slide}

\begin{slide}{Include/exclude pattern}
\begin{itemize}

\item Include/exclude pattern are used to specify which files should be synced.

\vspace*{.2cm}
\item Pattern can be specified for full path names and basenames.

\vspace*{.2cm}
\item The full path name pattern list and the basename pattern list must accept
(include) the file. The last matching pattern for each of both lists is choosen.

\end{itemize}

\vspace*{.5cm}
\begin{verbatim}
group gallien {
        host asterix@asterix-x obelix@obelix-x;
        key /etc/csync2.key_gallien;

        include /home/bob;
        exclude /home/bob/temp;
        include *.txt
        exclude ~*;
}
\end{verbatim}

\end{slide}

\begin{slide}{Action definitions (1)}
\begin{itemize}

\item It's often usefull to define commands to be executed
when a group of file has been updated.

\vspace*{.5cm}
\item The output is written to {\tt /dev/null} unless a logfile
is specified.

\vspace*{.5cm}
\item It's also possible to let csync2 execute the command locally
when a change has been detected.

\vspace*{.5cm}
\item The command is only executed once - also if multiple files
have been updated.

\end{itemize}
\end{slide}

\begin{slide}{Action definitions (2)}
\begin{verbatim}
group gallien {
        host asterix@asterix-x obelix@obelix-x;
        key /etc/csync2.key_gallien;

        include /etc/apache;

        action {
                pattern /etc/apache/httpd.conf;
                pattern /etc/apache/sites-available/*;
                exec "/usr/sbin/apache2ctl graceful";
                logfile "/var/log/csync2_action.log";
                do-local;
        }
}
\end{verbatim}
\end{slide}

% ============================================================================

\tsectionandpart{Bootstrapping the Databases}

\begin{slide}{The boring default way}
\begin{itemize}

\item Csync2 and config installed on all hosts

\vspace*{.2cm}
\item {\tt csync2 -cr /} marks everything as dirty (added)

\vspace*{.2cm}
\item {\tt csync2 -u} want's to sync everything to every peer host
\item .. and then discards all dirty entries after detecting that
the files are already in sync.

\vspace*{.8cm}
\item Number of syncs for $n$ files on $m$ hosts in the worst case: \\

\[ n \frac{m (m-1)}{2} \]

\vspace*{.8cm}
\item This is much wasted time for doing nothing if the hosts have been
already in sync.

\end{itemize}
\end{slide}


\begin{slide}{Fast Status-DB Creation}
\begin{itemize}

\item {\tt csync2 -cIr /} just updates the local database without
adding anything to the list of dirty files.

\vspace*{.2cm}
\item This is great for initially creating the databases if the hosts
are already in sync.

\vspace*{.2cm}
\item But: If the hosts have different files, noone will recognise.

\vspace*{.5cm}
\item So {\tt csync2 -T myname peername} can check if two hosts are in sync.

\vspace*{.2cm}
\item And {\tt csync2 -TI myname peername} marks files as dirty.

\vspace*{.2cm}
\item Note that {\tt -TI} can impossibly detect deletions or moves.

\vspace*{.2cm}
\item It's possible to cycle thru all hosts with {\tt -IT} to check
the entire cluster.

\end{itemize}
\end{slide}


\begin{slide}{Bootstrapping Procedures}
\begin{itemize}

\item For small groups, just run {\tt csync2 -cr /} and {\tt csync2 -u}
on all hosts.

\vspace*{.8cm}
\item For big groups this would take to much time, so:
\begin{itemize}
\item Manually sync all nodes (e.g. using rsync)
\item Run {\tt csync2 -cIr /} on all nodes
\item Compare the nodes with {\tt csync2 -T}
\end{itemize}

\vspace*{.8cm}
\item For big groups where hard-syncing is impossible:
\begin{itemize}
\item Run {\tt csync2 -cIr /} on all nodes
\item Find and resolve conflicts with {\tt -TI} and {\tt -m}
\end{itemize}

\end{itemize}
\end{slide}

% ============================================================================

\tsectionandpart{URLs and References}


\begin{slide}{Related Projects}
\begin{itemize}

\item librsync: A library for rsync-like binary diffing \\
http://librsync.sourceforge.net/

\vspace*{.5cm}
\item sqlite: A small embeddable SQL database engine \\
http://www.sqlite.org/

\vspace*{1.5cm}
\item rsync: popular file sychronisation tool \\
http://rsync.samba.org/

\vspace*{.5cm}
\item unison: another file sychronisation tool \\
http://www.cis.upenn.edu/~bcpierce/unison/

\end{itemize}
\end{slide}


\begin{slide}{Credits}
\begin{itemize}

\item LINBIT Information Technologies GmbH: \\
http://www.linbit.com/

\vspace*{.5cm}
\item Clifford Wolf: \\
http://www.clifford.at/

\end{itemize}

\vspace*{2cm}\hspace*{4cm}
http://oss.linbit.com/

\end{slide}

% ============================================================================

\end{document}

