Commits

Bryan O'Sullivan committed 0707489 Merge

Merge.

Comments (0)

Files changed (70)

 
 beta/*.tex
 build_id.tex
+*.4[ct][ct]
 *.aux
 *.bbl
 *.bib
 *.out
 *.pdf
 *.png
+*.ps
+*.run
 *.tmp
 *.toc
 *.xref
 
 \include{preface}
 \include{intro}
+\include{tour-basic}
+\include{tour-merge}
+\include{concepts}
+\include{daily}
+\include{filenames}
+\include{undo}
 \include{hook}
+\include{template}
 \include{mq}
+\include{mq-collab}
 
 \appendix
+\include{cmdref}
+\include{mq-ref}
+\include{srcinstall}
 \include{license}
 \addcontentsline{toc}{chapter}{Bibliography}
 \bibliographystyle{alpha}
 \bibliography{99book}
 
+\addcontentsline{toc}{chapter}{Index}
 \printindex
 
 \end{document}
 @Misc{web:rej,
   author = 	 {Chris Mason},
   title = 	 {\texttt{rej}--help solve patch rejects},
-  note = 	 {\url{ftp://ftp.suse.com/pub/people/mason/rej/}},
+  note = 	 {\url{http://oss.oracle.com/mercurial}},
 }
 
 @Misc{web:wiggle,
   note = 	 {\url{http://cgi.cse.unsw.edu.au/~neilb/source/wiggle/}},
 }
 
+@Misc{web:mysql-python,
+  author =	 {Andy Dustman},
+  title =	 {MySQL for Python},
+  note =	 {\url{http://sourceforge.net/projects/mysql-python}},
+}
+
+@Misc{web:changelog,
+  author =	 {Richard Stallman, GNU Project volunteers},
+  title =	 {GNU Coding Standards---Change Logs},
+  note =	 {\url{http://www.gnu.org/prep/standards/html_node/Change-Logs.html}},
+}
+
+@Misc{web:macpython,
+  author =	 {Bob Ippolito, Ronald Oussoren},
+  title =	 {Universal MacPython},
+  note =	 {\url{http://bob.pythonmac.org/archives/2006/04/10/python-and-universal-binaries-on-mac-os-x/}},
+}
 % Mercurial command, with arguments.
 \newcommand{\hgcmdargs}[2]{\index{\texttt{#1} command}``\texttt{hg #1 #2}''}
 
+\newcommand{\tplkword}[1]{\index{\texttt{#1} template keyword}\index{template keywords!\texttt{#1}}\texttt{#1}}
+
+\newcommand{\tplkwfilt}[2]{\index{\texttt{#1} template keyword!\texttt{#2}
+    filter}\index{template filters!\texttt{#2}}\index{\texttt{#2}
+    template filter}\texttt{#2}}
+
+\newcommand{\tplfilter}[1]{\index{template
+    filters!\texttt{#1}}\index{\texttt{#1} template
+    filter}\texttt{#1}}
+
 % Shell/system command.
 \newcommand{\command}[1]{\index{\texttt{#1} command}\texttt{#1}}
 
 
 % Named item in a hgrc file section.
 \newcommand{\rcitem}[2]{\index{\texttt{hgrc} file!\texttt{#1}
-    section!\texttt{#2} entry}\texttt{#1.#2}}
+    section!\texttt{#2} entry}\texttt{#2}}
 
 % hgrc file.
 \newcommand{\hgrc}{\index{\texttt{hgrc} file}\texttt{hgrc}}
 \newcommand{\pymodclass}[2]{\index{\texttt{#1} module!\texttt{#2}
     class}\texttt{#1.#2}}
 
+% Python function in a module.
+\newcommand{\pymodfunc}[2]{\index{\texttt{#1} module!\texttt{#2}
+    function}\texttt{#1.#2}}
+
 % Note: blah blah.
 \newsavebox{\notebox}
 \newenvironment{note}%
   {\begin{lrbox}{\notebox}\begin{minipage}{0.7\textwidth}\textbf{Note:}\space}%
   {\end{minipage}\end{lrbox}\fbox{\usebox{\notebox}}}
+\newenvironment{caution}%
+  {\begin{lrbox}{\notebox}\begin{minipage}{0.7\textwidth}\textbf{Caution:}\space}%
+  {\end{minipage}\end{lrbox}\fbox{\usebox{\notebox}}}
 
 % Code sample, eating 4 characters of leading space.
 \DefineVerbatimEnvironment{codesample4}{Verbatim}{frame=single,gobble=4,numbers=left,commandchars=\\\{\}}
 % Interaction from the examples directory.
 \newcommand{\interaction}[1]{\VerbatimInput[frame=single,numbers=left,commandchars=\\\{\}]{examples/#1.out}}
 
+% Example code from the examples directory.
+\newcommand{\excode}[1]{\VerbatimInput[frame=single,numbers=left,commandchars=\\\{\}]{../examples/#1}}
+
 % Graphics inclusion.
 \ifpdf
   \newcommand{\grafix}[1]{\includegraphics{#1}}
   \newcommand{\grafix}[1]{\includegraphics{#1.png}}
 \fi
 
+% Reference entry for a command.
+\newcommand{\cmdref}[2]{\section{\hgcmd{#1}---#2}\label{cmdref:#1}\index{\texttt{#1} command}}
+
+% Reference entry for a command option with long and short forms.
+\newcommand{\optref}[3]{\subsubsection{\hgopt{#1}{--#3}, also \hgopt{#1}{-#2}}}
+
+% Reference entry for a command option with only long form.
+\newcommand{\loptref}[2]{\subsubsection{\hgopt{#1}{--#2} option}}
+
 %%% Local Variables: 
 %%% mode: latex
 %%% TeX-master: "00book"
 # This makefile requires GNU make.
 
-hg_id := $(shell hg parents --template '{node|short}\n' | head -1)
+hg_id := $(shell hg parents --template '{node|short}\n')
 
 sources := \
 	00book.tex \
 	99book.bib \
 	99defs.tex \
 	build_id.tex \
+	cmdref.tex \
+	concepts.tex \
+	daily.tex \
+	filenames.tex \
 	hook.tex \
 	intro.tex \
 	mq.tex \
-	preface.tex
+	mq-collab.tex \
+	mq-ref.tex \
+	preface.tex \
+	srcinstall.tex \
+	template.tex \
+	tour-basic.tex \
+	tour-merge.tex \
+	undo.tex
 
 image-sources := \
-	mq-stack.svg
+	filelog.svg \
+	kdiff3.png \
+	metadata.svg \
+	mq-stack.svg \
+	revlog.svg \
+	snapshot.svg \
+	tour-history.svg \
+	tour-merge-conflict.svg \
+	tour-merge-merge.svg \
+	tour-merge-pull.svg \
+	tour-merge-sep-repos.svg \
+	undo-manual.dot \
+	undo-manual-merge.dot \
+	undo-non-tip.dot \
+	undo-simple.dot \
+	wdir.svg \
+	wdir-after-commit.svg \
+	wdir-branch.svg \
+	wdir-merge.svg \
+	wdir-pre-branch.svg
+
+image-dot := $(filter %.dot,$(image-sources))
+image-svg := $(filter %.svg,$(image-sources))
+image-png := $(filter %.png,$(image-sources))
 
 example-sources := \
-	examples/run-example \
-	examples/hook.simple \
-	examples/mq.qinit-help \
-	examples/mq.diff \
-	examples/mq.tarball \
-	examples/mq.tools \
-	examples/mq.tutorial
+	backout \
+	bisect \
+	cmdref \
+	daily.copy \
+	daily.files \
+	daily.rename \
+	daily.revert \
+	filenames \
+	hook.msglen \
+	hook.simple \
+	hook.ws \
+	mq.guards \
+	mq.qinit-help \
+	mq.dodiff \
+	mq.id \
+	mq.tarball \
+	mq.tools \
+	mq.tutorial \
+	rollback \
+	template.simple \
+	template.svnstyle \
+	tour \
+	tour-merge-conflict
 
 latex-options = \
 	-interaction batchmode \
 	if grep 'Reference.*undefined' $(@:.pdf=.log); then exit 1; fi
 endef
 
-pdf/hgbook.pdf: $(sources) $(image-sources:%.svg=%.pdf) examples
+image-pdf := $(image-dot:%.dot=%.pdf) $(image-svg:%.svg=%.pdf) $(image-png)
+
+pdf/hgbook.pdf: $(sources) $(image-pdf) examples
 	$(call pdf)
 
 html: html/onepage/hgbook.html html/split/hgbook.html
 	perl -pi -e 's/&#x00([0-7][0-9a-f]);/chr(hex($$1))/egi' $(dir $(1))/*.html
 endef
 
-html/onepage/hgbook.html: $(sources) $(image-sources:%.svg=%.png) examples
+image-html := $(image-dot:%.dot=%.png) $(image-svg:%.svg=%.png) $(image-png)
+
+html/onepage/hgbook.html: $(sources) $(image-html) examples
 	$(call htlatex,$@,$<)
 	cp $(image-sources:%.svg=%.png) $(dir $@)
 
-html/split/hgbook.html: $(sources) $(image-sources:%.svg=%.png) examples
+html/split/hgbook.html: $(sources) $(image-html) examples
 	$(call htlatex,$@,$<,2)
 	cp $(image-sources:%.svg=%.png) $(dir $@)
 
 %.png: %.svg
 	inkscape -D -e $@ $<
 
+%.svg: %.dot
+	dot -Tsvg -o $@ $<
+
 # Produce eps & pdf for the pdf
 
 %.pdf: %.eps
 %.eps: %.svg
 	inkscape -E $@ $<
 
+%.eps: %.dot
+	dot -Tps -o $@ $<
+
 examples: examples/.run
 
-examples/.run: $(example-sources)
-	cd examples && ./run-example
+examples/.run: $(example-sources:%=examples/%.run)
+	touch examples/.run
+
+examples/%.run: examples/% examples/run-example
+	cd examples && ./run-example $(notdir $<)
 
 build_id.tex: $(wildcard ../.hg/00changelog.[id])
 	echo -n $(hg_id) > build_id.tex
 
 clean:
-	rm -rf beta html pdf *.eps *.pdf *.png *.aux *.dvi *.log *.out \
-		examples/*.out examples/.run build_id.tex
+	rm -rf beta html pdf \
+		$(image-dot:%.dot=%.pdf) \
+		$(image-dot:%.dot=%.png) \
+		$(image-svg:%.svg=%.pdf) \
+		$(image-svg:%.svg=%.png) \
+		examples/*.{out,run} examples/.run build_id.tex
+#!/usr/bin/env python
+
+import getopt
+import itertools
+import os
+import re
+import sys
+
+def usage(exitcode):
+    print >> sys.stderr, ('usage: %s [-H|--hidden] hg_repo' % 
+                          os.path.basename(sys.argv[0]))
+    sys.exit(exitcode)
+
+try:
+    opts, args = getopt.getopt(sys.argv[1:], 'AHh?', ['all', 'help', 'hidden'])
+    opt_all = False
+    opt_hidden = False
+    for o, a in opts:
+        if o in ('-h', '-?', '--help'):
+            usage(0)
+        if o in ('-A', '--all'):
+            opt_all = True
+        if o in ('-H', '--hidden'):
+            opt_hidden = True
+except getopt.GetoptError, err:
+    print >> sys.stderr, 'error:', err
+    usage(1)
+
+try:
+    hg_repo, ltx_file = args
+except ValueError:
+    usage(1)
+
+if not os.path.isfile(os.path.join(hg_repo, 'mercurial', 'commands.py')):
+    print >> sys.stderr, ('error: %r does not contain mercurial code' %
+                          hg_repo)
+    sys.exit(1)
+
+sys.path.insert(0, hg_repo)
+
+from mercurial import commands
+
+def get_commands():
+    seen = {}
+    for name, info in sorted(commands.table.iteritems()):
+        aliases = name.split('|', 1)
+        name = aliases.pop(0).lstrip('^')
+        function, options, synopsis = info
+        seen[name] = {}
+        for shortopt, longopt, arg, desc in options:
+            seen[name][longopt] = shortopt
+    return seen
+
+def cmd_filter((name, aliases, options)):
+    if opt_all:
+        return True
+    if opt_hidden:
+        return name.startswith('debug')
+    return not name.startswith('debug')
+
+def scan(ltx_file):
+    cmdref_re = re.compile(r'^\\cmdref{(?P<cmd>\w+)}')
+    optref_re = re.compile(r'^\\l?optref{(?P<cmd>\w+)}'
+                           r'(?:{(?P<short>[^}])})?'
+                           r'{(?P<long>[^}]+)}')
+
+    seen = {}
+    locs = {}
+    for lnum, line in enumerate(open(ltx_file)):
+        m = cmdref_re.match(line)
+        if m:
+            d = m.groupdict()
+            cmd = d['cmd']
+            seen[cmd] = {}
+            locs[cmd] = lnum + 1
+            continue
+        m = optref_re.match(line)
+        if m:
+            d = m.groupdict()
+            seen[d['cmd']][d['long']] = d['short']
+            continue
+    return seen, locs
+    
+documented, locs = scan(ltx_file)
+known = get_commands()
+
+doc_set = set(documented)
+known_set = set(known)
+
+errors = 0
+
+for nonexistent in sorted(doc_set.difference(known_set)):
+    print >> sys.stderr, ('%s:%d: %r command does not exist' %
+                          (ltx_file, locs[nonexistent], nonexistent))
+    errors += 1
+
+def optcmp(a, b):
+    la, sa = a
+    lb, sb = b
+    sc = cmp(sa, sb)
+    if sc:
+        return sc
+    return cmp(la, lb)
+
+for cmd in doc_set.intersection(known_set):
+    doc_opts = documented[cmd]
+    known_opts = known[cmd]
+    
+    do_set = set(doc_opts)
+    ko_set = set(known_opts)
+
+    for nonexistent in sorted(do_set.difference(ko_set)):
+        print >> sys.stderr, ('%s:%d: %r option to %r command does not exist' %
+                              (ltx_file, locs[cmd], nonexistent, cmd))
+        errors += 1
+
+    def mycmp(la, lb):
+        sa = known_opts[la]
+        sb = known_opts[lb]
+        return optcmp((la, sa), (lb, sb))
+
+    for undocumented in sorted(ko_set.difference(do_set), cmp=mycmp):
+        print >> sys.stderr, ('%s:%d: %r option to %r command not documented' %
+                              (ltx_file, locs[cmd], undocumented, cmd))
+        shortopt = known_opts[undocumented]
+        if shortopt:
+            print '\optref{%s}{%s}{%s}' % (cmd, shortopt, undocumented)
+        else:
+            print '\loptref{%s}{%s}' % (cmd, undocumented)
+        errors += 1
+    sys.stdout.flush()
+
+if errors:
+    sys.exit(1)
+
+sorted_locs = sorted(locs.iteritems(), key=lambda x:x[1])
+
+def next_loc(cmd):
+    for i, (name, loc) in enumerate(sorted_locs):
+        if name >= cmd:
+            return sorted_locs[i-1][1] + 1
+    return loc
+
+for undocumented in sorted(known_set.difference(doc_set)):
+    print >> sys.stderr, ('%s:%d: %r command not documented' %
+                          (ltx_file, next_loc(undocumented), undocumented))
+    print '\cmdref{%s}' % undocumented
+    for longopt, shortopt in sorted(known[undocumented].items(), cmp=optcmp):
+        if shortopt:
+            print '\optref{%s}{%s}{%s}' % (undocumented, shortopt, longopt)
+        else:
+            print '\loptref{%s}{%s}' % (undocumented, longopt)
+    sys.stdout.flush()
+    errors += 1
+
+sys.exit(errors and 1 or 0)
+\chapter{Command reference}
+\label{cmdref}
+
+\cmdref{add}{add files at the next commit}
+\optref{add}{I}{include}
+\optref{add}{X}{exclude}
+\optref{add}{n}{dry-run}
+
+\cmdref{diff}{print changes in history or working directory}
+
+Show differences between revisions for the specified files or
+directories, using the unified diff format.  For a description of the
+unified diff format, see section~\ref{sec:mq:patch}.
+
+By default, this command does not print diffs for files that Mercurial
+considers to contain binary data.  To control this behaviour, see the
+\hgopt{diff}{-a} and \hgopt{diff}{--git} options.
+
+\subsection{Options}
+
+\loptref{diff}{nodates}
+
+Omit date and time information when printing diff headers.
+
+\optref{diff}{B}{ignore-blank-lines}
+
+Do not print changes that only insert or delete blank lines.  A line
+that contains only whitespace is not considered blank.
+
+\optref{diff}{I}{include}
+
+Exclude files and directories whose names match the given patterns.
+
+\optref{diff}{X}{exclude}
+
+Include files and directories whose names match the given patterns.
+
+\optref{diff}{a}{text}
+
+If this option is not specified, \hgcmd{diff} will refuse to print
+diffs for files that it detects as binary. Specifying \hgopt{diff}{-a}
+forces \hgcmd{diff} to treat all files as text, and generate diffs for
+all of them.
+
+This option is useful for files that are ``mostly text'' but have a
+few embedded NUL characters.  If you use it on files that contain a
+lot of binary data, its output will be incomprehensible.
+
+\optref{diff}{b}{ignore-space-change}
+
+Do not print a line if the only change to that line is in the amount
+of white space it contains.
+
+\optref{diff}{g}{git}
+
+Print \command{git}-compatible diffs.  XXX reference a format
+description.
+
+\optref{diff}{p}{show-function}
+
+Display the name of the enclosing function in a hunk header, using a
+simple heuristic.  This functionality is enabled by default, so the
+\hgopt{diff}{-p} option has no effect unless you change the value of
+the \rcitem{diff}{showfunc} config item, as in the following example.
+\interaction{cmdref.diff-p}
+
+\optref{diff}{r}{rev}
+
+Specify one or more revisions to compare.  The \hgcmd{diff} command
+accepts up to two \hgopt{diff}{-r} options to specify the revisions to
+compare.
+
+\begin{enumerate}
+\setcounter{enumi}{0}
+\item Display the differences between the parent revision of the
+  working directory and the working directory.
+\item Display the differences between the specified changeset and the
+  working directory.
+\item Display the differences between the two specified changesets.
+\end{enumerate}
+
+You can specify two revisions using either two \hgopt{diff}{-r}
+options or revision range notation.  For example, the two revision
+specifications below are equivalent.
+\begin{codesample2}
+  hg diff -r 10 -r 20
+  hg diff -r10:20
+\end{codesample2}
+
+When you provide two revisions, Mercurial treats the order of those
+revisions as significant.  Thus, \hgcmdargs{diff}{-r10:20} will
+produce a diff that will transform files from their contents as of
+revision~10 to their contents as of revision~20, while
+\hgcmdargs{diff}{-r20:10} means the opposite: the diff that will
+transform files from their revision~20 contents to their revision~10
+contents.  You cannot reverse the ordering in this way if you are
+diffing against the working directory.
+
+\optref{diff}{w}{ignore-all-space}
+
+\cmdref{version}{print version and copyright information}
+
+This command displays the version of Mercurial you are running, and
+its copyright license.  There are four kinds of version string that
+you may see.
+\begin{itemize}
+\item The string ``\texttt{unknown}''. This version of Mercurial was
+  not built in a Mercurial repository, and cannot determine its own
+  version.
+\item A short numeric string, such as ``\texttt{1.1}''. This is a
+  build of a revision of Mercurial that was identified by a specific
+  tag in the repository where it was built.  (This doesn't necessarily
+  mean that you're running an official release; someone else could
+  have added that tag to any revision in the repository where they
+  built Mercurial.)
+\item A hexadecimal string, such as ``\texttt{875489e31abe}''.  This
+  is a build of the given revision of Mercurial.
+\item A hexadecimal string followed by a date, such as
+  ``\texttt{875489e31abe+20070205}''.  This is a build of the given
+  revision of Mercurial, where the build repository contained some
+  local changes that had not been committed.
+\end{itemize}
+
+\subsection{Tips and tricks}
+
+\subsubsection{Why do the results of \hgcmd{diff} and \hgcmd{status}
+  differ?}
+\label{cmdref:diff-vs-status}
+
+When you run the \hgcmd{status} command, you'll see a list of files
+that Mercurial will record changes for the next time you perform a
+commit.  If you run the \hgcmd{diff} command, you may notice that it
+prints diffs for only a \emph{subset} of the files that \hgcmd{status}
+listed.  There are two possible reasons for this.
+
+The first is that \hgcmd{status} prints some kinds of modifications
+that \hgcmd{diff} doesn't normally display.  The \hgcmd{diff} command
+normally outputs unified diffs, which don't have the ability to
+represent some changes that Mercurial can track.  Most notably,
+traditional diffs can't represent a change in whether or not a file is
+executable, but Mercurial records this information.
+
+If you use the \hgopt{diff}{--git} option to \hgcmd{diff}, it will
+display \command{git}-compatible diffs that \emph{can} display this
+extra information.
+
+The second possible reason that \hgcmd{diff} might be printing diffs
+for a subset of the files displayed by \hgcmd{status} is that if you
+invoke it without any arguments, \hgcmd{diff} prints diffs against the
+first parent of the working directory.  If you have run \hgcmd{merge}
+to merge two changesets, but you haven't yet committed the results of
+the merge, your working directory has two parents (use \hgcmd{parents}
+to see them).  While \hgcmd{status} prints modifications relative to
+\emph{both} parents after an uncommitted merge, \hgcmd{diff} still
+operates relative only to the first parent.  You can get it to print
+diffs relative to the second parent by specifying that parent with the
+\hgopt{diff}{-r} option.  There is no way to print diffs relative to
+both parents.
+
+\subsubsection{Generating safe binary diffs}
+
+If you use the \hgopt{diff}{-a} option to force Mercurial to print
+diffs of files that are either ``mostly text'' or contain lots of
+binary data, those diffs cannot subsequently be applied by either
+Mercurial's \hgcmd{import} command or the system's \command{patch}
+command.  
+
+If you want to generate a diff of a binary file that is safe to use as
+input for \hgcmd{import}, use the \hgcmd{diff}{--git} option when you
+generate the patch.  The system \command{patch} command cannot handle
+binary patches at all.
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: "00book"
+%%% End: 
+\chapter{Behind the scenes}
+\label{chap:concepts}
+
+Unlike many revision control systems, the concepts upon which
+Mercurial is built are simple enough that it's easy to understand how
+the software really works.  Knowing this certainly isn't necessary,
+but I find it useful to have a ``mental model'' of what's going on.
+
+This understanding gives me confidence that Mercurial has been
+carefully designed to be both \emph{safe} and \emph{efficient}.  And
+just as importantly, if it's easy for me to retain a good idea of what
+the software is doing when I perform a revision control task, I'm less
+likely to be surprised by its behaviour.
+
+In this chapter, we'll initially cover the core concepts behind
+Mercurial's design, then continue to discuss some of the interesting
+details of its implementation.
+
+\section{Mercurial's historical record}
+
+\subsection{Tracking the history of a single file}
+
+When Mercurial tracks modifications to a file, it stores the history
+of that file in a metadata object called a \emph{filelog}.  Each entry
+in the filelog contains enough information to reconstruct one revision
+of the file that is being tracked.  Filelogs are stored as files in
+the \sdirname{.hg/data} directory.  A filelog contains two kinds of
+information: revision data, and an index to help Mercurial to find a
+revision efficiently.  
+
+A file that is large, or has a lot of history, has its filelog stored
+in separate data (``\texttt{.d}'' suffix) and index (``\texttt{.i}''
+suffix) files.  For small files without much history, the revision
+data and index are combined in a single ``\texttt{.i}'' file.  The
+correspondence between a file in the working directory and the filelog
+that tracks its history in the repository is illustrated in
+figure~\ref{fig:concepts:filelog}.
+
+\begin{figure}[ht]
+  \centering
+  \grafix{filelog}
+  \caption{Relationships between files in working directory and
+    filelogs in repository}
+  \label{fig:concepts:filelog}
+\end{figure}
+
+\subsection{Managing tracked files}
+
+Mercurial uses a structure called a \emph{manifest} to collect
+together information about the files that it tracks.  Each entry in
+the manifest contains information about the files present in a single
+changeset.  An entry records which files are present in the changeset,
+the revision of each file, and a few other pieces of file metadata.
+
+\subsection{Recording changeset information}
+
+The \emph{changelog} contains information about each changeset.  Each
+revision records who committed a change, the changeset comment, other
+pieces of changeset-related information, and the revision of the
+manifest to use.
+
+\subsection{Relationships between revisions}
+
+Within a changelog, a manifest, or a filelog, each revision stores a
+pointer to its immediate parent (or to its two parents, if it's a
+merge revision).  As I mentioned above, there are also relationships
+between revisions \emph{across} these structures, and they are
+hierarchical in nature.
+
+For every changeset in a repository, there is exactly one revision
+stored in the changelog.  Each revision of the changelog contains a
+pointer to a single revision of the manifest.  A revision of the
+manifest stores a pointer to a single revision of each filelog tracked
+when that changeset was created.  These relationships are illustrated
+in figure~\ref{fig:concepts:metadata}.
+
+\begin{figure}[ht]
+  \centering
+  \grafix{metadata}
+  \caption{Metadata relationships}
+  \label{fig:concepts:metadata}
+\end{figure}
+
+As the illustration shows, there is \emph{not} a ``one to one''
+relationship between revisions in the changelog, manifest, or filelog.
+If the manifest hasn't changed between two changesets, the changelog
+entries for those changesets will point to the same revision of the
+manifest.  If a file that Mercurial tracks hasn't changed between two
+changesets, the entry for that file in the two revisions of the
+manifest will point to the same revision of its filelog.
+
+\section{Safe, efficient storage}
+
+The underpinnings of changelogs, manifests, and filelogs are provided
+by a single structure called the \emph{revlog}.
+
+\subsection{Efficient storage}
+
+The revlog provides efficient storage of revisions using a
+\emph{delta} mechanism.  Instead of storing a complete copy of a file
+for each revision, it stores the changes needed to transform an older
+revision into the new revision.  For many kinds of file data, these
+deltas are typically a fraction of a percent of the size of a full
+copy of a file.
+
+Some obsolete revision control systems can only work with deltas of
+text files.  They must either store binary files as complete snapshots
+or encoded into a text representation, both of which are wasteful
+approaches.  Mercurial can efficiently handle deltas of files with
+arbitrary binary contents; it doesn't need to treat text as special.
+
+\subsection{Safe operation}
+\label{sec:concepts:txn}
+
+Mercurial only ever \emph{appends} data to the end of a revlog file.
+It never modifies a section of a file after it has written it.  This
+is both more robust and efficient than schemes that need to modify or
+rewrite data.
+
+In addition, Mercurial treats every write as part of a
+\emph{transaction} that can span a number of files.  A transaction is
+\emph{atomic}: either the entire transaction succeeds and its effects
+are all visible to readers in one go, or the whole thing is undone.
+This guarantee of atomicity means that if you're running two copies of
+Mercurial, where one is reading data and one is writing it, the reader
+will never see a partially written result that might confuse it.
+
+The fact that Mercurial only appends to files makes it easier to
+provide this transactional guarantee.  The easier it is to do stuff
+like this, the more confident you should be that it's done correctly.
+
+\subsection{Fast retrieval}
+
+Mercurial cleverly avoids a pitfall common to all earlier
+revision control systems: the problem of \emph{inefficient retrieval}.
+Most revision control systems store the contents of a revision as an
+incremental series of modifications against a ``snapshot''.  To
+reconstruct a specific revision, you must first read the snapshot, and
+then every one of the revisions between the snapshot and your target
+revision.  The more history that a file accumulates, the more
+revisions you must read, hence the longer it takes to reconstruct a
+particular revision.
+
+\begin{figure}[ht]
+  \centering
+  \grafix{snapshot}
+  \caption{Snapshot of a revlog, with incremental deltas}
+  \label{fig:concepts:snapshot}
+\end{figure}
+
+The innovation that Mercurial applies to this problem is simple but
+effective.  Once the cumulative amount of delta information stored
+since the last snapshot exceeds a fixed threshold, it stores a new
+snapshot (compressed, of course), instead of another delta.  This
+makes it possible to reconstruct \emph{any} revision of a file
+quickly.  This approach works so well that it has since been copied by
+several other revision control systems.
+
+Figure~\ref{fig:concepts:snapshot} illustrates the idea.  In an entry
+in a revlog's index file, Mercurial stores the range of entries from
+the data file that it must read to reconstruct a particular revision.
+
+\subsubsection{Aside: the influence of video compression}
+
+If you're familiar with video compression or have ever watched a TV
+feed through a digital cable or satellite service, you may know that
+most video compression schemes store each frame of video as a delta
+against its predecessor frame.  In addition, these schemes use
+``lossy'' compression techniques to increase the compression ratio, so
+visual errors accumulate over the course of a number of inter-frame
+deltas.
+
+Because it's possible for a video stream to ``drop out'' occasionally
+due to signal glitches, and to limit the accumulation of artefacts
+introduced by the lossy compression process, video encoders
+periodically insert a complete frame (called a ``key frame'') into the
+video stream; the next delta is generated against that frame.  This
+means that if the video signal gets interrupted, it will resume once
+the next key frame is received.  Also, the accumulation of encoding
+errors restarts anew with each key frame.
+
+\subsection{Identification and strong integrity}
+
+Along with delta or snapshot information, a revlog entry contains a
+cryptographic hash of the data that it represents.  This makes it
+difficult to forge the contents of a revision, and easy to detect
+accidental corruption.  
+
+Hashes provide more than a mere check against corruption; they are
+used as the identifiers for revisions.  The changeset identification
+hashes that you see as an end user are from revisions of the
+changelog.  Although filelogs and the manifest also use hashes,
+Mercurial only uses these behind the scenes.
+
+Mercurial verifies that hashes are correct when it retrieves file
+revisions and when it pulls changes from another repository.  If it
+encounters an integrity problem, it will complain and stop whatever
+it's doing.
+
+In addition to the effect it has on retrieval efficiency, Mercurial's
+use of periodic snapshots makes it more robust against partial data
+corruption.  If a revlog becomes partly corrupted due to a hardware
+error or system bug, it's often possible to reconstruct some or most
+revisions from the uncorrupted sections of the revlog, both before and
+after the corrupted section.  This would not be possible with a
+delta-only storage model.
+
+\section{Revision history, branching,
+  and merging}
+
+Every entry in a Mercurial revlog knows the identity of its immediate
+ancestor revision, usually referred to as its \emph{parent}.  In fact,
+a revision contains room for not one parent, but two.  Mercurial uses
+a special hash, called the ``null ID'', to represent the idea ``there
+is no parent here''.  This hash is simply a string of zeroes.
+
+In figure~\ref{fig:concepts:revlog}, you can see an example of the
+conceptual structure of a revlog.  Filelogs, manifests, and changelogs
+all have this same structure; they differ only in the kind of data
+stored in each delta or snapshot.
+
+The first revision in a revlog (at the bottom of the image) has the
+null ID in both of its parent slots.  For a ``normal'' revision, its
+first parent slot contains the ID of its parent revision, and its
+second contains the null ID, indicating that the revision has only one
+real parent.  Any two revisions that have the same parent ID are
+branches.  A revision that represents a merge between branches has two
+normal revision IDs in its parent slots.
+
+\begin{figure}[ht]
+  \centering
+  \grafix{revlog}
+  \caption{}
+  \label{fig:concepts:revlog}
+\end{figure}
+
+\section{The working directory}
+
+In the working directory, Mercurial stores a snapshot of the files
+from the repository as of a particular changeset.
+
+The working directory ``knows'' which changeset it contains.  When you
+update the working directory to contain a particular changeset,
+Mercurial looks up the appropriate revision of the manifest to find
+out which files it was tracking at the time that changeset was
+committed, and which revision of each file was then current.  It then
+recreates a copy of each of those files, with the same contents it had
+when the changeset was committed.
+
+The \emph{dirstate} contains Mercurial's knowledge of the working
+directory.  This details which changeset the working directory is
+updated to, and all of the files that Mercurial is tracking in the
+working directory.
+
+Just as a revision of a revlog has room for two parents, so that it
+can represent either a normal revision (with one parent) or a merge of
+two earlier revisions, the dirstate has slots for two parents.  When
+you use the \hgcmd{update} command, the changeset that you update to
+is stored in the ``first parent'' slot, and the null ID in the second.
+When you \hgcmd{merge} with another changeset, the first parent
+remains unchanged, and the second parent is filled in with the
+changeset you're merging with.  The \hgcmd{parents} command tells you
+what the parents of the dirstate are.
+
+\subsection{What happens when you commit}
+
+The dirstate stores parent information for more than just book-keeping
+purposes.  Mercurial uses the parents of the dirstate as \emph{the
+  parents of a new changeset} when you perform a commit.
+
+\begin{figure}[ht]
+  \centering
+  \grafix{wdir}
+  \caption{The working directory can have two parents}
+  \label{fig:concepts:wdir}
+\end{figure}
+
+Figure~\ref{fig:concepts:wdir} shows the normal state of the working
+directory, where it has a single changeset as parent.  That changeset
+is the \emph{tip}, the newest changeset in the repository that has no
+children.
+
+\begin{figure}[ht]
+  \centering
+  \grafix{wdir-after-commit}
+  \caption{The working directory gains new parents after a commit}
+  \label{fig:concepts:wdir-after-commit}
+\end{figure}
+
+It's useful to think of the working directory as ``the changeset I'm
+about to commit''.  Any files that you tell Mercurial that you've
+added, removed, renamed, or copied will be reflected in that
+changeset, as will modifications to any files that Mercurial is
+already tracking; the new changeset will have the parents of the
+working directory as its parents.
+
+After a commit, Mercurial will update the parents of the working
+directory, so that the first parent is the ID of the new changeset,
+and the second is the null ID.  This is shown in
+figure~\ref{fig:concepts:wdir-after-commit}.  Mercurial doesn't touch
+any of the files in the working directory when you commit; it just
+modifies the dirstate to note its new parents.
+
+\subsection{Creating a new head}
+
+It's perfectly normal to update the working directory to a changeset
+other than the current tip.  For example, you might want to know what
+your project looked like last Tuesday, or you could be looking through
+changesets to see which one introduced a bug.  In cases like this, the
+natural thing to do is update the working directory to the changeset
+you're interested in, and then examine the files in the working
+directory directly to see their contents as they werea when you
+committed that changeset.  The effect of this is shown in
+figure~\ref{fig:concepts:wdir-pre-branch}.
+
+\begin{figure}[ht]
+  \centering
+  \grafix{wdir-pre-branch}
+  \caption{The working directory, updated to an older changeset}
+  \label{fig:concepts:wdir-pre-branch}
+\end{figure}
+
+Having updated the working directory to an older changeset, what
+happens if you make some changes, and then commit?  Mercurial behaves
+in the same way as I outlined above.  The parents of the working
+directory become the parents of the new changeset.  This new changeset
+has no children, so it becomes the new tip.  And the repository now
+contains two changesets that have no children; we call these
+\emph{heads}.  You can see the structure that this creates in
+figure~\ref{fig:concepts:wdir-branch}.
+
+\begin{figure}[ht]
+  \centering
+  \grafix{wdir-branch}
+  \caption{After a commit made while synced to an older changeset}
+  \label{fig:concepts:wdir-branch}
+\end{figure}
+
+\begin{note}
+  If you're new to Mercurial, you should keep in mind a common
+  ``error'', which is to use the \hgcmd{pull} command without any
+  options.  By default, the \hgcmd{pull} command \emph{does not}
+  update the working directory, so you'll bring new changesets into
+  your repository, but the working directory will stay synced at the
+  same changeset as before the pull.  If you make some changes and
+  commit afterwards, you'll thus create a new head, because your
+  working directory isn't synced to whatever the current tip is.
+
+  I put the word ``error'' in quotes because all that you need to do
+  to rectify this situation is \hgcmd{merge}, then \hgcmd{commit}.  In
+  other words, this almost never has negative consequences; it just
+  surprises people.  I'll discuss other ways to avoid this behaviour,
+  and why Mercurial behaves in this initially surprising way, later
+  on.
+\end{note}
+
+\subsection{Merging heads}
+
+When you run the \hgcmd{merge} command, Mercurial leaves the first
+parent of the working directory unchanged, and sets the second parent
+to the changeset you're merging with, as shown in
+figure~\ref{fig:concepts:wdir-merge}.
+
+\begin{figure}[ht]
+  \centering
+  \grafix{wdir-merge}
+  \caption{Merging two hehads}
+  \label{fig:concepts:wdir-merge}
+\end{figure}
+
+Mercurial also has to modify the working directory, to merge the files
+managed in the two changesets.  Simplified a little, the merging
+process goes like this, for every file in the manifests of both
+changesets.
+\begin{itemize}
+\item If neither changeset has modified a file, do nothing with that
+  file.
+\item If one changeset has modified a file, and the other hasn't,
+  create the modified copy of the file in the working directory.
+\item If one changeset has removed a file, and the other hasn't (or
+  has also deleted it), delete the file from the working directory.
+\item If one changeset has removed a file, but the other has modified
+  the file, ask the user what to do: keep the modified file, or remove
+  it?
+\item If both changesets have modified a file, invoke an external
+  merge program to choose the new contents for the merged file.  This
+  may require input from the user.
+\item If one changeset has modified a file, and the other has renamed
+  or copied the file, make sure that the changes follow the new name
+  of the file.
+\end{itemize}
+There are more details---merging has plenty of corner cases---but
+these are the most common choices that are involved in a merge.  As
+you can see, most cases are completely automatic, and indeed most
+merges finish automatically, without requiring your input to resolve
+any conflicts.
+
+When you're thinking about what happens when you commit after a merge,
+once again the working directory is ``the changeset I'm about to
+commit''.  After the \hgcmd{merge} command completes, the working
+directory has two parents; these will become the parents of the new
+changeset.
+
+Mercurial lets you perform multiple merges, but you must commit the
+results of each individual merge as you go.  This is necessary because
+Mercurial only tracks two parents for both revisions and the working
+directory.  While it would be technically possible to merge multiple
+changesets at once, the prospect of user confusion and making a
+terrible mess of a merge immediately becomes overwhelming.
+
+\section{Other interesting design features}
+
+In the sections above, I've tried to highlight some of the most
+important aspects of Mercurial's design, to illustrate that it pays
+careful attention to reliability and performance.  However, the
+attention to detail doesn't stop there.  There are a number of other
+aspects of Mercurial's construction that I personally find
+interesting.  I'll detail a few of them here, separate from the ``big
+ticket'' items above, so that if you're interested, you can gain a
+better idea of the amount of thinking that goes into a well-designed
+system.
+
+\subsection{Clever compression}
+
+When appropriate, Mercurial will store both snapshots and deltas in
+compressed form.  It does this by always \emph{trying to} compress a
+snapshot or delta, but only storing the compressed version if it's
+smaller than the uncompressed version.
+
+This means that Mercurial does ``the right thing'' when storing a file
+whose native form is compressed, such as a \texttt{zip} archive or a
+JPEG image.  When these types of files are compressed a second time,
+the resulting file is usually bigger than the once-compressed form,
+and so Mercurial will store the plain \texttt{zip} or JPEG.
+
+Deltas between revisions of a compressed file are usually larger than
+snapshots of the file, and Mercurial again does ``the right thing'' in
+these cases.  It finds that such a delta exceeds the threshold at
+which it should store a complete snapshot of the file, so it stores
+the snapshot, again saving space compared to a naive delta-only
+approach.
+
+\subsubsection{Network recompression}
+
+When storing revisions on disk, Mercurial uses the ``deflate''
+compression algorithm (the same one used by the popular \texttt{zip}
+archive format), which balances good speed with a respectable
+compression ratio.  However, when transmitting revision data over a
+network connection, Mercurial uncompresses the compressed revision
+data.
+
+If the connection is over HTTP, Mercurial recompresses the entire
+stream of data using a compression algorithm that gives a etter
+compression ratio (the Burrows-Wheeler algorithm from the widely used
+\texttt{bzip2} compression package).  This combination of algorithm
+and compression of the entire stream (instead of a revision at a time)
+substantially reduces the number of bytes to be transferred, yielding
+better network performance over almost all kinds of network.
+
+(If the connection is over \command{ssh}, Mercurial \emph{doesn't}
+recompress the stream, because \command{ssh} can already do this
+itself.)
+
+\subsection{Read/write ordering and atomicity}
+
+Appending to files isn't the whole story when it comes to guaranteeing
+that a reader won't see a partial write.  If you recall
+figure~\ref{fig:concepts:metadata}, revisions in the changelog point to
+revisions in the manifest, and revisions in the manifest point to
+revisions in filelogs.  This hierarchy is deliberate.
+
+A writer starts a transaction by writing filelog and manifest data,
+and doesn't write any changelog data until those are finished.  A
+reader starts by reading changelog data, then manifest data, followed
+by filelog data.
+
+Since the writer has always finished writing filelog and manifest data
+before it writes to the changelog, a reader will never read a pointer
+to a partially written manifest revision from the changelog, and it will
+never read a pointer to a partially written filelog revision from the
+manifest.
+
+\subsection{Concurrent access}
+
+The read/write ordering and atomicity guarantees mean that Mercurial
+never needs to \emph{lock} a repository when it's reading data, even
+if the repository is being written to while the read is occurring.
+This has a big effect on scalability; you can have an arbitrary number
+of Mercurial processes safely reading data from a repository safely
+all at once, no matter whether it's being written to or not.
+
+The lockless nature of reading means that if you're sharing a
+repository on a multi-user system, you don't need to grant other local
+users permission to \emph{write} to your repository in order for them
+to be able to clone it or pull changes from it; they only need
+\emph{read} permission.  (This is \emph{not} a common feature among
+revision control systems, so don't take it for granted!  Most require
+readers to be able to lock a repository to access it safely, and this
+requires write permission on at least one directory, which of course
+makes for all kinds of nasty and annoying security and administrative
+problems.)
+
+Mercurial uses locks to ensure that only one process can write to a
+repository at a time (the locking mechanism is safe even over
+filesystems that are notoriously hostile to locking, such as NFS).  If
+a repository is locked, a writer will wait for a while to retry if the
+repository becomes unlocked, but if the repository remains locked for
+too long, the process attempting to write will time out after a while.
+This means that your daily automated scripts won't get stuck forever
+and pile up if a system crashes unnoticed, for example.  (Yes, the
+timeout is configurable, from zero to infinity.)
+
+\subsubsection{Safe dirstate access}
+
+As with revision data, Mercurial doesn't take a lock to read the
+dirstate file; it does acquire a lock to write it.  To avoid the
+possibility of reading a partially written copy of the dirstate file,
+Mercurial writes to a file with a unique name in the same directory as
+the dirstate file, then renames the temporary file atomically to
+\filename{dirstate}.  The file named \filename{dirstate} is thus
+guaranteed to be complete, not partially written.
+
+\subsection{Avoiding seeks}
+
+Critical to Mercurial's performance is the avoidance of seeks of the
+disk head, since any seek is far more expensive than even a
+comparatively large read operation.
+
+This is why, for example, the dirstate is stored in a single file.  If
+there were a dirstate file per directory that Mercurial tracked, the
+disk would seek once per directory.  Instead, Mercurial reads the
+entire single dirstate file in one step.
+
+Mercurial also uses a ``copy on write'' scheme when cloning a
+repository on local storage.  Instead of copying every revlog file
+from the old repository into the new repository, it makes a ``hard
+link'', which is a shorthand way to say ``these two names point to the
+same file''.  When Mercurial is about to write to one of a revlog's
+files, it checks to see if the number of names pointing at the file is
+greater than one.  If it is, more than one repository is using the
+file, so Mercurial makes a new copy of the file that is private to
+this repository.
+
+A few revision control developers have pointed out that this idea of
+making a complete private copy of a file is not very efficient in its
+use of storage.  While this is true, storage is cheap, and this method
+gives the highest performance while deferring most book-keeping to the
+operating system.  An alternative scheme would most likely reduce
+performance and increase the complexity of the software, each of which
+is much more important to the ``feel'' of day-to-day use.
+
+\subsection{Other contents of the dirstate}
+
+Because Mercurial doesn't force you to tell it when you're modifying a
+file, it uses the dirstate to store some extra information so it can
+determine efficiently whether you have modified a file.  For each file
+in the working directory, it stores the time that it last modified the
+file itself, and the size of the file at that time.  
+
+When you explicitly \hgcmd{add}, \hgcmd{remove}, \hgcmd{rename} or
+\hgcmd{copy} files, Mercurial updates the dirstate so that it knows
+what to do with those files when you commit.
+
+When Mercurial is checking the states of files in the working
+directory, it first checks a file's modification time.  If that has
+not changed, the file must not have been modified.  If the file's size
+has changed, the file must have been modified.  If the modification
+time has changed, but the size has not, only then does Mercurial need
+to read the actual contents of the file to see if they've changed.
+Storing these few extra pieces of information dramatically reduces the
+amount of data that Mercurial needs to read, which yields large
+performance improvements compared to other revision control systems.
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: "00book"
+%%% End:
+\chapter{Mercurial in daily use}
+\label{chap:daily}
+
+\section{Telling Mercurial which files to track}
+
+Mercurial does not work with files in your repository unless you tell
+it to manage them.  The \hgcmd{status} command will tell you which
+files Mercurial doesn't know about; it uses a ``\texttt{?}'' to
+display such files.
+
+To tell Mercurial to track a file, use the \hgcmd{add} command.  Once
+you have added a file, the entry in the output of \hgcmd{status} for
+that file changes from ``\texttt{?}'' to ``\texttt{A}''.
+\interaction{daily.files.add}
+
+After you run a \hgcmd{commit}, the files that you added before the
+commit will no longer be listed in the output of \hgcmd{status}.  The
+reason for this is that \hgcmd{status} only tells you about
+``interesting'' files---those that you have modified or told Mercurial
+to do something with---by default.  If you have a repository that
+contains thousands of files, you will rarely want to know about files
+that Mercurial is tracking, but that have not changed.  (You can still
+get this information; we'll return to this later.)
+
+Once you add a file, Mercurial doesn't do anything with it
+immediately.  Instead, it will take a snapshot of the file's state the
+next time you perform a commit.  It will then continue to track the
+changes you make to the file every time you commit, until you remove
+the file.
+
+\subsection{Explicit versus implicit file naming}
+
+A useful behaviour that Mercurial has is that if you pass the name of
+a directory to a command, every Mercurial command will treat this as
+``I want to operate on every file in this directory and its
+subdirectories''.
+\interaction{daily.files.add-dir}
+Notice in this example that Mercurial printed the names of the files
+it added, whereas it didn't do so when we added the file named
+\filename{a} in the earlier example.
+
+What's going on is that in the former case, we explicitly named the
+file to add on the command line, so the assumption that Mercurial
+makes in such cases is that we know what you were doing, and it
+doesn't print any output.
+
+However, when we \emph{imply} the names of files by giving the name of
+a directory, Mercurial takes the extra step of printing the name of
+each file that it does something with.  This makes it more clear what
+is happening, and reduces the likelihood of a silent and nasty
+surprise.  This behaviour is common to most Mercurial commands.
+
+\subsection{Aside: Mercurial tracks files, not directories}
+
+Mercurial does not track directory information.  Instead, it tracks
+the path to a file.  Before creating a file, it first creates any
+missing directory components of the path.  After it deletes a file, it
+then deletes any empty directories that were in the deleted file's
+path.  This sounds like a trivial distinction, but it has one minor
+practical consequence: it is not possible to represent a completely
+empty directory in Mercurial.
+
+Empty directories are rarely useful, and there are unintrusive
+workarounds that you can use to achieve an appropriate effect.  The
+developers of Mercurial thus felt that the complexity that would be
+required to manage empty directories was not worth the limited benefit
+this feature would bring.
+
+If you need an empty directory in your repository, there are a few
+ways to achieve this. One is to create a directory, then \hgcmd{add} a
+``hidden'' file to that directory.  On Unix-like systems, any file
+name that begins with a period (``\texttt{.}'') is treated as hidden
+by most commands and GUI tools.  This approach is illustrated in
+figure~\ref{ex:daily:hidden}.
+
+\begin{figure}[ht]
+  \interaction{daily.files.hidden}
+  \caption{Simulating an empty directory using a hidden file}
+  \label{ex:daily:hidden}
+\end{figure}
+
+Another way to tackle a need for an empty directory is to simply
+create one in your automated build scripts before they will need it.
+
+\section{How to stop tracking a file}
+
+Once you decide that a file no longer belongs in your repository, use
+the \hgcmd{remove} command; this deletes the file, and tells Mercurial
+to stop tracking it.  A removed file is represented in the output of
+\hgcmd{status} with a ``\texttt{R}''.
+\interaction{daily.files.remove}
+
+After you \hgcmd{remove} a file, Mercurial will no longer track
+changes to that file, even if you recreate a file with the same name
+in your working directory.  If you do recreate a file with the same
+name and want Mercurial to track the new file, simply \hgcmd{add} it.
+Mercurial will know that the newly added file is not related to the
+old file of the same name.
+
+\subsection{Removing a file does not affect its history}
+
+It is important to understand that removing a file has only two
+effects.
+\begin{itemize}
+\item It removes the current version of the file from the working
+  directory.
+\item It stops Mercurial from tracking changes to the file, from the
+  time of the next commit.
+\end{itemize}
+Removing a file \emph{does not} in any way alter the \emph{history} of
+the file.
+
+If you update the working directory to a changeset in which a file
+that you have removed was still tracked, it will reappear in the
+working directory, with the contents it had when you committed that
+changeset.  If you then update the working directory to a later
+changeset, in which the file had been removed, Mercurial will once
+again remove the file from the working directory.
+
+\subsection{Missing files}
+
+Mercurial considers a file that you have deleted, but not used
+\hgcmd{remove} to delete, to be \emph{missing}.  A missing file is
+represented with ``\texttt{!}'' in the output of \hgcmd{status}.
+Mercurial commands will not generally do anything with missing files.
+\interaction{daily.files.missing}
+
+If your repository contains a file that \hgcmd{status} reports as
+missing, and you want the file to stay gone, you can run
+\hgcmdargs{remove}{\hgopt{remove}{--after}} at any time later on, to
+tell Mercurial that you really did mean to remove the file.
+\interaction{daily.files.remove-after}
+
+On the other hand, if you deleted the missing file by accident, use
+\hgcmdargs{revert}{\emph{filename}} to recover the file.  It will
+reappear, in unmodified form.
+\interaction{daily.files.recover-missing}
+
+\subsection{Aside: why tell Mercurial explicitly to 
+  remove a file?}
+
+You might wonder why Mercurial requires you to explicitly tell it that
+you are deleting a file.  Early during the development of Mercurial,
+it let you delete a file however you pleased; Mercurial would notice
+the absence of the file automatically when you next ran a
+\hgcmd{commit}, and stop tracking the file.  In practice, this made it
+too easy to accidentally remove a file without noticing.
+
+\subsection{Useful shorthand---adding and removing files
+  in one step}
+
+Mercurial offers a combination command, \hgcmd{addremove}, that adds
+untracked files and marks missing files as removed.  
+\interaction{daily.files.addremove}
+The \hgcmd{commit} command also provides a \hgopt{commit}{-A} option
+that performs this same add-and-remove, immediately followed by a
+commit.
+\interaction{daily.files.commit-addremove}
+
+\section{Copying files}
+
+Mercurial provides a \hgcmd{copy} command that lets you make a new
+copy of a file.  When you copy a file using this command, Mercurial
+makes a record of the fact that the new file is a copy of the original
+file.  It treats these copied files specially when you merge your work
+with someone else's.
+
+\subsection{The results of copying during a merge}
+
+What happens during a merge is that changes ``follow'' a copy.  To
+best illustrate what this means, let's create an example.  We'll start
+with the usual tiny repository that contains a single file.
+\interaction{daily.copy.init}
+We need to do some work in parallel, so that we'll have something to
+merge.  So let's clone our repository.
+\interaction{daily.copy.clone}
+Back in our initial repository, let's use the \hgcmd{copy} command to
+make a copy of the first file we created.
+\interaction{daily.copy.copy}
+
+If we look at the output of the \hgcmd{status} command afterwards, the
+copied file looks just like a normal added file.
+\interaction{daily.copy.status}
+But if we pass the \hgopt{status}{-C} option to \hgcmd{status}, it
+prints another line of output: this is the file that our newly-added
+file was copied \emph{from}.
+\interaction{daily.copy.status-copy}
+
+Now, back in the repository we cloned, let's make a change in
+parallel.  We'll add a line of content to the original file that we
+created.
+\interaction{daily.copy.other}
+Now we have a modified \filename{file} in this repository.  When we
+pull the changes from the first repository, and merge the two heads,
+Mercurial will propagate the changes that we made locally to
+\filename{file} into its copy, \filename{new-file}.
+\interaction{daily.copy.merge}
+
+\subsection{Why should changes follow copies?}
+\label{sec:daily:why-copy}
+
+This behaviour, of changes to a file propagating out to copies of the
+file, might seem esoteric, but in most cases it's highly desirable.
+
+First of all, remember that this propagation \emph{only} happens when
+you merge.  So if you \hgcmd{copy} a file, and subsequently modify the
+original file during the normal course of your work, nothing will
+happen.
+
+The second thing to know is that modifications will only propagate
+across a copy as long as the repository that you're pulling changes
+from \emph{doesn't know} about the copy.
+
+The reason that Mercurial does this is as follows.  Let's say I make
+an important bug fix in a source file, and commit my changes.
+Meanwhile, you've decided to \hgcmd{copy} the file in your repository,
+without knowing about the bug or having seen the fix, and you have
+started hacking on your copy of the file.
+
+If you pulled and merged my changes, and Mercurial \emph{didn't}
+propagate changes across copies, your source file would now contain
+the bug, and unless you remembered to propagate the bug fix by hand,
+the bug would \emph{remain} in your copy of the file.
+
+By automatically propagating the change that fixed the bug from the
+original file to the copy, Mercurial prevents this class of problem.
+To my knowledge, Mercurial is the \emph{only} revision control system
+that propagates changes across copies like this.
+
+Once your change history has a record that the copy and subsequent
+merge occurred, there's usually no further need to propagate changes
+from the original file to the copied file, and that's why Mercurial
+only propagates changes across copies until this point, and no
+further.
+
+\subsection{How to make changes \emph{not} follow a copy}
+
+If, for some reason, you decide that this business of automatically
+propagating changes across copies is not for you, simply use your
+system's normal file copy command (on Unix-like systems, that's
+\command{cp}) to make a copy of a file, then \hgcmd{add} the new copy
+by hand.  Before you do so, though, please do reread
+section~\ref{sec:daily:why-copy}, and make an informed decision that
+this behaviour is not appropriate to your specific case.
+
+\subsection{Behaviour of the \hgcmd{copy} command}
+
+When you use the \hgcmd{copy} command, Mercurial makes a copy of each
+source file as it currently stands in the working directory.  This
+means that if you make some modifications to a file, then copy it
+without first having committed those changes, the new copy will
+contain your modifications.
+
+The \hgcmd{copy} command acts similarly to the Unix \command{cp}
+command (you can use the \hgcmd{cp} alias if you prefer).  The last
+argument is the \emph{destination}, and all prior arguments are
+\emph{sources}.  If you pass it a single file as the source, and the
+destination does not exist, it creates a new file with that name.
+\interaction{daily.copy.simple}
+If the destination is a directory, Mercurial copies its sources into
+that directory.
+\interaction{daily.copy.dir-dest}
+Copying a directory is recursive, and preserves the directory
+structure of the source.
+\interaction{daily.copy.dir-src}
+If the source and destination are both directories, the source tree is
+recreated in the destination directory.
+\interaction{daily.copy.dir-src-dest}
+
+As with the \hgcmd{rename} command, if you copy a file manually and
+then want Mercurial to know that you've copied the file, simply use
+the \hgopt{--after} option to \hgcmd{copy}.
+\interaction{daily.copy.after}
+
+\section{Renaming files}
+
+It's rather more common to need to rename a file than to make a copy
+of it.  The reason I discussed the \hgcmd{copy} command before talking
+about renaming files is that Mercurial treats a rename in essentially
+the same way as a copy.  Therefore, knowing what Mercurial does when
+you copy a file tells you what to expect when you rename a file.
+
+When you use the \hgcmd{rename} command, Mercurial makes a copy of
+each source file, then deletes it and marks the file as removed.
+\interaction{daily.rename.rename}
+The \hgcmd{status} command shows the newly copied file as added, and
+the copied-from file as removed.
+\interaction{daily.rename.status}
+As with the results of a \hgcmd{copy}, we must use the
+\hgopt{status}{-C} option to \hgcmd{status} to see that the added file
+is really being tracked by Mercurial as a copy of the original, now
+removed, file.
+\interaction{daily.rename.status-copy}
+
+As with \hgcmd{remove} and \hgcmd{copy}, you can tell Mercurial about
+a rename after the fact using the \hgopt{rename}{--after} option.  In
+most other respects, the behaviour of the \hgcmd{rename} command, and
+the options it accepts, are similar to the \hgcmd{copy} command.
+
+\subsection{Renaming files and merging changes}
+
+Since Mercurial's rename is implemented as copy-and-remove, the same
+propagation of changes happens when you merge after a rename as after
+a copy.
+
+If I modify a file, and you rename it to a new name, then we merge our
+respective changes, my modifications to the file under its original
+name will be propagated into the file under its new name.  (This is
+something you might expect to ``simply work,'' but not all revision
+control systems actually do this.)
+
+Whereas having changes follow a copy is a feature where you can
+perhaps nod and say ``yes, that might be useful,'' it should be clear
+that having them follow a rename is definitely important.  Without
+this facility, it would simply be too easy for changes to become
+orphaned when files are renamed.
+
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: "00book"
+%%% End: 

en/examples/backout

+#!/bin/bash
+
+# We have to fake the merges here, because they cause conflicts with
+# three-way command-line merge, and kdiff3 may not be available.
+
+export HGMERGE=$(mktemp)
+echo '#!/bin/sh' >> $HGMERGE
+echo 'echo first change > "$1"' >> $HGMERGE
+echo 'echo third change > "$1"' >> $HGMERGE
+chmod 700 $HGMERGE
+
+#$ name: init
+
+hg init myrepo
+cd myrepo
+echo first change >> myfile
+hg add myfile
+hg commit -m 'first change'
+echo second change >> myfile
+hg commit -m 'second change'
+
+#$ name: simple
+
+hg backout -m 'back out second change' tip
+cat myfile
+
+#$ name: simple.log
+
+hg log --style compact
+
+#$ name: non-tip.clone
+
+cd ..
+hg clone -r1 myrepo non-tip-repo
+cd non-tip-repo
+
+#$ name: non-tip.backout
+
+echo third change >> myfile
+hg commit -m 'third change'
+hg backout --merge -m 'back out second change' 1
+
+#$ name: non-tip.cat
+cat myfile
+
+#$ name: manual.clone
+
+cd ..
+hg clone -r1 myrepo newrepo
+cd newrepo
+
+#$ name: manual.backout
+
+echo third change >> myfile
+hg commit -m 'third change'
+hg backout -m 'back out second change' 1
+
+#$ name: manual.log
+
+hg log --style compact
+
+#$ name: manual.parents
+
+hg parents
+
+#$ name: manual.heads
+
+hg heads
+
+#$ name: manual.cat
+
+cat myfile
+
+#$ name: manual.merge
+
+hg merge
+hg commit -m 'merged backout with previous tip'
+cat myfile
+
+#$ name:
+
+rm $HGMERGE

en/examples/bisect

+#!/bin/bash
+
+echo '[extensions]' >> $HGRC
+echo 'hbisect =' >> $HGRC
+
+#$ name: init
+
+hg init mybug
+cd mybug
+
+#$ name: commits
+
+buggy_change=37
+
+for (( i = 0; i < 50; i++ )); do
+  if [[ $i = $buggy_change ]]; then
+    echo 'i have a gub' > myfile$i
+    hg commit -q -A -m 'buggy changeset'
+  else
+    echo 'nothing to see here, move along' > myfile$i
+    hg commit -q -A -m 'normal changeset'
+  fi
+done
+
+#$ name: help
+
+hg help bisect
+hg bisect help
+
+#$ name: search.init
+
+hg bisect init
+
+#$ name: search.bad-init
+
+hg bisect bad
+
+#$ name: search.good-init
+
+hg bisect good 10
+
+#$ name: search.step1
+
+if grep -q 'i have a gub' *
+then
+  result=bad
+else
+  result=good
+fi
+
+echo this revision is $result
+hg bisect $result
+
+#$ name: search.mytest
+
+mytest() {
+  if grep -q 'i have a gub' *
+  then
+    result=bad
+  else
+    result=good
+  fi
+
+  echo this revision is $result
+  hg bisect $result
+}
+  
+#$ name: search.step2
+
+mytest
+
+#$ name: search.rest
+
+mytest
+mytest
+mytest
+
+#$ name: search.reset
+
+hg bisect reset

en/examples/cmdref

+#!/bin/bash
+
+hg init diff
+cd diff
+cat > myfile.c <<EOF
+int myfunc()
+{
+    return 1;
+}
+EOF
+hg ci -Ama
+
+sed -ie 's/return 1/return 10/' myfile.c
+
+#$ name: diff-p
+
+echo '[diff]' >> $HGRC
+echo 'showfunc = False' >> $HGRC
+
+hg diff
+
+hg diff -p

en/examples/daily.copy

+#!/bin/bash
+
+#$ name: init
+
+hg init my-copy
+cd my-copy
+echo line > file
+hg add file
+hg commit -m 'Added a file'
+
+#$ name: clone
+
+cd ..
+hg clone my-copy your-copy
+
+#$ name: copy
+
+cd my-copy
+hg copy file new-file
+
+#$ name: status
+
+hg status
+
+#$ name: status-copy
+
+hg status -C
+hg commit -m 'Copied file'
+
+#$ name: other
+
+cd ../your-copy
+echo 'new contents' >> file
+hg commit -m 'Changed file'
+
+#$ name: cat
+
+cat file
+cat ../my-copy/new-file
+
+#$ name: merge
+
+hg pull ../my-copy
+hg merge
+cat new-file
+
+#$ name:
+
+cd ..
+hg init copy-example
+cd copy-example
+echo a > a
+echo b > b
+mkdir c
+mkdir c/a
+echo c > c/a/c
+hg ci -Ama
+
+#$ name: simple
+
+hg copy a c
+
+#$ name: dir-dest
+
+mkdir d
+hg copy a b d
+ls d
+
+#$ name: dir-src
+
+hg copy c e
+
+#$ name: dir-src-dest
+
+hg copy c d
+
+#$ name: after
+
+cp a z
+hg copy --after a z

en/examples/daily.files

+#!/bin/bash
+
+#$ name: add
+
+hg init add-example
+cd add-example
+echo a > a
+hg status
+hg add a
+hg status
+hg commit -m 'Added one file'
+hg status
+
+#$ name: add-dir
+
+mkdir b
+echo b > b/b
+echo c > b/c
+mkdir b/d
+echo d > b/d/d
+hg add b
+hg commit -m 'Added all files in subdirectory'
+
+#$ name:
+
+cd ..
+
+#$ name: hidden
+
+hg init hidden-example
+cd hidden-example
+mkdir empty
+touch empty/.hidden
+hg add empty/.hidden
+hg commit -m 'Manage an empty-looking directory'
+ls empty
+cd ..
+hg clone hidden-example tmp
+ls tmp
+ls tmp/empty
+
+#$ name:
+
+cd ..
+
+#$ name: remove
+
+hg init remove-example
+cd remove-example
+echo a > a
+mkdir b
+echo b > b/b
+hg add a b
+hg commit -m 'Small example for file removal'
+hg remove a
+hg status
+hg remove b
+
+#$ name:
+
+cd ..
+
+#$ name: missing
+hg init missing-example
+cd missing-example
+echo a > a
+hg add a
+hg commit -m'File about to be missing'
+rm a
+hg status
+
+#$ name: remove-after
+
+hg remove --after a
+hg status
+
+#$ name: recover-missing
+hg revert a
+cat a
+hg status
+
+#$ name:
+
+cd ..
+
+#$ name: addremove
+
+hg init addremove-example
+cd addremove-example
+echo a > a
+echo b > b
+hg addremove
+
+#$ name: commit-addremove
+
+echo c > c
+hg commit -A -m 'Commit with addremove'

en/examples/daily.rename

+#!/bin/bash
+
+hg init a
+cd a
+echo a > a
+hg ci -Ama
+
+#$ name: rename
+
+hg rename a b
+
+#$ name: status
+
+hg status
+
+#$ name: status-copy
+
+hg status -C

en/examples/daily.revert

+#!/bin/bash
+
+hg init a
+cd a
+echo 'original content' > file
+hg ci -Ama
+
+#$ name: modify
+
+cat file
+echo unwanted change >> file
+hg diff file
+
+#$ name: unmodify
+
+hg status
+hg revert file
+cat file
+
+#$ name: status
+
+hg status
+cat file.orig
+
+#$ name:
+
+rm file.orig
+
+#$ name: add
+
+echo oops > oops
+hg add oops
+hg status oops
+hg revert oops
+hg status
+
+#$ name:
+
+rm oops
+
+#$ name: remove
+
+hg remove file
+hg status
+hg revert file
+hg status
+ls file
+
+#$ name: missing
+
+rm file
+hg status
+hg revert file
+ls file
+
+#$ name: copy
+
+hg copy file new-file
+hg revert new-file
+hg status
+
+#$ name:
+
+rm new-file
+
+#$ name: rename
+
+hg rename file new-file
+hg revert new-file
+hg status
+
+#$ name: rename-orig
+hg revert file
+hg status

en/examples/data/check_whitespace.py

+#!/usr/bin/python
+
+import re
+
+def trailing_whitespace(difflines):
+    added, linenum, header = [], 0, False
+
+    for line in difflines:
+        if header:
+            # remember the name of the file that this diff affects
+            m = re.match(r'(?:---|\+\+\+) ([^\t]+)', line)
+            if m and m.group(1) != '/dev/null':
+                filename = m.group(1).split('/', 1)[-1]
+            if line.startswith('+++ '):
+                header = False
+            continue
+        if line.startswith('diff '):
+            header = True
+            continue
+        # hunk header - save the line number
+        m = re.match(r'@@ -\d+,\d+ \+(\d+),', line)
+        if m:
+            linenum = int(m.group(1))
+            continue
+        # hunk body - check for an added line with trailing whitespace
+        m = re.match(r'\+.*\s$', line)
+        if m:
+            added.append((filename, linenum))
+        if line and line[0] in ' +':
+            linenum += 1
+    return added
+
+if __name__ == '__main__':
+    import os, sys
+    
+    added = trailing_whitespace(os.popen('hg export tip'))
+    if added:
+        for filename, linenum in added:
+            print >> sys.stderr, ('%s, line %d: trailing whitespace added' %
+                                  (filename, linenum))
+        # save the commit message so we don't need to retype it
+        os.system('hg tip --template "{desc}" > .hg/commit.save')
+        print >> sys.stderr, 'commit message saved to .hg/commit.save'
+        sys.exit(1)

en/examples/filenames

+#!/bin/bash
+
+hg init a
+cd a
+mkdir -p examples src/watcher
+touch COPYING MANIFEST.in README setup.py
+touch examples/performant.py examples/simple.py
+touch src/main.py src/watcher/_watcher.c src/watcher/watcher.py src/xyzzy.txt
+
+#$ name: files
+
+hg add COPYING README examples/simple.py
+
+#$ name: dirs
+
+hg status src
+
+#$ name: wdir-subdir
+
+cd src
+hg add -n
+hg add -n .
+
+#$ name: wdir-relname
+
+hg status
+hg status `hg root`
+
+#$ name: glob.star
+
+hg add 'glob:*.py'
+
+#$ name: glob.starstar
+
+cd ..
+hg status 'glob:**.py'
+
+#$ name: glob.star-starstar
+
+hg status 'glob:*.py'
+hg status 'glob:**.py'
+
+#$ name: glob.question
+
+hg status 'glob:**.?'
+
+#$ name: glob.range
+
+hg status 'glob:**[nr-t]'
+
+#$ name: glob.group
+
+hg status 'glob:*.{in,py}'
+
+#$ name: filter.include
+
+hg status -I '*.in'
+
+#$ name: filter.exclude
+
+hg status -X '**.py' src

en/examples/hook.msglen

+#!/bin/sh
+
+hg init a
+cd a
+echo '[hooks]' > .hg/hgrc
+echo 'pretxncommit.msglen = test `hg tip --template {desc} | wc -c` -ge 10' >> .hg/hgrc
+
+#$ name: run
+
+cat .hg/hgrc
+echo a > a
+hg add a
+hg commit -A -m 'too short'
+hg commit -A -m 'long enough'

en/examples/hook.simple

+#!/bin/bash
+
 #$ name: init
 
 hg init hook-test

en/examples/hook.ws

+#!/bin/bash
+
+hg init a
+cd a
+echo '[hooks]' > .hg/hgrc
+echo "pretxncommit.whitespace = hg export tip | (! grep -qP '^\\+.*[ \\t]$')" >> .hg/hgrc
+
+#$ name: simple
+
+cat .hg/hgrc
+echo 'a ' > a
+hg commit -A -m 'test with trailing whitespace'
+echo 'a' > a
+hg commit -A -m 'drop trailing whitespace and try again'
+
+#$ name:
+
+echo '[hooks]' > .hg/hgrc