1. Bryan O'Sullivan
  2. hgbook


Bryan O'Sullivan  committed 1e013fb

Lots of filename related content. A little more command reference

Added a script to make sure commands are exhaustively documented.

Comments (0)

Files changed (8)

File en/00book.tex Modified

View file
  • Ignore whitespace
  • Hide word diff

File en/99defs.tex Modified

View file
  • Ignore whitespace
  • Hide word diff
 % Reference entry for a command.
-\newcommand{\cmdref}[1]{\section{#1}\label{cmdref:#1}\index{\texttt{#1} command}``\texttt{hg #1}''}
+\newcommand{\cmdref}[2]{\section{\hgcmd{#1}---#2}\label{cmdref:#1}\index{\texttt{#1} command}}
-% Reference entry for a command option.
-\newcommand{\optref}[2]{\index{\texttt{#1} command!\texttt{#2} option}\texttt{#2}}
+% Reference entry for a command option with long and short forms.
+\newcommand{\optref}[3]{\subsubsection{\hgopt{#1}{--#3}, also \hgopt{#1}{-#2}}}
+% Reference entry for a command option with only long form.
+\newcommand{\loptref}[2]{\subsubsection{\hgopt{#1}{--#2} option}}
 %%% Local Variables: 
 %%% mode: latex

File en/Makefile Modified

View file
  • Ignore whitespace
  • Hide word diff
 	cmdref.tex \
 	concepts.tex \
 	daily.tex \
+	filenames.tex \
 	hook.tex \
 	intro.tex \
 	mq.tex \
 example-sources := \
 	backout \
 	bisect \
+	cmdref \
 	daily.copy \
 	daily.files \
 	daily.rename \
 	daily.revert \
+	filenames \
 	hook.msglen \
 	hook.simple \
 	hook.ws \

File en/cmdref.py Added

View file
  • Ignore whitespace
  • Hide word diff
+#!/usr/bin/env python
+import getopt
+import itertools
+import os
+import re
+import sys
+def usage(exitcode):
+    print >> sys.stderr, ('usage: %s [-H|--hidden] hg_repo' % 
+                          os.path.basename(sys.argv[0]))
+    sys.exit(exitcode)
+    opts, args = getopt.getopt(sys.argv[1:], 'AHh?', ['all', 'help', 'hidden'])
+    opt_all = False
+    opt_hidden = False
+    for o, a in opts:
+        if o in ('-h', '-?', '--help'):
+            usage(0)
+        if o in ('-A', '--all'):
+            opt_all = True
+        if o in ('-H', '--hidden'):
+            opt_hidden = True
+except getopt.GetoptError, err:
+    print >> sys.stderr, 'error:', err
+    usage(1)
+    hg_repo, ltx_file = args
+except ValueError:
+    usage(1)
+if not os.path.isfile(os.path.join(hg_repo, 'mercurial', 'commands.py')):
+    print >> sys.stderr, ('error: %r does not contain mercurial code' %
+                          hg_repo)
+    sys.exit(1)
+sys.path.insert(0, hg_repo)
+from mercurial import commands
+def get_commands():
+    seen = {}
+    for name, info in sorted(commands.table.iteritems()):
+        aliases = name.split('|', 1)
+        name = aliases.pop(0).lstrip('^')
+        function, options, synopsis = info
+        seen[name] = {}
+        for shortopt, longopt, arg, desc in options:
+            seen[name][longopt] = shortopt
+    return seen
+def cmd_filter((name, aliases, options)):
+    if opt_all:
+        return True
+    if opt_hidden:
+        return name.startswith('debug')
+    return not name.startswith('debug')
+def scan(ltx_file):
+    cmdref_re = re.compile(r'^\\cmdref{(?P<cmd>\w+)}')
+    optref_re = re.compile(r'^\\l?optref{(?P<cmd>\w+)}'
+                           r'(?:{(?P<short>[^}])})?'
+                           r'{(?P<long>[^}]+)}')
+    seen = {}
+    locs = {}
+    for lnum, line in enumerate(open(ltx_file)):
+        m = cmdref_re.match(line)
+        if m:
+            d = m.groupdict()
+            cmd = d['cmd']
+            seen[cmd] = {}
+            locs[cmd] = lnum + 1
+            continue
+        m = optref_re.match(line)
+        if m:
+            d = m.groupdict()
+            seen[d['cmd']][d['long']] = d['short']
+            continue
+    return seen, locs
+documented, locs = scan(ltx_file)
+known = get_commands()
+doc_set = set(documented)
+known_set = set(known)
+errors = 0
+for nonexistent in sorted(doc_set.difference(known_set)):
+    print >> sys.stderr, ('%s:%d: %r command does not exist' %
+                          (ltx_file, locs[nonexistent], nonexistent))
+    errors += 1
+def optcmp(a, b):
+    la, sa = a
+    lb, sb = b
+    sc = cmp(sa, sb)
+    if sc:
+        return sc
+    return cmp(la, lb)
+for cmd in doc_set.intersection(known_set):
+    doc_opts = documented[cmd]
+    known_opts = known[cmd]
+    do_set = set(doc_opts)
+    ko_set = set(known_opts)
+    for nonexistent in sorted(do_set.difference(ko_set)):
+        print >> sys.stderr, ('%s:%d: %r option to %r command does not exist' %
+                              (ltx_file, locs[cmd], nonexistent, cmd))
+        errors += 1
+    def mycmp(la, lb):
+        sa = known_opts[la]
+        sb = known_opts[lb]
+        return optcmp((la, sa), (lb, sb))
+    for undocumented in sorted(ko_set.difference(do_set), cmp=mycmp):
+        print >> sys.stderr, ('%s:%d: %r option to %r command not documented' %
+                              (ltx_file, locs[cmd], undocumented, cmd))
+        shortopt = known_opts[undocumented]
+        if shortopt:
+            print '\optref{%s}{%s}{%s}' % (cmd, shortopt, undocumented)
+        else:
+            print '\loptref{%s}{%s}' % (cmd, undocumented)
+        errors += 1
+    sys.stdout.flush()
+if errors:
+    sys.exit(1)
+sorted_locs = sorted(locs.iteritems(), key=lambda x:x[1])
+def next_loc(cmd):
+    for i, (name, loc) in enumerate(sorted_locs):
+        if name >= cmd:
+            return sorted_locs[i-1][1] + 1
+    return loc
+for undocumented in sorted(known_set.difference(doc_set)):
+    print >> sys.stderr, ('%s:%d: %r command not documented' %
+                          (ltx_file, next_loc(undocumented), undocumented))
+    print '\cmdref{%s}' % undocumented
+    for longopt, shortopt in sorted(known[undocumented].items(), cmp=optcmp):
+        if shortopt:
+            print '\optref{%s}{%s}{%s}' % (undocumented, shortopt, longopt)
+        else:
+            print '\loptref{%s}{%s}' % (undocumented, longopt)
+    sys.stdout.flush()
+    errors += 1
+sys.exit(errors and 1 or 0)

File en/cmdref.tex Modified

View file
  • Ignore whitespace
  • Hide word diff
 \chapter{Command reference}
+\cmdref{add}{add files at the next commit}
+\cmdref{diff}{print changes in history or working directory}
 Show differences between revisions for the specified files or
 directories, using the unified diff format.  For a description of the
 unified diff format, see section~\ref{sec:mq:patch}.
+By default, this command does not print diffs for files that Mercurial
+considers to contain binary data.  To control this behaviour, see the
+\hgopt{diff}{-a} and \hgopt{diff}{--git} options.
+Omit date and time information when printing diff headers.
-Specify a revision to compare.
+Do not print changes that only insert or delete blank lines.  A line
+that contains only whitespace is not considered blank.
+Exclude files and directories whose names match the given patterns.
+Include files and directories whose names match the given patterns.
 If this option is not specified, \hgcmd{diff} will refuse to print
 diffs for files that it detects as binary. Specifying \hgopt{diff}{-a}
 all of them.
 This option is useful for files that are ``mostly text'' but have a
-few embedded NUL characters.  If you use it on files that are really
-binary, its output will be incomprehensible.
+few embedded NUL characters.  If you use it on files that contain a
+lot of binary data, its output will be incomprehensible.
+Do not print a line if the only change to that line is in the amount
+of white space it contains.
+Print \command{git}-compatible diffs.  XXX reference a format
-\subsection{Specifying revisions}
-The \hgcmd{diff} command accepts up to two \hgopt{diff}{-r} options to
-specify the revisions to compare.
+Display the name of the enclosing function in a hunk header, using a
+simple heuristic.  This functionality is enabled by default, so the
+\hgopt{diff}{-p} option has no effect unless you change the value of
+the \rcitem{diff}{showfunc} config item, as in the following example.
+Specify one or more revisions to compare.  The \hgcmd{diff} command
+accepts up to two \hgopt{diff}{-r} options to specify the revisions to
-\item Display the differences between the parent of the working
-  directory and the working directory.
+\item Display the differences between the parent revision of the
+  working directory and the working directory.
 \item Display the differences between the specified changeset and the
   working directory.
 \item Display the differences between the two specified changesets.
 contents.  You cannot reverse the ordering in this way if you are
 diffing against the working directory.
-\subsection{Why do the results of \hgcmd{diff} and \hgcmd{status}
+\cmdref{version}{print version and copyright information}
+This command displays the version of Mercurial you are running, and
+its copyright license.  There are four kinds of version string that
+you may see.
+\item The string ``\texttt{unknown}''. This version of Mercurial was
+  not built in a Mercurial repository, and cannot determine its own
+  version.
+\item A short numeric string, such as ``\texttt{1.1}''. This is a
+  build of a revision of Mercurial that was identified by a specific
+  tag in the repository where it was built.  (This doesn't necessarily
+  mean that you're running an official release; someone else could
+  have added that tag to any revision in the repository where they
+  built Mercurial.)
+\item A hexadecimal string, such as ``\texttt{875489e31abe}''.  This
+  is a build of the given revision of Mercurial.
+\item A hexadecimal string followed by a date, such as
+  ``\texttt{875489e31abe+20070205}''.  This is a build of the given
+  revision of Mercurial, where the build repository contained some
+  local changes that had not been committed.
+\subsection{Tips and tricks}
+\subsubsection{Why do the results of \hgcmd{diff} and \hgcmd{status}
 \hgopt{diff}{-r} option.  There is no way to print diffs relative to
 both parents.
-\subsection{Generating safe binary diffs}
+\subsubsection{Generating safe binary diffs}
 If you use the \hgopt{diff}{-a} option to force Mercurial to print
 diffs of files that are either ``mostly text'' or contain lots of

File en/examples/cmdref Added

View file
  • Ignore whitespace
  • Hide word diff
+hg init diff
+cd diff
+cat > myfile.c <<EOF
+int myfunc()
+    return 1;
+hg ci -Ama
+sed -ie 's/return 1/return 10/' myfile.c
+#$ name: diff-p
+echo '[diff]' >> $HGRC
+echo 'showfunc = False' >> $HGRC
+hg diff
+hg diff -p

File en/examples/filenames Added

View file
  • Ignore whitespace
  • Hide word diff
+hg init a
+cd a
+mkdir -p examples src/watcher
+touch COPYING MANIFEST.in README setup.py
+touch examples/performant.py examples/simple.py
+touch src/main.py src/watcher/_watcher.c src/watcher/watcher.py src/xyzzy.txt
+#$ name: files
+hg add COPYING README examples/simple.py
+#$ name: dirs
+hg status src
+#$ name: wdir-subdir
+cd src
+hg add -n
+hg add -n .
+#$ name: wdir-relname
+hg status
+hg status `hg root`
+#$ name: glob.star
+hg add 'glob:*.py'
+#$ name: glob.starstar
+cd ..
+hg status 'glob:**.py'
+#$ name: glob.star-starstar
+hg status 'glob:*.py'
+hg status 'glob:**.py'
+#$ name: glob.question
+hg status 'glob:**.?'
+#$ name: glob.range
+hg status 'glob:**[nr-t]'
+#$ name: glob.group
+hg status 'glob:*.{in,py}'
+#$ name: filter.include
+hg status -I '*.in'
+#$ name: filter.exclude
+hg status -X '**.py' src

File en/filenames.tex Added

View file
  • Ignore whitespace
  • Hide word diff
+\chapter{File names and pattern matching}
+Mercurial provides mechanisms that let you work with file names in a
+consistent and expressive way.
+\section{Simple file naming}
+Mercurial uses a unified piece of machinery ``under the hood'' to
+handle file names.  Every command behaves uniformly with respect to
+file names.  The way in which commands work with file names is as
+If you explicitly name real files on the command line, Mercurial works
+with exactly those files, as you would expect.
+When you provide a directory name, Mercurial will interpret this as
+``operate on every file in this directory and its subdirectories''.
+Mercurial traverses the files and subdirectories in a directory in
+alphabetical order.  When it encounters a subdirectory, it will
+traverse that subdirectory before continuing with the current
+\section{Running commands without any file names}
+Mercurial's commands that work with file names have useful default
+behaviours when you invoke them without providing any file names or
+patterns.  What kind of behaviour you should expect depends on what
+the command does.  Here are a few rules of thumb you can use to
+predict what a command is likely to do if you don't give it any names
+to work with.
+\item Most commands will operate on the entire working directory.
+  This is what the \hgcmd{add} command does, for example.
+\item If the command has effects that are difficult or impossible to
+  reverse, it will force you to explicitly provide at least one name
+  or pattern (see below).  This protects you from accidentally
+  deleting files by running \hgcmd{remove} with no arguments, for
+  example.
+It's easy to work around these default behaviours if they don't suit
+you.  If a command normally operates on the whole working directory,
+you can invoke it on just the current directory and its subdirectories
+by giving it the name ``\dirname{.}''.
+Along the same lines, some commands normally print file names relative
+to the root of the repository, even if you're invoking them from a
+subdirectory.  Such a command will print file names relative to your
+subdirectory if you give it explicit names.  Here, we're going to run
+\hgcmd{status} from a subdirectory, and get it to operate on the
+entire working directory while printing file names relative to our
+subdirectory, by passing it the output of the \hgcmd{root} command.
+\section{Telling you what's going on}
+The \hgcmd{add} example in the preceding section illustrates something
+else that's helpful about Mercurial commands.  If a command operates
+on a file that you didn't name explicitly on the command line, it will
+usually print the name of the file, so that you will not be surprised
+what's going on.
+The principle here is of \emph{least surprise}.  If you've exactly
+named a file on the command line, there's no point in repeating it
+back at you.  If Mercurial is acting on a file \emph{implicitly},
+because you provided no names, or a directory, or a pattern (see
+below), it's safest to tell you what it's doing.
+For commands that behave this way, you can silence them using the
+\hggopt{-q} option.  You can also get them to print the name of every
+file, even those you've named explicitly, using the \hggopt{-v}
+\section{Using patterns to identify files}
+In addition to working with file and directory names, Mercurial lets
+you use \emph{patterns} to identify files.  Mercurial's pattern
+handling is expressive.
+On Unix-like systems (Linux, MacOS, etc.), the job of matching file
+names to patterns normally falls to the shell.  On these systems, you
+must explicitly tell Mercurial that a name is a pattern.  On Windows,
+the shell does not expand patterns, so Mercurial will automatically
+identify names that are patterns, and expand them for you.
+To provide a pattern in place of a regular name on the command line,
+the mechanism is simple:
+  syntax:patternbody
+That is, a pattern is identified by a short text string that says what
+kind of pattern this is, followed by a colon, followed by the actual
+Mercurial supports two kinds of pattern syntax.  The most frequently
+used is called \texttt{glob}; this is the same kind of pattern
+matching used by the Unix shell, and should be familiar to Windows
+command prompt users, too.  
+When Mercurial does automatic pattern matching on Windows, it uses
+\texttt{glob} syntax.  You can thus omit the ``\texttt{glob:}'' prefix
+on Windows, but it's safe to use it, too.
+The \texttt{re} syntax is more powerful; it lets you specify patterns
+using regular expressions, also known as regexps.
+By the way, in the examples that follow, notice that I'm careful to
+wrap all of my patterns in quote characters, so that they won't get
+expanded by the shell before Mercurial sees them.
+\subsection{Shell-style \texttt{glob} patterns}
+This is an overview of the kinds of patterns you can use when you're
+matching on glob patterns.
+The ``\texttt{*}'' character matches any string, within a single
+The ``\texttt{**}'' pattern matches any string, and crosses directory
+boundaries.  It's not a standard Unix glob token, but it's accepted by
+several popular Unix shells, and is very useful.
+The ``\texttt{?}'' pattern matches any single character.
+The ``\texttt{[}'' character begins a \emph{character class}.  This
+matches any single character within the class.  The class ends with a
+``\texttt{]}'' character.  A class may contain multiple \emph{range}s
+of the form ``\texttt{a-f}'', which is shorthand for
+If the first character after the ``\texttt{[}'' in a character class
+is a ``\texttt{!}'', it \emph{negates} the class, making it match any
+single character not in the class.
+A ``\texttt{\{}'' begins a group of subpatterns, where the whole group
+matches if any subpattern in the group matches.  The ``\texttt{,}''
+character separates subpatterns, and ``\texttt{\}}'' ends the group.
+\subsubsection{Watch out!}
+Don't forget that if you want to match a pattern in any directory, you
+should not be using the ``\texttt{*}'' match-any token, as this will
+only match within one directory.  Instead, use the ``\texttt{**}''
+token.  This small example illustrates the difference between the two.
+\subsection{Regular expression matching with \texttt{re} patterns}
+Mercurial accepts the same regular expression syntax as the Python
+programming language (it uses Python's regexp engine internally).
+This is based on the Perl language's regexp syntax, which is the most
+popular dialect in use (it's also used in Java, for example).
+I won't discuss Mercurial's regexp dialect in any detail here, as
+regexps are not often used.  Perl-style regexps are in any case
+already exhaustively documented on a multitude of web sites, and in
+many books.  Instead, I will focus here on a few things you should
+know if you find yourself needing to use regexps with Mercurial.
+A regexp is matched against an entire file name, relative to the root
+of the repository.  In other words, even if you're already in
+subbdirectory \dirname{foo}, if you want to match files under this
+directory, your pattern must start with ``\texttt{foo/}''.
+One thing to note, if you're familiar with Perl-style regexps, is that
+Mercurial's are \emph{rooted}.  That is, a regexp starts matching
+against the beginning of a string; it doesn't look for a match
+anywhere within the string it.  To match anywhere in a string, start
+your pattern with ``\texttt{.*}''.
+\section{Filtering files}
+Not only does Mercurial give you a variety of ways to specify files;
+it lets you further winnow those files using \emph{filters}.  Commands
+that work with file names accept two filtering options.
+\item \hggopt{-I}, or \hggopt{--include}, lets you specify a pattern
+  that file names must match in order to be processed.
+\item \hggopt{-X}, or \hggopt{--exclude}, gives you a way to
+  \emph{avoid} processing files, if they match this pattern.
+You can provide multiple \hggopt{-I} and \hggopt{-X} options on the
+command line, and intermix them as you please.  Mercurial interprets
+the patterns you provide using glob syntax by default (but you can use
+regexps if you need to).
+You can read a \hggopt{-I} filter as ``process only the files that
+match this filter''.
+The \hggopt{-X} filter is best read as ``process only the files that
+don't match this pattern''.
+\section{Ignoring unwanted files and directories}
+\section{Case sensitivity}
+If you're working in a mixed development environment that contains
+both Linux (or other Unix) systems and Macs or Windows systems, you
+should keep in the back of your mind the knowledge that they treat the
+case (``N'' versus ``n'') of file names in incompatible ways.  This is
+not very likely to affect you, and it's easy to deal with if it does,
+but it could surprise you if you don't know about it.
+Operating systems and filesystems differ in the way they handle the
+\emph{case} of characters in file and directory names.  There are
+three common ways to handle case in names.
+\item Completely case insensitive.  Uppercase and lowercase versions
+  of a letter are treated as identical, both when creating a file and
+  during subsequent accesses.  This is common on older DOS-based
+  systems.
+\item Case preserving, but insensitive.  When a file or directory is
+  created, the case of its name is stored, and can be retrieved and
+  displayed by the operating system.  When an existing file is being
+  looked up, its case is ignored.  This is the standard arrangement on
+  Windows and MacOS.  The names \filename{foo} and \filename{FoO}
+  identify the same file.  This treatment of uppercase and lowercase
+  letters as interchangeable is also referred to as \emph{case
+    folding}.
+\item Case sensitive.  The case of a name is significant at all times.
+  The names \filename{foo} and {FoO} identify different files.  This
+  is the way Linux and Unix systems normally work.
+On Unix-like systems, it is possible to have any or all of the above
+ways of handling case in action at once.  For example, if you use a
+USB thumb drive formatted with a FAT32 filesystem on a Linux system,
+Linux will handle names on that filesystem in a case preserving, but
+insensitive, way.
+\subsection{Safe, portable repository storage}
+Mercurial's repository storage mechanism is \emph{case safe}.  It
+translates file names so that they can be safely stored on both case
+sensitive and case insensitive filesystems.  This means that you can
+use normal file copying tools to transfer a Mercurial repository onto,
+for example, a USB thumb drive, and safely move that drive and
+repository back and forth between a Mac, a PC running Windows, and a
+Linux box.
+\subsection{Detecting case conflicts}
+When operating in the working directory, Mercurial honours the naming
+policy of the filesystem where the working directory is located.  If
+the filesystem is case preserving, but insensitive, Mercurial will
+treat names that differ only in case as the same.
+An important aspect of this approach is that it is possible to commit
+a changeset on a case sensitive (typically Linux or Unix) filesystem
+that will cause trouble for users on case insensitive (usually Windows
+and MacOS) users.  If a Linux user commits changes to two files, one
+named \filename{myfile.c} and the other named \filename{MyFile.C},
+they will be stored correctly in the repository.  And in the working
+directories of other Linux users, they will be correctly represented
+as separate files.
+If a Windows or Mac user pulls this change, they will not initially
+have a problem, because Mercurial's repository storage mechanism is
+case safe.  However, once they try to \hgcmd{update} the working
+directory to that changeset, or \hgcmd{merge} with that changeset,
+Mercurial will spot the conflict between the two file names that the
+filesystem would treat as the same, and forbid the update or merge
+from occurring.
+\subsection{Fixing a case conflict}
+If you are using Windows or a Mac in a mixed environment where some of
+your collaborators are using Linux or Unix, and Mercurial reports a
+case folding conflict when you try to \hgcmd{update} or \hgcmd{merge},
+the procedure to fix the problem is simple.
+Just find a nearby Linux or Unix box, clone the problem repository
+onto it, and use Mercurial's \hgcmd{rename} command to change the
+names of any offending files or directories so that they will no
+longer cause case folding conflicts.  Commit this change, \hgcmd{pull}
+or \hgcmd{push} it across to your Windows or MacOS system, and
+\hgcmd{update} to the revision with the non-conflicting names.
+The changeset with case-conflicting names will remain in your
+project's history, and you still won't be able to \hgcmd{update} your
+working directory to that changeset on a Windows or MacOS system, but
+you can continue development unimpeded.
+  Prior to version~0.9.3, Mercurial did not use a case safe repository
+  storage mechanism, and did not detect case folding conflicts.  If
+  you are using an older version of Mercurial on Windows or MacOS, I
+  strongly recommend that you upgrade.
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: "00book"
+%%% End: