Shlomi Fish avatar Shlomi Fish committed b88427c

Add the current sources.

Comments (0)

Files changed (44)

+use strict;
+use warnings;
+
+use Module::Build;
+
+my $build = Module::Build->new(
+   module_name => 'Parser::MGC',
+   requires => {
+      'File::Slurp' => 0,
+   },
+   build_requires => {
+      'File::Temp' => 0,
+      'Test::More' => 0,
+   },
+   auto_configure_requires => 0, # Don't add M::B to configure_requires
+   license => 'perl',
+   create_makefile_pl => 'traditional',
+   create_license => 1,
+   create_readme  => 1,
+);
+
+$build->create_build_script;
+Revision history for Parser-MGC
+
+0.11    CHANGES:
+         * Allow different toplevel parse methods to the constructor
+         * Added another example showing parsing XML - only a minimal example;
+           do not use this as real code. :)
+
+0.10    CHANGES:
+         * Added ->maybe_expect, for higher performance parsers
+
+0.09    CHANGES:
+         * Added ->pos accessor and ->fail_from to throw failures from other
+           locations
+         * Added ->token_number as a convenience for int or float
+
+0.08    CHANGES:
+         * Give ->list_of and ->sequence_of proper failure-handling semantics
+         * Added ->generic_token
+         * Defer conversion of pos into line/col/text until string-formatting
+           a failure exception - improves performance of backtracking
+         * Make token_float tuneable
+
+0.07    CHANGES:
+         * Allow ->expect to return subgroup captures in list context
+         * Documentation improvements
+        
+        BUGFIXES:
+         * Use Data::Dumper rather than Data::Dump in examples, as the latter
+           is not core; no point pulling in non-core deps simply for examples
+
+0.06    CHANGES:
+         * Renamed ->one_of to ->any_of
+         * Added ->substring_before
+         * Allow ->scope_of to not take a start pattern
+         * Recognise the usual set of character escapes in ->token_string
+         * Added more example scripts to demonstrate:
+            + the use ->substring_before to parse POD-like notation
+            + accumulator variables instead of structural return
+         * Unit-test the example scripts
+
+0.05    CHANGES:
+         * Added ->scope_level
+         * Added ->from_reader as a new potential source of string input
+
+0.04    CHANGES:
+         * Added ->token_float
+         * Optionally parse 0o... ad octal integers
+
+        BUGFIXES:
+         * Match strings non-greedily
+         * Correct exception printing when line indent includes tabs (thanks
+           to Khisanth/#perl)
+
+0.03    CHANGES:
+         * Expanded documentation, more examples
+        
+        BUGFIXES:
+         * Regexp quoting fix for perl >= 5.13.6
+         * Declare dependency on File::Slurp
+
+0.02    CHANGES:
+         * ->expect now returns the consumed string
+         * ->token_int recognises negative integers
+         * ->token_* raises a failure at end-of-scope, rather than returning
+           undef
+
+        BUGFIXES:
+         * 'use overload fallback' to keep Test::More 0.96 happy
+
+0.01    First version, released on an unsuspecting world.
+
+This software is copyright (c) 2012 by Paul Evans <leonerd@leonerd.org.uk>.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+Terms of the Perl programming language system itself
+
+a) the GNU General Public License as published by the Free
+   Software Foundation; either version 1, or (at your option) any
+   later version, or
+b) the "Artistic License"
+
+--- The GNU General Public License, Version 1, February 1989 ---
+
+This software is Copyright (c) 2012 by Paul Evans <leonerd@leonerd.org.uk>.
+
+This is free software, licensed under:
+
+  The GNU General Public License, Version 1, February 1989
+
+                    GNU GENERAL PUBLIC LICENSE
+                     Version 1, February 1989
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ 51 Franklin St, Suite 500, Boston, MA  02110-1335  USA
+
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The license agreements of most software companies try to keep users
+at the mercy of those companies.  By contrast, our General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  The
+General Public License applies to the Free Software Foundation's
+software and to any other program whose authors commit to using it.
+You can use it for your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Specifically, the General Public License is designed to make
+sure that you have the freedom to give away or sell copies of free
+software, that you receive source code or can get it if you want it,
+that you can change the software or use pieces of it in new free
+programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of a such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must tell them their rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any program or other work which
+contains a notice placed by the copyright holder saying it may be
+distributed under the terms of this General Public License.  The
+"Program", below, refers to any such program or work, and a "work based
+on the Program" means either the Program or any work containing the
+Program or a portion of it, either verbatim or with modifications.  Each
+licensee is addressed as "you".
+
+  1. You may copy and distribute verbatim copies of the Program's source
+code as you receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice and
+disclaimer of warranty; keep intact all the notices that refer to this
+General Public License and to the absence of any warranty; and give any
+other recipients of the Program a copy of this General Public License
+along with the Program.  You may charge a fee for the physical act of
+transferring a copy.
+
+  2. You may modify your copy or copies of the Program or any portion of
+it, and copy and distribute such modifications under the terms of Paragraph
+1 above, provided that you also do the following:
+
+    a) cause the modified files to carry prominent notices stating that
+    you changed the files and the date of any change; and
+
+    b) cause the whole of any work that you distribute or publish, that
+    in whole or in part contains the Program or any part thereof, either
+    with or without modifications, to be licensed at no charge to all
+    third parties under the terms of this General Public License (except
+    that you may choose to grant warranty protection to some or all
+    third parties, at your option).
+
+    c) If the modified program normally reads commands interactively when
+    run, you must cause it, when started running for such interactive use
+    in the simplest and most usual way, to print or display an
+    announcement including an appropriate copyright notice and a notice
+    that there is no warranty (or else, saying that you provide a
+    warranty) and that users may redistribute the program under these
+    conditions, and telling the user how to view a copy of this General
+    Public License.
+
+    d) You may charge a fee for the physical act of transferring a
+    copy, and you may at your option offer warranty protection in
+    exchange for a fee.
+
+Mere aggregation of another independent work with the Program (or its
+derivative) on a volume of a storage or distribution medium does not bring
+the other work under the scope of these terms.
+
+  3. You may copy and distribute the Program (or a portion or derivative of
+it, under Paragraph 2) in object code or executable form under the terms of
+Paragraphs 1 and 2 above provided that you also do one of the following:
+
+    a) accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of
+    Paragraphs 1 and 2 above; or,
+
+    b) accompany it with a written offer, valid for at least three
+    years, to give any third party free (except for a nominal charge
+    for the cost of distribution) a complete machine-readable copy of the
+    corresponding source code, to be distributed under the terms of
+    Paragraphs 1 and 2 above; or,
+
+    c) accompany it with the information you received as to where the
+    corresponding source code may be obtained.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form alone.)
+
+Source code for a work means the preferred form of the work for making
+modifications to it.  For an executable file, complete source code means
+all the source code for all modules it contains; but, as a special
+exception, it need not include source code for modules which are standard
+libraries that accompany the operating system on which the executable
+file runs, or for standard header files or definitions files that
+accompany that operating system.
+
+  4. You may not copy, modify, sublicense, distribute or transfer the
+Program except as expressly provided under this General Public License.
+Any attempt otherwise to copy, modify, sublicense, distribute or transfer
+the Program is void, and will automatically terminate your rights to use
+the Program under this License.  However, parties who have received
+copies, or rights to use copies, from you under this General Public
+License will not have their licenses terminated so long as such parties
+remain in full compliance.
+
+  5. By copying, distributing or modifying the Program (or any work based
+on the Program) you indicate your acceptance of this license to do so,
+and all its terms and conditions.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the original
+licensor to copy, distribute or modify the Program subject to these
+terms and conditions.  You may not impose any further restrictions on the
+recipients' exercise of the rights granted herein.
+
+  7. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of the license which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+the license, you may choose any version ever published by the Free Software
+Foundation.
+
+  8. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+        Appendix: How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to humanity, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these
+terms.
+
+  To do so, attach the following notices to the program.  It is safest to
+attach them to the start of each source file to most effectively convey
+the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) 19yy  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 1, or (at your option)
+    any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) 19xx name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the
+appropriate parts of the General Public License.  Of course, the
+commands you use may be called something other than `show w' and `show
+c'; they could even be mouse-clicks or menu items--whatever suits your
+program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  program `Gnomovision' (a program to direct compilers to make passes
+  at assemblers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+That's all there is to it!
+
+
+--- The Artistic License 1.0 ---
+
+This software is Copyright (c) 2012 by Paul Evans <leonerd@leonerd.org.uk>.
+
+This is free software, licensed under:
+
+  The Artistic License 1.0
+
+The Artistic License
+
+Preamble
+
+The intent of this document is to state the conditions under which a Package
+may be copied, such that the Copyright Holder maintains some semblance of
+artistic control over the development of the package, while giving the users of
+the package the right to use and distribute the Package in a more-or-less
+customary fashion, plus the right to make reasonable modifications.
+
+Definitions:
+
+  - "Package" refers to the collection of files distributed by the Copyright
+    Holder, and derivatives of that collection of files created through
+    textual modification. 
+  - "Standard Version" refers to such a Package if it has not been modified,
+    or has been modified in accordance with the wishes of the Copyright
+    Holder. 
+  - "Copyright Holder" is whoever is named in the copyright or copyrights for
+    the package. 
+  - "You" is you, if you're thinking about copying or distributing this Package.
+  - "Reasonable copying fee" is whatever you can justify on the basis of media
+    cost, duplication charges, time of people involved, and so on. (You will
+    not be required to justify it to the Copyright Holder, but only to the
+    computing community at large as a market that must bear the fee.) 
+  - "Freely Available" means that no fee is charged for the item itself, though
+    there may be fees involved in handling the item. It also means that
+    recipients of the item may redistribute it under the same conditions they
+    received it. 
+
+1. You may make and give away verbatim copies of the source form of the
+Standard Version of this Package without restriction, provided that you
+duplicate all of the original copyright notices and associated disclaimers.
+
+2. You may apply bug fixes, portability fixes and other modifications derived
+from the Public Domain or from the Copyright Holder. A Package modified in such
+a way shall still be considered the Standard Version.
+
+3. You may otherwise modify your copy of this Package in any way, provided that
+you insert a prominent notice in each changed file stating how and when you
+changed that file, and provided that you do at least ONE of the following:
+
+  a) place your modifications in the Public Domain or otherwise make them
+     Freely Available, such as by posting said modifications to Usenet or an
+     equivalent medium, or placing the modifications on a major archive site
+     such as ftp.uu.net, or by allowing the Copyright Holder to include your
+     modifications in the Standard Version of the Package.
+
+  b) use the modified Package only within your corporation or organization.
+
+  c) rename any non-standard executables so the names do not conflict with
+     standard executables, which must also be provided, and provide a separate
+     manual page for each non-standard executable that clearly documents how it
+     differs from the Standard Version.
+
+  d) make other distribution arrangements with the Copyright Holder.
+
+4. You may distribute the programs of this Package in object code or executable
+form, provided that you do at least ONE of the following:
+
+  a) distribute a Standard Version of the executables and library files,
+     together with instructions (in the manual page or equivalent) on where to
+     get the Standard Version.
+
+  b) accompany the distribution with the machine-readable source of the Package
+     with your modifications.
+
+  c) accompany any non-standard executables with their corresponding Standard
+     Version executables, giving the non-standard executables non-standard
+     names, and clearly documenting the differences in manual pages (or
+     equivalent), together with instructions on where to get the Standard
+     Version.
+
+  d) make other distribution arrangements with the Copyright Holder.
+
+5. You may charge a reasonable copying fee for any distribution of this
+Package.  You may charge any fee you choose for support of this Package. You
+may not charge a fee for this Package itself. However, you may distribute this
+Package in aggregate with other (possibly commercial) programs as part of a
+larger (possibly commercial) software distribution provided that you do not
+advertise this Package as a product of your own.
+
+6. The scripts and library files supplied as input to or produced as output
+from the programs of this Package do not automatically fall under the copyright
+of this Package, but belong to whomever generated them, and may be sold
+commercially, and may be aggregated with this Package.
+
+7. C or perl subroutines supplied by you and linked into this Package shall not
+be considered part of this Package.
+
+8. The name of the Copyright Holder may not be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
+The End
+
+Build.PL
+Changes
+examples/eval-expr.pl
+examples/LICENSE
+examples/parse-dict.pl
+examples/parse-pod.pl
+examples/parse-xml.pl
+examples/synopsis.pl
+lib/Parser/MGC.pm
+LICENSE
+Makefile.PL
+MANIFEST			This list of files
+META.json
+META.yml
+README
+t/00use.t
+t/01base.t
+t/02expect.t
+t/03reader.t
+t/04where.t
+t/05comment.t
+t/06substring.t
+t/07generic_token.t
+t/10token_int.t
+t/11token_float.t
+t/12token_number.t
+t/13token_string.t
+t/14token_ident.t
+t/15token_kw.t
+t/20maybe.t
+t/21scope_of.t
+t/22list_of.t
+t/23sequence_of.t
+t/24any_of.t
+t/30commit.t
+t/31scope_level.t
+t/32exception.t
+t/90ex_dict.t
+t/90ex_expr.t
+t/90ex_pod.t
+t/90ex_synopsis.t
+t/90ex_xml.t
+t/98backcompat.t
+t/99pod.t
+{
+   "abstract" : "build simple recursive-descent parsers",
+   "author" : [
+      "Paul Evans <leonerd@leonerd.org.uk>"
+   ],
+   "dynamic_config" : 1,
+   "generated_by" : "Module::Build version 0.38, CPAN::Meta::Converter version 2.120630",
+   "license" : [
+      "perl_5"
+   ],
+   "meta-spec" : {
+      "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+      "version" : "2"
+   },
+   "name" : "Parser-MGC",
+   "prereqs" : {
+      "build" : {
+         "requires" : {
+            "File::Temp" : "0",
+            "Test::More" : "0"
+         }
+      },
+      "runtime" : {
+         "requires" : {
+            "File::Slurp" : "0"
+         }
+      }
+   },
+   "provides" : {
+      "Parser::MGC" : {
+         "file" : "lib/Parser/MGC.pm",
+         "version" : "0.11"
+      }
+   },
+   "release_status" : "stable",
+   "resources" : {
+      "license" : [
+         "http://dev.perl.org/licenses/"
+      ]
+   },
+   "version" : "0.11"
+}
+---
+abstract: 'build simple recursive-descent parsers'
+author:
+  - 'Paul Evans <leonerd@leonerd.org.uk>'
+build_requires:
+  File::Temp: 0
+  Test::More: 0
+dynamic_config: 1
+generated_by: 'Module::Build version 0.38, CPAN::Meta::Converter version 2.120630'
+license: perl
+meta-spec:
+  url: http://module-build.sourceforge.net/META-spec-v1.4.html
+  version: 1.4
+name: Parser-MGC
+provides:
+  Parser::MGC:
+    file: lib/Parser/MGC.pm
+    version: 0.11
+requires:
+  File::Slurp: 0
+resources:
+  license: http://dev.perl.org/licenses/
+version: 0.11
+# Note: this file was auto-generated by Module::Build::Compat version 0.3800
+use ExtUtils::MakeMaker;
+WriteMakefile
+(
+  'NAME' => 'Parser::MGC',
+  'VERSION_FROM' => 'lib/Parser/MGC.pm',
+  'PREREQ_PM' => {
+                   'File::Slurp' => 0,
+                   'File::Temp' => 0,
+                   'Test::More' => 0
+                 },
+  'INSTALLDIRS' => 'site',
+  'EXE_FILES' => [],
+  'PL_FILES' => {}
+)
+;
+NAME
+    `Parser::MGC' - build simple recursive-descent parsers
+
+SYNOPSIS
+     package My::Grammar::Parser
+     use base qw( Parser::MGC );
+
+     sub parse
+     {
+        my $self = shift;
+
+        $self->sequence_of( sub {
+           $self->any_of(
+              sub { $self->token_int },
+              sub { $self->token_string },
+              sub { \$self->token_ident },
+              sub { $self->scope_of( "(", \&parse, ")" ) }
+           );
+        } );
+     }
+
+     my $parser = My::Grammar::Parser->new;
+
+     my $tree = $parser->from_file( $ARGV[0] );
+
+     ...
+
+DESCRIPTION
+    This base class provides a low-level framework for building
+    recursive-descent parsers that consume a given input string from left to
+    right, returning a parse structure. It takes its name from the `m//gc'
+    regexps used to implement the token parsing behaviour.
+
+    It provides a number of token-parsing methods, which each extract a
+    grammatical token from the string. It also provides wrapping methods
+    that can be used to build up a possibly-recursive grammar structure, by
+    applying a structure around other parts of parsing code. Each method,
+    both token and structural, atomically either consumes a prefix of the
+    string and returns its result, or fails and consumes nothing. This makes
+    it simple to implement grammars that require backtracking.
+
+CONSTRUCTOR
+  $parser = Parser::MGC->new( %args )
+    Returns a new instance of a `Parser::MGC' object. This must be called on
+    a subclass that provides method of the name provided as `toplevel', by
+    default called `parse'.
+
+    Takes the following named arguments
+
+    toplevel => STRING
+            Name of the toplevel method to use to start the parse from. If
+            not supplied, will try to use a method called `parse'.
+
+    patterns => HASH
+            Keys in this hash should map to quoted regexp (`qr//')
+            references, to override the default patterns used to match
+            tokens. See `PATTERNS' below
+
+    accept_0o_oct => BOOL
+            If true, the `token_int' method will also accept integers with a
+            `0o' prefix as octal.
+
+PATTERNS
+    The following pattern names are recognised. They may be passed to the
+    constructor in the `patterns' hash, or provided as a class method under
+    the name `pattern_*name*'.
+
+    * ws
+        Pattern used to skip whitespace between tokens. Defaults to
+        `/[\s\n\t]+/'
+
+    * comment
+        Pattern used to skip comments between tokens. Undefined by default.
+
+    * int
+        Pattern used to parse an integer by `token_int'. Defaults to
+        `/-?(?:0x[[:xdigit:]]+|[[:digit:]]+)/'. If `accept_0o_oct' is given,
+        then this will be expanded to match `/0o[0-7]+/' as well.
+
+    * float
+        Pattern used to parse a floating-point number by `token_float'.
+        Defaults to `/-?(?:\d*\.\d+|\d+\.)(?:e-?\d+)?|-?\d+e-?\d+/i'.
+
+    * ident
+        Pattern used to parse an identifier by `token_ident'. Defaults to
+        `/[[:alpha:]_]\w*/'
+
+    * string_delim
+        Pattern used to delimit a string by `token_string'. Defaults to
+        `/["']/'.
+
+METHODS
+  $result = $parser->from_string( $str )
+    Parse the given literal string and return the result from the toplevel
+    method.
+
+  $result = $parser->from_file( $file )
+    Parse the given file, which may be a pathname in a string, or an opened
+    IO handle, and return the result from the toplevel method.
+
+  $result = $parser->from_reader( \&reader )
+    Parse the input which is read by the `reader' function. This function
+    will be called in scalar context to generate portions of string to
+    parse, being passed the `$parser' object. The function should return
+    `undef' when it has no more string to return.
+
+     $reader->( $parser )
+
+    Note that because it is not generally possible to detect exactly when
+    more input may be required due to failed regexp parsing, the reader
+    function is only invoked during searching for skippable whitespace. This
+    makes it suitable for reading lines of a file in the common case where
+    lines are considered as skippable whitespace, or for reading lines of
+    input interractively from a user. It cannot be used in all cases (for
+    example, reading fixed-size buffers from a file) because two successive
+    invocations may split a single token across the buffer boundaries, and
+    cause parse failures.
+
+  $pos = $parser->pos
+    Returns the current parse position, as a character offset from the
+    beginning of the file or string.
+
+  ( $lineno, $col, $text ) = $parser->where
+    Returns the current parse position, as a line and column number, and the
+    entire current line of text. The first line is numbered 1, and the first
+    column is numbered 0.
+
+  $parser->fail( $message )
+  $parser->fail_from( $pos, $message )
+    Aborts the current parse attempt with the given message string. The
+    failure message will include the line and column position, and the line
+    of input that failed at the current parse position, or a position
+    earlier obtained using the `pos' method.
+
+  $eos = $parser->at_eos
+    Returns true if the input string is at the end of the string.
+
+  $level = $parser->scope_level
+    Returns the number of nested `scope_of' calls that have been made.
+
+STRUCTURE-FORMING METHODS
+    The following methods may be used to build a grammatical structure out
+    of the defined basic token-parsing methods. Each takes at least one code
+    reference, which will be passed the actual `$parser' object as its first
+    argument.
+
+  $ret = $parser->maybe( $code )
+    Attempts to execute the given `$code' reference in scalar context, and
+    returns what it returned. If the code fails to parse by calling the
+    `fail' method then none of the input string will be consumed; the
+    current parsing position will be restored. `undef' will be returned in
+    this case.
+
+    This may be considered to be similar to the `?' regexp qualifier.
+
+     sub parse_declaration
+     {
+        my $self = shift;
+
+        [ $self->parse_type,
+          $self->token_ident,
+          $self->maybe( sub {
+             $self->expect( "=" );
+             $self->parse_expression
+          } ),
+        ];
+     }
+
+  $ret = $parser->scope_of( $start, $code, $stop )
+    Expects to find the `$start' pattern, then attempts to execute the given
+    `$code' reference, then expects to find the `$stop' pattern. Returns
+    whatever the code reference returned.
+
+    While the code is being executed, the `$stop' pattern will be used by
+    the token parsing methods as an end-of-scope marker; causing them to
+    raise a failure if called at the end of a scope.
+
+     sub parse_block
+     {
+        my $self = shift;
+
+        $self->scope_of( "{", sub { $self->parse_statements }, "}" );
+     }
+
+    If the `$start' pattern is undefined, it is presumed the caller has
+    already checked for this. This is useful when the stop pattern needs to
+    be calculated based on the start pattern.
+
+     sub parse_bracketed
+     {
+        my $self = shift;
+
+        my $delim = $self->expect( qr/[\(\[\<\{]/ );
+        $delim =~ tr/([<{/)]>}/;
+
+        $self->enter_scope( undef, sub { $self->parse_body }, $delim );
+     }
+
+  $ret = $parser->list_of( $sep, $code )
+    Expects to find a list of instances of something parsed by `$code',
+    separated by the `$sep' pattern. Returns an ARRAY ref containing a list
+    of the return values from the `$code'.
+
+    This method does not consider it an error if the returned list is empty;
+    that is, that the scope ended before any item instances were parsed from
+    it.
+
+     sub parse_numbers
+     {
+        my $self = shift;
+
+        $self->list_of( ",", sub { $self->token_int } );
+     }
+
+  $ret = $parser->sequence_of( $code )
+    A shortcut for calling `list_of' with an empty string as separator;
+    expects to find at least one instance of something parsed by `$code',
+    separated only by skipped whitespace.
+
+    This may be considered to be similar to the `+' or `*' regexp
+    qualifiers.
+
+     sub parse_statements
+     {
+        my $self = shift;
+
+        $self->sequence_of( sub { $self->parse_statement } );
+     }
+
+  $ret = $parser->any_of( @codes )
+    Expects that one of the given code references can parse something from
+    the input, returning what it returned. Each code reference may indicate
+    a failure to parse by calling the `fail' method.
+
+    This may be considered to be similar to the `|' regexp operator for
+    forming alternations of possible parse trees.
+
+     sub parse_statement
+     {
+        my $self = shift;
+
+        $self->any_of(
+           sub { $self->parse_declaration; $self->expect(";") },
+           sub { $self->parse_expression; $self->expect(";") },
+           sub { $self->parse_block },
+        );
+     }
+
+    Note: This method used to be called `one_of', but was renamed for
+    clarity. Currently this method is provided also as an alias by the old
+    name. Code using the old name should be rewritten to `any_of' instead,
+    as this backward-compatibility alias may be removed in a later version.
+
+  $parser->commit
+    Calling this method will cancel the backtracking behaviour of the
+    innermost `maybe', `list_of', `sequence_of', or `any_of' structure
+    forming method. That is, if later code then calls `fail', the exception
+    will be propagated out of `maybe', and no further code blocks will be
+    attempted by `any_of'.
+
+    Typically this will be called once the grammatical structure of an
+    alternation has been determined, ensuring that any further failures are
+    raised as real exceptions, rather than by attempting other alternatives.
+
+     sub parse_statement
+     {
+        my $self = shift;
+
+        $self->any_of(
+           ...
+           sub {
+              $self->scope_of( "{",
+                 sub { $self->commit; $self->parse_statements; },
+              "}" ),
+           },
+        );
+     }
+
+TOKEN PARSING METHODS
+    The following methods attempt to consume some part of the input string,
+    to be used as part of the parsing process.
+
+  $str = $parser->expect( $literal )
+  $str = $parser->expect( qr/pattern/ )
+  @groups = $parser->expect( qr/pattern/ )
+    Expects to find a literal string or regexp pattern match, and consumes
+    it. In scalar context, this method returns the string that was captured.
+    In list context it returns the matching substring and the contents of
+    any subgroups contained in the pattern.
+
+    This method will raise a parse error (by calling `fail') if the regexp
+    fails to match. Note that if the pattern could match an empty string
+    (such as for example `qr/\d*/'), the pattern will always match, even if
+    it has to match an empty string. This method will not consider a failure
+    if the regexp matches with zero-width.
+
+  $str = $parser->maybe_expect( ... )
+  @groups = $parser->maybe_expect( ... )
+    A convenient shortcut equivalent to calling `expect' within `maybe', but
+    implemented more efficiently, avoiding the exception-handling set up by
+    `maybe'. Returns `undef' or an empty list if the match fails.
+
+  $str = $parser->substring_before( $literal )
+  $str = $parser->substring_before( qr/pattern/ )
+    Expects to possibly find a literal string or regexp pattern match. If it
+    finds such, consume all the input text before but excluding this match,
+    and return it. If it fails to find a match before the end of the current
+    scope, consumes all the input text until the end of scope and return it.
+
+    This method does not consume the part of input that matches, only the
+    text before it. It is not considered a failure if the substring before
+    this match is empty. If a non-empty match is required, use the `fail'
+    method:
+
+     sub token_nonempty_part
+     {
+        my $self = shift;
+
+        my $str = $parser->substring_before( "," );
+        length $str or $self->fail( "Expected a string fragment before ," );
+
+        return $str;
+     }
+
+    Note that unlike most of the other token parsing methods, this method
+    does not consume either leading or trailing whitespace around the
+    substring. It is expected that this method would be used as part a
+    parser to read quoted strings, or similar cases where whitespace should
+    be preserved.
+
+  $val = $parser->generic_token( $name, $re, $convert )
+    Expects to find a token matching the precompiled regexp `$re'. If
+    provided, the `$convert' CODE reference can be used to convert the
+    string into a more convenient form. `$name' is used in the failure
+    message if the pattern fails to match.
+
+    If provided, the `$convert' function will be passed the parser and the
+    matching substring; the value it returns is returned from
+    `generic_token'.
+
+     $convert->( $parser, $substr )
+
+    If not provided, the substring will be returned as it stands.
+
+    This method is mostly provided for subclasses to define their own token
+    types. For example:
+
+     sub token_hex
+     {
+        my $self = shift;
+        $self->generic_token( hex => qr/[0-9A-F]{2}h/, sub { hex $_[1] } );
+     }
+
+  $int = $parser->token_int
+    Expects to find an integer in decimal, octal or hexadecimal notation,
+    and consumes it. Negative integers, preceeded by `-', are also
+    recognised.
+
+  $float = $parser->token_float
+    Expects to find a number expressed in floating-point notation; a
+    sequence of digits possibly prefixed by `-', possibly containing a
+    decimal point, possibly followed by an exponent specified by `e'
+    followed by an integer. The numerical value is then returned.
+
+  $number = $parser->token_number
+    Expects to find a number expressed in either of the above forms.
+
+  $str = $parser->token_string
+    Expects to find a quoted string, and consumes it. The string should be
+    quoted using `"' or `'' quote marks.
+
+    The content of the quoted string can contain character escapes similar
+    to those accepted by C or Perl. Specifically, the following forms are
+    recognised:
+
+     \a               Bell ("alert")
+     \b               Backspace
+     \e               Escape
+     \f               Form feed
+     \n               Newline
+     \r               Return
+     \t               Horizontal Tab
+     \0, \012         Octal character
+     \x34, \x{5678}   Hexadecimal character
+
+    C's `\v' for vertical tab is not supported as it is rarely used in
+    practice and it collides with Perl's `\v' regexp escape. Perl's `\c' for
+    forming other control characters is also not supported.
+
+  $ident = $parser->token_ident
+    Expects to find an identifier, and consumes it.
+
+  $keyword = $parser->token_kw( @keywords )
+    Expects to find a keyword, and consumes it. A keyword is defined as an
+    identifier which is exactly one of the literal values passed in.
+
+EXAMPLES
+  Accumulating Results Using Variables
+    Although the structure-forming methods all return a value, obtained from
+    their nested parsing code, it can sometimes be more convenient to use a
+    variable to accumulate a result in instead. For example, consider the
+    following parser method, designed to parse a set of `name: "value"'
+    assignments, such as might be found in a configuration file, or
+    YAML/JSON-style mapping value.
+
+     sub parse_dict
+     {
+        my $self = shift;
+ 
+        my %ret;
+        $self->list_of( ",", sub {
+           my $key = $self->token_ident;
+           exists $ret{$key} and $self->fail( "Already have a mapping for '$key'" );
+ 
+           $self->expect( ":" );
+ 
+           $ret{$key} = $self->parse_value;
+        } );
+ 
+        return \%ret
+     }
+
+    Instead of using the return value from `list_of', this method
+    accumulates values in the `%ret' hash, eventually returning a reference
+    to it as its result. Because of this, it can perform some error checking
+    while it parses; namely, rejecting duplicate keys.
+
+TODO
+    *   Make unescaping of string constants more customisable. Possibly
+        consider instead a `parse_string_generic' using a loop over
+        `substring_before'.
+
+    *   Easy ability for subclasses to define more token types as methods.
+        Perhaps provide a class method such as
+
+         __PACKAGE__->has_token( hex => qr/[0-9A-F]+/i, sub { hex $_[1] } );
+
+    *   Investigate how well `from_reader' can cope with buffer splitting
+        across other tokens than simply skippable whitespace
+
+AUTHOR
+    Paul Evans <leonerd@leonerd.org.uk>
+
+The following licence applies to the example scripts in this directory
+----------------------------------------------------------------------
+
+
+The MIT License
+
+Copyright (c) 2011 Paul Evans <leonerd@leonerd.org.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

examples/eval-expr.pl

+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+package ExprParser;
+use base qw( Parser::MGC );
+
+sub parse
+{
+   my $self = shift;
+
+   $self->parse_term;
+}
+
+sub parse_term
+{
+   my $self = shift;
+
+   my $val = $self->parse_factor;
+
+   1 while $self->any_of(
+      sub { $self->expect( "+" ); $self->commit; $val += $self->parse_factor; 1 },
+      sub { $self->expect( "-" ); $self->commit; $val -= $self->parse_factor; 1 },
+      sub { 0 },
+   );
+
+   return $val;
+}
+
+sub parse_factor
+{
+   my $self = shift;
+
+   my $val = $self->parse_atom;
+
+   1 while $self->any_of(
+      sub { $self->expect( "*" ); $self->commit; $val *= $self->parse_atom; 1 },
+      sub { $self->expect( "/" ); $self->commit; $val /= $self->parse_atom; 1 },
+      sub { 0 },
+   );
+
+   return $val;
+}
+
+sub parse_atom
+{
+   my $self = shift;
+
+   $self->any_of(
+      sub { $self->scope_of( "(", sub { $self->commit; $self->parse }, ")" ) },
+      sub { $self->token_int },
+   );
+}
+
+if( !caller ) {
+   my $parser = __PACKAGE__->new;
+
+   while( defined( my $line = <STDIN> ) ) {
+      my $ret = eval { $parser->from_string( $line ) };
+      print $@ and next if $@;
+
+      print "$ret\n";
+   }
+}
+
+1;

examples/parse-dict.pl

+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+package DictParser;
+use base qw( Parser::MGC );
+
+sub parse
+{
+   my $self = shift;
+
+   $self->any_of(
+      sub { $self->token_int },
+
+      sub { $self->token_string },
+
+      sub { $self->scope_of( "{",
+               sub { $self->commit; $self->parse_dict },
+            "}" );
+      },
+   );
+}
+
+sub parse_dict
+{
+   my $self = shift;
+
+   my %ret;
+   $self->list_of( ",", sub {
+      my $key = $self->token_ident;
+
+      $self->expect( ":" );
+
+      $ret{$key} = $self->parse;
+   } );
+
+   return \%ret
+}
+
+use Data::Dumper;
+
+if( !caller ) {
+   my $parser = __PACKAGE__->new;
+
+   while( defined( my $line = <STDIN> ) ) {
+      my $ret = eval { $parser->from_string( $line ) };
+      print $@ and next if $@;
+
+      print Dumper( $ret );
+   }
+}
+
+1;

examples/parse-pod.pl

+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+package PodParser;
+use base qw( Parser::MGC );
+
+sub parse
+{
+   my $self = shift;
+
+   $self->sequence_of(
+      sub { $self->any_of(
+
+         sub { my ( undef, $tag, $delim ) = $self->expect( qr/([A-Z])(<+)/ );
+               $self->commit;
+               +{ $tag => $self->scope_of( undef, \&parse, ">" x length $delim ) }; },
+
+         sub { $self->substring_before( qr/[A-Z]</ ) },
+      ) },
+   );
+}
+
+use Data::Dumper;
+
+if( !caller ) {
+   my $parser = __PACKAGE__->new;
+
+   while( defined( my $line = <STDIN> ) ) {
+      my $ret = eval { $parser->from_string( $line ) };
+      print $@ and next if $@;
+
+      print Dumper( $ret );
+   }
+}
+
+1;

examples/parse-xml.pl

+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+# DO NOT RELY ON THIS AS A REAL XML PARSER!
+
+# It is not intended to be used actually as an XML parser, simply to stand as
+# an example of how you might use Parser::MGC to parse an XML-like syntax
+
+# There are a great many things it doesn't do correctly; it lacks at least the
+# following features:
+#   Entities
+#   Processing instructions
+#   Comments
+#   CDATA
+
+package XmlParser;
+use base qw( Parser::MGC );
+
+sub parse
+{
+   my $self = shift;
+
+   my $rootnode = $self->parse_node;
+   $rootnode->kind eq "element" or die "Expected XML root node";
+   $rootnode->name eq "xml"     or die "Expected XML root node";
+
+   return [ $rootnode->children ];
+}
+
+sub parse_node
+{
+   my $self = shift;
+
+   # A "node" is either an XML element subtree or plaintext
+   $self->any_of(
+      \&parse_plaintext,
+      \&parse_element,
+   );
+}
+
+sub parse_plaintext
+{
+   my $self = shift;
+
+   my $str = $self->substring_before( '<' );
+   $self->fail( "No plaintext" ) unless length $str;
+
+   return XmlParser::Node::Plain->new( $str );
+}
+
+sub parse_element
+{
+   my $self = shift;
+
+   my $tag = $self->parse_tag;
+
+   $self->commit;
+
+   my $node = bless [ node => $tag->{name}, $tag->{attrs} ], "XmlParser::Node";
+   return XmlParser::Node::Element->new( $tag->{name}, $tag->{attrs} ) if $tag->{selfclose};
+
+   my $childlist = $self->sequence_of( \&parse_node );
+
+   $self->parse_close_tag->{name} eq $tag->{name}
+      or $self->fail( "Expected $tag->{name} to be closed" );
+
+   return XmlParser::Node::Element->new( $tag->{name}, $tag->{attrs}, @$childlist );
+}
+
+sub parse_tag
+{
+   my $self = shift;
+
+   $self->expect( '<' );
+   my $tagname = $self->token_ident;
+
+   my @attrs = @{ $self->sequence_of( \&parse_tag_attr ) };
+
+   my $selfclose = $self->maybe_expect( '/' );
+   $self->expect( '>' );
+
+   return {
+      name  => $tagname,
+      attrs => { map { ( $_->[0], $_->[1] ) } @attrs },
+      selfclose => $selfclose,
+   };
+}
+
+sub parse_close_tag
+{
+   my $self = shift;
+
+   $self->expect( '</' );
+   my $tagname = $self->token_ident;
+   $self->expect( '>' );
+
+   return { name => $tagname };
+}
+
+sub parse_tag_attr
+{
+   my $self = shift;
+
+   my $attrname = $self->token_ident;
+   $self->expect( '=' );
+   return [ $attrname => $self->parse_tag_attr_value ];
+}
+
+sub parse_tag_attr_value
+{
+   my $self = shift;
+
+   # TODO: This sucks
+   return $self->token_string;
+}
+
+
+use Data::Dumper;
+
+if( !caller ) {
+   my $parser = __PACKAGE__->new;
+
+   my $ret = $parser->from_file( \*STDIN );
+   print Dumper( $ret );
+}
+
+
+package XmlParser::Node;
+sub new { my $class = shift; bless [ @_ ], $class }
+
+package XmlParser::Node::Plain;
+use base qw( XmlParser::Node );
+sub kind { "plain" }
+sub text { shift->[0] }
+
+package XmlParser::Node::Element;
+use base qw( XmlParser::Node );
+sub kind     { "element" }
+sub name     { shift->[0] }
+sub attrs    { shift->[1] }
+sub children { my $self = shift; @{$self}[2..$#$self] }
+
+1;

examples/synopsis.pl

+use strict;
+use warnings;
+
+package LispParser;
+use base qw( Parser::MGC );
+
+use constant pattern_ident => qr{[[:alnum:]+*/._:-]+};
+
+sub parse
+{
+   my $self = shift;
+
+   $self->sequence_of( sub {
+      $self->any_of(
+         sub { $self->token_int },
+         sub { $self->token_string },
+         sub { \$self->token_ident },
+         sub { $self->scope_of( "(", \&parse, ")" ) }
+      );
+   } );
+}
+
+use Data::Dumper;
+
+if( !caller ) {
+   my $parser = __PACKAGE__->new;
+
+   print Dumper( $parser->from_file( $ARGV[0] ) );
+}
+
+1;

lib/Parser/MGC.pm

+#  You may distribute under the terms of either the GNU General Public License
+#  or the Artistic License (the same terms as Perl itself)
+#
+#  (C) Paul Evans, 2010-2012 -- leonerd@leonerd.org.uk
+
+package Parser::MGC;
+
+use strict;
+use warnings;
+
+our $VERSION = '0.11';
+
+use Carp;
+
+use File::Slurp qw( slurp );
+
+=head1 NAME
+
+C<Parser::MGC> - build simple recursive-descent parsers
+
+=head1 SYNOPSIS
+
+ package My::Grammar::Parser
+ use base qw( Parser::MGC );
+
+ sub parse
+ {
+    my $self = shift;
+
+    $self->sequence_of( sub {
+       $self->any_of(
+          sub { $self->token_int },
+          sub { $self->token_string },
+          sub { \$self->token_ident },
+          sub { $self->scope_of( "(", \&parse, ")" ) }
+       );
+    } );
+ }
+
+ my $parser = My::Grammar::Parser->new;
+
+ my $tree = $parser->from_file( $ARGV[0] );
+
+ ...
+
+=head1 DESCRIPTION
+
+This base class provides a low-level framework for building recursive-descent
+parsers that consume a given input string from left to right, returning a
+parse structure. It takes its name from the C<m//gc> regexps used to implement
+the token parsing behaviour.
+
+It provides a number of token-parsing methods, which each extract a
+grammatical token from the string. It also provides wrapping methods that can
+be used to build up a possibly-recursive grammar structure, by applying a
+structure around other parts of parsing code. Each method, both token and
+structural, atomically either consumes a prefix of the string and returns its
+result, or fails and consumes nothing. This makes it simple to implement
+grammars that require backtracking.
+
+=cut
+
+=head1 CONSTRUCTOR
+
+=cut
+
+=head2 $parser = Parser::MGC->new( %args )
+
+Returns a new instance of a C<Parser::MGC> object. This must be called on a
+subclass that provides method of the name provided as C<toplevel>, by default
+called C<parse>.
+
+Takes the following named arguments
+
+=over 8
+
+=item toplevel => STRING
+
+Name of the toplevel method to use to start the parse from. If not supplied,
+will try to use a method called C<parse>.
+
+=item patterns => HASH
+
+Keys in this hash should map to quoted regexp (C<qr//>) references, to
+override the default patterns used to match tokens. See C<PATTERNS> below
+
+=item accept_0o_oct => BOOL
+
+If true, the C<token_int> method will also accept integers with a C<0o> prefix
+as octal.
+
+=back
+
+=cut
+
+=head1 PATTERNS
+
+The following pattern names are recognised. They may be passed to the
+constructor in the C<patterns> hash, or provided as a class method under the
+name C<pattern_I<name>>.
+
+=over 4
+
+=item * ws
+
+Pattern used to skip whitespace between tokens. Defaults to C</[\s\n\t]+/>
+
+=item * comment
+
+Pattern used to skip comments between tokens. Undefined by default.
+
+=item * int
+
+Pattern used to parse an integer by C<token_int>. Defaults to
+C</-?(?:0x[[:xdigit:]]+|[[:digit:]]+)/>. If C<accept_0o_oct> is given, then
+this will be expanded to match C</0o[0-7]+/> as well.
+
+=item * float
+
+Pattern used to parse a floating-point number by C<token_float>. Defaults to
+C</-?(?:\d*\.\d+|\d+\.)(?:e-?\d+)?|-?\d+e-?\d+/i>.
+
+=item * ident
+
+Pattern used to parse an identifier by C<token_ident>. Defaults to
+C</[[:alpha:]_]\w*/>
+
+=item * string_delim
+
+Pattern used to delimit a string by C<token_string>. Defaults to C</["']/>.
+
+=back
+
+=cut
+
+my @patterns = qw(
+   ws
+   comment
+   int
+   float
+   ident
+   string_delim
+);
+
+use constant pattern_ws      => qr/[\s\n\t]+/;
+use constant pattern_comment => undef;
+use constant pattern_int     => qr/-?(?:0x[[:xdigit:]]+|[[:digit:]]+)/;
+use constant pattern_float   => qr/-?(?:\d*\.\d+|\d+\.)(?:e-?\d+)?|-?\d+e-?\d+/i;
+use constant pattern_ident   => qr/[[:alpha:]_]\w*/;
+use constant pattern_string_delim => qr/["']/;
+
+sub new
+{
+   my $class = shift;
+   my %args = @_;
+
+   my $toplevel = $args{toplevel} || "parse";
+
+   $class->can( $toplevel ) or
+      croak "Expected to be a subclass that can ->$toplevel";
+
+   my $self = bless {
+      toplevel => $toplevel,
+      patterns => {},
+      scope_level => 0,
+   }, $class;
+
+   $self->{patterns}{$_} = $args{patterns}{$_} || $self->${\"pattern_$_"} for @patterns;
+
+   if( $args{accept_0o_oct} ) {
+      $self->{patterns}{int} = qr/0o[0-7]+|$self->{patterns}{int}/;
+   }
+
+   return $self;
+}
+
+=head1 METHODS
+
+=cut
+
+=head2 $result = $parser->from_string( $str )
+
+Parse the given literal string and return the result from the toplevel method.
+
+=cut
+
+sub from_string
+{
+   my $self = shift;
+   my ( $str ) = @_;
+
+   $self->{str} = $str;
+
+   pos $self->{str} = 0;
+
+   my $toplevel = $self->{toplevel};
+   my $result = $self->$toplevel;
+
+   $self->at_eos or
+      $self->fail( "Expected end of input" );
+
+   return $result;
+}
+
+=head2 $result = $parser->from_file( $file )
+
+Parse the given file, which may be a pathname in a string, or an opened IO
+handle, and return the result from the toplevel method.
+
+=cut
+
+sub from_file
+{
+   my $self = shift;
+   my ( $filename ) = @_;
+
+   $self->{filename} = $filename;
+
+   $self->from_string( scalar(slurp $filename) );
+}
+
+=head2 $result = $parser->from_reader( \&reader )
+
+Parse the input which is read by the C<reader> function. This function will be
+called in scalar context to generate portions of string to parse, being passed
+the C<$parser> object. The function should return C<undef> when it has no more
+string to return.
+
+ $reader->( $parser )
+
+Note that because it is not generally possible to detect exactly when more
+input may be required due to failed regexp parsing, the reader function is
+only invoked during searching for skippable whitespace. This makes it suitable
+for reading lines of a file in the common case where lines are considered as
+skippable whitespace, or for reading lines of input interractively from a
+user. It cannot be used in all cases (for example, reading fixed-size buffers
+from a file) because two successive invocations may split a single token
+across the buffer boundaries, and cause parse failures.
+
+=cut
+
+sub from_reader
+{
+   my $self = shift;
+   my ( $reader ) = @_;
+
+   local $self->{reader} = $reader;
+
+   $self->{str} = "";
+   pos $self->{str} = 0;
+
+   my $result = $self->parse;
+
+   $self->at_eos or
+      $self->fail( "Expected end of input" );
+
+   return $result;
+}
+
+=head2 $pos = $parser->pos
+
+Returns the current parse position, as a character offset from the beginning
+of the file or string.
+
+=cut
+
+sub pos
+{
+   my $self = shift;
+   return pos $self->{str};
+}
+
+=head2 ( $lineno, $col, $text ) = $parser->where
+
+Returns the current parse position, as a line and column number, and
+the entire current line of text. The first line is numbered 1, and the first
+column is numbered 0.
+
+=cut
+
+sub where
+{
+   my $self = shift;
+   my ( $pos ) = @_;
+
+   defined $pos or $pos = pos $self->{str};
+
+   my $str = $self->{str};
+
+   my $sol = $pos;
+   $sol-- if $sol > 0 and substr( $str, $sol, 1 ) =~ m/^[\r\n]$/;
+   $sol-- while $sol > 0 and substr( $str, $sol-1, 1 ) !~ m/^[\r\n]$/;
+
+   my $eol = $pos;
+   $eol++ while $eol < length($str) and substr( $str, $eol, 1 ) !~ m/^[\r\n]$/;
+
+   my $line = substr( $str, $sol, $eol - $sol );
+
+   my $col = $pos - $sol;
+   my $lineno = ( () = substr( $str, 0, $pos ) =~ m/\n/g ) + 1;
+
+   return ( $lineno, $col, $line );
+}
+
+=head2 $parser->fail( $message )
+
+=head2 $parser->fail_from( $pos, $message )
+
+Aborts the current parse attempt with the given message string. The failure
+message will include the line and column position, and the line of input that
+failed at the current parse position, or a position earlier obtained using the
+C<pos> method.
+
+=cut
+
+sub fail
+{
+   my $self = shift;
+   my ( $message ) = @_;
+   $self->fail_from( $self->pos, $message );
+}
+
+sub fail_from
+{
+   my $self = shift;
+   my ( $pos, $message ) = @_;
+   die Parser::MGC::Failure->new( $message, $self, $pos );
+}
+
+=head2 $eos = $parser->at_eos
+
+Returns true if the input string is at the end of the string.
+
+=cut
+
+sub at_eos
+{
+   my $self = shift;
+
+   # Save pos() before skipping ws so we don't break the substring_before method
+   my $pos = pos $self->{str};
+
+   $self->skip_ws;
+
+   my $at_eos;
+   if( pos( $self->{str} ) >= length $self->{str} ) {
+      $at_eos = 1;
+   }
+   elsif( defined $self->{endofscope} ) {
+      $at_eos = $self->{str} =~ m/\G$self->{endofscope}/;
+   }
+   else {
+      $at_eos = 0;
+   }
+
+   pos( $self->{str} ) = $pos;
+
+   return $at_eos;
+}
+
+=head2 $level = $parser->scope_level
+
+Returns the number of nested C<scope_of> calls that have been made.
+
+=cut
+
+sub scope_level
+{
+   my $self = shift;
+   return $self->{scope_level};
+}
+
+=head1 STRUCTURE-FORMING METHODS
+
+The following methods may be used to build a grammatical structure out of the
+defined basic token-parsing methods. Each takes at least one code reference,
+which will be passed the actual C<$parser> object as its first argument.
+
+=cut
+
+=head2 $ret = $parser->maybe( $code )
+
+Attempts to execute the given C<$code> reference in scalar context, and
+returns what it returned. If the code fails to parse by calling the C<fail>
+method then none of the input string will be consumed; the current parsing
+position will be restored. C<undef> will be returned in this case.
+
+This may be considered to be similar to the C<?> regexp qualifier.
+
+ sub parse_declaration
+ {
+    my $self = shift;
+
+    [ $self->parse_type,
+      $self->token_ident,
+      $self->maybe( sub {
+         $self->expect( "=" );
+         $self->parse_expression
+      } ),
+    ];
+ }
+
+=cut
+
+sub maybe
+{
+   my $self = shift;
+   my ( $code ) = @_;
+
+   my $pos = pos $self->{str};
+
+   my $committed = 0;
+   local $self->{committer} = sub { $committed++ };
+
+   my $ret;
+   eval { $ret = $code->( $self ); 1 } and return $ret;
+   my $e = $@;
+
+   pos($self->{str}) = $pos;
+
+   die $e if $committed or not eval { $e->isa( "Parser::MGC::Failure" ) };
+   return undef;
+}
+
+=head2 $ret = $parser->scope_of( $start, $code, $stop )
+
+Expects to find the C<$start> pattern, then attempts to execute the given
+C<$code> reference, then expects to find the C<$stop> pattern. Returns
+whatever the code reference returned.
+
+While the code is being executed, the C<$stop> pattern will be used by the
+token parsing methods as an end-of-scope marker; causing them to raise a
+failure if called at the end of a scope.
+
+ sub parse_block
+ {
+    my $self = shift;
+
+    $self->scope_of( "{", sub { $self->parse_statements }, "}" );
+ }
+
+If the C<$start> pattern is undefined, it is presumed the caller has already
+checked for this. This is useful when the stop pattern needs to be calculated
+based on the start pattern.
+
+ sub parse_bracketed
+ {
+    my $self = shift;
+
+    my $delim = $self->expect( qr/[\(\[\<\{]/ );
+    $delim =~ tr/([<{/)]>}/;
+
+    $self->enter_scope( undef, sub { $self->parse_body }, $delim );
+ }
+
+=cut
+
+sub scope_of
+{
+   my $self = shift;
+   my ( $start, $code, $stop ) = @_;
+
+   ref $stop or $stop = qr/\Q$stop/;
+
+   $self->expect( $start ) if defined $start;
+
+   local $self->{endofscope} = $stop;
+   local $self->{scope_level} = $self->{scope_level} + 1;
+
+   my $ret = $code->( $self );
+
+   $self->expect( $stop );
+
+   return $ret;
+}
+
+=head2 $ret = $parser->list_of( $sep, $code )
+
+Expects to find a list of instances of something parsed by C<$code>,
+separated by the C<$sep> pattern. Returns an ARRAY ref containing a list of
+the return values from the C<$code>.
+
+This method does not consider it an error if the returned list is empty; that
+is, that the scope ended before any item instances were parsed from it.
+
+ sub parse_numbers
+ {
+    my $self = shift;
+
+    $self->list_of( ",", sub { $self->token_int } );
+ }
+
+=cut
+
+sub list_of
+{
+   my $self = shift;
+   my ( $sep, $code ) = @_;
+
+   ref $sep or $sep = qr/\Q$sep/ if defined $sep;
+
+   my $committed;
+   local $self->{committer} = sub { $committed++ };
+
+   my @ret;
+
+   while( !$self->at_eos ) {
+      $committed = 0;
+      my $pos = pos $self->{str};
+
+      eval { push @ret, $code->( $self ); 1 } and next;
+      my $e = $@;
+
+      pos($self->{str}) = $pos;
+      die $e if $committed or not eval { $e->isa( "Parser::MGC::Failure" ) };
+      last;
+   }
+   continue {
+      if( defined $sep ) {
+         $self->skip_ws;
+         $self->{str} =~ m/\G$sep/gc or last;
+      }
+   }
+
+   return \@ret;
+}
+
+=head2 $ret = $parser->sequence_of( $code )
+
+A shortcut for calling C<list_of> with an empty string as separator; expects
+to find at least one instance of something parsed by C<$code>, separated only
+by skipped whitespace.
+
+This may be considered to be similar to the C<+> or C<*> regexp qualifiers.
+
+ sub parse_statements
+ {
+    my $self = shift;
+
+    $self->sequence_of( sub { $self->parse_statement } );
+ }
+
+=cut
+
+sub sequence_of
+{
+   my $self = shift;
+   my ( $code ) = @_;
+
+   $self->list_of( undef, $code );
+}
+
+=head2 $ret = $parser->any_of( @codes )
+
+Expects that one of the given code references can parse something from the
+input, returning what it returned. Each code reference may indicate a failure
+to parse by calling the C<fail> method.
+
+This may be considered to be similar to the C<|> regexp operator for forming
+alternations of possible parse trees.
+
+ sub parse_statement
+ {
+    my $self = shift;
+
+    $self->any_of(
+       sub { $self->parse_declaration; $self->expect(";") },
+       sub { $self->parse_expression; $self->expect(";") },
+       sub { $self->parse_block },
+    );
+ }
+
+Note: This method used to be called C<one_of>, but was renamed for clarity.
+Currently this method is provided also as an alias by the old name. Code
+using the old name should be rewritten to C<any_of> instead, as this 
+backward-compatibility alias may be removed in a later version.
+
+=cut
+
+sub any_of
+{
+   my $self = shift;
+
+   while( @_ ) {
+      my $pos = pos $self->{str};
+
+      my $committed = 0;
+      local $self->{committer} = sub { $committed++ };
+
+      my $ret;
+      eval { $ret = shift->( $self ); 1 } and return $ret;
+      my $e = $@;
+
+      pos( $self->{str} ) = $pos;
+
+      die $e if $committed or not eval { $e->isa( "Parser::MGC::Failure" ) };
+   }
+
+   $self->fail( "Found nothing parseable" );
+}
+
+*one_of = \&any_of;
+
+=head2 $parser->commit
+
+Calling this method will cancel the backtracking behaviour of the innermost
+C<maybe>, C<list_of>, C<sequence_of>, or C<any_of> structure forming method.
+That is, if later code then calls C<fail>, the exception will be propagated
+out of C<maybe>, and no further code blocks will be attempted by C<any_of>.
+
+Typically this will be called once the grammatical structure of an
+alternation has been determined, ensuring that any further failures are raised
+as real exceptions, rather than by attempting other alternatives.
+
+ sub parse_statement
+ {
+    my $self = shift;
+
+    $self->any_of(
+       ...
+       sub {
+          $self->scope_of( "{",
+             sub { $self->commit; $self->parse_statements; },
+          "}" ),
+       },
+    );
+ }
+
+=cut
+
+sub commit
+{
+   my $self = shift;
+   if( $self->{committer} ) {
+      $self->{committer}->();
+   }
+   else {
+      croak "Cannot commit except within a backtrack-able structure";
+   }
+}
+
+=head1 TOKEN PARSING METHODS
+
+The following methods attempt to consume some part of the input string, to be
+used as part of the parsing process.
+
+=cut
+
+sub skip_ws
+{
+   my $self = shift;
+
+   my $ws = $self->{patterns}{ws};
+   my $c  = $self->{patterns}{comment};
+
+   {
+      1 while $self->{str} =~ m/\G$ws/gc or
+              ( $c and $self->{str} =~ m/\G$c/gc );
+
+      return if pos( $self->{str} ) < length $self->{str};
+
+      return unless $self->{reader};
+
+      my $more = $self->{reader}->( $self );
+      if( defined $more ) {
+         my $pos = pos( $self->{str} );
+         $self->{str} .= $more;
+         pos( $self->{str} ) = $pos;
+
+         redo;
+      }
+
+      undef $self->{reader};
+      return;
+   }
+}
+
+=head2 $str = $parser->expect( $literal )
+
+=head2 $str = $parser->expect( qr/pattern/ )
+
+=head2 @groups = $parser->expect( qr/pattern/ )
+
+Expects to find a literal string or regexp pattern match, and consumes it.
+In scalar context, this method returns the string that was captured. In list
+context it returns the matching substring and the contents of any subgroups
+contained in the pattern.
+
+This method will raise a parse error (by calling C<fail>) if the regexp fails
+to match. Note that if the pattern could match an empty string (such as for
+example C<qr/\d*/>), the pattern will always match, even if it has to match an
+empty string. This method will not consider a failure if the regexp matches
+with zero-width.
+
+=head2 $str = $parser->maybe_expect( ... )
+
+=head2 @groups = $parser->maybe_expect( ... )
+
+A convenient shortcut equivalent to calling C<expect> within C<maybe>, but
+implemented more efficiently, avoiding the exception-handling set up by
+C<maybe>. Returns C<undef> or an empty list if the match fails.
+
+=cut
+
+sub maybe_expect
+{
+   my $self = shift;
+   my ( $expect ) = @_;
+
+   ref $expect or $expect = qr/\Q$expect/;
+
+   $self->skip_ws;
+   $self->{str} =~ m/\G$expect/gc or return;
+
+   return substr( $self->{str}, $-[0], $+[0]-$-[0] ) if !wantarray;
+   return map { substr( $self->{str}, $-[$_], $+[$_]-$-[$_] ) } 0 .. $#+;
+}
+
+sub expect
+{
+   my $self = shift;
+   my ( $expect ) = @_;
+
+   ref $expect or $expect = qr/\Q$expect/;
+
+   if( wantarray ) {
+      my @ret = $self->maybe_expect( $expect ) or
+         $self->fail( "Expected $expect" );
+      return @ret;
+   }
+   else {
+      defined( my $ret = $self->maybe_expect( $expect ) ) or
+         $self->fail( "Expected $expect" );
+      return $ret;
+   }
+}
+
+=head2 $str = $parser->substring_before( $literal )
+
+=head2 $str = $parser->substring_before( qr/pattern/ )
+
+Expects to possibly find a literal string or regexp pattern match. If it finds
+such, consume all the input text before but excluding this match, and return
+it. If it fails to find a match before the end of the current scope, consumes
+all the input text until the end of scope and return it.
+
+This method does not consume the part of input that matches, only the text
+before it. It is not considered a failure if the substring before this match
+is empty. If a non-empty match is required, use the C<fail> method:
+
+ sub token_nonempty_part
+ {
+    my $self = shift;
+
+    my $str = $parser->substring_before( "," );
+    length $str or $self->fail( "Expected a string fragment before ," );
+
+    return $str;
+ }
+
+Note that unlike most of the other token parsing methods, this method does not
+consume either leading or trailing whitespace around the substring. It is
+expected that this method would be used as part a parser to read quoted
+strings, or similar cases where whitespace should be preserved.
+
+=cut
+
+sub substring_before
+{
+   my $self = shift;
+   my ( $expect ) = @_;
+
+   ref $expect or $expect = qr/\Q$expect/;
+
+   my $endre = ( defined $self->{endofscope} ) ?
+      qr/$expect|$self->{endofscope}/ :
+      $expect;
+
+   # NO skip_ws
+
+   my $start = pos $self->{str};
+   my $end;
+   if( $self->{str} =~ m/\G(?s:.*?)($endre)/ ) {
+      $end = $-[1];
+   }
+   else {
+      $end = length $self->{str};
+   }
+
+   pos( $self->{str} ) = $end;
+   return substr( $self->{str}, $start, $end - $start );
+}
+
+=head2 $val = $parser->generic_token( $name, $re, $convert )
+
+Expects to find a token matching the precompiled regexp C<$re>. If provided,
+the C<$convert> CODE reference can be used to convert the string into a more
+convenient form. C<$name> is used in the failure message if the pattern fails
+to match.
+
+If provided, the C<$convert> function will be passed the parser and the
+matching substring; the value it returns is returned from C<generic_token>.
+
+ $convert->( $parser, $substr )
+
+If not provided, the substring will be returned as it stands.
+
+This method is mostly provided for subclasses to define their own token types.
+For example:
+
+ sub token_hex
+ {
+    my $self = shift;
+    $self->generic_token( hex => qr/[0-9A-F]{2}h/, sub { hex $_[1] } );
+ }
+
+=cut
+
+sub generic_token
+{
+   my $self = shift;
+   my ( $name, $re, $convert ) = @_;
+
+   $self->fail( "Expected $name" ) if $self->at_eos;
+
+   $self->skip_ws;
+   $self->{str} =~ m/\G$re/gc or
+      $self->fail( "Expected $name" );
+
+   my $match = substr( $self->{str}, $-[0], $+[0] - $-[0] );
+
+   return $convert ? $convert->( $self, $match ) : $match;
+}
+
+sub _token_generic
+{
+   my $self = shift;
+   my %args = @_;
+
+   my $name    = $args{name};
+   my $re      = $args{pattern} ? $self->{patterns}{ $args{pattern} } : $args{re};
+   my $convert = $args{convert};
+
+   $self->generic_token( $name, $re, $convert );
+}
+
+=head2 $int = $parser->token_int
+
+Expects to find an integer in decimal, octal or hexadecimal notation, and
+consumes it. Negative integers, preceeded by C<->, are also recognised.
+
+=cut
+
+sub token_int
+{
+   my $self = shift;
+   $self->_token_generic(
+      name => "int",
+
+      pattern => "int",
+      convert => sub {
+         my $int = $_[1];
+         my $sign = ( $int =~ s/^-// ) ? -1 : 1;
+
+         $int =~ s/^0o/0/;
+
+         return $sign * oct $int if $int =~ m/^0/;
+         return $sign * $int;
+      },
+   );
+}
+
+=head2 $float = $parser->token_float
+
+Expects to find a number expressed in floating-point notation; a sequence of
+digits possibly prefixed by C<->, possibly containing a decimal point,
+possibly followed by an exponent specified by C<e> followed by an integer. The
+numerical value is then returned.
+
+=cut
+
+sub token_float
+{
+   my $self = shift;
+   $self->_token_generic(
+      name => "float",
+
+      pattern => "float",
+      convert => sub { $_[1] + 0 },
+   );
+}
+
+=head2 $number = $parser->token_number