Commits

Mitsuhiro Nakamura  committed 8871919

version 17-14

  • Participants
  • Parent commits 15ee2c7
  • Tags v17-14

Comments (0)

Files changed (162)

 # Make file for the pgn-extract program.
 #    Program: pgn-extract: a Portable Game Notation (PGN) extractor.
-#    Copyright (C) 1994-2005 David Barnes
+#    Copyright (C) 1994-2013 David Barnes
 #    This program is free software; you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation; either version 1, or (at your option)
 #    http://www.cs.kent.ac.uk/people/staff/djb/
 
 OBJS=grammar.o lex.o map.o decode.o moves.o lists.o apply.o output.o eco.o\
-	lines.o end.o main.o hashing.o argsfile.o mymalloc.o
+	lines.o end.o main.o hashing.o argsfile.o mymalloc.o fenmatcher.o
 DEBUGINFO=-g
 
 # These flags are particularly severe on checking warnings.
 # Linux users might need to add -D__linux__ to these in order to
 # use strcasecmp instead of strcmpi (cf output.c)
 
-# Mac OS X users might need to add  -D__unix__ to CFLAGS
+# Mac OS X users might need to add -D__unix__ to CFLAGS
 # and use CC=cc
 
 CFLAGS+=-c -pedantic -Wall -Wshadow -Wformat -Wpointer-arith \
 	-Wstrict-prototypes -Wmissing-prototypes -Wwrite-strings $(DEBUGINFO)\
-	-I/usr/local/lib/ansi-include
+	-I/usr/local/lib/ansi-include \
+	-O3
 CC=gcc
 
 # AIX 3.2 Users might like to use these alternatives for CFLAGS and CC.
 	$(CC) $(CFLAGS) mymalloc.c
 
 apply.o :  apply.c defs.h lex.h grammar.h typedef.h map.h bool.h apply.h taglist.h\
-	   eco.h decode.h moves.h hashing.h mymalloc.h output.h
+	   eco.h decode.h moves.h hashing.h mymalloc.h output.h fenmatcher.h
 	$(CC) $(CFLAGS) apply.c
 
 argsfile.o : argsfile.c argsfile.h bool.h defs.h typedef.h lines.h \
 		taglist.h tokens.h lex.h moves.h eco.h apply.h output.h \
-		lists.h
+		lists.h mymalloc.h
 	$(CC) $(CFLAGS) argsfile.c
 
 decode.o : decode.c defs.h typedef.h taglist.h lex.h bool.h decode.h lists.h \
            mymalloc.h
 	$(CC) $(CFLAGS) eco.c
 
-end.o : end.c end.h bool.h defs.h typedef.h lines.h tokens.h lex.h mymalloc.h
+end.o : end.c end.h bool.h defs.h typedef.h lines.h tokens.h lex.h mymalloc.h \
+        apply.h grammar.h
 	$(CC) $(CFLAGS) end.c
 
 grammar.o : grammar.c bool.h defs.h typedef.h lex.h taglist.h map.h lists.h\
 	$(CC) $(CFLAGS) map.c
 
 moves.o :  moves.c defs.h typedef.h lex.h bool.h map.h lists.h moves.h apply.h\
-	   lines.h taglist.h mymalloc.h
+	   lines.h taglist.h mymalloc.h fenmatcher.h
 	$(CC) $(CFLAGS) moves.c
 
+fenmatcher.o : fenmatcher.c apply.h bool.h defs.h fenmatcher.h mymalloc.h typedef.h
+	$(CC) $(CFLAGS) fenmatcher.c
+
 output.o :  output.c output.h taglist.h bool.h typedef.h defs.h lex.h grammar.h\
 	    apply.h mymalloc.h
 	$(CC) $(CFLAGS) output.c
 
+
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2005 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)
 #include "eco.h"
 #include "decode.h"
 #include "hashing.h"
+#include "fenmatcher.h"
 
 /* Define a positional search depth that should look at the
  * full length of a game.  This is used in play_moves().
 #define DEFAULT_POSITIONAL_DEPTH 300
 
         /* Prototypes of functions limited to this file. */
-static Boolean position_matches(HashCode current_hash_value);
+static Boolean position_matches(const Board *board);
 static Boolean play_moves(Game *game_details, Board *board, Move *moves,
-                unsigned max_depth);
+                unsigned max_depth, Boolean check_move_validity);
 static Boolean apply_variations(const Game *game_details,const Board *board,
-                                Variation *variation);
-static Boolean rewrite_variations(const Board *board,Variation *variation);
-static Boolean rewrite_moves(Board *board,Move *moves);
+                                Variation *variation, Boolean check_move_validity);
+static Boolean rewrite_variations(const Board *board,Variation *variation, Boolean null_move_found);
+static Boolean rewrite_moves(Board *board,Move *moves, Boolean null_move_found);
 static void append_evaluation(Move *move_details, const Board *board);
+static void append_FEN_comment(Move *move_details, const Board *board);
 static double evaluate(const Board *board);
 static double shannonEvaluation(const Board *board);
 
     short c = ColConvert(col);
 
     Piece coloured_piece = board->board[r][c];
-    switch(coloured_piece){
+    switch((int) coloured_piece){
         case W(PAWN):
         case W(KNIGHT):
         case W(BISHOP):
         Piece piece_to_move = move_details->piece_to_move;
 
         if(GlobalState.output_format == EPD){
-            move_details->epd = (char *)malloc(FEN_SPACE);
+            move_details->epd = (char *) MallocOrDie(FEN_SPACE);
             build_basic_EPD_string(board,move_details->epd);
         }
 
-        make_move(move_details->from_col,move_details->from_rank,
-                        move_details->to_col,move_details->to_rank,
-                        piece_to_move,colour,board);
+	if(move_details->class != NULL_MOVE) {
+	    make_move(move_details->from_col,move_details->from_rank,
+			    move_details->to_col,move_details->to_rank,
+			    piece_to_move,colour,board);
+	}
         /* See if there are any subsiduary actions. */
         switch(move_details->class){
             case PAWN_MOVE:
                             move_details->to_rank,
                             piece_to_move,colour,board);
             break;
+	case NULL_MOVE:
+	    /* Nothing more to do. */
+	    break;
         case UNKNOWN_MOVE:
         default:
             Ok = FALSE;
     return Ok;
 }
 
-
         /* Play out the moves on the given board.
          * game_details is updated with the final_ and cumulative_ hash
          * values.
+	 * Check move validity unless a NULL_MOVE has been found in this
+	 * variation.
          */
 static Boolean
-play_moves(Game *game_details, Board *board, Move *moves, unsigned max_depth)
+play_moves(Game *game_details, Board *board, Move *moves, unsigned max_depth,
+           Boolean check_move_validity)
 {   Boolean game_ok = TRUE;
     /* Force a match if we aren't looking for positional variations. */
     Boolean game_matches = GlobalState.positional_variations?FALSE:TRUE;
      * from a FEN string, rather than being the normal starting
      * position.
      */
-    if(!game_matches && position_matches(board->hash_value)){
-        game_matches = TRUE;
+    if(!game_matches && position_matches(board)){
+	game_matches = TRUE;
+        if(GlobalState.add_position_match_comments) {
+	    CommentList *comment = create_match_comment(next_move);
+	    comment->next = game_details->prefix_comment;
+	    game_details->prefix_comment = comment;
+        }
     }
     /* Keep going while the game is ok, and we have some more
      * moves and we haven't exceeded the search depth without finding
             /* See if there are any variations associated with this move. */
             if((next_move->Variants != NULL) && GlobalState.keep_variations){
                 game_matches |= apply_variations(game_details,board,
-                                            next_move->Variants);
+						 next_move->Variants,
+						 check_move_validity);
             }
             /* Now try the main move. */
-            if(apply_move(board->to_move,next_move,board)){
-               /* Don't try for a positional match if we already have one. */
-               if(!game_matches && position_matches(board->hash_value)){
-                   game_matches = TRUE;
-               }
-               /* Combine this hash value with the cumulative one. */
-               game_details->cumulative_hash_value += board->hash_value;
-               board->to_move = OPPOSITE_COLOUR(board->to_move);
-               if(board->to_move == WHITE){
-                   board->move_number++;
-               }
-               if(GlobalState.add_ECO && !GlobalState.parsing_ECO_file){
-                   int half_moves = half_moves_played(board);
-                   EcoLog *entry = eco_matches(
-                           board->hash_value,
-                           game_details->cumulative_hash_value,
-                           half_moves);
-                   if(entry != NULL){
-                       /* Consider keeping the match.
-                        * Try to avoid spurious matches which become
-                        * more likely with larger ECO files and
-                        * the longer a game goes on.
-                        * Try to mitigate this partly by preferring
-                        * an ECO line of exactly the same length as
-                        * the current game line.
-                        */
-                       if(eco_match == NULL){
-                           eco_match = entry;
-                       }
-                       else {
-                           /* This logic prefers a longer match
-                            * to a shorter, irrespective of whether
-                            * either match is exact or not.
-                            * This logic was followed in versions
-                            * up to and including v13.8.
-                            */
-                           eco_match = entry;
-                       }
-                   }
-               }
-               next_move = next_move->next;
+	    if(next_move->class == NULL_MOVE) {
+	        /* We might not be able to check the validity of
+		 * subsequent moves.
+		 */
+#if 0
+		check_move_validity = FALSE;
+#endif
+	    }
+            if(check_move_validity) {
+		if(apply_move(board->to_move,next_move,board)){
+		   /* Don't try for a positional match if we already have one. */
+		   if(!game_matches && position_matches(board)){
+		       game_matches = TRUE;
+		       if(GlobalState.add_position_match_comments) {
+			   CommentList *comment = create_match_comment(next_move);
+			   append_comments_to_move(next_move, comment);
+		       }
+		   }
+		   /* Combine this hash value with the cumulative one. */
+		   game_details->cumulative_hash_value += board->hash_value;
+		   if(GlobalState.fuzzy_match_duplicates) {
+			int plies = 2 * board->move_number - 1;
+			/* Check who has just moved. */
+			if(board->to_move == BLACK) {
+			    plies++;
+			}
+			/* Consider remembering this hash value for fuzzy matches. */
+			if(GlobalState.fuzzy_match_depth == plies) {
+			    /* Remember it. */
+			    game_details->fuzzy_duplicate_hash = board->hash_value;
+			}
+		   }
+		   board->to_move = OPPOSITE_COLOUR(board->to_move);
+		   if(board->to_move == WHITE){
+		       board->move_number++;
+		   }
+		   if(GlobalState.add_ECO && !GlobalState.parsing_ECO_file){
+		       int half_moves = half_moves_played(board);
+		       EcoLog *entry = eco_matches(
+			       board->hash_value,
+			       game_details->cumulative_hash_value,
+			       half_moves);
+		       if(entry != NULL){
+			   /* Consider keeping the match.
+			    * Could try to avoid spurious matches which become
+			    * more likely with larger ECO files and
+			    * the longer a game goes on.
+			    * Could be mitigated partly by preferring
+			    * an ECO line of exactly the same length as
+			    * the current game line.
+			    * Not currently implemented.
+			    */
+			   if(eco_match == NULL){
+			       /* We don't have one yet. */
+			       eco_match = entry;
+			   }
+			   else {
+			       /* Keep it anyway.
+			        * This logic always prefers a longer match
+				* to a shorter, irrespective of whether
+				* either match is exact or not.
+				* This logic was followed in versions
+				* up to and including v13.8.
+				*/
+			       eco_match = entry;
+			   }
+		       }
+		   }
+		   next_move = next_move->next;
+		}
+		else{
+		    print_error_context(GlobalState.logfile);
+		    fprintf(GlobalState.logfile,
+				    "Failed to make move %u%s %s in the game:\n",
+				    board->move_number,
+				    (board->to_move == WHITE)?".":"...",
+				    next_move->move);
+		    print_board(board,GlobalState.logfile);
+		    report_details(GlobalState.logfile);
+		    game_ok = FALSE;
+		}
             }
-            else{
-                print_error_context(GlobalState.logfile);
-                fprintf(GlobalState.logfile,
-                                "Failed to make move %u%s %s in the game:\n",
-                                board->move_number,
-                                (board->to_move == WHITE)?".":"...",
-                                next_move->move);
-                print_board(board,GlobalState.logfile);
-                report_details(GlobalState.logfile);
-                game_ok = FALSE;
-            }
+	    else {
+	        /* Go through the motions as if the move were checked. */
+	       board->to_move = OPPOSITE_COLOUR(board->to_move);
+	       if(board->to_move == WHITE){
+		   board->move_number++;
+	       }
+	       next_move = next_move->next;
+	    }
         }
         else{
             /* An empty move. */
 }
 
         /* Play out a variation.
-          * Return TRUE if the variation matches a position that
+	 * Check move validity unless a NULL_MOVE has been found in this
+	 * variation.
+         * Return TRUE if the variation matches a position that
          * we are looking for.
          */
 static Boolean
-apply_variations(const Game *game_details,const Board *board,Variation *variation)
+apply_variations(const Game *game_details,const Board *board,Variation *variation,
+                 Boolean check_move_validity)
 {   /* Force a match if we aren't looking for positional variations. */
     Boolean variation_matches = GlobalState.positional_variations?FALSE:TRUE;
     /* Allocate space for the copies.
          * later matches.
          */
         variation_matches |= play_moves(copy_game,copy_board,variation->moves,
-                                        DEFAULT_POSITIONAL_DEPTH);
+                                        DEFAULT_POSITIONAL_DEPTH,
+					check_move_validity);
         variation = variation->next;
     }
     (void) free((void *)copy_game);
 
         /* game_details contains a complete move score.
          * Try to apply each move on a new board.
-         * Store in number_or_moves the final value of
-         * board->move_number.
+         * Store in plycount the number of ply played.
          * Return TRUE if the game matches a variation that we are
          * looking for.
          */
 Boolean
-apply_move_list(Game *game_details,unsigned *number_of_moves)
+apply_move_list(Game *game_details,unsigned *plycount)
 {   Move *moves = game_details->moves;
     Board *board = new_game_board(game_details->tags[FEN_TAG]);
     Boolean game_matches;
     /* Start off the cumulative hash value. */
     game_details->cumulative_hash_value = 0;
 
-    /* Play through the moves and see if we have a match. */
-    game_matches = play_moves(game_details,board,moves,max_depth);
+    /* Play through the moves and see if we have a match.
+     * Check move validity.
+     */
+    game_matches = play_moves(game_details,board,moves,max_depth,TRUE);
 
     game_details->moves_checked = TRUE;
 
     /* Record how long the game was. */
     if(board->to_move == BLACK){
-        *number_of_moves = board->move_number;
+        *plycount = 2 * board->move_number - 1;
     }
     else{
         /* This move number hasn't been played. */
-        *number_of_moves = board->move_number-1;
+        *plycount = 2 * (board->move_number - 1);
     }
 
     if(game_matches) {
             case QUEENSIDE_CASTLE:
                 /* No move list to prepare. */
                 break;
+	    case NULL_MOVE:
+                /* No move list to prepare. */
+                break;
             case UNKNOWN_MOVE:
             default:
                 fprintf(GlobalState.logfile,
                                                 move_list,board);
         }
         if((move_list == NULL) && (class != KINGSIDE_CASTLE) &&
-                        (class != QUEENSIDE_CASTLE)){
+                        (class != QUEENSIDE_CASTLE) && (class != NULL_MOVE)){
             Ok = FALSE;
         }
         /* We should now have enough information in move_details to compose a
                 case QUEENSIDE_CASTLE:
                     strcpy((char *) new_move_str,"O-O-O");
                     break;
+                case NULL_MOVE:
+                    strcpy((char *) new_move_str, (char *) NULL_MOVE_STRING);
+                    break;
                 case UNKNOWN_MOVE:
                 default:
                     Ok = FALSE;
          * Return TRUE if the move is ok, FALSE otherwise.
          */
 static Boolean
-rewrite_move(Colour colour,Move *move_details, Board *board)
+rewrite_move(Colour colour,Move *move_details, Board *board, Boolean null_move_found)
 {   /* Assume success. */
     Boolean Ok = TRUE;
 
     if(rewrite_SAN_string(colour,move_details,board)){
         Piece piece_to_move = move_details->piece_to_move;
 
-        make_move(move_details->from_col,move_details->from_rank,
-                        move_details->to_col,move_details->to_rank,
-                        piece_to_move,colour,board);
+	if(move_details->class != NULL_MOVE) {
+	    make_move(move_details->from_col,move_details->from_rank,
+			    move_details->to_col,move_details->to_rank,
+			    piece_to_move,colour,board);
+	}
+	else {
+	    null_move_found = TRUE;
+	}
         /* See if there are any subsiduary actions. */
         switch(move_details->class){
             case PAWN_MOVE:
                             move_details->to_rank,
                             piece_to_move,colour,board);
             break;
+	case NULL_MOVE:
+	    /* Nothing more. */
+	    break;
         case UNKNOWN_MOVE:
         default:
             Ok = FALSE;
 
         /* Rewrite the list of moves by playing through the game. */
 static Boolean
-rewrite_moves(Board *board,Move *moves)
+rewrite_moves(Board *board,Move *moves, Boolean null_move_found)
 {   Boolean game_ok = TRUE;
     Move *move_details = moves;
 
             /* See if there are any variations associated with this move. */
             if((move_details->Variants != NULL) &&
                     GlobalState.keep_variations &&
-                    !rewrite_variations(board,move_details->Variants)){
+                    !rewrite_variations(board,move_details->Variants, null_move_found)){
                 /* Something wrong with the variations. */
                 game_ok = FALSE;
             }
-            else if(rewrite_move(board->to_move,move_details,board)){
+	    /* @@@ There was a else-if here; not sure why?! */
+	    if(move_details->class == NULL_MOVE) {
+	        null_move_found = TRUE;
+	    }
+            if(rewrite_move(board->to_move,move_details,board, null_move_found)){
                 board->to_move = OPPOSITE_COLOUR(board->to_move);
 
                 if(GlobalState.output_evaluation) {
                     append_evaluation(move_details, board);
                 }
 
+                if(GlobalState.add_FEN_comments) {
+                    /* Append an FEN comment with the new state of the board
+                     * with the move having been played.
+                     */
+                    append_FEN_comment(move_details, board);
+                }
+
                 move_details = move_details->next;
                 if(board->to_move == WHITE){
                     board->move_number++;
                 }
             }
-            else{
+            else {
                 fprintf(GlobalState.logfile,
                                 "Failed to rewrite move %u%s %s in the game:\n",
                                     board->move_number,
 }
 
         /* Rewrite the list of variations.
-          * Return TRUE if the variation are ok. a position that
+         * Return TRUE if the variation are ok. a position that
          */
 static Boolean
-rewrite_variations(const Board *board,Variation *variation)
+rewrite_variations(const Board *board,Variation *variation, Boolean null_move_found)
 {   Board *copy_board = allocate_new_board();
     Boolean variations_ok = TRUE;
 
         /* Work on the copy. */
         *copy_board = *board;
 
-        variations_ok = rewrite_moves(copy_board,variation->moves);
+        variations_ok = rewrite_moves(copy_board,variation->moves, null_move_found);
         variation = variation->next;
     }
     (void) free_board((void *)copy_board);
 {   Board *board = new_game_board(fen);
     Boolean game_ok;
 
-    game_ok = rewrite_moves(board,moves);
+    /* No null-move found at the start of the game. */
+    game_ok = rewrite_moves(board,moves,FALSE);
     if(!game_ok){
         (void) free_board((void *)board);
         board = NULL;
         /* We don't include the cumulative hash value as the sequence
          * of moves to reach this position is not important.
          */
-        entry->combined_hash_value = board->hash_value;
+        entry->cumulative_hash_value = 0;
+	entry->final_hash_value = board->hash_value;
         /* Link it into the head at this index. */
-        entry->next =  codes_of_interest[ix];
+        entry->next = codes_of_interest[ix];
         codes_of_interest[ix] = entry;
     }
     (void) free_board((void *)board);
 }
 
-        /* Look in codes_of_interest for current_hash_value. */
+        /* Does the current board match a position of interest.
+	 * Look in codes_of_interest for current_hash_value.
+	 */
 static Boolean
-position_matches(HashCode current_hash_value)
-{   unsigned ix = current_hash_value % MAX_CODE;
+position_matches(const Board *board)
+{
+    HashCode current_hash_value = board->hash_value;
+    unsigned ix = current_hash_value % MAX_CODE;
     Boolean found = FALSE;
     HashLog *entry;
 
     for(entry = codes_of_interest[ix]; !found && (entry != NULL);
                         entry = entry->next){
-        /* We can test against just the position value as there is no
-         * cumulative_hash_value stored against these positions.
-         */
-        if(entry->combined_hash_value == current_hash_value){
+        /* We can test against just the position value. */
+        if(entry->final_hash_value == current_hash_value){
             found = TRUE;
         }
     }
+    if(!found) {
+	const char *matching_pattern = matchBoard(board);
+        found = matching_pattern != NULL;
+    }
     return found;
 }
 
     epd[ix] = '\0';
 }
 
+    /* Build a FEN string from the given board. */
+void
+build_FEN_string(const Board *board,char *fen)
+{   
+    size_t ix;
+    int full_move_number =
+            board->to_move == BLACK ? board->move_number : (board->move_number + 1);
+
+    build_basic_EPD_string(board,fen);
+    /* Append the (pseudo) half move count and the full move count. */
+    ix = strlen(fen);
+    fen[ix] = ' '; ix++;
+
+    /* Half moves since the last capture or pawn move. */
+    sprintf(&fen[ix], "%u", board->halfmove_clock);
+    ix = strlen(fen);
+    fen[ix] = ' '; ix++;
+
+    /* The full move number. */
+    sprintf(&fen[ix],"%u", full_move_number);
+}
+
+    /* Append to move_details a FEN comment of the board.
+     * The board state is immediately following application of the
+     * given move.
+     */
+static void
+append_FEN_comment(Move *move_details, const Board *board)
+{
+    char *FEN_comment = MallocOrDie(FEN_SPACE);
+    CommentList *comment = (CommentList* ) MallocOrDie(sizeof(*comment));
+    StringList *current_comment = SaveStringListItem(NULL, FEN_comment);
+    
+    build_FEN_string(board, FEN_comment);
+    comment->Comment = current_comment;
+    comment->next = NULL;
+    append_comments_to_move(move_details, comment);
+}
+
     /* Append to move_details an evaluation value for board.
      * The board state is immediately following application of the
      * given move.
     append_comments_to_move(move_details, comment);
 }
 
+    /* Append to move_details a comment indicating that this
+     * move resulted in a positional match.
+     */
+CommentList *
+create_match_comment(Move *move_details)
+{
+    /* The comment string. */
+    char *match_comment = StringCopy(GlobalState.position_match_comment);
+    StringList *current_comment = SaveStringListItem(NULL, match_comment);
+    CommentList *comment = (CommentList* ) MallocOrDie(sizeof(*comment));
+    
+    comment->Comment = current_comment;
+    comment->next = NULL;
+    return comment;
+}
     /* Return an evaluation of board. */
 static double
 evaluate(const Board *board)
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2005 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)
  */
 
 void store_hash_value(Move *move_details,const char *fen);
-Boolean apply_move_list(Game *game_details,unsigned *number_of_moves);
+Boolean apply_move_list(Game *game_details,unsigned *plycount);
 Boolean apply_eco_move_list(Game *game_details,unsigned *number_of_half_moves);
 Board *rewrite_game(Move *moves,const char *fen);
 char *StringCopy(const char *str);
 void build_basic_EPD_string(const Board *board,char *fen);
 char SAN_piece_letter(Piece piece);
 const char *piece_str(Piece piece);
+void build_FEN_string(const Board *board,char *fen);
+CommentList *create_match_comment(Move *move_details);
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2007 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)
 #include "apply.h"
 #include "output.h"
 #include "lists.h"
+#include "mymalloc.h"
 
-#define CURRENT_VERSION "v16.7"
+#define CURRENT_VERSION "v17-14"
 #define URL "http://www.cs.kent.ac.uk/people/staff/djb/pgn-extract/"
 
 /* The prefix of the arguments allowed in an argsfile.
 static int
 stringcompare(const char *s1, const char *s2)
 {
-#if defined(__unix__) || defined(__linux__)
+#if defined(__unix__) || defined(__linux__) || defined(__APPLE__)
     return strcasecmp(s1,s2);
 #else
     return _stricmp(s1,s2);
 }
 #endif
 
+    /* Skip over leading spaces from the string. */
+static const char *skip_leading_spaces(const char *str)
+{
+    while(*str == ' ') {
+        str++;
+    }
+    return str;
+}
+
         /* Print a usage message, and exit. */
 static void
-usage_and_exit(unsigned level)
-{ const char **help_data;
-  const char *level_0_help[] = {
+usage_and_exit(void)
+{
+    const char *help_data[] = {
+      "-7 -- output only the seven tag roster for each game. Other tags (apart",
+      "      from FEN and possibly ECO) are discarded (See -e).",
+      "-#num -- output num games per file, to files named 1.pgn, 2.pgn, etc.",
+
+      "",
+
       "-aoutputfile -- append extracted games to outputfile. (See -o).",
       "-Aargsfile -- read the program's arguments from argsfile.",
-      "-dduplicates -- write duplicate games to the file duplicates.",
-      "-D -- don't output duplicate games.",
-      "-ffile_list  -- file_list contains the list of PGN source files, one per line.",
-      "-h1 -- print details of further arguments.",
-      "-llogfile  -- Save the diagnostics in logfile rather than using stderr.",
-      "-ooutputfile -- write extracted games to outputfile (existing contents lost).",
-      "-r -- report any errors but don't extract.",
-      "-s -- silent mode: don't report each game as it is extracted.",
-      "-ttagfile -- file of tag extraction criteria.",
-      "-Tcriterion -- player, date, or result extraction criterion.",
-      "-U -- don't output games that only occur once. (See -d).",
-      "-vvariations -- the file variations contains the textual lines of interest.",
-      "-xvariations -- the file variations contains the lines resulting in",
-      "                positions of interest.",
-      /* Must be NULL terminated. */
-      (char *)NULL,
-  };
-  const char *level_1_help[] = {
-      "-7 -- output only the seven tag roster for each game. Other tags (apart",
-      "      from FEN and possibly ECO) are discarded (See -e).",
       "-b[elu]num -- restricted bounds on the number of moves in a game.",
       "       lnum set a lower bound of `num' moves,",
       "       unum set an upper bound of `num' moves,",
       "-cfile[.pgn] -- Use file.pgn as a check-file for duplicates or",
       "      contents of file (no pgn suffix) as a list of check-file names.",
       "-C -- don't include comments in the output. Ordinarily these are retained.",
+      "-dduplicates -- write duplicate games to the file duplicates.",
+      "-D -- don't output duplicate games.",
       "-eECO_file -- perform ECO classification of games. The optional",
       "      ECO_file should contain a PGN format list of ECO lines",
       "      Default is to use eco.pgn from the current directory.",
       "      refined division of games.",
       "      All files are opened in append mode.",
       "-F -- output a FEN string comment of the final game position.",
-      "-h -- print details of the main arguments.",
+      "-ffile_list  -- file_list contains the list of PGN source files, one per line.",
+      "-h -- print details of the arguments.",
+      "-llogfile  -- Save the diagnostics in logfile rather than using stderr.",
       "-Llogfile  -- Append all diagnostics to logfile, rather than overwriting.",
       "-M -- Match only games which end in checkmate.",
       "-noutputfile -- Write all valid games not otherwise output to outputfile.",
       "-N -- don't include NAGs in the output. Ordinarily these are retained.",
+      "-ooutputfile -- write extracted games to outputfile (existing contents lost).",
       "-P -- don't match permutations of the textual variations (-v).",
       "-Rtagorder -- Use the tag ordering specified in the file tagorder.",
+      "-r -- report any errors but don't extract.",
       "-S -- Use a simple soundex algorithm for some tag matches. If used",
       "      this option must precede the -t or -T options.",
+      "-s -- silent mode: don't report each game as it is extracted.",
+      "-ttagfile -- file of player, date, result or FEN extraction criteria.",
+      "-Tcriterion -- player, date, or result extraction criterion.",
+      "-U -- don't output games that only occur once. (See -d).",
+      "-vvariations -- the file variations contains the textual lines of interest.",
       "-V -- don't include variations in the output. Ordinarily these are retained.",
       "-wwidth -- set width as an approximate line width for output.",
       "-W[cm|epd|halg|lalg|elalg|san] -- specify the output format to use.",
       "      -Whalg is hyphenated long algebraic.",
       "      -Wlalg is long algebraic.",
       "      -Welalg is enhanced long algebraic.",
+      "      -Wuci is output compatible with the UCI protocol.",
+      "-xvariations -- the file variations contains the lines resulting in",
+      "                positions of interest.",
       "-zendings -- the file endings contains the end positions of interest.",
       "-Z -- use the file virtual.tmp as an external hash table for duplicates.",
       "      Use when MallocOrDie messages occur with big datasets.",
-      "-#num -- output num games per file, to files named 1.pgn, 2.pgn, etc.",
+
+      "",
+
+      "--append - see -a",
+      "--checkfile - see -c",
       "--checkmate - see -M",
+      "--duplicates - see -d",
       "--evaluation - include a position evaluation after each move",
+      "--fencomments - include a FEN string after each move",
+      "--fuzzydepth plies - positional duplicates match",
+      "--help - see -h",
       "--linelength - see -w",
+      "--markmatches - mark positional and material matches with a comment; see -t, -v, and -z",
+      "--nochecks - don't output + and # after moves.",
       "--nocomments - see -C",
       "--noduplicates - see -D",
       "--nomovenumbers - don't output move numbers.",
       "--notags - don't output any tags.",
       "--nounique - see -U",
       "--novars - see -V",
+      "--output - see -o",
       "--plylimit - limit the number of plies output.",
       "--seven - see -7",
       "--stalemate - only output games that end in stalemate.",
+      "--version - print the current version number and exit.",
+
       /* Must be NULL terminated. */
       (char *)NULL,
   };
 
-  /* Select the correct set of help information. */
-  if(level == 0){
-      help_data = level_0_help;
-  }
-  else{
-      help_data = level_1_help;
-  }
+  const char **data = help_data;
 
   fprintf(GlobalState.logfile,
           "pgn-extract %s (%s): a Portable Game Notation (PGN) manipulator.\n",
           CURRENT_VERSION,__DATE__);
   fprintf(GlobalState.logfile,
-          "Copyright (C) 1994-2007 David J. Barnes (d.j.barnes@kent.ac.uk)\n");
+          "Copyright (C) 1994-2013 David J. Barnes (d.j.barnes@kent.ac.uk)\n");
   fprintf(GlobalState.logfile,"%s\n\n",URL);
   fprintf(GlobalState.logfile,"Usage: pgn-extract [arguments] [file.pgn ...]\n");
-  fprintf(GlobalState.logfile,"Partial list of arguments ");
-  fprintf(GlobalState.logfile,"(see -%c for more):\n",HELP_ARGUMENT);
   
-  for(; *help_data != NULL; help_data++){
-      fprintf(GlobalState.logfile,"%s\n",*help_data);
+  for(; *data != NULL; data++){
+      fprintf(GlobalState.logfile,"%s\n",*data);
   }
   exit(1);
 }
                     case TAG_EXTRACTION_ARGUMENT:
                     case LINE_WIDTH_ARGUMENT:
                     case OUTPUT_FORMAT_ARGUMENT:
-                    case LONG_FORM_ARGUMENT:
-                    case HELP_ARGUMENT:
                         process_argument(line[argument_prefix_len],
                                          &line[argument_prefix_len+1]);
                         linetype = NO_ARGUMENT_MATCH;
                         break;
+                    case LONG_FORM_ARGUMENT:
+			{
+			    char *arg = &line[argument_prefix_len+1];
+			    char *space = strchr(arg, ' ');
+			    if(space != NULL) {
+				/* We need to drop an associated value from arg. */
+				int arglen = space - arg;
+				char *just_arg = (char *) MallocOrDie(arglen + 1);
+				strncpy(just_arg, arg, arglen);
+				just_arg[arglen] = '\0';
+				process_long_form_argument(just_arg,
+							   skip_leading_spaces(space));
+			    }
+			    else {
+				process_long_form_argument(arg, "");
+				linetype = NO_ARGUMENT_MATCH;
+			    }
+			}
+                        break;
 
                         /* Arguments with no additional
                          * argument value.
                          * line in the argument file.
                          */
                     case SEVEN_TAG_ROSTER_ARGUMENT:
+                    case HELP_ARGUMENT:
                     case ALTERNATIVE_HELP_ARGUMENT:
                     case DONT_KEEP_COMMENTS_ARGUMENT:
                     case DONT_KEEP_DUPLICATES_ARGUMENT:
      *                'o' and "file.pgn".
      * A zero-length string for associated_value is not necessarily
      * an error, e.g. -e has an optional following filename.
-     * If the associated_value is to be used beyond this function,
+     * NB: If the associated_value is to be used beyond this function,
      * it must be copied.
      */
 void
 {   /* Provide an alias for associated_value because it will
      * often represent a file name.
      */
-    const char *filename = associated_value;
-    /* Strip leading spaces from the filename. */
-    while(*filename == ' ') {
-        filename++;
-    }
+    const char *filename = skip_leading_spaces(associated_value);
 
     switch(arg_letter){
         case WRITE_TO_OUTPUT_FILE_ARGUMENT:
               unsigned length;
 
               if(sscanf(associated_value,"%u",&length) > 0){
-                  GlobalState.max_line_length = length;
-                  set_output_line_length(GlobalState.max_line_length);
+                  set_output_line_length(length);
               }
               else{
                   fprintf(GlobalState.logfile,
             }
             break;
         case HELP_ARGUMENT:
-            { /* Determine at which level help is required. */
-              unsigned level = 0;
-
-              (void) sscanf(associated_value,"%u",&level);
-              usage_and_exit(level);
-            }
+            usage_and_exit();
             break;
         case OUTPUT_FORMAT_ARGUMENT:
             /* Whether to use the source form of moves or
              * rewrite them into another format.
              */
-            GlobalState.output_format = which_output_format(associated_value);
+	    {
+		OutputFormat format = which_output_format(associated_value);
+		if(format == UCI) {
+		    /* Rewrite the game in a format suitable for input to
+		     * a UCI-compatible engine.
+		     * This is actually LALG but involves adjusting a lot of
+		     * the other statuses, too.
+		     */
+		    GlobalState.keep_NAGs = FALSE;
+		    GlobalState.keep_comments = FALSE;
+		    GlobalState.keep_move_numbers = FALSE;
+		    GlobalState.keep_checks = FALSE;
+		    GlobalState.keep_variations = FALSE;
+		    set_output_line_length(5000);
+		    format = LALG;
+		}
+		GlobalState.output_format = format;
+	    }
             break;
         case SEVEN_TAG_ROSTER_ARGUMENT:
             if(GlobalState.tag_output_format == ALL_TAGS ||
             break;
         case OUTPUT_FEN_STRING_ARGUMENT:
             /* Output a FEN string of the final position.
-             *  This is displayed in a comment.
+             * This is displayed in a comment.
              */
-            GlobalState.output_FEN_string = TRUE;
+	    if(GlobalState.add_FEN_comments) {
+		/* Already implied. */
+	        GlobalState.output_FEN_string = FALSE;
+	    }
+	    else {
+		GlobalState.output_FEN_string = TRUE;
+	    }
             break;
         case CHECK_ONLY_ARGUMENT:
             /* Report errors, but don't convert. */
 int
 process_long_form_argument(const char *argument, const char *associated_value)
 {
-    if(stringcompare(argument, "checkmate") == 0) {
+    if(stringcompare(argument, "append") == 0) {
+        process_argument(APPEND_TO_OUTPUT_FILE_ARGUMENT, associated_value);
+        return 2;
+    }
+    else if(stringcompare(argument, "checkfile") == 0) {
+        process_argument(CHECK_FILE_ARGUMENT, associated_value);
+        return 2;
+    }
+    else if(stringcompare(argument, "checkmate") == 0) {
         process_argument(MATCH_CHECKMATE_ARGUMENT, "");
         return 1;
     }
+    else if(stringcompare(argument, "duplicates") == 0) {
+        process_argument(DUPLICATES_FILE_ARGUMENT, associated_value);
+        return 2;
+    }
     else if(stringcompare(argument, "evaluation") == 0) {
         /* Output an evaluation is required with each move. */
         GlobalState.output_evaluation = TRUE;
         return 1;
     }
+    else if(stringcompare(argument, "fencomments") == 0) {
+        /* Output an evaluation is required with each move. */
+        GlobalState.add_FEN_comments = TRUE;
+	/* Turn off any separate setting of output_FEN_comment. */
+	GlobalState.output_FEN_string = FALSE;
+        return 1;
+    }
+    else if(stringcompare(argument, "help") == 0) {
+        process_argument(HELP_ARGUMENT, "");
+        return 1;
+    }
+    else if(stringcompare(argument, "fuzzydepth") == 0) {
+        /* Extract the depth. */
+        int depth = 0;
+
+        if(sscanf(associated_value, "%d",&depth) == 1){
+            if(depth >= 0) {
+		GlobalState.fuzzy_match_duplicates = TRUE;
+                GlobalState.fuzzy_match_depth = depth;
+            }
+            else {
+                fprintf(GlobalState.logfile,
+                        "--%s requires a number greater than or equal to zero.\n", argument);
+                exit(1);
+            }
+        }
+        else {
+            fprintf(GlobalState.logfile,
+                    "--%s requires a number following it.\n", argument);
+            exit(1);
+        }
+        return 2;
+    }
     else if(stringcompare(argument, "linelength") == 0) {
         process_argument(LINE_WIDTH_ARGUMENT,
                          associated_value);
         return 2;
     }
+    else if(stringcompare(argument, "markmatches") == 0) {
+        if(*associated_value != '\0') {
+	    GlobalState.add_position_match_comments = TRUE;
+	    GlobalState.position_match_comment = StringCopy(associated_value);
+	}
+	else {
+              fprintf(GlobalState.logfile,
+                        "--markmatches requires a comment string following it.\n");
+              exit(1);
+	}
+	return 2;
+    }
+    else if(stringcompare(argument, "nochecks") == 0) {
+        GlobalState.keep_checks = FALSE;
+        return 1;
+    }
     else if(stringcompare(argument, "nocomments") == 0) {
         process_argument(DONT_KEEP_COMMENTS_ARGUMENT, "");
         return 1;
         process_argument(DONT_KEEP_VARIATIONS_ARGUMENT, "");
         return 1;
     }
+    else if(stringcompare(argument, "output") == 0) {
+        process_argument(WRITE_TO_OUTPUT_FILE_ARGUMENT, associated_value);
+        return 2;
+    }
     else if(stringcompare(argument, "plylimit") == 0) {
           int limit = 0;
 
           /* Extract the limit. */
           if(sscanf(associated_value, "%d",&limit) == 1){
-              if(limit > 0) {
+              if(limit >= 0) {
                   GlobalState.output_ply_limit = limit;
               }
               else {
                   fprintf(GlobalState.logfile,
-                        "--plylimit requires a number greater than zero.\n");
+                        "--plylimit requires a number greater than or equal to zero.\n");
                   exit(1);
               }
           }
           else {
               fprintf(GlobalState.logfile,
-                        "--plylimit requires a number following.\n");
+                        "--plylimit requires a number following it.\n");
               exit(1);
           }
           return 2;
         GlobalState.match_only_stalemate = TRUE;
         return 1;
     }
+    else if(stringcompare(argument, "version") == 0) {
+        fprintf(GlobalState.logfile, "pgn-extract %s\n", CURRENT_VERSION);
+	exit(0);
+        return 1;
+    }
     else {
         fprintf(GlobalState.logfile,
                 "Unrecognised long-form argument: --%s\n",
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2007 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2005 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2005 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)
              Ok = FALSE;
          }
     }
+    else if(strcmp((char *) move_string, NULL_MOVE_STRING) == 0) {
+        class = NULL_MOVE;
+    }
     else{
         print_error_context(GlobalState.logfile);
         fprintf(GlobalState.logfile,"Unknown move %s.\n",move_string);
         Ok = FALSE;
     }
-    if(Ok){
+    if(Ok && class != NULL_MOVE){
         /* Allow trailing checks. */
         while(is_check(*move)){
             move++;
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2005 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2005 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)
 /* Different classes of move determined by the lexical analyser. */
 typedef enum { PAWN_MOVE, PAWN_MOVE_WITH_PROMOTION, ENPASSANT_PAWN_MOVE,
                PIECE_MOVE, KINGSIDE_CASTLE, QUEENSIDE_CASTLE,
+	       NULL_MOVE,
                UNKNOWN_MOVE
              } MoveClass;
 /* Types for algebraic rank and column. */
 #define OPPOSITE_COLOUR(colour) (!(colour))
 #define EXTRACT_COLOUR(coloured_piece) ((coloured_piece) & 0x01)
 #define EXTRACT_PIECE(coloured_piece) ((coloured_piece) >> PIECE_SHIFT)
+
+/* The string for internally representing the non-standard PGN
+ * notation for null moves.
+ */
+#define NULL_MOVE_STRING ("--")
+
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2005 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)
 #define ECO_TABLE_SIZE 4096
 static EcoLog **EcoTable;
 
-/* Rate the quality of an ECO match.
- * Currently unused.
- */
-static int eco_match_quality(EcoLog* entry,
-                      HashCode current_hash_value,
-                      HashCode cumulative_hash_value,
-                      int half_moves_played);
-
 #if INCLUDE_UNUSED_FUNCTIONS
 static void
 dumpEcoTable(void)
     return level;
 }
 
+/* Quality values for aspects of an ECO match.
+ * Currently unused.
+ */
+static int ECO_REQUIRED_HASH_VALUE = 1;
+static int ECO_HALF_MOVE_VALUE = 1;
+static int ECO_CUMULATIVE_HASH_VALUE = 0;
+
+/* Rate the quality of the given match.
+ * Currently unused.
+ */
+static int eco_match_quality(EcoLog* entry,
+                      HashCode current_hash_value,
+                      HashCode cumulative_hash_value,
+                      int half_moves_played)
+{
+    int quality = 0;
+    if(entry->required_hash_value == current_hash_value){
+        quality += ECO_REQUIRED_HASH_VALUE;
+        if(abs(half_moves_played - entry->half_moves) <= ECO_HALF_MOVE_LIMIT) {
+            quality += ECO_HALF_MOVE_VALUE;
+        }
+        if(entry->cumulative_hash_value == cumulative_hash_value){
+            quality += ECO_CUMULATIVE_HASH_VALUE;
+        }
+    }
+    return quality;
+}
 #endif
 
 void initEcoTable(void)
     }
 }
 
-/* Quality values for aspects of an ECO match.
- * Currently unused.
- */
-static int ECO_REQUIRED_HASH_VALUE = 1;
-static int ECO_HALF_MOVE_VALUE = 1;
-static int ECO_CUMULATIVE_HASH_VALUE = 0;
-
-/* Rate the quality of the given match.
- * Currently unused.
- */
-static int eco_match_quality(EcoLog* entry,
-                      HashCode current_hash_value,
-                      HashCode cumulative_hash_value,
-                      int half_moves_played)
-{
-    int quality = 0;
-    if(entry->required_hash_value == current_hash_value){
-        quality += ECO_REQUIRED_HASH_VALUE;
-        if(abs(half_moves_played - entry->half_moves) <= ECO_HALF_MOVE_LIMIT) {
-            quality += ECO_HALF_MOVE_VALUE;
-        }
-        if(entry->cumulative_hash_value == cumulative_hash_value){
-            quality += ECO_CUMULATIVE_HASH_VALUE;
-        }
-    }
-    return quality;
-}
 
         /* Look in EcoTable for current_hash_value.
          * Use cumulative_hash_value to refine the match.
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2005 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)
 1. Nc3 *
 
 [ECO "A00"]
-[Opening "Dunst (Sleipner,Heinrichsen) opening"]
+[Opening "Dunst (Sleipner, Heinrichsen) opening"]
 
 1. Nc3 e5 *
 
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2005 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)
 #include "tokens.h"
 #include "taglist.h"
 #include "lex.h"
+#include "apply.h"
+#include "grammar.h"
+
+/**
+ * Code to handle specifications describing the state of the board
+ * in terms of numbers of pieces and material balance between opponents.
+ *
+ * Games are then matched against these specifications.
+ */
 
 /* Define a type to represent classes of occurrance. */
-typedef enum { EXACTLY, NUM_OR_MORE, NUM_OR_LESS,
-                SAME_AS_OPPONENT, NOT_SAME_AS_OPPONENT,
-                LESS_THAN_OPPONENT, MORE_THAN_OPPONENT,
-                LESS_EQ_THAN_OPPONENT, MORE_EQ_THAN_OPPONENT
+typedef enum {
+    EXACTLY, NUM_OR_MORE, NUM_OR_LESS,
+    SAME_AS_OPPONENT, NOT_SAME_AS_OPPONENT,
+    LESS_THAN_OPPONENT, MORE_THAN_OPPONENT,
+    LESS_EQ_THAN_OPPONENT, MORE_EQ_THAN_OPPONENT
 } Occurs;
 
 /* Define a structure to hold details on the occurrances of
 look_for_ending(Move *moves,Ending_details *details_to_find)
 {   Boolean game_ok = TRUE;
     Boolean game_matches = FALSE;
+    Boolean match_comment_added = FALSE;
     Move *next_move = moves;
     Colour colour = WHITE;
     /* The initial game position has the full set of piece details. */
                 game_ok = FALSE;
             }
         }
+	else {
+	    /* Match.
+	       See whether a matching comment is required.
+	     */
+	    if(GlobalState.add_position_match_comments && !match_comment_added) {
+		if(next_move != NULL) {
+		    CommentList *comment = create_match_comment(next_move);
+		    append_comments_to_move(next_move, comment);
+		}
+	    }
+	}
     }
     if(!game_ok){
         game_matches = FALSE;
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2005 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)

File fenmatcher.c

+/*
+ *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
+ *  Copyright (C) 1994-2013 David Barnes
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 1, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *  David Barnes may be contacted as D.J.Barnes@kent.ac.uk
+ *  http://www.cs.kent.ac.uk/people/staff/djb/
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "bool.h"
+#include "mymalloc.h"
+#include "defs.h"
+#include "typedef.h"
+#include "apply.h"
+#include "fenmatcher.h"
+
+/* Character on an encoded board representing an empty square. */
+#define EMPTY_SQUARE '_'
+/* Pattern meta characters. */
+#define NON_EMPTY_SQUARE '!'
+#define ANY_SQUARE_STATE '?'
+#define ZERO_OR_MORE_OF_ANYTHING '*'
+#define ANY_WHITE_PIECE 'A'
+#define ANY_BLACK_PIECE 'a'
+
+/* Symbols for closures. */
+#define CCL_START '['
+#define CCL_END ']'
+#define NCCL '^'
+
+/**
+ * Based on original pattern matching code by Rob Pike.
+ * Taken from:
+ *     http://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html
+ * and ideas from Kernighan and Plauger's "Software Tools".
+ */
+
+static Boolean matchhere(const char *regexp, const char *text);
+static Boolean matchstar(const char *regexp, const char *text);
+static Boolean matchccl(const char *regexp, const char *text);
+static Boolean matchnccl(const char *regexp, const char *text);
+static Boolean matchone(char regchar, char textchar);
+static char *convert_board_to_text(const Board *board);
+
+/* The list of FEN-based patterns to match. */
+static StringList *fen_patterns = NULL;
+
+void
+add_fen_pattern(const char *fen_pattern)
+{
+    /* Check the pattern a reasonable syntax. */
+    /* Count the number of rank dividers. */
+    int dividors = 0;
+    /* Count the number of symbols in each rank - must be
+     * at least one.
+     */
+    int rankSymbols = 0;
+    Boolean ok = TRUE;
+    const char *p = fen_pattern;
+    Boolean in_closure = FALSE;
+    while(*p != '\0' && ok) {
+        if(*p == '/') {
+	    dividors++;
+	    if(rankSymbols == 0) {
+	        /* Nothing on the previous rank. */
+		ok = FALSE;
+	    }
+	    rankSymbols = 0;
+	}
+	else if(*p == CCL_START) {
+	    if(!in_closure) {
+	        in_closure = TRUE;
+	    }
+	    else {
+	        ok = FALSE;
+		fprintf(GlobalState.logfile,
+			"Nested closures not allowed: %s\n",
+			fen_pattern);
+	    }
+	}
+	else if(*p == CCL_END) {
+	    if(in_closure) {
+	        in_closure = FALSE;
+	    }
+	    else {
+	        ok = FALSE;
+		fprintf(GlobalState.logfile,
+			"Missing %c to match %c: %s\n",
+			CCL_START, CCL_END,
+			fen_pattern);
+	    }
+	}
+	else if(*p == NCCL) {
+	    if(!in_closure) {
+	        ok = FALSE;
+		fprintf(GlobalState.logfile,
+			"%c not allowed outside %c...%c: %s\n",
+			NCCL,
+			CCL_START, CCL_END,
+			fen_pattern);
+	    }
+	}
+	else {
+	    rankSymbols++;
+	}
+	p++;
+    }
+    if(dividors != 7) {
+        ok = FALSE;
+    }
+    else if(rankSymbols == 0) {
+        ok = FALSE;
+    }
+    if(ok) {
+	const char *pattern = StringCopy(fen_pattern);
+	fen_patterns = SaveStringListItem(fen_patterns, pattern);
+    }
+    else {
+        fprintf(GlobalState.logfile, "FEN Pattern: %s badly formed.\n",
+		fen_pattern);
+    }
+}
+
+    /*
+     * Try to match the board against one of the FEN patterns.
+     * Return the matching pattern, if there is one, otherwise NULL.
+     */
+const char *
+matchBoard(const Board *board)
+{
+    Boolean match = FALSE;
+    const char *pattern = NULL;
+    if(fen_patterns != NULL) {
+	const char *text = convert_board_to_text(board);
+
+	StringList *item = fen_patterns;
+
+	while(item != NULL && !match) {
+	    if(0) printf("Try %s against %s\n", item->str, text);
+	    pattern = item->str;
+	    if(matchhere(pattern, text)) {
+		if(0) fprintf(stdout, "%s matches\n%s\n", pattern, text);
+		match = TRUE;
+	    }
+	    else {
+	        item = item->next;
+	    }
+	}
+	(void) free((void *) text);
+	if(match) {
+	    return pattern;
+	}
+	else {
+	    return (const char *) NULL;
+	}
+    }
+    else {
+        return (const char *) NULL;
+    }
+}
+
+/**
+ * matchhere: search for regexp at beginning of text
+ */
+static Boolean
+matchhere(const char *regexp, const char *text)
+{
+    if (regexp[0] == '\0' && text[0] == '\0') {
+	return TRUE;
+    }
+    if (regexp[0] == ZERO_OR_MORE_OF_ANYTHING) {
+	return matchstar(regexp+1, text);
+    }
+    if (*text !='\0') {
+        switch(*regexp) {
+	    case ANY_SQUARE_STATE:
+		return matchhere(regexp+1, text+1);
+		break;
+	    case NON_EMPTY_SQUARE:
+	    case ANY_WHITE_PIECE:
+	    case ANY_BLACK_PIECE:
+	        if(matchone(*regexp, *text)) {
+		    return matchhere(regexp+1, text+1);
+		}
+		break;
+	    case CCL_START:
+		/* Closure */
+	        if(regexp[1] == NCCL) {
+		    return matchnccl(regexp + 2, text);
+		}
+		else {
+		    return matchccl(regexp + 1, text);
+		}
+	        break;
+	    case '1': case '2': case '3': case '4':
+	    case '5': case '6': case '7': case '8':
+		{
+		    /* The number of empty squares required. */
+		    int empty = regexp[0] - '0';
+		    Boolean matches = TRUE;
+		    /* The number matched. */
+		    int match_count = 0;
+		    while(matches && match_count < empty) {
+		        if(text[match_count] == EMPTY_SQUARE) {
+			    match_count++;
+			}
+			else {
+			    matches = FALSE;
+			}
+		    }
+		    if(matches) {
+			return matchhere(regexp+1, text + match_count);
+		    }
+		}
+	        break;
+	    default:
+	        if(*regexp == *text) {
+		    return matchhere(regexp+1, text+1);
+		}
+		break;
+	}
+    }
+    /* No match. */
+    return FALSE;
+}
+
+/**
+ * matchstar: leftmost longest search on a single rank.
+ */
+static Boolean
+matchstar(const char *regexp, const char *text)
+{
+    const char *t;
+
+    /* Find the end of this rank. */
+    for (t = text; *t != '\0' && *t != '/'; t++) {
+	    ;
+    }
+    /* Try from the longest match to the shortest until success. */
+    do {
+	/* * matches zero or more */
+	if (matchhere(regexp, t)) {
+	    return TRUE;
+	}
+    } while (t-- > text);
+    return FALSE;
+}
+
+    /*
+     * Return TRUE if regchar matches textchar, FALSE otherwise.
+     */
+static Boolean
+matchone(char regchar, char textchar)
+{
+    if(regchar == textchar) {
+        return TRUE;
+    }
+    else {
+        switch(regchar) {
+	    case NON_EMPTY_SQUARE:
+	        return textchar != EMPTY_SQUARE;
+	    case ANY_WHITE_PIECE:
+		/* Match any white piece. */
+		switch(textchar) {
+		    case 'K':
+		    case 'Q':
+		    case 'R':
+		    case 'N':
+		    case 'B':
+		    case 'P':
+		        return TRUE;
+		    default:
+		        return FALSE;
+		}
+	    case ANY_BLACK_PIECE:
+		/* Match any black piece. */
+		switch(textchar) {
+		    case 'k':
+		    case 'q':
+		    case 'r':
+		    case 'n':
+		    case 'b':
+		    case 'p':
+			return TRUE;
+		    default:
+		        return FALSE;
+		}
+	    case ANY_SQUARE_STATE:
+	        return TRUE;
+	    default:
+	        return FALSE;
+	}
+    }
+}
+
+    /*
+     * Match any of the character closure.
+     */
+static Boolean
+matchccl(const char *regexp, const char *text)
+{
+    while(*regexp != CCL_END &&
+	  !matchone(*regexp,*text) && *regexp != '\0') {
+	regexp++;
+    }
+    if(matchone(*regexp, *text)) {
+        do {
+	    regexp++;
+	}
+	while(*regexp != CCL_END && *regexp != '\0');
+	return matchhere(regexp + 1, text + 1);
+    }
+    else {
+	return FALSE;
+    }
+}
+
+    /*
+     * Match any of the characters not in the closure.
+     */
+static Boolean
+matchnccl(const char *regexp, const char *text)
+{
+    while(*regexp != CCL_END &&
+	  !matchone(*regexp,*text) && *regexp != '\0') {
+	regexp++;
+    }
+    if(*regexp == CCL_END) {
+	return matchhere(regexp + 1, text + 1);
+    }
+    else {
+	return FALSE;
+    }
+}
+
+
+    /* Build a basic EPD string from the given board. */
+static char *
+convert_board_to_text(const Board *board)
+{   Rank rank;
+    int ix = 0;
+    /* Allow space for a full board and '/' separators in between. */
+    char *text = (char *) MallocOrDie(8 * 8 + 8);
+    for(rank = LASTRANK; rank >= FIRSTRANK; rank--){
+        Col col;
+        for(col = FIRSTCOL; col <= LASTCOL; col++){
+            int coloured_piece = board->board[RankConvert(rank)]
+                                             [ColConvert(col)];
+            if(coloured_piece != EMPTY){
+                text[ix] = coloured_piece_to_SAN_letter(coloured_piece);
+            }
+            else{
+                text[ix] = EMPTY_SQUARE;
+            }
+	    ix++;
+        }
+        if(rank != FIRSTRANK){
+            text[ix] = '/';
+	    ix++;
+        }
+    }
+    text[ix]  = '\0';
+    return text;
+}

File fenmatcher.h

+/*
+ *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
+ *  Copyright (C) 1994-2013 David Barnes
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 1, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *  David Barnes may be contacted as D.J.Barnes@kent.ac.uk
+ *  http://www.cs.kent.ac.uk/people/staff/djb/
+ *
+ */
+
+void add_fen_pattern(const char *fen_pattern);
+const char *matchBoard(const Board *board);
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2007 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)
 
 static TokenType Symbol = NO_TOKEN;
 int yyparse(SourceFileType file_type);
-/* Keep track of which RAV level we are at. This is used to
- * check whether a TERMINATING_RESULT is the final one.
+/* Keep track of which RAV level we are at.
+ * This is used to check whether a TERMINATING_RESULT is the final one
+ * and whether NULL_MOVEs are allowed.
  */
 static unsigned RAV_level = 0;
 
             Tags[RESULT_TAG] = result_tag;
         }
         else if((result_tag != NULL) &&
-                        (strcmp(result_tag,terminating_result) != 0)){
+                    (strcmp(terminating_result,"*") != 0) &&
+                    (strcmp(result_tag,terminating_result) != 0)){
+            print_error_context(GlobalState.logfile);
             fprintf(GlobalState.logfile,
                         "Inconsistent result strings in the following game.\n");
             report_details(GlobalState.logfile);
-            print_error_context(GlobalState.logfile);
         }
         else{
             /* Ok. */
     CommentList *prefix_comment;
     Move *move_list = NULL;
     char *result;
+    /* There shouldn't be a hanging comment before the result,
+     * but there sometimes is.
+     */
+    CommentList *hanging_comment;
 
     /* Assume that we won't return anything. */
     *returned_move_list = NULL;
     }
     /* @@@ Beware of comments and/or tags without moves. */
     move_list = ParseMoveList();
+
+    /* @@@ Look for a comment with no move text before the result. */
+    hanging_comment = ParseOptCommentList();
+    /* Append this to the final move, if there is one. */
+
     /* Look for a result, even if there were no moves. */
     result = ParseResult();
     if(move_list != NULL){
+        /* Find the last move. */
+        Move *last_move = move_list;
+
+        while(last_move->next != NULL){
+            last_move = last_move->next;
+        }
+        if(hanging_comment != NULL) {
+            append_comments_to_move(last_move,hanging_comment);
+        }
         if(result != NULL){
             /* Append it to the last move. */
-            Move *last_move= move_list;
-
-            while(last_move->next != NULL){
-                last_move = last_move->next;
-            }
             last_move->terminating_result = result;
             check_result(Game_Header.Tags,result);
             *returned_move_list = move_list;
         something_found = TRUE;
     }
     else{
+        /* @@@ Nothing to attach the comment to. */
+        (void) free((void *) hanging_comment);
+        hanging_comment = NULL;
         /*
          * Workaround for games with zero moves.
          * Check the result for consistency with the tags, but then
     if(Symbol == MOVE){
         move_details = yylval.move_details;
 
+	if(move_details->class == NULL_MOVE && RAV_level == 0) {
+            print_error_context(GlobalState.logfile);
+	    fprintf(GlobalState.logfile, "Null moves (--) only allowed in variations.\n");
+	}
+
         Symbol = next_token();
         if(Symbol == CHECK_SYMBOL){
             strcat((char *) move_details->move,"+");
         }
         moves = ParseMoveList();
         if(moves == NULL){
+            print_error_context(GlobalState.logfile);
             fprintf(GlobalState.logfile,"Missing move list in variation.\n");
-            print_error_context(GlobalState.logfile);
         }
         result = ParseResult();
         if((result != NULL) && (moves != NULL)){
          * return the head of the resulting list.
          */
 StringList *
-SaveStringListItem(StringList *list,char *str)
+SaveStringListItem(StringList *list,const char *str)
 {
     if(str != NULL){
       StringList *new_item;
      */
     Boolean game_output = FALSE;
     /* We need a dummy argument for apply_move_list. */
-    unsigned number_of_moves;
+    unsigned plycount;
 
     /* Update the count of how many games handled. */
     GlobalState.num_games_processed++;
      * been checked.
      */
     if(CheckTagDetailsNotECO(current_game.tags,current_game.tags_length) &&
-             apply_move_list(&current_game,&number_of_moves) && 
-             check_move_bounds(number_of_moves) &&
+             apply_move_list(&current_game,&plycount) && 
+             check_move_bounds(plycount) &&
              check_textual_variations(current_game) &&
              check_for_ending(current_game.moves) &&
              check_for_only_checkmate(current_game.moves) &&
         /* If there is no original filename then the game is not a
          * duplicate.
          */
-        const char *original_filename = previous_occurance(current_game);
+        const char *original_filename = previous_occurance(current_game, plycount);
 
         if((original_filename == NULL) && GlobalState.suppress_originals){
             /* Don't output first occurrences. */
         /* The user wants to keep everything else. */
         if(!current_game.moves_checked){
              /* Make sure that the move text is in a reasonable state. */
-             (void) apply_move_list(&current_game,&number_of_moves);
+             (void) apply_move_list(&current_game,&plycount);
         }
         if(current_game.moves_ok){
             output_game(current_game,GlobalState.non_matching_file);
     free_tags();
     free_move_list(current_game.moves);
 }
+
         /* If file_type == ECOFILE we are dealing with a file of ECO
          * input rather than a normal game file.
          */
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2005 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)
 /*
  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
- *  Copyright (C) 1994-2005 David Barnes
+ *  Copyright (C) 1994-2013 David Barnes
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 1, or (at your option)
  */
 static char VIRTUAL_FILE[] = "virtual.tmp";
 
+/* Define the size of the hash table.
+ * Size was 8191 but that seems unduly conservative these days.
+ */
+#define LOG_TABLE_SIZE 100003