Commits

David McClosky  committed d7ad7d3

Re-add allScript since the retraining the reranker currently requires this.

  • Participants
  • Parent commits f2cd76f

Comments (0)

Files changed (1)

File first-stage/TRAIN/allScript

+#!/bin/bash -u
+#
+# Trains the Charniak parser -- see usage() statement below.
+#
+# 05/12/05
+# * merged parser and language modeler training scripts
+# * replaced copied code with loops
+# * enabled running this script from outside TRAIN directory
+# * made the final "/" on the path argument optional
+# * no longer assumes "." is in $PATH
+# * specified shell at top (make script invariant to user shell)
+# * outputs program exit codes for better error detection
+# * added a usage statement
+#
+# 07/12/05
+# * distinguished parser/lm vs. language choice
+# * removed use of "realpath" not available on all systems
+# * removed "typeset -r" constant declarations due to some user
+#   reports of having this unsupported in their bash
+#
+# 11/30/05
+# * decoupled make from running (script no longer makes for you)
+#-----------------------------------------------------------------
+
+function usage () {
+    echo "Usage: `basename $0` [-lm/-parser] [-Ch/-En] DATA_dir train_trees dev_trees"
+    echo "       If no optional \"-\" flags are supplied, trains English parser (default behavior)"
+    exit 1
+}
+if [ $# -eq 0 ]; then usage; fi
+
+echo -e "\nInvocation: $0 $@"
+
+# Parser or Language model?
+if [ $1 = -lm ];       then MODE=lm; shift
+elif [ $1 = -parser ]; then MODE=parser; shift
+else                        MODE=parser
+fi
+
+# English or Chinese
+if [ $1 = -Ch ];   then LANG=Chinese; shift
+elif [ $1 = -En ]; then LANG=English; shift
+else                    LANG=English
+fi
+
+# traditional allScript training arguments
+if [ $# -ne 3 ]; then usage; fi
+DATA=`echo $1 | sed -e 's|/$||g'` # remove final "/" if present
+TRAIN="$2 ${DATA}/bugFix.txt"
+TUNE="$3 ${DATA}/bugFix.txt" 
+
+echo "* directory: $DATA"
+echo "* TRAIN file: $2"
+echo "* TUNE file: $3"
+
+# Set training mode and language 
+if [ $LANG = English ]; then
+    HEAD_PROG=pTgNt
+    if [ $MODE = parser ]; then
+	SWITCH="" # or equivalently -LEn
+    elif [ $MODE = lm ]; then
+	SWITCH="-M" 
+    fi
+elif [ $LANG = Chinese ]; then
+    HEAD_PROG=pSfgT
+    if [ $MODE = parser ]; then
+	SWITCH="-LCh"
+    elif [ $MODE = lm ]; then
+	echo "Chinese LM not supported!"
+	exit 1
+    fi
+fi
+
+# Cleanup old files
+for f in pSgT.txt pUgT.txt nttCounts.txt; do
+    rm -f $DATA/$f
+done
+
+# define helper function: run a command and print its exit code
+function run () {
+    echo -e "\nrun: $1\n-------------"
+    eval $1
+    local code=$?
+    if [ $code -ne 0 ]; then
+	echo "Exit code: $code"
+	exit $code
+    fi
+}
+
+# Training -----------------------------------------------------
+
+HERE=`dirname $0`
+
+for prog in pSgT pUgT $HEAD_PROG; do
+    run "cat $TRAIN | $HERE/$prog $SWITCH $DATA/"
+done
+
+for x in r m l u h lm ru rm tt; do
+
+    cutoff=50
+    if [ $x = ru ]; then
+	cutoff=98
+    elif [ $x = tt ]; then
+	cutoff=100
+    fi
+
+    run "cat $TRAIN | $HERE/rCounts $SWITCH $x $DATA/"
+    run "$HERE/selFeats $x $cutoff $DATA/" 
+    rm -f $DATA/$x.g
+    run "$HERE/iScale $x $DATA/"
+    run "cat $TUNE | $HERE/trainRs $SWITCH $x $DATA/" 
+    rm -f $DATA/$x.f $DATA/$x.ff
+    
+done
+
+# use Knesser-Ney smoothing with language model for trigram interpolation
+if [ $MODE = lm ]; then
+    run "cat $TRAIN | $HERE/kn3Counts ww $DATA/"
+fi
+
+echo -e "\nTraining completed successfully.\n"