Commits

vvcephei committed 54a70c4

output scripts

Comments (0)

Files changed (3)

bin/compare-output.sh

+#!/bin/bash
+
+help()
+{
+cat <<EOF
+This is a convenience script designed to run a series of experiments and compare
+the result to known output.
+
+The available sets of experiments are:
+- emnlp
+- reviews
+- all
+EOF
+}
+
+OUT=~/.updown_output
+mkdir -p $OUT
+OUTPUT=$OUT/output
+GOLD=$OUT/gold
+
+echo "Running the experiments defined in run-all.sh. This will take quite a while."
+
+case $1 in
+    emnlp) 
+        OUTPUT=$OUT/output_emnlp
+        GOLD=$OUT/gold_emnlp
+        $UPDOWN_DIR/bin/experiments/run-emnlp2011.sh 2>&1 | tee $OUTPUT
+        ;;
+    reviews)
+        OUTPUT=$OUT/output_reviews
+        GOLD=$OUT/gold_reviews
+        $UPDOWN_DIR/bin/experiments/run-reviews.sh 2>&1 | tee $OUTPUT
+        ;;
+    all)
+        OUTPUT=$OUT/output_all
+        GOLD=$OUT/gold_all
+        $UPDOWN_DIR/bin/experiments/run-emnlp2011.sh 2>&1 | tee $OUTPUT
+        $UPDOWN_DIR/bin/experiments/run-reviews.sh 2>&1 | tee -a $OUTPUT
+        ;;
+    *) echo "Unrecognized command: $CMD"; help; exit 1;;
+esac
+
+if [[ -e $GOLD ]]; then
+    echo "Comparing the results against the last known good output ($GOLD)."
+    RES=`diff $GOLD $OUTPUT`
+    if [[ "$RES" != "" ]]; then
+        echo "
+        
+        
+        $RES"
+        echo -n "Output has changed! Replace the known-good file? [yN] "
+        read RESPONSE
+        if [[ $RESPONSE == "y" ]]; then
+            cp $OUTPUT $GOLD
+        fi
+    fi
+else
+    echo "
+    
+    No known good output. Using the current output for future comparisons ($GOLD)."
+    cp $OUTPUT $GOLD
+fi
+
+
+echo -n "
+
+Done! Save the output from this run? [yN] "
+read RESPONSE
+if [[ "$RESPONSE" == "y" ]]; then
+    NEW_OUTPUT="$OUTOUT`date +%y%m%d%H%m%S`"
+    cp $OUTPUT $NEW_OUTPUT
+    echo "Output has been saved to $NEW_OUTPUT"
+fi
+
+
+

bin/experiments/run-emnlp2011.sh

+#!/bin/bash
+
+OUTPUT=/tmp/updown
+mkdir -p $OUTPUT
+zcat $UPDOWN_DIR/models/maxent-eng.mxm.gz > $OUTPUT/maxent-eng.mxm
+
+echo "
+
+PREPROC Stanford" 1>&2
+updown preproc-stanford -i $UPDOWN_DIR/data/stanford/orig/testdata.manual.2009.05.25  -s $UPDOWN_DIR/src/main/resources/eng/dictionary/stoplist.txt > $OUTPUT/stanford-features.txt
+echo "
+
+PREPROC Shamma" 1>&2
+updown preproc-shamma   -i $UPDOWN_DIR/data/shamma/orig/debate08_sentiment_tweets.tsv -s $UPDOWN_DIR/src/main/resources/eng/dictionary/stoplist.txt > $OUTPUT/shamma-features.txt
+echo "
+
+PREPROC HCR Train" 1>&2
+updown preproc-hcr      -i $UPDOWN_DIR/data/hcr/train/orig/hcr-train.csv              -s $UPDOWN_DIR/src/main/resources/eng/dictionary/stoplist.txt -t $OUTPUT/hcr-train-targets.txt > $OUTPUT/hcr-train-features.txt
+echo "
+
+PREPROC HCR Dev" 1>&2
+updown preproc-hcr      -i $UPDOWN_DIR/data/hcr/dev/orig/hcr-dev.csv                  -s $UPDOWN_DIR/src/main/resources/eng/dictionary/stoplist.txt -t $OUTPUT/hcr-dev-targets.txt > $OUTPUT/hcr-dev-features.txt
+echo "
+
+PREPROC HCR Test" 1>&2
+updown preproc-hcr      -i $UPDOWN_DIR/data/hcr/test/orig/hcr-test.csv                -s $UPDOWN_DIR/src/main/resources/eng/dictionary/stoplist.txt -t $OUTPUT/hcr-test-targets.txt > $OUTPUT/hcr-test-features.txt
+
+echo "
+
+lex-ratio" 1>&2
+updown lex-ratio        -g $OUTPUT/stanford-features.txt -p $UPDOWN_DIR/src/main/resources/eng/lexicon/subjclueslen1polar.tff
+echo "
+
+per-tweet-eval" 1>&2
+updown per-tweet-eval   -g $OUTPUT/stanford-features.txt -m $OUTPUT/maxent-eng.mxm
+echo "
+
+per-user-eval" 1>&2
+updown per-user-eval    -g $OUTPUT/stanford-features.txt -m $OUTPUT/maxent-eng.mxm
+
+MEM=4
+echo "
+
+junto" 1>&2
+updown $MEM junto          -g $OUTPUT/stanford-features.txt -m $OUTPUT/maxent-eng.mxm -p $UPDOWN_DIR/src/main/resources/eng/lexicon/subjclueslen1polar.tff -f $UPDOWN_DIR/data/stanford/username-username-edges.txt -r $UPDOWN_DIR/src/main/resources/eng/model/ngramProbs.ser.gz > /dev/null
+echo "
+
+follower-graph" 1>&2
+updown $MEM junto          -g $OUTPUT/stanford-features.txt -m $OUTPUT/maxent-eng.mxm -p $UPDOWN_DIR/src/main/resources/eng/lexicon/subjclueslen1polar.tff -f $UPDOWN_DIR/data/stanford/username-username-edges.txt -r $UPDOWN_DIR/src/main/resources/eng/model/ngramProbs.ser.gz -e fm > /dev/null
+echo "
+
+targets" 1>&2
+updown $MEM junto          -g $OUTPUT/hcr-dev-features.txt  -m $OUTPUT/maxent-eng.mxm -p $UPDOWN_DIR/src/main/resources/eng/lexicon/subjclueslen1polar.tff -f $UPDOWN_DIR/data/hcr/username-username-edges.txt      -r $UPDOWN_DIR/src/main/resources/eng/model/ngramProbs.ser.gz -t $OUTPUT/hcr-dev-targets.txt > /dev/null

bin/experiments/run-reviews.sh

+#!/bin/bash
+
+OUTPUT=/tmp/updown
+mkdir -p $OUTPUT
+
+# polarity corpus ===============================================
+echo "
+
+preproc polarity"
+updown run updown.preproc.PreprocPangLeePolarityCorpus -i "/data/pang_lee_polarity_dataset_v2.0/0/pos->pos,/data/pang_lee_polarity_dataset_v2.0/0/neg->neg" > $OUTPUT/polarity_corpus.txt
+
+echo "
+
+10-fold maxent on polarity corpus" 1>&2
+updown run updown.app.NFoldMaxentExperiment -g $OUTPUT/polarity_corpus.txt > /dev/null
+
+echo "
+
+preproc polarity bigrams"
+updown run updown.preproc.PreprocPangLeePolarityCorpus -i "/data/pang_lee_polarity_dataset_v2.0/0/pos->pos,/data/pang_lee_polarity_dataset_v2.0/0/neg->neg" --textPipeline "splitSpace|addBiGrams|twokenizeSkipGtOneGrams|removeStopwords" > $OUTPUT/polarity_corpus_bi.txt
+
+echo "
+
+10-fold maxent on polarity corpus with bigrams" 1>&2
+updown run updown.app.NFoldMaxentExperiment -g $OUTPUT/polarity_corpus_bi.txt > /dev/null
+
+
+# sentence corpus ===============================================
+echo "
+
+preproc sentence"
+updown run updown.preproc.PreprocPangLeeSentenceCorpus -i "/data/pang_lee_sentence_corpus/0/rt-polarity.neg->neg,/data/pang_lee_sentence_corpus/0/rt-polarity.pos->pos" > $OUTPUT/sentence_corpus.txt
+echo "
+
+10-fold maxent on sentence corpus" 1>&2
+updown run updown.app.NFoldMaxentExperiment -g $OUTPUT/sentence_corpus.txt > /dev/null
+
+echo "
+
+preproc sentence bigrams"
+updown run updown.preproc.PreprocPangLeeSentenceCorpus -i "/data/pang_lee_sentence_corpus/0/rt-polarity.neg->neg,/data/pang_lee_sentence_corpus/0/rt-polarity.pos->pos" --textPipeline "splitSpace|addBiGrams|twokenizeSkipGtOneGrams|removeStopwords" > $OUTPUT/sentence_corpus_bi.txt
+echo "
+
+10-fold maxent on sentence corpus with bigrams" 1>&2
+updown run updown.app.NFoldMaxentExperiment -g $OUTPUT/sentence_corpus_bi.txt > /dev/null
+
+
+# scale corpus ===============================================
+echo "
+
+preproc scale data"
+updown run updown.preproc.PreprocPangLeeSentenceCorpus -i "/data/pang_lee_scale_data/1/0->neg,/data/pang_lee_scale_data/1/1->neu,/data/pang_lee_scale_data/1/2->pos" > $OUTPUT/scale_corpus_3.txt
+
+echo "
+
+10-fold maxent on 3 point scale corpus" 1>&2
+updown run updown.app.NFoldMaxentExperiment -g $OUTPUT/scale_corpus_3.txt > /dev/null
+
+
+echo "
+
+preproc scale data bigrams"
+updown run updown.preproc.PreprocPangLeeSentenceCorpus -i "/data/pang_lee_scale_data/1/0->neg,/data/pang_lee_scale_data/1/1->neu,/data/pang_lee_scale_data/1/2->pos" --textPipeline "splitSpace|addBiGrams|twokenizeSkipGtOneGrams|removeStopwords" > $OUTPUT/scale_corpus_3_bi.txt
+
+echo "
+
+10-fold maxent on 3 point scale corpus with bigrams" 1>&2
+updown run updown.app.NFoldMaxentExperiment -g $OUTPUT/scale_corpus_3_bi.txt > /dev/null