Commits

David McClosky committed 63ce8ba

More robust handling of some parse failures when using external POS tags.
If external POS tags are provided and the parse fails, we try to parse
again without any POS constraints.

Comments (0)

Files changed (3)

first-stage/PARSE/ExtPos.C

-
 #include "ExtPos.h"
 #include "utils.h"
 #include <sstream>
     push_back(vt);
   }
 }
+
+bool ExtPos::hasExtPos() {
+    for (size_t i = 0; i < size(); i ++) {
+        vector<const Term*> terms = operator[](i);
+        if (terms.size() > 0) {
+            return true;
+        }
+    }
+
+    return false;
+}

first-stage/PARSE/ExtPos.h

-
 #ifndef EXTPOS_H
 #define EXTPOS_H
 #include "Term.h"
 {
  public:
   void read(ifstream* ifs,SentRep& sr);
+  bool hasExtPos();
 };
 
 #endif

first-stage/PARSE/parseIt.C

  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * vi:ts=8
  */
 
 #include <pthread.h>
 static void* mainLoop (void* arg);
 static void printSkipped( SentRep *srp, MeChart *chart,PrintStack& pstk, printStruct& ps);
 static void workOnPrintStack(PrintStack* printStack);
+static bool decodeParses(int len, int locCount, SentRep* srp, MeChart* chart, printStruct& printS, 
+                         PrintStack& printStack);
 
 //-----------------------
 // Constants
       Item* topS = chart->topS();
       if(!topS)
 	{
-	  WARN( "Parse failed: !topS" );
-	  printSkipped(srp,chart,printStack,printS);
-	  delete chart;
-	  continue;
+          if (extPos.hasExtPos()) {
+              WARN("Parse failed: !topS -- reparsing without POS constraints");
+              chart = new MeChart(*srp, *id);
+              chart->parse();
+              topS = chart->topS();
+              if (!topS) {
+                  WARN("Reparsing without POS constraints failed too: !topS");
+                  printSkipped(srp, chart, printStack, printS);
+                  delete chart;
+                  continue;
+              }
+          } else {
+              WARN( "Parse failed: !topS" );
+              printSkipped(srp,chart,printStack,printS);
+              delete chart;
+              continue;
+          }
 	}
-      // compute the outside probabilities on the items so that we can
-      // skip doing detailed computations on the really bad ones 
-      chart->set_Alphas();
 
-      Bst& bst = chart->findMapParse();
-      if( bst.empty())
-	{
-	  WARN( "Parse failed: chart->findMapParse().empty()" );
-	  printSkipped(srp,chart,printStack,printS);
-	  delete chart;
-	  continue;
-	}
-      if(Feature::isLM)
-	{
-	  double lgram = log2(bst.sum());
-	  lgram -= (len*log600);
-	  double pgram = pow(2,lgram);
-	  double iptri =chart->triGram();;
-	  double ltri = (log2(iptri)-len*log600);
-	  double ptri = pow(2.0,ltri);
-	  double pcomb = (0.667 * pgram)+(0.333 * ptri);
-	  double lmix = log2(pcomb);
-	  if(locCount%10==9)cout << lgram << "\t" << ltri << "\t" << lmix << "\n";
-	}
-      int numVersions = 0;
-      Link diffs(0);
-      //cerr << "Need num diff: " << Bchart::Nth << endl;
-      for(numVersions = 0 ; ; numVersions++)
-	{
-	  short pos = 0;
-	  Val* v = bst.next(numVersions);
-	  if(!v) break;
-	  double vp = v->prob();
-	  if(vp == 0) break;
-	  if(isnan(vp)) break;
-	  if(isinf(vp)) break;
-	  InputTree* mapparse=inputTreeFromBsts(v,pos,*srp);
-	  bool isU;
-	  int cnt = 0;
-	  diffs.is_unique(mapparse, isU,cnt);
-	  if(cnt != len)
-	    {
-	      cerr << "Bad length parse for: " << *srp << endl;
-	      cerr << *mapparse << endl;
-	      assert(cnt == len);
-	    }
-	  if(isU)
-	    {
-	      printS.probs.push_back(v->prob());
-	      printS.trees.push_back(mapparse);
-	      printS.numDiff++;
-	    }
-	  else
-	    {
-	      delete mapparse;
-	    }
-	  if(printS.numDiff >= Bchart::Nth) break;
-	  if(numVersions > 20000) break;
-	}
+      bool failed = decodeParses(len, locCount, srp, chart, printS, printStack);
+      if (failed) {
+        continue;
+      }
+
       if( printS.numDiff == 0)
 	{
-	  WARN( "Parse failed from 0, inf or nan probabililty" );
-	  printSkipped(srp,chart,printStack,printS);
-	  delete chart;
-	  continue;
+          if (extPos.hasExtPos()) {
+              WARN("Parse failed from 0, inf or nan probabililty -- reparsing without POS constraints");
+              chart = new MeChart(*srp, *id);
+              chart->parse();
+
+              bool failed = decodeParses(len, locCount, srp, chart, printS, printStack);
+              if (failed || printS.numDiff == 0) {
+                WARN("Parse failed from 0, inf or nan probabililty -- failed even without POS constraints");
+                printSkipped(srp,chart,printStack,printS);
+                delete chart;
+                continue;
+              }
+          } else {
+              WARN("Parse failed from 0, inf or nan probabililty");
+              printSkipped(srp,chart,printStack,printS);
+              delete chart;
+              continue;
+          }
 	}
 
       /* put the sentence with which we just finished at the end of the printStack*/
   return 0;
 }
 
+static bool decodeParses(int len, int locCount, SentRep* srp, MeChart* chart, printStruct& printS, 
+                         PrintStack& printStack) {
+  // compute the outside probabilities on the items so that we can
+  // skip doing detailed computations on the really bad ones 
+  chart->set_Alphas();
+  Bst& bst = chart->findMapParse();
+  if( bst.empty())
+    {
+      WARN( "Parse failed: chart->findMapParse().empty()" );
+      printSkipped(srp,chart,printStack,printS);
+      delete chart;
+      return true;
+    }
+  if(Feature::isLM)
+    {
+      double lgram = log2(bst.sum());
+      lgram -= (len*log600);
+      double pgram = pow(2,lgram);
+      double iptri =chart->triGram();;
+      double ltri = (log2(iptri)-len*log600);
+      double ptri = pow(2.0,ltri);
+      double pcomb = (0.667 * pgram)+(0.333 * ptri);
+      double lmix = log2(pcomb);
+      if(locCount%10==9)cout << lgram << "\t" << ltri << "\t" << lmix << "\n";
+    }
+  int numVersions = 0;
+  Link diffs(0);
+  for(numVersions = 0 ; ; numVersions++)
+    {
+      short pos = 0;
+      Val* v = bst.next(numVersions);
+      if(!v) break;
+      double vp = v->prob();
+      if(vp == 0) break;
+      if(isnan(vp)) break;
+      if(isinf(vp)) break;
+      InputTree* mapparse=inputTreeFromBsts(v,pos,*srp);
+      bool isUnique;
+      int cnt = 0;
+      diffs.is_unique(mapparse, isUnique,cnt);
+      if(cnt != len)
+        {
+          cerr << "Bad length parse for: " << *srp << endl;
+          cerr << *mapparse << endl;
+          assert(cnt == len);
+        }
+      if(isUnique)
+        {
+          printS.probs.push_back(v->prob());
+          printS.trees.push_back(mapparse);
+          printS.numDiff++;
+        }
+      else
+        {
+          delete mapparse;
+        }
+      if(printS.numDiff >= Bchart::Nth) break;
+      if(numVersions > 20000) break;
+    }
+
+    return false;
+}
+
 //------------------------------
 
 static const ECString& getPOS(Wrd& w, MeChart *chart)