David McClosky avatar David McClosky committed 63ce8ba

More robust handling of some parse failures when using external POS tags.
If external POS tags are provided and the parse fails, we try to parse
again without any POS constraints.

Comments (0)

Files changed (3)

first-stage/PARSE/ExtPos.C

-
 #include "ExtPos.h"
 #include "utils.h"
 #include <sstream>
     push_back(vt);
   }
 }
+
+bool ExtPos::hasExtPos() {
+    for (size_t i = 0; i < size(); i ++) {
+        vector<const Term*> terms = operator[](i);
+        if (terms.size() > 0) {
+            return true;
+        }
+    }
+
+    return false;
+}

first-stage/PARSE/ExtPos.h

-
 #ifndef EXTPOS_H
 #define EXTPOS_H
 #include "Term.h"
 {
  public:
   void read(ifstream* ifs,SentRep& sr);
+  bool hasExtPos();
 };
 
 #endif

first-stage/PARSE/parseIt.C

  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * vi:ts=8
  */
 
 #include <pthread.h>
 static void* mainLoop (void* arg);
 static void printSkipped( SentRep *srp, MeChart *chart,PrintStack& pstk, printStruct& ps);
 static void workOnPrintStack(PrintStack* printStack);
+static bool decodeParses(int len, int locCount, SentRep* srp, MeChart* chart, printStruct& printS, 
+                         PrintStack& printStack);
 
 //-----------------------
 // Constants
       Item* topS = chart->topS();
       if(!topS)
 	{
-	  WARN( "Parse failed: !topS" );
-	  printSkipped(srp,chart,printStack,printS);
-	  delete chart;
-	  continue;
+          if (extPos.hasExtPos()) {
+              WARN("Parse failed: !topS -- reparsing without POS constraints");
+              chart = new MeChart(*srp, *id);
+              chart->parse();
+              topS = chart->topS();
+              if (!topS) {
+                  WARN("Reparsing without POS constraints failed too: !topS");
+                  printSkipped(srp, chart, printStack, printS);
+                  delete chart;
+                  continue;
+              }
+          } else {
+              WARN( "Parse failed: !topS" );
+              printSkipped(srp,chart,printStack,printS);
+              delete chart;
+              continue;
+          }
 	}
-      // compute the outside probabilities on the items so that we can
-      // skip doing detailed computations on the really bad ones 
-      chart->set_Alphas();
 
-      Bst& bst = chart->findMapParse();
-      if( bst.empty())
-	{
-	  WARN( "Parse failed: chart->findMapParse().empty()" );
-	  printSkipped(srp,chart,printStack,printS);
-	  delete chart;
-	  continue;
-	}
-      if(Feature::isLM)
-	{
-	  double lgram = log2(bst.sum());
-	  lgram -= (len*log600);
-	  double pgram = pow(2,lgram);
-	  double iptri =chart->triGram();;
-	  double ltri = (log2(iptri)-len*log600);
-	  double ptri = pow(2.0,ltri);
-	  double pcomb = (0.667 * pgram)+(0.333 * ptri);
-	  double lmix = log2(pcomb);
-	  if(locCount%10==9)cout << lgram << "\t" << ltri << "\t" << lmix << "\n";
-	}
-      int numVersions = 0;
-      Link diffs(0);
-      //cerr << "Need num diff: " << Bchart::Nth << endl;
-      for(numVersions = 0 ; ; numVersions++)
-	{
-	  short pos = 0;
-	  Val* v = bst.next(numVersions);
-	  if(!v) break;
-	  double vp = v->prob();
-	  if(vp == 0) break;
-	  if(isnan(vp)) break;
-	  if(isinf(vp)) break;
-	  InputTree* mapparse=inputTreeFromBsts(v,pos,*srp);
-	  bool isU;
-	  int cnt = 0;
-	  diffs.is_unique(mapparse, isU,cnt);
-	  if(cnt != len)
-	    {
-	      cerr << "Bad length parse for: " << *srp << endl;
-	      cerr << *mapparse << endl;
-	      assert(cnt == len);
-	    }
-	  if(isU)
-	    {
-	      printS.probs.push_back(v->prob());
-	      printS.trees.push_back(mapparse);
-	      printS.numDiff++;
-	    }
-	  else
-	    {
-	      delete mapparse;
-	    }
-	  if(printS.numDiff >= Bchart::Nth) break;
-	  if(numVersions > 20000) break;
-	}
+      bool failed = decodeParses(len, locCount, srp, chart, printS, printStack);
+      if (failed) {
+        continue;
+      }
+
       if( printS.numDiff == 0)
 	{
-	  WARN( "Parse failed from 0, inf or nan probabililty" );
-	  printSkipped(srp,chart,printStack,printS);
-	  delete chart;
-	  continue;
+          if (extPos.hasExtPos()) {
+              WARN("Parse failed from 0, inf or nan probabililty -- reparsing without POS constraints");
+              chart = new MeChart(*srp, *id);
+              chart->parse();
+
+              bool failed = decodeParses(len, locCount, srp, chart, printS, printStack);
+              if (failed || printS.numDiff == 0) {
+                WARN("Parse failed from 0, inf or nan probabililty -- failed even without POS constraints");
+                printSkipped(srp,chart,printStack,printS);
+                delete chart;
+                continue;
+              }
+          } else {
+              WARN("Parse failed from 0, inf or nan probabililty");
+              printSkipped(srp,chart,printStack,printS);
+              delete chart;
+              continue;
+          }
 	}
 
       /* put the sentence with which we just finished at the end of the printStack*/
   return 0;
 }
 
+static bool decodeParses(int len, int locCount, SentRep* srp, MeChart* chart, printStruct& printS, 
+                         PrintStack& printStack) {
+  // compute the outside probabilities on the items so that we can
+  // skip doing detailed computations on the really bad ones 
+  chart->set_Alphas();
+  Bst& bst = chart->findMapParse();
+  if( bst.empty())
+    {
+      WARN( "Parse failed: chart->findMapParse().empty()" );
+      printSkipped(srp,chart,printStack,printS);
+      delete chart;
+      return true;
+    }
+  if(Feature::isLM)
+    {
+      double lgram = log2(bst.sum());
+      lgram -= (len*log600);
+      double pgram = pow(2,lgram);
+      double iptri =chart->triGram();;
+      double ltri = (log2(iptri)-len*log600);
+      double ptri = pow(2.0,ltri);
+      double pcomb = (0.667 * pgram)+(0.333 * ptri);
+      double lmix = log2(pcomb);
+      if(locCount%10==9)cout << lgram << "\t" << ltri << "\t" << lmix << "\n";
+    }
+  int numVersions = 0;
+  Link diffs(0);
+  for(numVersions = 0 ; ; numVersions++)
+    {
+      short pos = 0;
+      Val* v = bst.next(numVersions);
+      if(!v) break;
+      double vp = v->prob();
+      if(vp == 0) break;
+      if(isnan(vp)) break;
+      if(isinf(vp)) break;
+      InputTree* mapparse=inputTreeFromBsts(v,pos,*srp);
+      bool isUnique;
+      int cnt = 0;
+      diffs.is_unique(mapparse, isUnique,cnt);
+      if(cnt != len)
+        {
+          cerr << "Bad length parse for: " << *srp << endl;
+          cerr << *mapparse << endl;
+          assert(cnt == len);
+        }
+      if(isUnique)
+        {
+          printS.probs.push_back(v->prob());
+          printS.trees.push_back(mapparse);
+          printS.numDiff++;
+        }
+      else
+        {
+          delete mapparse;
+        }
+      if(printS.numDiff >= Bchart::Nth) break;
+      if(numVersions > 20000) break;
+    }
+
+    return false;
+}
+
 //------------------------------
 
 static const ECString& getPOS(Wrd& w, MeChart *chart)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.