Commits

David McClosky  committed 32d7c62

Support for longer words (1024 characters) and sentences.

  • Participants
  • Parent commits 7f0320e

Comments (0)

Files changed (15)

File first-stage/PARSE/Bchart.C

   lastWord[id]=lastKnownWord;
   int i,j;
   assert(len <= MAXSENTLEN);
-  char temp[512];
+  char temp[1024];
   for(i = 0 ; i < len ; i++)
     {
       ECString wl = langAwareToLower(sentence[i].lexeme().c_str(), temp);
   lastWord[id]=lastKnownWord;
   int i,j;
   assert(len <= MAXSENTLEN);
-  char temp[512];
+  char temp[1024];
   for(i = 0 ; i < len ; i++)
     {
       ECString wl = langAwareToLower(sentence[i].lexeme().c_str(), temp);

File first-stage/PARSE/BchartSm.C

   if(!ans.empty()) return ans;
   if(printDebug(500))
     cerr << "wordPlist " << *word << endl;
-  char temp[512];
+  char temp[1024];
   ECString head(word->lexeme());
   ECString headL(langAwareToLower(head.c_str(),temp));
   int wint = wtoInt(headL); 
     return 1;
   //cerr << "pCapgt = " << pcap << endl;
   if(shU->lexeme().length() < 2) return 1;  //ignore words of length 1;
-  char temp[512];
+  char temp[1024];
   ECString sh(langAwareToLower(shU->lexeme().c_str(),temp));
   bool cap = false;
   /* if all caps, ignore capitalization evidence */
   assert(ans >= 0);
   if(Term::fromInt(t)->openClass())
     {
-      char temp[512];
+      char temp[1024];
       ECString sh(langAwareToLower(shU->lexeme().c_str(),temp));
       float phegt = pegt(sh,t);
       if(phegt == 0) phegt = .00001;

File first-stage/PARSE/EgsFromTree.C

   if(trm->terminal_p())
     {
       ECString tmpW1 = tree->word();
-      char chars[512];
+      char chars[1024];
       ECString tmpW = langAwareToLower(tmpW1.c_str(), chars);
       
       int wInt = wtoInt(tmpW);

File first-stage/PARSE/FullHist.C

       int termInt = itm->term()->toInt();
       //cerr << "ebei " << termInt << endl;
       FullHist* st = new FullHist(termInt, this,itm);
-      assert(i < 256);
+      assert(i < 400);
       fharray[i++] = st;
       st->cpos = 0;
     }

File first-stage/PARSE/FullHist.h

   Bchart* cb;
   int hpos;
   int preTerm;
-  FullHist* fharray[256];
+  FullHist* fharray[400];
   int size;
 };
 

File first-stage/PARSE/GotIter.C

 {
   //if(pos_ > 20) cerr << "MOGI pos " << pos_ << " " << size_ << " " << *firstRight_<<endl;
   if(pos_ < 0) return false;
-  assert(pos_ < 256);
+  assert(pos_ < 400);
   itm = lrarray[pos_];
   //if(pos_ > 20) cerr << "MOGI itm " << *itm << endl;
   dir = dir_;
   int i = 0;
   while(gi.next(itm))
     {
-      assert(i < 256);
+      assert(i < 400);
       lrarray[i] = itm;
       //cerr << "lrgi " << *itm << endl;
       if(itm->start() == spos && !startRight)

File first-stage/PARSE/GotIter.h

   int     dir() { return dir_; }
  private:
   void         makelrgi(Edge* edge);
-  Item*        lrarray[256];
+  Item*        lrarray[400];
   int          pos_;
   int          size_;
   int          dir_;

File first-stage/PARSE/utils.C

 toLower(const char* str, char* temp)
 {
   int l = strlen(str);
-  assert(l < 512);
+  assert(l < 1024);
   for(int i = 0 ; i <= l ; i++)
     {
       char n = str[i];

File first-stage/TRAIN/Pst.C

 wordPlistConstruct(const ECString& head, int word_num)
 {
   list<double> ans;
-  char temp[512];
+  char temp[1024];
   ECString headL(langAwareToLower(head.c_str(), temp));
   const WordInfo* wi = useHeadC( headL );
   if( wi )
 Pst::
 pstt(ECString& shU, int t, int word_num)
 {
-  char temp[512];
+  char temp[1024];
   ECString sh(langAwareToLower(shU.c_str(), temp));
   const Term* tTerm = Term::fromInt(t);
   double phst = pHst(sh, t);
 Pst::
 psktt(const ECString& shU, int t, int word_num)
 {
-  char temp[512];
+  char temp[1024];
   ECString sh(langAwareToLower(shU.c_str(), temp));
   double ans = pHst(sh, t);
   double phcp = pCapgt(shU,t, word_num);
   ans *= .000001;
   if(Term::fromInt(t)->openClass())
     {
-      char temp[512];
+      char temp[1024];
       ECString sh(langAwareToLower(shU.c_str(),temp));
       float phegt = pegt(sh,t);
       if(phegt == 0) phegt = .00001;
   if(word_num == 0) return 1;
   //cerr << "pCapgt = " << pcap << endl;
   if(shU.length() < 2) return 1;  //ignore words of length 1;
-  char temp[512];
+  char temp[1024];
   ECString sh(langAwareToLower(shU.c_str(),temp));
   bool cap = false;
   if(shU[0] != sh[0] && shU[1] == sh[1]) cap = true;

File first-stage/TRAIN/pSfgT.C

       if(trm->openClass())
 	{
 	  ECString hdLexU(tree->word());
-	  char temp[512];
+	  char temp[1024];
 	  ECString hdLex(langAwareToLower(hdLexU.c_str(),temp));
 	  int len = hdLex.length();
 	  if(len >= 4)

File first-stage/TRAIN/pSgT.C

 void
 incrWordData(int lhsInt, ECString wupper)
 {
-  char temp[512];
+  char temp[1024];
   ECString w(langAwareToLower(wupper.c_str(), temp));
   numTerm[lhsInt]++;
   WordMap::iterator wmi = wordMap.find(w);

File first-stage/TRAIN/pTgNt.C

       if(trm->openClass()) 
 	{
 	  ECString hdLexU(tree->word());
-	  char temp[512];
+	  char temp[1024];
 	  ECString hdLex(langAwareToLower(hdLexU.c_str(),temp));
 	  int len = hdLex.length();
 	  if(len >= 3)

File first-stage/TRAIN/pUgT.C

   if( tree->word() != ""  )
     {
       ECString hdLexU(tree->word());
-      char temp[512];
+      char temp[1024];
       ECString hdLex(langAwareToLower(hdLexU.c_str(),temp));
       int len = hdLex.length();
       const WordInfo* wi = Pst::get(hdLex); //???;

File first-stage/TRAIN/treeHistSf.C

 headFromTree(InputTree* tree)
 {
 
-  char temp[512];
+  char temp[1024];
   string wrdStr(langAwareToLower(tree->head().c_str(), temp));
   const WordInfo* wi = Pst::get(wrdStr);
   if(!wi)
   pt = pt->parent();
   if(!pt) return topInt;
 
-  char temp[512];
+  char temp[1024];
   ECString wrdStr(langAwareToLower(pt->head().c_str(),temp));
   const WordInfo* wi = Pst::get(wrdStr);
   if(!wi)
       return nullWordInt;
     }
   ECString wrd = sentence[pos]->head();
-  char tmp[512];
+  char tmp[1024];
   ECString wrdl=langAwareToLower(wrd.c_str(), tmp);
   const WordInfo* wi = Pst::get(wrdl);
   assert(wi);

File first-stage/TRAIN/utils.C

 langAwareToLower(const char* str, char* temp)
 {
   int l = strlen(str);
-  assert(l < 512);
+  assert(l < 1024);
 
   /* Arabic doesn't get lowercased, all other languages do (for now) */
   if (Term::Language == "Ar") {
 toLower(const char* str, char* temp)
 {
   int l = strlen(str);
-  assert(l < 512);
+  assert(l < 1024);
   for(int i = 0 ; i <= l ; i++)
     {
       char n = str[i];