Commits

David McClosky committed 13e16d3

Merge in Matthew Gerber's changes to @ symbol handling.
@ symbols no longer get special treatment when bracketed in <s>. Previously they were ignored, but presently it has the effect of skipping many sentences from Twitter corpora.

Comments (0)

Files changed (1)

first-stage/PARSE/ewDciTokStrm.C

 	return "</s>";                          //  ing Error" (unbracketed
     }                                           //  text) that follows.
 
-    while( savedWrd_ == "<s>"               // Starting in 1989, lines brack-
-           &&  nextWrd_ == "@" )            //  eted as sentences but with an
-    {                                       //   @  following the  <s>  hold
-        //cerr << "before @ flush" << endl;				      
-	savedWrd_ = flush_to_sentence();    //  data arranged as charts or
-        //cerr << "aft @ flush" << endl;				      
-	if(useCin)
-	  {
-	    if( !cin )                        //  tables.  It is non-text-like
-	      nextWrd_ = "";                  //  and should be discarded.
-	    else cin >> nextWrd_;
-	  }
-	else
-	  {
-	    if( !istr_ )                      //  tables.  It is non-text-like
-	      nextWrd_ = "";                  //  and should be discarded.
-	    else istr_ >> nextWrd_;
-	  }
-    }
     if(docEnd)
       {
 	docEnd = 0;