1. Jörg Tiedemann
  2. Uplug

Commits

tiedeman  committed 9d5fc97

added spaces between sentences in xces2moses

  • Participants
  • Parent commits 23fa8d6
  • Branches master

Comments (0)

Files changed (1)

File uplug-main/tools/xces2moses

View file
 		$srceof=0;
 		if ($sent=~/s [^\>]*id="$id"/s){
 		    &xml2txt($sent);
-		    $srctxt.=$sent;
+		    $srctxt.=$sent.' ';
 		    last;
 		}
 		$srceof=1;
 		$trgeof=0;
 		if ($sent=~/s [^\>]*id="$id"/s){
 		    &xml2txt($sent);
-		    $trgtxt.=$sent;
+		    $trgtxt.=$sent.' ';
 		    last;
 		}
 		$trgeof=1;
 	    }
 	}
 
+	$srctxt=~s/\s\s+/ /;
+	$trgtxt=~s/\s\s+/ /;
+
 	$srctxt=~s/\s+$//;
 	$trgtxt=~s/\s+$//;