Commits

tiedeman committed cc836f6

removed call to turkish tagger (fix this later!)

Comments (0)

Files changed (3)

uplug-cwb/scripts/opus-indexer.pl

 #   -i depth .. min depth for finding alignment file (0 otherwise)
 #   -u pattern  allowed structural patterns
 #   -p pattern  allowed positional patterns
+#   -M ........ skip creating monolingual index files
+#   -k ........ keep temp file for cwb encoding
 #
 #---------------------------------------------------------------------------
 # Copyright (C) 2004 Jörg Tiedemann  <joerg@stp.ling.uu.se>
 use XML::Parser;
 use Encode;
 
-use vars qw($opt_a $opt_i $opt_d $opt_r $opt_t $opt_c $opt_v $opt_x $opt_o $opt_y $opt_f $opt_m $opt_s $opt_u $opt_p);
+use vars qw($opt_a $opt_i $opt_d $opt_r $opt_t $opt_c $opt_v $opt_x $opt_o $opt_y $opt_f $opt_m $opt_s $opt_u $opt_p $opt_M $opt_k);
 use Getopt::Std;
-getopts('a:d:r:t:c:x:voyf:m:si:u:p:');
+getopts('a:d:r:t:c:x:voyf:m:si:u:p:Mk');
 
 
 # script arguments
 # make monolingual corpus indeces
 
 # if (not $opt_m){
+unless ($opt_M){
 foreach my $l (@LANG){
 
     my $llc = lc($l);
     print STDERR "make CWB index for '$l'\n" if $VERBOSE;
     MakeCWBindex($llc,$cwbtok,$attr);
 
-    unlink $cwbtok;
+    unlink $cwbtok unless ($opt_k);
     
 }
-# }
+}
 
 ########################################################################
 # make alignment index for each language pair
 	@extra = `grep 'ALIGNED' $regdir/$lang`;
     }
     mkdir "$datdir/$lang",0755;
+
+    print STDERR "$ENCODE -R $regdir/$lang -d $datdir/$lang -f $cwbtok $attr\n";
     system ("$ENCODE -R $regdir/$lang -d $datdir/$lang -f $cwbtok $attr");
+    print STDERR "$CWBMAKEALL -r $regdir -V $lang\n";
     system ("$CWBMAKEALL -r $regdir -V $lang");
 
     ## ... and add them to the new registry file

uplug-main/share/systems/opus/tr/annotate

     'name' => 'Turkish pre-processing',
     'submodules' => [
         'pre/tok -l tr',
-        'pre/tr/tag',
+#        'pre/tr/tag',
     ],
     'submodule names' => [
-        'POS tagger',
+    	 'generic tokenizer',
+#        'POS tagger',
     ],
     'stdout' => 'text',
   },

uplug-main/share/systems/opus/tr/tag

     'name' => 'Turkish pre-processing',
     'submodules' => [
         'pre/tok -l tr',
-        'pre/tr/tag',
+#        'pre/tr/tag',
     ],
     'submodule names' => [
         'tokenizer',
-        'POS tagger',
+#        'POS tagger',
     ],
     'stdout' => 'text',
   },
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.