Commits

Lars Yencken  committed 113454a

[247] Parameterized the value of N (how many disambiguated each step).

  • Participants
  • Parent commits 8a128bc

Comments (0)

Files changed (1)

File src/main.cpp

 
 //--------------------------------------------------------------------------//
 
+const int g_defaultN = 5;
+
 int main(int argc, char *argv[])
 {
 #ifdef DARWIN
 #else
     cout.imbue(locale(g_locale));
 #endif
-    if (argc != 3) {
+
+    char* inputFile = NULL;
+    char* outputFile = NULL;
+    int n = g_defaultN;
+
+    if (argc == 3) {
+        inputFile = argv[1];
+        outputFile = argv[2];
+    } else if (argc == 5) {
+        inputFile = argv[1];
+        if (strcmp(argv[2], "-n") != 0) {
+            usage();
+        }
+        n = atoi(argv[3]);
+        outputFile = argv[4];
+    } else {
         usage();
     }
-    const char* inputFile = argv[1];
-    const char* outputFile = argv[2];
 
     vector<BaseAlignment> alignments;
     {
         vector<Segment> entries;
         parseEntries(inputFile, entries);
 
-        TfIdfAligner aligner(0.8, 5);
+        TfIdfAligner aligner(0.8, n);
         aligner.alignSegments(entries, alignments);
     }
     dumpAlignments(outputFile, alignments);
 void usage()
 {
     cerr << 
-"Usage: align inputFile outputFile\n\n\
+"Usage: align [-n N] inputFile outputFile\n\n\
 Takes a file where every line contains a word and its reading and aligns\n\
 each word, producing the same output but segmented. The order of words in\n\
-the input is not preserved.\n";
+the input is not preserved.\n\
+\n\
+The -n option allows you to specify how many words to disambiguate in each\n\
+step. Expect at least an Nx speedup for whatever number you choose. The\n\
+default is 5.\n";
     exit(1);
 }