Source

gpalign-cpp / src / main.cpp

Full commit
//--------------------------------------------------------------------------//
// main.cpp
// Lars Yencken <lars.yencken@gmail.com>
// vim: ts=4 sw=4 sts=4 expandtab:
// Sat Oct  6 16:44:59 EST 2007
//--------------------------------------------------------------------------//

#include <iostream>
#include <locale>

#include "tfIdfAligner.hpp"
#include "io.hpp"

using namespace std;

#ifdef DARWIN
#include <locale.h>
#endif

//--------------------------------------------------------------------------//

/**
 * Prints a usage message and exits.
 */
void usage();

//--------------------------------------------------------------------------//

const int g_defaultN = 5;

int main(int argc, char *argv[])
{
#ifdef DARWIN
    setlocale(LC_CTYPE, g_locale);
#else
    cout.imbue(locale(g_locale));
#endif

    char* inputFile = NULL;
    char* outputFile = NULL;
    int n = g_defaultN;

    if (argc == 3) {
        inputFile = argv[1];
        outputFile = argv[2];
    } else if (argc == 5) {
        if (strcmp(argv[1], "-n") != 0) {
            cout << "unknown flag " << argv[1] << endl;
            usage();
        }
        n = atoi(argv[2]);
        inputFile = argv[3];
        outputFile = argv[4];
    } else {
        usage();
    }

    vector<BaseAlignment> alignments;
    {
        vector<Segment> entries;
        parseEntries(inputFile, entries);

        TfIdfAligner aligner(0.8, n);
        aligner.alignSegments(entries, alignments);
    }
    dumpAlignments(outputFile, alignments);
    return 0;
}

//--------------------------------------------------------------------------//

void usage()
{
    cerr << 
"Usage: align [-n N] inputFile outputFile\n\n\
Takes a file where every line contains a word and its reading and aligns\n\
each word, producing the same output but segmented. The order of words in\n\
the input is not preserved.\n\
\n\
The -n option allows you to specify how many words to disambiguate in each\n\
step. Expect at least an Nx speedup for whatever number you choose. The\n\
default is 5.\n";
    exit(1);
}