Source

snowball / libstemmer.c

#include <stdlib.h>
#include <string.h>
#include "libstemmer.h"
#include "api.h"
#include "modules.h"

struct sb_stemmer {
    struct SN_env * (*create)(void);
    void (*close)(struct SN_env *);
    int (*stem)(struct SN_env *);

    struct SN_env * env;
};

extern const char **
sb_stemmer_list(void)
{
    return algorithm_names;
}

static stemmer_encoding_t
sb_getenc(const char * charenc)
{
    struct stemmer_encoding * encoding;
    if (charenc == NULL) return ENC_UTF_8;
    for (encoding = encodings; encoding->name != 0; encoding++) {
	if (strcmp(encoding->name, charenc) == 0) break;
    }
    if (encoding->name == NULL) return ENC_UNKNOWN;
    return encoding->enc;
}

extern struct sb_stemmer *
sb_stemmer_new(const char * algorithm, const char * charenc)
{
    stemmer_encoding_t enc;
    struct stemmer_modules * module;
    struct sb_stemmer * stemmer;

    enc = sb_getenc(charenc);
    if (enc == ENC_UNKNOWN) return NULL;

    for (module = modules; module->name != 0; module++) {
	if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break;
    }
    if (module->name == NULL) return NULL;
    
    stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
    if (stemmer == NULL) return NULL;

    stemmer->create = module->create;
    stemmer->close = module->close;
    stemmer->stem = module->stem;

    stemmer->env = stemmer->create();
    if (stemmer->env == NULL)
    {
        sb_stemmer_delete(stemmer);
        return NULL;
    }

    return stemmer;
}

void
sb_stemmer_delete(struct sb_stemmer * stemmer)
{
    if (stemmer == 0) return;
    if (stemmer->close == 0) return;
    stemmer->close(stemmer->env);
    stemmer->close = 0;
    free(stemmer);
}

const sb_symbol *
sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size)
{
    int ret;
    if (SN_set_current(stemmer->env, size, (const symbol *)(word)))
    {
        stemmer->env->l = 0;
        return NULL;
    }
    ret = stemmer->stem(stemmer->env);
    if (ret < 0) return NULL;
    stemmer->env->p[stemmer->env->l] = 0;
    return (const sb_symbol *)(stemmer->env->p);
}

int
sb_stemmer_length(struct sb_stemmer * stemmer)
{
    return stemmer->env->l;
}
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.