pysvmlight / svmlight / svmlight.py

from ctypes import *

VERSION = "V6.02"
VERSION_DATE = "14.08.08"

CFLOAT = c_float
FNUM   = c_long
FVAL   = c_float

MAXFEATNUM = 99999999

LINEAR  = 0
POLY    = 1
RBF     = 2
SIGMOID = 3

CLASSIFICATION = 1
REGRESSION     = 2
RANKING        = 3
OPTIMIZATION   = 4

MAXSHRINK = 50000
svm = CDLL("./svmlight.so")


class WORD(Structure):
	_fields_ = [("wnum",   FNUM),
                ("weight", FVAL)]


class SVECTOR(Structure):
    pass

SVECTOR._fields_ = [("words",       POINTER(WORD)),
                    ("twonorm_sq",  c_double),
                    ("userdefined", POINTER(c_char)),
                    ("kernel_id",   c_long),
                    ("next",        POINTER(SVECTOR)),
                    ("factor",      c_double)]


class DOC(Structure):
    _fields_ = [("docnum",     c_long),
                ("queryid",    c_long),
                ("costfactor", c_double),
                ("slackid",    c_long),
                ("fvec",       POINTER(SVECTOR))]

class LEARN_PARM(Structure):
    _fields_ = [("type",                  c_long),
                ("svm_c",                 c_double),
                ("eps",                   c_double),
                ("svm_costratio",         c_double),
                ("transduction_posratio", c_double),
                ("biased_hyperplane",     c_long),
                ("sharedslack",           c_long),
                ("svm_maxqpsize",         c_long),
                ("svm_newvarsinqp",       c_long),
                ("kernel_cache_size",     c_long),
                ("epsilon_crit",          c_double),
                ("epsilon_shrink",        c_double),
                ("svm_iter_to_shrink",    c_long),
                ("maxiter",               c_long),
                ("remove_inconsistent",   c_long),
                ("skip_final_opt_check",  c_long),
                ("compute_loo",           c_long),
                ("rho",                   c_double),
                ("xa_depth",              c_long),
                ("predfile",              (c_char * 200)),
                ("alphafile",             (c_char * 200)),
                ("epsilon_const",         c_double),
                ("epsilon_a",             c_double),
                ("opt_precision",         c_double),
                ("svm_c_steps",           c_long),
                ("svm_c_factor",          c_double),
                ("svm_costratio_unlab",   c_double),
                ("svm_unlabbound",        c_double),
                ("svm_cost",              POINTER(c_double)),
                ("totwords",              c_long)]

class KERNEL_PARM(Structure):
    _fields_ = [("kernel_type",     c_long),
                ("poly_degree",     c_long),
                ("rbf_gamma",       c_double),
                ("coef_lin",        c_double),
                ("coef_const",      c_double),
                ("custom",          (c_char * 50))]

class MODEL(Structure):
    _fields_ = [("sv_num",          c_long),
                ("at_upper_bound",  c_long),
                ("b",               c_double),
                ("supvec",          POINTER(POINTER(DOC))),
                ("alpha",           POINTER(c_double)),
                ("index",           POINTER(c_long)),
                ("totwords",        c_long),
                ("totdoc",          c_long),
                ("kernel_parm",     KERNEL_PARM),
                ("loo_error",       c_double),
                ("loo_recall",      c_double),
                ("loo_precision",   c_double),
                ("xa_error",        c_double),
                ("xa_recall",       c_double),
                ("xa_precision",    c_double),
                ("lin_weights",     POINTER(c_double)),
                ("maxdiff",         c_double)]

class QP(Structure):
	_fields_ = [("opt_n",     c_long),
                ("opt_m",     c_long),
                ("opt_ce",    POINTER(c_double)),
                ("opt_ce0",   POINTER(c_double)),
                ("opt_g",     POINTER(c_double)),
                ("opt_g0",    POINTER(c_double)),
                ("opt_xinit", POINTER(c_double)),
                ("opt_low",   POINTER(c_double)),
                ("opt_up",    POINTER(c_double))]

class KERNEL_CACHE(Structure):
  _fields_ = [("index",         POINTER(c_long)),
              ("buffer",        POINTER(CFLOAT)),
              ("invindex",      POINTER(c_long)),
              ("active2totdoc", POINTER(c_long)),
              ("totdoc2active", POINTER(c_long)),
              ("lru",           POINTER(c_long)),
              ("occu",          POINTER(c_long)),
              ("elems",         c_long),
              ("max_elems",     c_long),
              ("time",          c_long),
              ("activenum",     c_long),
              ("buffsize",      c_long)]

class TIMING(Structure):
    _fields_ = [("time_kernel",     c_long),
                ("time_opti",       c_long),
                ("time_shrink",     c_long),
                ("time_update",     c_long),
                ("time_model",      c_long),
                ("time_check",      c_long),
                ("time_select",     c_long)]

class SHRINK_STATE(Structure):
    _fields_ = [("active",          POINTER(c_long)),
                ("inactive_since",  POINTER(c_long)),
                ("deactnum",        c_long),
                ("a_history",       POINTER(POINTER(c_double))),
                ("maxhistory",      c_long),
                ("last_a",          POINTER(c_double)),
                ("last_lin",        POINTER(c_double))]

''' This auxiliary function to svm_learn reads some parameters from the keywords to
 * the function and fills the rest in with defaults (from read_input_parameters()
 * in svm_learn_main.c:109).
 
 returns an int
'''

def read_learning_parameters(verbosity, learn_parm, kernel_parm, **kwds):
    learn_parm.predfile = "trans_predictions")
    learn_parm.alphafile = ""
    verbosity.contents = 0
    learn_parm.biased_hyperplane = 1
    learn_parm.sharedslack = 0
    learn_parm.remove_inconsistent = 0
    learn_parm.skip_final_opt_check = 0
    learn_parm.svm_maxqpsize = 10
    learn_parm.svm_newvarsinqp = 0
    learn_parm.svm_iter_to_shrink = -9999
    learn_parm.maxiter = 100000
    learn_parm.kernel_cache_size = 40
    learn_parm.svm_c = 0.0
    learn_parm.eps = 0.1
    learn_parm.transduction_posratio = -1.0
    learn_parm.svm_costratio = 1.0
    learn_parm.svm_costratio_unlab = 1.0
    learn_parm.svm_unlabbound = 1E-5
    learn_parm.epsilon_crit = 0.001
    learn_parm.epsilon_a = 1E-15
    learn_parm.compute_loo = 0
    learn_parm.rho = 1.0
    learn_parm.xa_depth = 0
    kernel_parm.kernel_type = 0
    kernel_parm.poly_degree = 3
    kernel_parm.rbf_gamma = 1.0
    kernel_parm.coef_lin = 1
    kernel_parm.coef_const = 1
    kernel_parm.custom = "empty"
    learn_parm.type = CLASSIFICATION

    if "type" in kwds:
        typ = kwds["type"]
        if typ == "classification":
            learn_parm.type = CLASSIFICATION
        elif typ == "regression":
            learn_parm.type = REGRESSION
        elif typ == "ranking":
            learn_parm.type = RANKING
        elif typ == "optimization":
            learn_parm.type = OPTIMIZATION
        else:
            raise Exception("unknown learning type specified. Valid types are: 'classification', 'regression', 'ranking' and 'optimization'.")

    if "kernel" in kwds:
        kernel = kwds["kernel"]
        if kernel == "linear":
            kernel_parm.kernel_type = LINEAR
        elif kernel == "polynomial":
            kernel_parm.kernel_type = POLY
        elif kernel == "rbf":
            kernel_parm.kernel_type = RBF
        elif kernel == "sigmoid":
            kernel_parm.kernel_type = SIGMOID
        else:
            raise Exception("unknown kernel type specified. Valid types are: 'linear', 'polynomial', 'rbf' and 'sigmoid'.")

    if "verbosity" in kwds:
        verbosity.contents = kwds["verbosity"]
    }
    if(PyMapping_HasKeyString(kwds, "C")) {
        PyObject *vobj = PyMapping_GetItemString(kwds, "C");
        learn_parm->svm_c = PyFloat_AsDouble(vobj);
    }
    if(PyMapping_HasKeyString(kwds, "poly_degree")) {
        PyObject *vobj = PyMapping_GetItemString(kwds, "poly_degree");
        kernel_parm->poly_degree = PyNumber_AsSsize_t(vobj, 0);
    }
    if(PyMapping_HasKeyString(kwds, "rbf_gamma")) {
        PyObject *vobj = PyMapping_GetItemString(kwds, "rbf_gamma");
        kernel_parm->rbf_gamma = PyFloat_AsDouble(vobj);
    }
    if(PyMapping_HasKeyString(kwds, "coef_lin")) {
        PyObject *vobj = PyMapping_GetItemString(kwds, "coef_lin");
        kernel_parm->coef_lin = PyFloat_AsDouble(vobj);
    }
    if(PyMapping_HasKeyString(kwds, "coef_const")) {
        PyObject *vobj = PyMapping_GetItemString(kwds, "coef_const");
        kernel_parm->coef_const = PyFloat_AsDouble(vobj);
    }

    if(learn_parm->svm_iter_to_shrink == -9999) {
        if(kernel_parm->kernel_type == LINEAR)
            learn_parm->svm_iter_to_shrink=2;
        else
            learn_parm->svm_iter_to_shrink=100;
    }

    return 1;
}

def svm_learn2(*args, **kwds):
    docs = POINTER(POINTER(DOC))
    target = POINTER(c_double)
    totwords = c_int
    totdoc = c_int
    kernel_cache = POINTER(KERNEL_CACHE)
    learn_parm = LEARN_PARM
    kernel_parm = KERNEL_PARM
    verbosity = c_long
    #doclist = POINTER(PyObject) #FIXME
    model = POINTER(MODEL)
    # result = POINTER(MODEL_AND_DOCS)

	#if(!PyArg_ParseTuple(args, "O", &doclist))
    #    return NULL;
    svm.read_learning_parameters(kwds, pointer(verbosity), pointer(learn_parm), pointer(kernel_parm))
    #if(!unpack_doclist(doclist, &docs, &target, &totwords, &totdoc))
    #    return NULL;

    return (learn_parm, kernal_parm)

if __name__ == "__main__":
	result = svm_learn2()
	print result


'''

static PyObject *svm_learn(PyObject *self, PyObject *args, PyObject *kwds)
{
    #DOC **docs;
    double* target;
    int totwords, totdoc;
    KERNEL_CACHE *kernel_cache;
    LEARN_PARM learn_parm;
    KERNEL_PARM kernel_parm;
    long verbosity;
    PyObject *doclist;
    MODEL *model;
    MODEL_AND_DOCS *result;

    if(!PyArg_ParseTuple(args, "O", &doclist))
        return NULL;
    read_learning_parameters(kwds, &verbosity, &learn_parm, &kernel_parm);
    if(!unpack_doclist(doclist, &docs, &target, &totwords, &totdoc))
        return NULL;

    model = malloc(sizeof(MODEL));
    if(kernel_parm.kernel_type == LINEAR)
        kernel_cache = NULL;
    else
        kernel_cache = kernel_cache_init(totdoc, learn_parm.kernel_cache_size);

	# DO THIS ONE!
    if(learn_parm.type == CLASSIFICATION) {
        svm_learn_classification(docs, target, totdoc, totwords, &learn_parm,
                                 &kernel_parm, kernel_cache, model, NULL /* alpha_in */);
    }
    else if(learn_parm.type == REGRESSION) {
        svm_learn_regression(docs, target, totdoc, totwords, &learn_parm,
                             &kernel_parm, &kernel_cache, model);
    }
    else if(learn_parm.type == RANKING) {
        svm_learn_ranking(docs, target, totdoc, totwords, &learn_parm,
                          &kernel_parm, &kernel_cache, model);
    }
    else if(learn_parm.type == OPTIMIZATION) {
        svm_learn_optimization(docs, target, totdoc, totwords, &learn_parm,
                               &kernel_parm, kernel_cache, model, NULL /* alpha_in */);
    }
'''
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.