Ceri Stagg avatar Ceri Stagg committed 5d9f5e6

count_doclist and shell of unpack_doclist

Comments (0)

Files changed (1)

svmlight/svmlight.py

 		    ("kparm",  KERNEL_PARM)]
 # ----------------------------------------------
 
-'''
-def unpack_doclist( 
+def count_doclist( doclist ):
+	max_docs = len( doclist )
+	max_words = 0
+	for doctuple in iter( doclist ):
+		words_list = doctuple[1]
+		list_length = len( words_list )
+		if list_length > max_words:
+			max_words = list_length
 
-static int unpack_doclist(
-        PyObject *doclist, DOC ***docs, double **label, int *totwords, int *totdoc)
-{
-    long queryid, slackid, dnum = 0, wpos, max_docs, max_words;
-    WORD *words;
-    double doc_label, costfactor;
-    PyObject *iter, *item;
+	return ( max_docs, max_words )		
 
-    if(!PySequence_Check(doclist)) {
-        PyErr_SetString(PyExc_TypeError, "expected list of documents");
-        return 0;
-    }
-    count_doclist(doclist, &max_docs, &max_words);
-    (*docs) = (DOC **)malloc(sizeof(DOC*) * max_docs); /* Feature vectors */
-    (*label) = (double *)malloc(sizeof(double) * max_docs); /* Target values */
-    words = (WORD *)malloc(sizeof(WORD) * (max_words + 1));
 
-    (*totwords) = 0;
-    iter = PyObject_GetIter(doclist);
-    while((item = PyIter_Next(iter))) {
-        if(!unpack_document(item, words, &doc_label, &queryid, &slackid,
-                             &costfactor, &wpos, max_words))
-            return 0;
-        if((wpos > 1) && ((words[wpos - 2]).wnum > (*totwords)))
-            (*totwords)=(words[wpos-2]).wnum;
-        (*label)[dnum] = doc_label;
-        (*docs)[dnum] = create_example(dnum, queryid, slackid, costfactor,
-                                       create_svector(words, "", 1.0));
-        dnum++;
-        Py_DECREF(item);
-    }
-    Py_DECREF(iter);
+def unpack_doclist( doclist ):
+	
 
-    free(words);
-    (*totdoc) = dnum;
-    return 1;
-'''
+    try:
+	doc_iterator = iter(doclist)
+    except TypeError, te:
+	raise Exception("Not iterable")
+
+# ----------------------------------------------
+
 # ----------------------------------------------
 
 def print_client_data( client_data ):
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.