Ceri Stagg avatar Ceri Stagg committed 148cbfd

Start to clean up and improve to be more idiomatic.

Comments (0)

Files changed (1)

svmlight/svmlight.py

 from collections import namedtuple
 import pickle
 
-import localdata
-
 VERSION = "V6.02"
 VERSION_DATE = "14.08.08"
 
 
 MAXSHRINK = 50000
 svm = CDLL("./svmlight.so")
+# ----------------------------------------------
 
 class WORD(Structure):
     _fields_ = [("wnum",   FNUM),
 svm.sprod_ss.restype = c_double
 svm.classify_example_linear.restype = c_double
 svm.read_model.restype = POINTER( MODEL )
-
 # ----------------------------------------------
 
 ''' This auxiliary function to svm_learn reads some parameters from the keywords to
     return ( max_docs, max_words )
 # ----------------------------------------------
 
-
 UnpackData = namedtuple('Unpackdata', 'words doc_label queryid slackid costfactor')
 # ----------------------------------------------
 
     # be reading them from the feature pairs (don't really care).
     queryid, slackid, costfactor = 0, 0, 1
 
-    if type(docobj) != tuple:
+    if not isinstance( docobj, tuple ):
         raise Exception("document should be a tuple")
 
     label, words_list = docobj[0], docobj[1]
     if type(words_list) != list:
         raise Exception("expected list of feature pairs")
 
+    words = [WordTuple( int( feat0 ), feat1 ) for
+             feat0, feat1 in words_list[:max_words_doc]]
+    '''
     words = []
 
-    for feature_pair in words_list:
+    for (feat0, feat1) in words_list:
         if len( words ) >= max_words_doc:
             break 
-        wordtuple = WordTuple( int( feature_pair[0] ), feature_pair[1] )
+        wordtuple = WordTuple( int( feat0 ), feat1 )
         words.append( wordtuple )
+    '''
 
     # sentinel entry required by C code
     words.append( WordTuple( 0, 0.0 ) )
 
 def create_svector( words, userdefined, factor ):
 
-    '''
-    [("words",       POINTER(WORD)),
-                    ("twonorm_sq",  c_double),
-                    ("userdefined", POINTER(c_char)),
-                    ("kernel_id",   c_long),
-                    ("next",        POINTER(SVECTOR)),
-                    ("factor",      c_double)]
-                    '''
-
     result = SVECTOR()
     cwords = create_fixed_size_words( words )
 
-    # result = (WORD * len( words ))() -- cwords
-    # ("words",       POINTER(WORD)),
-
     result.words = cwords
     result.twonorm_sq = svm.sprod_ss( pointer(result), pointer(result) )
 
         result.append( dist )
 
     return result
-
+# ----------------------------------------------
         
 
 # -------------------- MAIN --------------------
+
+import localdata
+
 if __name__ == "__main__":
     training_data = localdata.train0
     test_data = localdata.test0
 
     with open("model.pickle", 'wb') as f:
         pickle.dump( learn_results_tuple.model, f)
+
 # ----------------------------------------------
 
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.