Ceri Stagg avatar Ceri Stagg committed 7933957

Finally getting close

Comments (0)

Files changed (1)

svmlight/svmlight.py

 MAXSHRINK = 50000
 svm = CDLL("./svmlight.so")
 
+svm.sprod_ss.restype = c_double
 
 class WORD(Structure):
     _fields_ = [("wnum",   FNUM),
     if type(docobj) != tuple:
         raise Exception("document should be a tuple")
 
+    print ', '.join(map(str, docobj))
+
     label     = docobj[0]
     words_list = docobj[1]
     if len( docobj ) > 2:
             break 
         wordtuple = WordTuple( feature_pair[0], feature_pair[1] )
         words.append( wordtuple )
-        print( "Feature pair wnum %r weight %r" % ( wordtuple.wnum, wordtuple.weight ) )
+        # print( "Feature pair wnum %r weight %r" % ( wordtuple.wnum, wordtuple.weight ) )
 
     returnval = UnpackData( words, label, queryid, slackid, costfactor, len( words ) )
     return returnval
     return result
 # ----------------------------------------------
 
-def create_example_from_unpack(unpackdata, fvec):
-    return create_example( unpackdata.doc_label, unpackdata.queryid, unpackdata.slackid, unpackdata.costfactor, fvec )
+def create_example_from_unpack(unpackdata, currentsize, fvec):
+    return create_example( currentsize, unpackdata.queryid, unpackdata.slackid, unpackdata.costfactor, fvec )
 # ----------------------------------------------
 
 class DOCLISTDATA(Structure):
-    _fields_ = [("docs", POINTER(DOC)),
+    _fields_ = [("docs", POINTER(POINTER(DOC))),
                 ("labels", POINTER(c_double)),
                 ("totwords",  c_int),
                 ("totdoc",  c_int)]
                 totwords = candidatewords
 
         docnum = unpackdata.doc_label
+        print( "Docnum %r" % (docnum) )
      
         print "Creating svector"
         fvec = create_svector( unpackdata.words, "", 1.0 )
         print "....done svector."
-        newdoc = create_example_from_unpack( unpackdata, fvec )
-        tempdoclist.append( newdoc )
+        currentsize = len( tempdoclist )
+        newdoc = create_example_from_unpack( unpackdata, currentsize, fvec )
+        pdoc = pointer( newdoc )
+        tempdoclist.append( pdoc )
+
+        locdoc = tempdoclist[-1].contents
+        print( "locdoc num %r" % (locdoc.docnum ) )
+
         templabellist.append( docnum )
 
     totdoc = len( doclist )
 
-    carraydoc = ( DOC * totdoc )()
+    mungo = c_int( 7 )
+    pmungo = pointer( mungo )
+    mungoval = pmungo.contents
+
+    gubb = ( c_int * 10 )()
+    gubb[0] = c_int( 12 )
+    print( "Gubb %r" % ( gubb[0] ) )
+
+    whizz = ( POINTER( c_int ) * 10 )()
+    gong = c_int( 12 )
+    whizz[0] = pointer( gong )
+    print( "Whizz %r" % whizz[0].contents )
+
+    carraydoc = ( POINTER( DOC ) * totdoc )()
           
     counter = 0
     for item in iter( tempdoclist ):
+
         carraydoc[ counter ] = item
+
+        '''
+        locdoc2 = item.contents
+        print( "locdoc2 num %r" % (locdoc2.docnum ) )
+
+        
+        plocaldoc = POINTER( DOC )
+        plocaldoc = DOC()
+        plocaldoc = carraydoc[ counter ]
+        localdoc = plocaldoc.contents
+        print( "Doc %r docnum %r retry %r" % ( counter, item.docnum, localdoc.docnum ) )
+        '''
         counter += 1
 
     carraylabel = ( c_double * totdoc )() 
     result.labels = carraylabel
     result.totwords = totwords
     result.totdoc = totdoc
+
+    for i in range ( 0, totdoc ):
+        pdoc = result.docs[i]
+        print( "Docnum %r %r" % ( pdoc.contents.docnum, pdoc ) )
      
     return result
 # ----------------------------------------------
     for item in words:
         result[ index ].wnum = item.wnum
         result[ index ].weight = item.weight
-        print( "Converting %r %r" % ( result[ index ].wnum, result[ index ].weight ) )
+        # print( "Converting %r %r" % ( result[ index ].wnum, result[ index ].weight ) )
         index += 1
 
     return result
     # ("words",       POINTER(WORD)),
 
     result.words = cwords
+    print( "create_svector" )
     print( "Calling sprod_ss..." )
     result.twonorm_sq = svm.sprod_ss( pointer(result), pointer(result) )
     print( "...done (twonorm_sq was %f)" % (result.twonorm_sq) )
                                               client_data.plearn.kernel_cache_size )
         
 
+    for i in range ( 0, doclistdata.totdoc ):
+        pdoc = doclistdata.docs[i]
+        print( "Docnum %r %r" % ( pdoc.contents.docnum, pdoc ) )
+
     print( "Learn type: %r" % ( client_data.plearn.type ))
 
     if client_data.plearn.type == CLASSIFICATION:
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.