- attached segfault.txt
segmentation fault
Apparently, this library crashes randomly. Since I sometimes get a classifier that works very well, I am thinking I use the library correctly:
One example:
Program received signal SIGSEGV, Segmentation fault.
0x0000000000558175 in Kernel::dot (px=0xa36ff0, py=0xa36ff0) at svm.cpp:301
301 sum += px->value * py->value;
(gdb) where
#0 0x0000000000558175 in Kernel::dot (px=0xa36ff0, py=0xa36ff0) at svm.cpp:301
#1 0x000000000055a459 in SVC_Q::SVC_Q (
y_=0x9df4f0 '\001' <repeats 48 times>, '\377' <repeats 152 times>...,
param=..., prob=..., this=0x7fffffffd360) at svm.cpp:1276
#2 solve_c_svc (Cn=0.0031250000000000002, Cp=0.03125, si=0x7fffffffd330,
alpha=0x7fffb94fa010, param=0xa13000, prob=0x7fffffffd650) at svm.cpp:1458
#3 svm_train_one (prob=prob@entry=0x7fffffffd650, param=param@entry=0xa13000,
Cp=Cp@entry=0.03125, Cn=0.0031250000000000002) at svm.cpp:1656
#4 0x000000000055d893 in svm_train (prob=prob@entry=0x9f5ec0,
param=param@entry=0xa13000) at svm.cpp:2217
#5 0x0000000000556253 in svm_train_stub (v_prob=140736338603696,
v_param=140736376142384) at libsvm_stubs.cpp:400
#6 0x0000000000416df7 in camlLibsvm__train_inner_9202 () at lib/libsvm.ml:424
#7 0x0000000000410b8a in camlSvm__fun_3166 () at src/svm.ml:76
#8 0x000000000041232a in camlUtls__wall_clock_time_1858 () at src/utls.ml:258
#9 0x00000000004106b0 in camlSvm__train_test_2209 () at src/svm.ml:75
#10 0x0000000000411046 in camlSvm__entry () at src/svm.ml:159
#11 0x0000000000407a09 in caml_program ()
#12 0x000000000059feea in caml_start_program ()
#13 0x000000000058645d in caml_startup_exn (argv=0x7fffffffdb78)
at startup.c:144
#14 0x0000000000586499 in caml_startup (argv=<optimized out>) at startup.c:149
#15 0x00000000005864c2 in caml_main (argv=<optimized out>) at startup.c:158
#16 0x0000000000406cbc in main (argc=<optimized out>, argv=<optimized out>)
at main.c:37
Another one:
Program received signal SIGSEGV, Segmentation fault.
0x0000000000558145 in Kernel::dot (px=0x9d57f0, py=0xa4d000) at svm.cpp:297
297 svm.cpp: No such file or directory.
(gdb) where
#0 0x0000000000558145 in Kernel::dot (px=0x9d57f0, py=0xa4d000) at svm.cpp:297
#1 0x000000000055fb33 in SVC_Q::get_Q (this=0x7fffffffd360, i=47, len=20848)
at svm.cpp:1286
#2 0x0000000000556ff6 in Solver::select_working_set (this=0x7fffffffd3c0,
out_i=@0x7fffffffd270: 10576416, out_j=@0x7fffffffd274: 0) at svm.cpp:823
#3 0x0000000000558b7d in Solver::Solve (this=this@entry=0x7fffffffd3c0,
l=l@entry=20848, Q=..., p_=p_@entry=0x7fffb94d1010,
y_=y_@entry=0x9da980 '\001' <repeats 48 times>, '\377' <repeats 152 times>..., alpha_=alpha_@entry=0x7fffb94fa010, Cp=Cp@entry=0.03125,
Cn=Cn@entry=0.0031250000000000002, eps=eps@entry=0.001, si=0x7fffffffd330,
shrinking=1) at svm.cpp:576
#4 0x000000000055a4ba in solve_c_svc (Cn=0.0031250000000000002, Cp=0.03125,
si=0x7fffffffd330, alpha=0x7fffb94fa010, param=0xa15f50,
prob=0x7fffffffd650) at svm.cpp:1459
#5 svm_train_one (prob=prob@entry=0x7fffffffd650, param=param@entry=0xa15f50,
Cp=Cp@entry=0.03125, Cn=0.0031250000000000002) at svm.cpp:1656
#6 0x000000000055d893 in svm_train (prob=prob@entry=0xa00160,
param=param@entry=0xa15f50) at svm.cpp:2217
#7 0x0000000000556253 in svm_train_stub (v_prob=140736338603616,
v_param=140736376142328) at libsvm_stubs.cpp:400
#8 0x0000000000416df7 in camlLibsvm__train_inner_9202 () at lib/libsvm.ml:424
#9 0x0000000000410b8a in camlSvm__fun_3166 () at src/svm.ml:76
#10 0x000000000041232a in camlUtls__wall_clock_time_1858 () at src/utls.ml:258
#11 0x00000000004106b0 in camlSvm__train_test_2209 () at src/svm.ml:75
#12 0x0000000000411046 in camlSvm__entry () at src/svm.ml:159
#13 0x0000000000407a09 in caml_program ()
#14 0x000000000059feea in caml_start_program ()
#15 0x000000000058645d in caml_startup_exn (argv=0x7fffffffdb78)
at startup.c:144
#16 0x0000000000586499 in caml_startup (argv=<optimized out>) at startup.c:149
#17 0x00000000005864c2 in caml_main (argv=<optimized out>) at startup.c:158
#18 0x0000000000406cbc in main (argc=<optimized out>, argv=<optimized out>)
at main.c:37
My ocaml code: Svm.train ~c:0.03125 ~cachesize:4096.0 ~kernel:`LINEAR ~weights:[(-1, 0.1);(1, 1.0)] ( -1: inactive label; +1: active label ) problem)
Regards, F.
Comments (7)
-
-
Same problem here. More precisely, I get a segfault when calling a
Svm.Problem.create
twice in the same process, and I get the following message:double free or corruption (!prev)
which makes a lot of sense, but also shows my problem is unrelated with François’s. In my case there might be something wrong with finalisation.
-
Commenting this line prevents the error, so there is definitely something with finalisation.
-
I think I found the problem, which is indeed related to garbage collection. Here an example that fails for me:
open Core_kernel let () = let open Lacaml.D in let d = 50 in let n = 20 in let x_pos = Array.init n ~f:(fun _ -> Array.init d ~f:(fun _ -> Random.float 1.)) in let x_neg = Array.init n ~f:(fun _ -> Array.init d ~f:(fun _ -> Random.float 1.)) in let x = Mat.of_array (Array.append x_pos x_neg) in let y = Vec.init (2 * n) (fun i -> if i < n then 1. else 0.) in let _prob = Libsvm.Svm.Problem.create ~x ~y in Gc.full_major () ; ()
So the problem has to do with svm problem creation.
Let’s have a look there:
CAMLprim value svm_problem_x_set_stub(value v_prob, value v_x) { CAMLparam2(v_prob, v_x); Svm_problem_val(v_prob)->x = Svm_node_matrix_val(v_x); CAMLreturn(Val_unit); }
v_prob
is passed the C data contained inv_x
but there is no means to say to the GC that it holds a pointer on this data. Now there:let create_gen x y ~f = let n_samples = Mat.dim1 x in let n_feats = Mat.dim2 x in let x' = Mat.transpose_copy x in let m = Stub.svm_node_matrix_create n_samples in let v = Stub.double_array_create n_samples in for i = 1 to n_samples do let x_row = Mat.col x' i in Stub.svm_node_matrix_set m (i-1) (f x_row); Stub.double_array_set v (i-1) y.{i} done; let prob = Stub.svm_problem_create () in Stub.svm_problem_l_set prob n_samples; Stub.svm_problem_x_set prob m; Stub.svm_problem_y_set prob v; { n_samples; n_feats; prob; }
After
create_gen
is called the GC thinks it can reclaimm
and its associated data, which it can't since the output of the function has a pointer on it. And when the call tofull_major
comes, you get an error fromfree
. -
I started a branch to fix the problem, and it involves quite a few changes: this pattern of two ocaml values sharing C memory is repeated for several data structures and IMHO has to be changed. I’m pretty confident I can produce a proposal fix, but as it involves still some work, I’d like you to confirm (or not) you’re willing to consider a (big) patch. If possible, don’t wait too long in answering, I have a very tiny time frame to work on this, and it’s better I resume while I still have things clear in mind!
-
repo owner Hi Philippe, I would be happy to accept a patch that solves this issue. Please go ahead when you have the time.
-
repo owner - changed status to closed
- Log in to comment