Commits

Jeff Squyres committed 338d359

Be a little more conservative about initializing devices and modules
(i.e., ensure that more data items get zeroed out/set to NULL) so that
if something goes wrong during initialization, we don't try to clean
up something that isn't there (and segv).

The chance of this happening on the trunk is very low (and will also
be low once the verbs improvements are brought over to v1.7). But it
can actually happen in the v1.6 branch (e.g., if no CPC is available,
we'll try to get the length of the endpoints list, but the endpoints
list is NULL).

Hence, even though the real goal is to get this functionality over to
v1.6, I figured I'd commit to the trunk/CMR to v1.7 just to try to
keep commonality in the openib between all three where possible.

Comments (0)

Files changed (2)

ompi/mca/btl/openib/btl_openib.c

     }
 
     /* Release all QPs */
-    for (ep_index=0;
-         ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints);
-         ep_index++) {
-        endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints,
-                                                  ep_index);
-        if(!endpoint) {
-            BTL_VERBOSE(("In finalize, got another null endpoint"));
-            continue;
+    if (NULL != openib_btl->device->endpoints) {
+        for (ep_index=0;
+             ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints);
+             ep_index++) {
+            endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints,
+                                                                              ep_index);
+            if(!endpoint) {
+                BTL_VERBOSE(("In finalize, got another null endpoint"));
+                continue;
+            }
+            if(endpoint->endpoint_btl != openib_btl) {
+                continue;
+            }
+            for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) {
+                if(openib_btl->device->eager_rdma_buffers[i] == endpoint) {
+                    openib_btl->device->eager_rdma_buffers[i] = NULL;
+                    OBJ_RELEASE(endpoint);
+                }
+            }
+            OBJ_RELEASE(endpoint);
         }
-        if(endpoint->endpoint_btl != openib_btl) {
-            continue;
-        }
-        for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) {
-            if(openib_btl->device->eager_rdma_buffers[i] == endpoint) {
-                openib_btl->device->eager_rdma_buffers[i] = NULL;
-                OBJ_RELEASE(endpoint);
-            }
-        }
-        OBJ_RELEASE(endpoint);
     }
 
     /* Release SRQ resources */

ompi/mca/btl/openib/btl_openib_component.c

         for(i = 0; i < mca_btl_openib_component.btls_per_lid; i++){
             char param[40];
 
-            openib_btl = (mca_btl_openib_module_t *) malloc(sizeof(mca_btl_openib_module_t));
+            openib_btl = (mca_btl_openib_module_t *) calloc(1, sizeof(mca_btl_openib_module_t));
             if(NULL == openib_btl) {
                 BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
                 return OMPI_ERR_OUT_OF_RESOURCE;
     device->ib_channel = NULL;
 #endif
     device->btls = 0;
+    device->endpoints = NULL;
+    device->device_btls = NULL;
     device->ib_cq[BTL_OPENIB_HP_CQ] = NULL;
     device->ib_cq[BTL_OPENIB_LP_CQ] = NULL;
     device->cq_size[BTL_OPENIB_HP_CQ] = 0;
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.