Commits

Nathan Hjelm  committed fb79c55

fix a bug in rget header. refactor start_rdma in ob1

  • Participants
  • Parent commits 92a1413

Comments (0)

Files changed (3)

File ompi/mca/pml/bfo/pml_bfo_sendreq.c

                      sendreq->req_send.req_base.req_proc);
 
         /* copy segment data */
-        memmove (hdr + 1, src->des_src, seg_size);
+        memmove (&hdr->hdr_rget + 1, src->des_src, seg_size);
 
         des->des_cbfunc = mca_pml_bfo_send_ctl_completion;
 

File ompi/mca/pml/csum/pml_csum_sendreq.c

                      sendreq->req_send.req_base.req_proc);
 
         /* copy segment data */
-        memmove (hdr + 1, src->des_src, seg_size);
+        memmove (&hdr->hdr_rget + 1, src->des_src, seg_size);
 
         des->des_cbfunc = mca_pml_csum_send_ctl_completion;
 

File ompi/mca/pml/ob1/pml_ob1_sendreq.c

 {
     mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)des->des_cbdata;
     mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
-    size_t req_bytes_delivered = 0;
+    size_t req_bytes_delivered;
 
     /* count bytes of user data actually delivered and check for request completion */
     if (OPAL_LIKELY(OMPI_SUCCESS == status)) {
         req_bytes_delivered = mca_pml_ob1_compute_segment_length (btl->btl_seg_size,
                                                                   (void *) des->des_src,
                                                                   des->des_src_cnt, 0);
+        OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
     }
-    OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
     sendreq->src_des = NULL;
 
     send_request_pml_complete_check(sendreq);
      */
 
     mca_btl_base_descriptor_t *des, *src = NULL;
-    mca_btl_base_segment_t* segment;
-    mca_pml_ob1_hdr_t* hdr;
-    bool need_local_cb = false;
+    mca_pml_ob1_rget_hdr_t *hdr;
+    size_t seg_size;
     int rc;
 
     sendreq->src_des = NULL;
 
     bml_btl = sendreq->req_rdma[0].bml_btl;
-    if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & (MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_CUDA_GET))) {
-        mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg;
-        size_t seg_size;
-        size_t old_position = sendreq->req_send.req_base.req_convertor.bConverted;
-
-        MEMCHECKER(
-            memchecker_call(&opal_memchecker_base_mem_defined,
-                            sendreq->req_send.req_base.req_addr,
-                            sendreq->req_send.req_base.req_count,
-                            sendreq->req_send.req_base.req_datatype);
-        );
-        /* prepare source descriptor/segment(s) */
-        /* PML owns this descriptor and will free it in */
-        /*  get_completion */
-        mca_bml_base_prepare_src( bml_btl, 
-                                  reg,
-                                  &sendreq->req_send.req_base.req_convertor,
-                                  MCA_BTL_NO_ORDER, 0, &size,
-                                  MCA_BTL_DES_FLAGS_GET | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP,
-                                  &src );
-        MEMCHECKER(
-            memchecker_call(&opal_memchecker_base_mem_noaccess,
-                            sendreq->req_send.req_base.req_addr,
-                            sendreq->req_send.req_base.req_count,
-                            sendreq->req_send.req_base.req_datatype);
-        );
-        if( OPAL_UNLIKELY(NULL == src) ) {
-            opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor,
-                                        &old_position);
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        } 
-        src->des_cbfunc = mca_pml_ob1_rget_completion;
-        src->des_cbdata = sendreq;
-
-        sendreq->src_des = src;
-
-        seg_size = bml_btl->btl->btl_seg_size * src->des_src_cnt;
+    if (!(bml_btl->btl_flags & (MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_CUDA_GET))) {
+        /* This BTL does not support get. Use rendezvous to start the RDMA operation using put instead. */
+        return mca_pml_ob1_send_request_start_rndv (sendreq, bml_btl, 0, MCA_PML_OB1_HDR_FLAGS_CONTIG |
+                                                    MCA_PML_OB1_HDR_FLAGS_PIN);
+    }
 
-        /* allocate space for get hdr + segment list */
-        mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER,
-                           sizeof(mca_pml_ob1_rget_hdr_t) + seg_size,
-                           MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
-        if( OPAL_UNLIKELY(NULL == des) ) {
-            opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor,
-                                         &old_position );
-            mca_bml_base_free(bml_btl, src);
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        }
-        segment = des->des_src;
-
-        /* build match header */
-        hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
-        hdr->hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_CONTIG|MCA_PML_OB1_HDR_FLAGS_PIN;
-        hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RGET;
-        hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
-        hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
-        hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
-        hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
-        hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
-        hdr->hdr_rndv.hdr_src_req.pval = sendreq;
-        hdr->hdr_rget.hdr_des.pval = src;
-        hdr->hdr_rget.hdr_seg_cnt = src->des_src_cnt;
-
-        ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RGET,
-                     sendreq->req_send.req_base.req_proc);
+    MEMCHECKER(
+               memchecker_call(&opal_memchecker_base_mem_defined,
+                               sendreq->req_send.req_base.req_addr,
+                               sendreq->req_send.req_base.req_count,
+                               sendreq->req_send.req_base.req_datatype);
+               );
+    /* prepare source descriptor/segment(s) */
+    /* PML owns this descriptor and will free it in */
+    /*  mca_pml_ob1_rget_completion */
+    mca_bml_base_prepare_src( bml_btl, sendreq->req_rdma[0].btl_reg,
+                              &sendreq->req_send.req_base.req_convertor,
+                              MCA_BTL_NO_ORDER, 0, &size, MCA_BTL_DES_FLAGS_GET |
+                              MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, &src );
+    MEMCHECKER(
+               memchecker_call(&opal_memchecker_base_mem_noaccess,
+                               sendreq->req_send.req_base.req_addr,
+                               sendreq->req_send.req_base.req_count,
+                               sendreq->req_send.req_base.req_datatype);
+               );
+    if( OPAL_UNLIKELY(NULL == src) ) {
+        return OMPI_ERR_OUT_OF_RESOURCE;
+    }
+ 
+    src->des_cbfunc = mca_pml_ob1_rget_completion;
+    src->des_cbdata = sendreq;
 
-        /* copy segment data */
-        memcpy (hdr + 1, src->des_src, seg_size);
+    sendreq->src_des = src;
 
-        des->des_cbfunc = mca_pml_ob1_send_ctl_completion;
+    seg_size = bml_btl->btl->btl_seg_size * src->des_src_cnt;
 
-        /**
-         * Well, it's a get so we will not know when the peer get the data anyway.
-         * If we generate the PERUSE event here, at least we will know when do we
-         * sent the GET message ...
-         */
-        if( sendreq->req_send.req_bytes_packed > 0 ) {
-            PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
-                                     &(sendreq->req_send.req_base), PERUSE_SEND );
-        }
+    /* allocate space for get hdr + segment list */
+    mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, sizeof (*hdr) + seg_size,
+                       MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
+    if( OPAL_UNLIKELY(NULL == des) ) {
+        /* NTH: no need to reset the converter here. it will be reset before it is retried */
+        mca_bml_base_free(bml_btl, src);
+        return OMPI_ERR_OUT_OF_RESOURCE;
+    }
 
-    } else {
+    /* build match header */
+    hdr = (mca_pml_ob1_rget_hdr_t *) des->des_src->seg_addr.pval;
+
+    hdr->hdr_rndv.hdr_match.hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_CONTIG|MCA_PML_OB1_HDR_FLAGS_PIN;
+    hdr->hdr_rndv.hdr_match.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RGET;
+    hdr->hdr_rndv.hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
+    hdr->hdr_rndv.hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
+    hdr->hdr_rndv.hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
+    hdr->hdr_rndv.hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
+    hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
+    hdr->hdr_rndv.hdr_src_req.pval = sendreq;
+    hdr->hdr_des.pval = src;
+    hdr->hdr_seg_cnt = src->des_src_cnt;
 
-        /* allocate a rendezvous header - dont eager send any data 
-         * receiver will schedule rdma put(s) of the entire message
-         */
+    ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RGET, sendreq->req_send.req_base.req_proc);
 
-        mca_bml_base_alloc(bml_btl, &des, 
-                           MCA_BTL_NO_ORDER,
-                           sizeof(mca_pml_ob1_rendezvous_hdr_t),
-                           MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
-        if( OPAL_UNLIKELY(NULL == des)) {
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        }
-        segment = des->des_src;
-            
-        /* build hdr */
-        hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
-        hdr->hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_CONTIG|MCA_PML_OB1_HDR_FLAGS_PIN;
-        hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
-        hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
-        hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
-        hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
-        hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
-        hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
-        hdr->hdr_rndv.hdr_src_req.pval = sendreq;
-
-        ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RNDV,
-                     sendreq->req_send.req_base.req_proc);
+    /* copy segment data */
+    memcpy (hdr + 1, src->des_src, seg_size);
 
-        /* update lengths with number of bytes actually packed */
-        segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t);
-    
-        /* first fragment of a long message */
-        des->des_cbfunc = mca_pml_ob1_rndv_completion;
-        need_local_cb = true;
+    des->des_cbfunc = mca_pml_ob1_send_ctl_completion;
+    des->des_cbdata = sendreq;
 
-        /* wait for ack and completion */
-        sendreq->req_state = 2;
+    /**
+     * Well, it's a get so we will not know when the peer will get the data anyway.
+     * If we generate the PERUSE event here, at least we will know when we
+     * sent the GET message ...
+     */
+    if( sendreq->req_send.req_bytes_packed > 0 ) {
+        PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
+                                 &(sendreq->req_send.req_base), PERUSE_SEND );
     }
 
-    des->des_cbdata = sendreq;
-
     /* send */
-    rc = mca_bml_base_send(bml_btl, des, hdr->hdr_common.hdr_type);
-    if( OPAL_LIKELY( rc >= 0 ) ) {
-        if( OPAL_LIKELY( 1 == rc ) && (true == need_local_cb)) {
-            mca_pml_ob1_rndv_completion_request( bml_btl, sendreq, 0 );
+    rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RGET);
+    if (OPAL_UNLIKELY(rc < 0)) {
+        mca_bml_base_free(bml_btl, des);
+
+        if (sendreq->src_des) {
+            mca_bml_base_free (bml_btl, sendreq->src_des);
+            sendreq->src_des = NULL;
         }
-        return OMPI_SUCCESS;
-    }
-    mca_bml_base_free(bml_btl, des);
-    if (sendreq->src_des) {
-        mca_bml_base_free (bml_btl, sendreq->src_des);
-        sendreq->src_des = NULL;
+
+        return rc;
     }
 
-    return rc;
+    return OMPI_SUCCESS;
 }