1. petsc
  2. PETSc
  3. petsc

Commits

Karl Rupp  committed 4cf1874

ViennaCL: Added synchronization to bindings just as for CUSP in order to allow for correct results with log_summary

  • Participants
  • Parent commits 7121bf1
  • Branches master

Comments (0)

Files changed (5)

File include/petscsys.h

View file
  • Ignore whitespace
 PETSC_EXTERN PetscBool PetscInitializeCalled;
 PETSC_EXTERN PetscBool PetscFinalizeCalled;
 PETSC_EXTERN PetscBool PetscCUSPSynchronize;
+PETSC_EXTERN PetscBool PetscViennaCLSynchronize;
 
 PETSC_EXTERN PetscErrorCode PetscSetHelpVersionFunctions(PetscErrorCode (*)(MPI_Comm),PetscErrorCode (*)(MPI_Comm));
 PETSC_EXTERN PetscErrorCode PetscCommDuplicate(MPI_Comm,MPI_Comm*,int*);

File src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx

View file
  • Ignore whitespace
 
           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
         }
+        ViennaCLWaitForGPU();
       } catch(std::exception const & ex) {
         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
       }
         // copy nonzero entries directly to destination (no conversion required)
         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
 
+        ViennaCLWaitForGPU();
         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
       }
     } catch(std::exception const & ex) {
     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
     try {
       *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
+      ViennaCLWaitForGPU();
     } catch (std::exception const & ex) {
       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
     }
         if (zz == xx || zz == yy) { //temporary required
           ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
           *zgpu = *ygpu + temp;
+          ViennaCLWaitForGPU();
         } else {
           *zgpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
           *zgpu += *ygpu;
+          ViennaCLWaitForGPU();
         }
       }
 

File src/sys/objects/init.c

View file
  • Ignore whitespace
 PetscErrorCode (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
 #endif
 /*
-  This is needed to turn on/off cusp synchronization
+  This is needed to turn on/off GPU synchronization
 */
 PetscBool PetscCUSPSynchronize = PETSC_FALSE;
+PetscBool PetscViennaCLSynchronize = PETSC_FALSE;
 
 /* ------------------------------------------------------------------------------*/
 /*
   ierr = PetscOptionsGetBool(NULL,"-cusp_synchronize",&flg1,NULL);CHKERRQ(ierr);
   if (flg1) PetscCUSPSynchronize = PETSC_TRUE;
 #endif
+#if defined(PETSC_HAVE_VIENNACL)
+  ierr = PetscOptionsHasName(NULL,"-log_summary",&flg3);CHKERRQ(ierr);
+  if (flg3) flg1 = PETSC_TRUE;
+  else flg1 = PETSC_FALSE;
+  ierr = PetscOptionsGetBool(NULL,"-viennacl_synchronize",&flg1,NULL);CHKERRQ(ierr);
+  if (flg1) PetscViennaCLSynchronize = PETSC_TRUE;
+#endif
   PetscFunctionReturn(0);
 }
 

File src/vec/vec/impls/seq/seqviennacl/vecviennacl.cxx

View file
  • Ignore whitespace
       try {
         ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
         viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
-        //ierr = WaitForGPU();CHKERRViennaCL(ierr);  //copy does not return before data is safe. No need to wait.
+        ViennaCLWaitForGPU();
       } catch(std::exception const & ex) {
         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
       }
     try {
       ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
       viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
-      //ierr = WaitForGPU();CHKERRViennaCL(ierr); //Reads in ViennaCL are blocking
+      ViennaCLWaitForGPU();
     } catch(std::exception const & ex) {
       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
     }
     ierr = VecViennaCLGetArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
     try {
       *ygpu = *xgpu + alpha * *ygpu;
+      ViennaCLWaitForGPU();
     } catch(std::exception const & ex) {
       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
     }
     ierr = VecViennaCLGetArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
     try {
       *ygpu += alpha * *xgpu;
+      ViennaCLWaitForGPU();
     } catch(std::exception const & ex) {
       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
     }
     ierr = VecViennaCLGetArrayWrite(win,&wgpu);CHKERRQ(ierr);
     try {
       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
+      ViennaCLWaitForGPU();
     } catch(std::exception const & ex) {
       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
     }
       }
       ierr = PetscLogFlops(2*win->map->n);CHKERRQ(ierr);
     }
+    ViennaCLWaitForGPU();
     ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
     ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
     ierr = VecViennaCLRestoreArrayWrite(win,&wgpu);CHKERRQ(ierr);
       ierr = VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);CHKERRQ(ierr);
     }
   }
+  ViennaCLWaitForGPU();
   PetscFunctionReturn(0);
 }
 
     if (xin->map->n >0) {
       ierr = PetscLogFlops(2.0*xin->map->n-1);CHKERRQ(ierr);
     }
+    ViennaCLWaitForGPU();
     ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
     ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
   } else *z = 0.0;
       ierr = VecViennaCLRestoreArrayRead(yyin[i],&ygpu);CHKERRQ(ierr);
     }
 
+    ViennaCLWaitForGPU();
     ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
     ierr = PetscLogFlops(PetscMax(nv*(2.0*n-1),0.0));CHKERRQ(ierr);
   } else {
     ierr = VecViennaCLGetArrayWrite(xin,&xgpu);CHKERRQ(ierr);
     try {
       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
+      ViennaCLWaitForGPU();
     } catch(std::exception const & ex) {
       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
     }
     ierr = VecViennaCLGetArrayReadWrite(xin,&xgpu);CHKERRQ(ierr);
     try {
       *xgpu *= alpha;
+      ViennaCLWaitForGPU();
     } catch(std::exception const & ex) {
       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
     }
   PetscFunctionBegin;
   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
   ierr = VecDot_SeqViennaCL(xin, yin, z);CHKERRQ(ierr);
+  ViennaCLWaitForGPU();
   PetscFunctionReturn(0);
 }
 
       ierr = VecViennaCLGetArrayWrite(yin,&ygpu);CHKERRQ(ierr);
       try {
         *ygpu = *xgpu;
+        ViennaCLWaitForGPU();
       } catch(std::exception const & ex) {
         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
       }
     } else if (xin->valid_GPU_array == PETSC_VIENNACL_CPU) {
       /* copy in CPU if we are on the CPU*/
       ierr = VecCopy_SeqViennaCL_Private(xin,yin);CHKERRQ(ierr);
+      ViennaCLWaitForGPU();
     } else if (xin->valid_GPU_array == PETSC_VIENNACL_BOTH) {
       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
       if (yin->valid_GPU_array == PETSC_VIENNACL_CPU) {
         /* copy in CPU */
         ierr = VecCopy_SeqViennaCL_Private(xin,yin);CHKERRQ(ierr);
-
+        ViennaCLWaitForGPU();
       } else if (yin->valid_GPU_array == PETSC_VIENNACL_GPU) {
         /* copy in GPU */
         ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
         ierr = VecViennaCLGetArrayWrite(yin,&ygpu);CHKERRQ(ierr);
         try {
           *ygpu = *xgpu;
+          ViennaCLWaitForGPU();
         } catch(std::exception const & ex) {
           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
         }
         ierr = VecViennaCLGetArrayWrite(yin,&ygpu);CHKERRQ(ierr);
         try {
           *ygpu = *xgpu;
+          ViennaCLWaitForGPU();
         } catch(std::exception const & ex) {
           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
         }
         ierr = VecViennaCLRestoreArrayWrite(yin,&ygpu);CHKERRQ(ierr);
       } else {
         ierr = VecCopy_SeqViennaCL_Private(xin,yin);CHKERRQ(ierr);
+        ViennaCLWaitForGPU();
       }
     }
   }
 
     try {
       viennacl::swap(*xgpu, *ygpu);
+      ViennaCLWaitForGPU();
     } catch(std::exception const & ex) {
       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
     }
     ierr = VecViennaCLGetArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
     try {
       *ygpu = *xgpu * alpha;
+      ViennaCLWaitForGPU();
     } catch(std::exception const & ex) {
       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
     }
     ierr = VecViennaCLGetArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
     try {
       *ygpu = *xgpu * alpha + *ygpu * beta;
+      ViennaCLWaitForGPU();
     } catch(std::exception const & ex) {
       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
     }
     try {
       if (beta == 0.0) {
         *zgpu = gamma * *zgpu;
+        ViennaCLWaitForGPU();
         ierr = PetscLogFlops(1.0*n);CHKERRQ(ierr);
       } else if (gamma == 0.0) {
         *zgpu = beta * *ygpu;
+        ViennaCLWaitForGPU();
         ierr = PetscLogFlops(1.0*n);CHKERRQ(ierr);
       } else {
         *zgpu = beta * *ygpu + gamma * *zgpu;
+        ViennaCLWaitForGPU();
         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
       }
     } catch(std::exception const & ex) {
     try {
       if (gamma == 0.0) {
         *zgpu = alpha * *xgpu;
+        ViennaCLWaitForGPU();
         ierr = PetscLogFlops(1.0*n);CHKERRQ(ierr);
       } else {
         *zgpu = alpha * *xgpu + gamma * *zgpu;
+        ViennaCLWaitForGPU();
         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
       }
     } catch(std::exception const & ex) {
   } else if (gamma == 0.0 && xin->map->n > 0) {
     try {
       *zgpu = alpha * *xgpu + beta * *ygpu;
+      ViennaCLWaitForGPU();
     } catch(std::exception const & ex) {
       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
     }
       if (gamma != 1.0)
         *zgpu *= gamma;
       *zgpu += alpha * *xgpu + beta * *ygpu;
+      ViennaCLWaitForGPU();
     } catch(std::exception const & ex) {
       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
     }
     ierr = VecViennaCLGetArrayReadWrite(win,&wgpu);CHKERRQ(ierr);
     try {
       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
+      ViennaCLWaitForGPU();
     } catch(std::exception const & ex) {
       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
     }
     if (type == NORM_2 || type == NORM_FROBENIUS) {
       try {
         *z = viennacl::linalg::norm_2(*xgpu);
+        ViennaCLWaitForGPU();
       } catch(std::exception const & ex) {
         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
       }
       ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
       try {
         *z = viennacl::linalg::norm_inf(*xgpu);
+        ViennaCLWaitForGPU();
       } catch(std::exception const & ex) {
         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
       }
     } else if (type == NORM_1) {
       try {
         *z = viennacl::linalg::norm_1(*xgpu);
+        ViennaCLWaitForGPU();
       } catch(std::exception const & ex) {
         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
       }
       try {
         *z     = viennacl::linalg::norm_1(*xgpu);
         *(z+1) = viennacl::linalg::norm_2(*xgpu);
+        ViennaCLWaitForGPU();
       } catch(std::exception const & ex) {
         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
       }

File src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h

View file
  • Ignore whitespace
 
 #include "viennacl/vector.hpp"
 
+#define ViennaCLWaitForGPU() if (PetscViennaCLSynchronize) viennacl::backend::finish();
+
 typedef viennacl::vector<PetscScalar>    ViennaCLVector;
 
 
   *a   = 0;
   ierr = VecViennaCLCopyToGPU(v);CHKERRQ(ierr);
   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
+  ViennaCLWaitForGPU();
   PetscFunctionReturn(0);
 }
 
   *a   = 0;
   ierr = VecViennaCLCopyToGPU(v);CHKERRQ(ierr);
   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
+  ViennaCLWaitForGPU();
   PetscFunctionReturn(0);
 }
 
   *a   = 0;
   ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
+  ViennaCLWaitForGPU();
   PetscFunctionReturn(0);
 }