Commits

Karl Rupp  committed 087a195 Merge

Merge branch 'karlrupp/fix-mpicusp'

* karlrupp/fix-mpicusp: This branch fixes problems with the CUSP bindings when using MPI.
It resolves two memory leaks and an incorrect implementation of MatMultAdd()
using a compressed row storage for off-processor values.

  • Participants
  • Parent commits cdce425, 29138a1

Comments (0)

Files changed (2)

File src/mat/impls/aij/seq/seqcusp/aijcusp.cu

 	  delete (CUSPMATRIXDIA *) cuspstruct->mat;
 	else
 	  delete (CUSPMATRIX *) cuspstruct->mat;
-        if (cuspstruct->tempvec) delete cuspstruct->tempvec;
 
       } catch(char *ex) {
         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
       }
 
       /* assign the compressed row indices */
+      if (cuspstruct->indices) delete (CUSPINTARRAYGPU*)cuspstruct->indices;
       cuspstruct->indices = new CUSPINTARRAYGPU;
       cuspstruct->indices->assign(ridx,ridx+m);
 
         ierr = PetscFree(ii);CHKERRQ(ierr);
         ierr = PetscFree(ridx);CHKERRQ(ierr);
       }
+      if (cuspstruct->tempvec) delete (CUSPARRAY*)cuspstruct->tempvec;
       cuspstruct->tempvec = new CUSPARRAY;
       cuspstruct->tempvec->resize(m);
     } catch(char *ex) {
       /* use compressed row format */
       CUSPMATRIX *mat = (CUSPMATRIX*)cuspstruct->mat;
       cusp::multiply(*mat,*xarray,*cuspstruct->tempvec);
-      ierr = VecSet_SeqCUSP(yy,0.0);CHKERRQ(ierr);
-      thrust::copy(cuspstruct->tempvec->begin(),cuspstruct->tempvec->end(),thrust::make_permutation_iterator(yarray->begin(),cuspstruct->indices->begin()));
+      thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(cuspstruct->tempvec->begin(),
+                                                                    thrust::make_permutation_iterator(zarray->begin(), cuspstruct->indices->begin()))),
+                       thrust::make_zip_iterator(thrust::make_tuple(cuspstruct->tempvec->end(),
+                                                                    thrust::make_permutation_iterator(zarray->end(),cuspstruct->indices->end()))),
+                       VecCUSPPlusEquals());
     } else { 
 
       if (cuspstruct->format==MAT_CUSP_ELL) {
 	delete (CUSPMATRIXDIA*)(cuspcontainer->mat);
       else
 	delete (CUSPMATRIX*)(cuspcontainer->mat);
+
+      if (cuspcontainer->indices) delete (CUSPINTARRAYGPU*)cuspcontainer->indices;
+      if (cuspcontainer->tempvec) delete (CUSPARRAY*)cuspcontainer->tempvec;
     }
     delete cuspcontainer;
     A->valid_GPU_matrix = PETSC_CUSP_UNALLOCATED;

File src/vec/vec/interface/rvector.c

   const PetscScalar *x;
 
   PetscFunctionBegin;
+#if defined(PETSC_HAVE_CUSP)
+  if ((vec->petscnative || vec->ops->getarray) && vec->valid_GPU_array != PETSC_CUSP_GPU) {
+#else
   if (vec->petscnative || vec->ops->getarray) {
+#endif
     ierr = VecGetLocalSize(vec,&n);CHKERRQ(ierr);
     ierr = VecGetArrayRead(vec,&x);CHKERRQ(ierr);
     for (i=0; i<n; i++) {