Commits

BarryFSmith  committed 73213c3

Added code to properly preallocate "diagonal" portion of scalar matrix derived from vector matrix in PCGAMGCreateGraph()

Reported-by: Fabian Jakub <Fabian.Jakub@physik.uni-muenchen.de>

  • Participants
  • Parent commits b77cb7c

Comments (0)

Files changed (1)

File src/ksp/pc/impls/gamg/tools.c

 PetscErrorCode PCGAMGCreateGraph(const Mat Amat, Mat *a_Gmat)
 {
   PetscErrorCode ierr;
-  PetscInt       Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs;
+  PetscInt       Istart,Iend,Ii,i,jj,kk,ncols,nloc,NN,MM,bs;
   PetscMPIInt    rank, size;
   MPI_Comm       comm;
   Mat            Gmat;
   if (bs > 1) {
     const PetscScalar *vals;
     const PetscInt    *idx;
-    PetscInt          *d_nnz, *o_nnz;
-    /* count nnz, there is sparcity in here so this might not be enough */
+    PetscInt          *d_nnz, *o_nnz,*blockmask,maskcnt;
+    /*
+       Determine the preallocation needed for the scalar matrix derived from the vector matrix.
+       This is accurate for the "diagonal" block of the matrix but will be grossly high for the
+       off diagonal block most of the time but could be too low for the off-diagonal.
+
+       This should be fixed to be accurate for the off-diagonal portion. Cannot just use a mask
+       for the off-diagonal portion since for huge matrices that would require too much memory per
+       MPI process.
+    */
     ierr = PetscMalloc1(nloc, &d_nnz);CHKERRQ(ierr);
     ierr = PetscMalloc1(nloc, &o_nnz);CHKERRQ(ierr);
+    ierr = PetscMalloc1(nloc, &blockmask);CHKERRQ(ierr);
     for (Ii = Istart, jj = 0; Ii < Iend; Ii += bs, jj++) {
-      d_nnz[jj] = 0;
-      for (kk=0; kk<bs; kk++) {
-        ierr = MatGetRow(Amat,Ii+kk,&ncols,0,0);CHKERRQ(ierr);
-        if (ncols > d_nnz[jj]) {
-          d_nnz[jj] = ncols; /* very pessimistic but could be too low in theory */
+      o_nnz[jj] = 0;
+      ierr = PetscMemzero(blockmask,nloc*sizeof(PetscInt));CHKERRQ(ierr);
+      for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
+        ierr = MatGetRow(Amat,Ii+kk,&ncols,&idx,0);CHKERRQ(ierr);
+        for (i=0; i<ncols; i++) {
+          if (idx[i] >= Istart && idx[i] < Iend) {
+            blockmask[(idx[i] - Istart)/bs] = 1;
+          }
+        }
+        if (ncols > o_nnz[jj]) {
           o_nnz[jj] = ncols;
-          if (d_nnz[jj] > nloc) d_nnz[jj] = nloc;
           if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
         }
-        ierr = MatRestoreRow(Amat,Ii+kk,&ncols,0,0);CHKERRQ(ierr);
+        ierr = MatRestoreRow(Amat,Ii+kk,&ncols,&idx,0);CHKERRQ(ierr);
+      }
+      maskcnt = 0;
+      for (i=0; i<nloc; i++) {
+        if (blockmask[i]) maskcnt++;
       }
+      d_nnz[jj] = maskcnt;
     }
 
     /* get scalar copy (norms) of matrix -- AIJ specific!!! */