1. petsc
  2. PETSc
  3. petsc

Commits

Hong Zhang  committed e88777f

cleanup routines for MatTransposeColoring; set MatTransposeColoring->brows>0 as default

  • Participants
  • Parent commits f99a636
  • Branches master

Comments (0)

Files changed (4)

File include/petsc-private/matimpl.h

View file
 
   PetscInt       *colorforrow,*colorforcol;  /* pointer to rows and columns */
   PetscInt       *rows;                      /* lists the local rows for each color (using the local row numbering) */
-  PetscInt       *columnsforspidx,*den2sp;   /* maps (row,color) in the dense matrix to index of sparse matrix array a->a */
+  PetscInt       *den2sp;                    /* maps (row,color) in the dense matrix to index of sparse matrix array a->a */
   PetscInt       *columns;                   /* lists the local columns of each color (using global column numbering) */
+  PetscInt       brows;                      /* number of rows for efficient implementation of MatTransColoringApplyDenToSp() */
 };
 
 /*

File src/mat/examples/tests/makefile

View file
                                  ex138.PETSc ex138.rm ex139.PETSc runex139 ex139.rm ex141.PETSc runex141 ex141.rm \
                                  ex151.PETSc runex151 ex151.rm \
                                  ex159.PETSc runex159 runex159_nest ex159.rm \
-                                 ex160.PETSc runex160 ex160.rm  ex161.PETSc runex161 runex161_2 ex161.rm ex164.PETSc runex164 ex164.rm
+                                 ex160.PETSc runex160 ex160.rm  ex161.PETSc runex161 runex161_2 runex161_3 ex161.rm ex164.PETSc runex164 ex164.rm
 TESTEXAMPLES_C_X	       = ex2.PETSc runex2 ex2.rm ex7.PETSc runex7 ex7.rm \
                                  ex12.PETSc runex12 runex12_2 runex12_3 runex12_4 ex12.rm ex13.PETSc runex13 ex13.rm \
                                  ex17.PETSc runex17 ex17.rm ex19.PETSc runex19 ex19.rm ex24.PETSc ex24.rm ex25.PETSc \

File src/mat/impls/aij/seq/matmatmult.c

View file
   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)Csp->data;
   PetscScalar    *ca_den,*ca=csp->a;
   PetscInt       k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors;
-  PetscInt       brows=10; 
+  PetscInt       brows=matcoloring->brows,*den2sp=matcoloring->den2sp; 
 
   PetscFunctionBegin;
-  ierr = PetscOptionsGetInt(NULL,"-matden2sp_brows",&brows,NULL);CHKERRQ(ierr);
   ierr   = MatDenseGetArray(Cden,&ca_den);CHKERRQ(ierr);
  
   if (brows) {  /* rowblock-wise sweeping Cden - would be 40% faster than column-wise sweeping */
-    PetscInt       *den2sp=matcoloring->den2sp; 
     PetscInt       row_i,i,spidx;
-    for (i=0; i<m; i += brows) {  /* loop over rows of Csp */
-      for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */
+    for (i=0; i<m-brows; i += brows) {  /* loop over row blocks of Csp */
+      for (k=0; k<ncolors; k++) {       /* loop over colors (columns of Cden) */
         for (row_i=i; row_i<i+brows; row_i++) { 
-          if (row_i >= m) break;
           l = k*m + row_i; 
           spidx = den2sp[l];
           if ( spidx > -1 ) {
         }
       }
     }
+    for (; i<m; i++) { /* over extra rows of Csp */
+      for (k=0; k<ncolors; k++) { 
+        l = k*m + i;
+        spidx = den2sp[l];
+        if ( spidx > -1 ) {
+          ca[spidx] = ca_den[l];
+        }
+      }
+    }
   } else { /* column-wise sweeping Cden */
     PetscInt       nrows,*row,*idx;
-    PetscInt       *rows=matcoloring->rows,*spidx=matcoloring->columnsforspidx,*colorforrow=matcoloring->colorforrow;
+    PetscInt       *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow;
     PetscScalar    *cp_den;
     cp_den = ca_den;
     for (k=0; k<ncolors; k++) {
       nrows = matcoloring->nrows[k];
       row   = rows  + colorforrow[k];
-      idx   = spidx + colorforrow[k];
+      idx   = den2sp + colorforrow[k];
       for (l=0; l<nrows; l++) {
         ca[idx[l]] = cp_den[row[l]];
       }
 
   ierr = MatDenseRestoreArray(Cden,&ca_den);CHKERRQ(ierr);
 #if defined(PETSC_USE_INFO)
-  ierr = PetscInfo1(Csp,"Loop over %D row blocks for den2sp\n",brows);CHKERRQ(ierr);
+  if (matcoloring->brows) {
+    ierr = PetscInfo1(Csp,"Loop over %D row blocks for den2sp\n",brows);CHKERRQ(ierr);
+  } else {
+    ierr = PetscInfo(Csp,"Loop over colors/columns of Cden, may not be efficient\n");CHKERRQ(ierr);
+  }
 #endif
   PetscFunctionReturn(0);
 }
 /*
  MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from
  MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output
- spidx[], index of a->j, to be used for setting 'columnsforspidx' in MatTransposeColoringCreate_SeqAIJ().
+ spidx[], index of a->a, to be used in MatTransposeColoringCreate_SeqAIJ().
  */
 #undef __FUNCT__
 #define __FUNCT__ "MatGetColumnIJ_SeqAIJ_Color"
 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c)
 {
   PetscErrorCode ierr;
-  PetscInt       i,n,nrows,N,j,k,m,ncols,col,cm;
+  PetscInt       i,n,nrows,Nbs,j,k,m,ncols,col,cm;
   const PetscInt *is,*ci,*cj,*row_idx;
   PetscInt       nis = iscoloring->n,*rowhit,bs = 1;
   IS             *isa;
   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
-  PetscInt       *colorforrow,*rows,*rows_i,*columnsforspidx,*columnsforspidx_i,*idxhit,*spidx,*den2sp,*den2sp_i;
-  PetscInt       *colorforcol,*columns,*columns_i;
+  PetscInt       *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i;
+  PetscInt       *colorforcol,*columns,*columns_i,brows;
+  PetscBool      flg;
 
   PetscFunctionBegin;
   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
 
   /* bs >1 is not being tested yet! */
-  N         = mat->cmap->N/bs;
+  Nbs       = mat->cmap->N/bs;
   c->M      = mat->rmap->N/bs;  /* set total rows, columns and local rows */
-  c->N      = mat->cmap->N/bs;
-  c->m      = mat->rmap->N/bs;
+  c->N      = Nbs;
+  c->m      = c->M; 
   c->rstart = 0;
+  c->brows  = 100;
 
   c->ncolors = nis;
   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
   ierr       = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforrow);CHKERRQ(ierr);
-  ierr       = PetscMalloc2(csp->nz+1,PetscInt,&rows,csp->nz+1,PetscInt,&columnsforspidx);CHKERRQ(ierr);
-  ierr       = PetscMalloc(nis*c->m*sizeof(PetscInt),&den2sp);CHKERRQ(ierr);
-  for (i=0; i<nis*c->m; i++) den2sp[i] = -1;
+  ierr       = PetscMalloc((csp->nz+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
+
+  brows = c->brows;
+  ierr = PetscOptionsGetInt(NULL,"-matden2sp_brows",&brows,&flg);CHKERRQ(ierr);
+  if (flg) c->brows = brows;
+  if (brows) {
+    ierr = PetscMalloc(nis*c->m*sizeof(PetscInt),&den2sp);CHKERRQ(ierr);
+    for (i=0; i<nis*c->m; i++) den2sp[i] = -1;
+  } else {
+    ierr = PetscMalloc((csp->nz+1)*sizeof(PetscInt),&den2sp);CHKERRQ(ierr);
+  }
   
-  colorforrow[0]    = 0;
-  rows_i            = rows;
-  columnsforspidx_i = columnsforspidx;
-  den2sp_i          = den2sp;
+  colorforrow[0] = 0;
+  rows_i         = rows;
+  den2sp_i       = den2sp;
 
   ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforcol);CHKERRQ(ierr);
-  ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columns);CHKERRQ(ierr);
+  ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&columns);CHKERRQ(ierr);
 
   colorforcol[0] = 0;
   columns_i      = columns;
     c->nrows[i]      = nrows;
     colorforrow[i+1] = colorforrow[i] + nrows;
 
-    nrows = 0;
-    for (j=0; j<cm; j++) {
-      if (rowhit[j]) {
-        rows_i[nrows]            = j;
-        columnsforspidx_i[nrows] = idxhit[j];
-        den2sp_i[j]              = idxhit[j];
-        nrows++;
-      }
-    } 
+    if (brows == 0) {
+      nrows = 0;
+      for (j=0; j<cm; j++) {
+        if (rowhit[j]) {
+          rows_i[nrows] = j;
+          den2sp_i[j]   = idxhit[j];
+          nrows++;
+        }
+      } 
+      den2sp_i += nrows;
+    } else {
+      nrows = 0;
+      for (j=0; j<cm; j++) {
+        if (rowhit[j]) {
+          rows_i[nrows] = j;
+          den2sp_i[j]   = idxhit[j];
+          nrows++;
+        }
+      } 
+      den2sp_i += cm;
+    }
     ierr    = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
     rows_i += nrows; 
-    columnsforspidx_i += nrows;
-    den2sp_i          += cm;
   }
   ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
   ierr = PetscFree(rowhit);CHKERRQ(ierr);
   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
-#if defined(PETSC_USE_DEBUG)
   if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]);
-#endif
 
   c->colorforrow     = colorforrow;
   c->rows            = rows;
-  c->columnsforspidx = columnsforspidx;
   c->den2sp          = den2sp;
   c->colorforcol     = colorforcol;
   c->columns         = columns;

File src/mat/interface/matrix.c

View file
   ierr = PetscFree(matcolor->ncolumns);CHKERRQ(ierr);
   ierr = PetscFree(matcolor->nrows);CHKERRQ(ierr);
   ierr = PetscFree(matcolor->colorforrow);CHKERRQ(ierr);
-  ierr = PetscFree2(matcolor->rows,matcolor->columnsforspidx);CHKERRQ(ierr);
+  ierr = PetscFree(matcolor->rows);CHKERRQ(ierr);
   ierr = PetscFree(matcolor->den2sp);CHKERRQ(ierr);
   ierr = PetscFree(matcolor->colorforcol);CHKERRQ(ierr);
   ierr = PetscFree(matcolor->columns);CHKERRQ(ierr);