Commits

Hong Zhang committed a76eec0 Merge

Merge branch 'hzhang/snes-jacobiancoloropt'

  • Participants
  • Parent commits ee17fca, 6346027

Comments (0)

Files changed (39)

File include/finclude/petscmat.h

       PetscEnum MATOP_SET_BLOCK_SIZES
       PetscEnum MATOP_AYPX
       PetscEnum MATOP_RESIDUAL
+      PetscEnum MATOP_FDCOLORING_SETUP
 
       parameter(MATOP_SET_VALUES=0)
       parameter(MATOP_GET_ROW=1)
       parameter(MATOP_SET_BLOCK_SIZES=139)
       parameter(MATOP_AYPX=140)
       parameter(MATOP_RESIDUAL=141)
+      parameter(MATOP_FDCOLORING_SETUP=142)
 !
 !
 !

File include/petsc-private/matimpl.h

   PetscErrorCode (*setblocksizes)(Mat,PetscInt,PetscInt);
   PetscErrorCode (*aypx)(Mat,PetscScalar,Mat,MatStructure);
   PetscErrorCode (*residual)(Mat,Vec,Vec,Vec);
+  PetscErrorCode (*fdcoloringsetup)(Mat,ISColoring,MatFDColoring);
 };
 /*
     If you add MatOps entries above also add them to the MATOP enum
     columns       = {{0,2},{1},{3},{}}
     nrows         = {4,2,3,3}
     rows          = {{0,1,2,3},{0,1},{1,2,3},{0,1,2}}
-    columnsforrow = {{0,0,2,2},{1,1},{4,3,3},{5,5,5}}
-    vscaleforrow  = {{,,,},{,},{,,},{,,}}
     vwscale       = {dx(0),dx(1),dx(2),dx(3)}               MPI Vec
     vscale        = {dx(0),dx(1),dx(2),dx(3),dx(4),dx(5)}   Seq Vec
 
     columns       = {{6},{},{4},{5}}
     nrows         = {3,0,2,2}
     rows          = {{0,1,2},{},{1,2},{1,2}}
-    columnsforrow = {{6,0,6},{},{4,4},{5,5}}
-    vscaleforrow =  {{,,},{},{,},{,}}
     vwscale       = {dx(4),dx(5),dx(6)}              MPI Vec
     vscale        = {dx(0),dx(4),dx(5),dx(6)}        Seq Vec
 
     to compute the Jacobian.
 
 */
+typedef struct {
+  PetscInt     row;
+  PetscInt     col;
+  PetscScalar  *valaddr;   /* address of value */
+} MatEntry;
+
+typedef struct {
+  PetscInt     row;
+  PetscScalar  *valaddr;   /* address of value */
+} MatEntry2;
 
 struct  _p_MatFDColoring{
   PETSCHEADER(int);
   PetscInt       *ncolumns;        /* number of local columns for a color */
   PetscInt       **columns;        /* lists the local columns of each color (using global column numbering) */
   PetscInt       *nrows;           /* number of local rows for each color */
-  PetscInt       **rows;           /* lists the local rows for each color (using the local row numbering) */
-  PetscInt       **columnsforrow;  /* lists the corresponding columns for those rows (using the global column) */
+  MatEntry       *matentry;        /* holds (row, column, address of value) for Jacobian matrix entry */
+  MatEntry2      *matentry2;       /* holds (row, address of value) for Jacobian matrix entry */
+  PetscScalar    *dy;              /* store a block of F(x+dx)-F(x) when J is in BAIJ format */
   PetscReal      error_rel;        /* square root of relative error in computing function */
   PetscReal      umin;             /* minimum allowable u'dx value */
   Vec            w1,w2,w3;         /* work vectors used in computing Jacobian */
   PetscBool      fset;             /* indicates that the initial function value F(X) is set */
   PetscErrorCode (*f)(void);       /* function that defines Jacobian */
   void           *fctx;            /* optional user-defined context for use by the function f */
-  PetscInt       **vscaleforrow;   /* location in vscale for each columnsforrow[] entry */
   Vec            vscale;           /* holds FD scaling, i.e. 1/dx for each perturbed column */
   PetscInt       currentcolor;     /* color for which function evaluation is being done now */
-  const char     *htype;            /* "wp" or "ds" */
+  const char     *htype;           /* "wp" or "ds" */
   ISColoringType ctype;            /* IS_COLORING_GLOBAL or IS_COLORING_GHOSTED */
-
+  PetscInt       brows,bcols;      /* number of block rows or columns for speedup inserting the dense matrix into sparse Jacobian */
+  PetscBool      setupcalled;      /* true if setup has been called */
   void           *ftn_func_pointer,*ftn_func_cntx; /* serve the same purpose as *fortran_func_pointers in PETSc objects */
 };
 
 PETSC_EXTERN PetscLogEvent MAT_ILUFactorSymbolic, MAT_ICCFactorSymbolic, MAT_Copy, MAT_Convert, MAT_Scale, MAT_AssemblyBegin;
 PETSC_EXTERN PetscLogEvent MAT_AssemblyEnd, MAT_SetValues, MAT_GetValues, MAT_GetRow, MAT_GetRowIJ, MAT_GetSubMatrices, MAT_GetColoring, MAT_GetOrdering, MAT_GetRedundantMatrix;
 PETSC_EXTERN PetscLogEvent MAT_IncreaseOverlap, MAT_Partitioning, MAT_Coarsen, MAT_ZeroEntries, MAT_Load, MAT_View, MAT_AXPY, MAT_FDColoringCreate, MAT_TransposeColoringCreate;
-PETSC_EXTERN PetscLogEvent MAT_FDColoringApply, MAT_Transpose, MAT_FDColoringFunction;
+PETSC_EXTERN PetscLogEvent MAT_FDColoringSetUp, MAT_FDColoringApply, MAT_Transpose, MAT_FDColoringFunction;
 PETSC_EXTERN PetscLogEvent MAT_MatMult, MAT_MatSolve,MAT_MatMultSymbolic, MAT_MatMultNumeric,MAT_Getlocalmatcondensed,MAT_GetBrowsOfAcols,MAT_GetBrowsOfAocols;
 PETSC_EXTERN PetscLogEvent MAT_PtAP, MAT_PtAPSymbolic, MAT_PtAPNumeric,MAT_Seqstompinum,MAT_Seqstompisym,MAT_Seqstompi,MAT_Getlocalmat;
 PETSC_EXTERN PetscLogEvent MAT_RARt, MAT_RARtSymbolic, MAT_RARtNumeric;

File include/petscmat.h

 PETSC_EXTERN PetscErrorCode MatFDColoringApply(Mat,MatFDColoring,Vec,MatStructure*,void *);
 PETSC_EXTERN PetscErrorCode MatFDColoringSetF(MatFDColoring,Vec);
 PETSC_EXTERN PetscErrorCode MatFDColoringGetPerturbedColumns(MatFDColoring,PetscInt*,PetscInt*[]);
+PETSC_EXTERN PetscErrorCode MatFDColoringSetUp(Mat,ISColoring,MatFDColoring);
+PETSC_EXTERN PetscErrorCode MatFDColoringSetBlockSize(MatFDColoring,PetscInt,PetscInt);
+
 
 /*S
      MatTransposeColoring - Object for computing a sparse matrix product C=A*B^T via coloring
                MATOP_RART_NUMERIC=138,
                MATOP_SET_BLOCK_SIZES=139,
                MATOP_AYPX=140,
-               MATOP_RESIDUAL=141
+               MATOP_RESIDUAL=141,
+               MATOP_FDCOLORING_SETUP= 142
              } MatOperation;
 PETSC_EXTERN PetscErrorCode MatHasOperation(Mat,MatOperation,PetscBool *);
 PETSC_EXTERN PetscErrorCode MatShellSetOperation(Mat,MatOperation,void(*)(void));

File src/dm/examples/tests/ex26.c

   ierr = DMCreateMatrix(da,&J);CHKERRQ(ierr);
   ierr = DMCreateColoring(da,IS_COLORING_GHOSTED,&iscoloring);CHKERRQ(ierr);
   ierr = MatFDColoringCreate(J,iscoloring,&matfdcoloring);CHKERRQ(ierr);
+  ierr = MatFDColoringSetUp(J,iscoloring,matfdcoloring);CHKERRQ(ierr);
   ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr);
 
   /* free spaces */

File src/dm/examples/tests/ex9.c

   ierr = DMSetMatType(da,MATMPIAIJ);CHKERRQ(ierr);
   ierr = DMCreateMatrix(da,&mat);CHKERRQ(ierr);
   ierr = MatFDColoringCreate(mat,coloring,&fdcoloring);CHKERRQ(ierr);
+  ierr = MatFDColoringSetUp(mat,coloring,fdcoloring);CHKERRQ(ierr);
 
   ierr = DMCreateGlobalVector(da,&dvec);CHKERRQ(ierr);
   ierr = DMCreateLocalVector(da,&lvec);CHKERRQ(ierr);

File src/mat/examples/tutorials/ex16.c

     ierr = MatGetColoring(Asp,MATCOLORINGSL,&iscoloring);CHKERRQ(ierr);
     ierr = MatFDColoringCreate(Asp,iscoloring,&matfdcoloring);CHKERRQ(ierr);
     ierr = MatFDColoringSetFromOptions(matfdcoloring);CHKERRQ(ierr);
+    ierr = MatFDColoringSetUp(Asp,iscoloring,matfdcoloring);CHKERRQ(ierr);
     /*ierr = MatFDColoringView(matfdcoloring,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);*/
     ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr);
     ierr = MatFDColoringDestroy(&matfdcoloring);CHKERRQ(ierr);

File src/mat/impls/aij/mpi/fdmpiaij.c

 
 #include <../src/mat/impls/aij/mpi/mpiaij.h>
+#include <../src/mat/impls/baij/mpi/mpibaij.h>
 
-extern PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat);
+#undef __FUNCT__
+#define __FUNCT__ "MatFDColoringApply_BAIJ"
+PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
+{
+  PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
+  PetscErrorCode ierr;
+  PetscInt       k,cstart,cend,l,row,col,nz,spidx,i,j;
+  PetscScalar    dx=0.0,*xx,*w3_array,*dy_i,*dy=coloring->dy;
+  PetscScalar    *vscale_array;
+  PetscReal      epsilon=coloring->error_rel,umin=coloring->umin,unorm;
+  Vec            w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale;
+  void           *fctx=coloring->fctx;
+  PetscInt       ctype=coloring->ctype,nxloc,nrows_k;
+  PetscScalar    *valaddr;
+  MatEntry       *Jentry=coloring->matentry;
+  MatEntry2      *Jentry2=coloring->matentry2;
+  const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
+  PetscInt       bs=J->rmap->bs;
+
+  PetscFunctionBegin;
+  /* (1) Set w1 = F(x1) */
+  if (!coloring->fset) {
+    ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
+    ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
+    ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
+  } else {
+    coloring->fset = PETSC_FALSE;
+  }
+
+  /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
+  ierr = VecGetLocalSize(x1,&nxloc);CHKERRQ(ierr);
+  if (coloring->htype[0] == 'w') {
+    /* vscale = dx is a constant scalar */
+    ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
+    dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon);
+  } else {
+    ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
+    ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr);
+    for (col=0; col<nxloc; col++) {
+      dx = xx[col];
+      if (PetscAbsScalar(dx) < umin) {
+        if (PetscRealPart(dx) >= 0.0)      dx = umin;
+        else if (PetscRealPart(dx) < 0.0 ) dx = -umin;
+      }
+      dx               *= epsilon;
+      vscale_array[col] = 1.0/dx;
+    }
+    ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
+    ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr);
+  }
+  if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
+    ierr = VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
+    ierr = VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
+  }
+
+  /* (3) Loop over each color */
+  if (!coloring->w3) {
+    ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
+    ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
+  }
+  w3 = coloring->w3;
+
+  ierr = VecGetOwnershipRange(x1,&cstart,&cend);CHKERRQ(ierr); /* used by ghosted vscale */
+  if (vscale) {
+    ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr);
+  }
+  nz   = 0;
+  for (k=0; k<ncolors; k++) {
+    coloring->currentcolor = k;
+
+    /*
+      (3-1) Loop over each column associated with color
+      adding the perturbation to the vector w3 = x1 + dx.
+    */
+    ierr = VecCopy(x1,w3);CHKERRQ(ierr);
+    dy_i = dy;
+    for (i=0; i<bs; i++) {     /* Loop over a block of columns */
+      ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
+      if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
+      if (coloring->htype[0] == 'w') {
+        for (l=0; l<ncolumns[k]; l++) {
+          col            = i + bs*coloring->columns[k][l];  /* local column (in global index!) of the matrix we are probing for */
+          w3_array[col] += 1.0/dx;
+          if (i) w3_array[col-1] -= 1.0/dx; /* resume original w3[col-1] */
+        }
+      } else { /* htype == 'ds' */
+        vscale_array -= cstart; /* shift pointer so global index can be used */
+        for (l=0; l<ncolumns[k]; l++) {
+          col = i + bs*coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
+          w3_array[col] += 1.0/vscale_array[col];
+          if (i) w3_array[col-1] -=  1.0/vscale_array[col-1]; /* resume original w3[col-1] */
+        }
+        vscale_array += cstart;
+      }
+      if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
+      ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
+
+      /*
+       (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
+                           w2 = F(x1 + dx) - F(x1)
+       */
+      ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
+      ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */
+      ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
+      ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
+      ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
+      ierr = VecResetArray(w2);CHKERRQ(ierr);
+      dy_i += nxloc; /* points to dy+i*nxloc */
+    }
+
+    /*
+     (3-3) Loop over rows of vector, putting results into Jacobian matrix
+    */
+    nrows_k = nrows[k];
+    if (coloring->htype[0] == 'w') {
+      for (l=0; l<nrows_k; l++) {
+        row     = bs*Jentry2[nz].row;   /* local row index */
+        valaddr = Jentry2[nz++].valaddr;
+        spidx   = 0;
+        dy_i    = dy;
+        for (i=0; i<bs; i++) {   /* column of the block */
+          for (j=0; j<bs; j++) { /* row of the block */
+            valaddr[spidx++] = dy_i[row+j]*dx;
+          }
+          dy_i += nxloc; /* points to dy+i*nxloc */
+        }
+      }
+    } else { /* htype == 'ds' */
+      for (l=0; l<nrows_k; l++) {
+        row     = bs*Jentry[nz].row;   /* local row index */
+        col     = bs*Jentry[nz].col;   /* local column index */
+        valaddr = Jentry[nz++].valaddr;
+        spidx   = 0;
+        dy_i    = dy;
+        for (i=0; i<bs; i++) {   /* column of the block */
+          for (j=0; j<bs; j++) { /* row of the block */
+            valaddr[spidx++] = dy_i[row+j]*vscale_array[col+i];
+          }
+          dy_i += nxloc; /* points to dy+i*nxloc */
+        }
+      }
+    }
+  }
+  ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
+  ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
+  if (vscale) {
+    ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr);
+  }
+
+  coloring->currentcolor = -1;
+  PetscFunctionReturn(0);
+}
 
 #undef __FUNCT__
-#define __FUNCT__ "MatFDColoringCreate_MPIAIJ"
-PetscErrorCode MatFDColoringCreate_MPIAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
+#define __FUNCT__ "MatFDColoringApply_AIJ"
+PetscErrorCode  MatFDColoringApply_AIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
+{
+  PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
+  PetscErrorCode ierr;
+  PetscInt       k,cstart,cend,l,row,col,nz;
+  PetscScalar    dx=0.0,*y,*xx,*w3_array;
+  PetscScalar    *vscale_array;
+  PetscReal      epsilon=coloring->error_rel,umin=coloring->umin,unorm;
+  Vec            w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale;
+  void           *fctx=coloring->fctx;
+  PetscInt       ctype=coloring->ctype,nxloc,nrows_k;
+  MatEntry       *Jentry=coloring->matentry;
+  MatEntry2      *Jentry2=coloring->matentry2;
+  const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
+
+  PetscFunctionBegin;
+  /* (1) Set w1 = F(x1) */
+  if (!coloring->fset) {
+    ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
+    ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
+    ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
+  } else {
+    coloring->fset = PETSC_FALSE;
+  }
+
+  /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
+  if (coloring->htype[0] == 'w') {
+    /* vscale = 1./dx is a constant scalar */
+    ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
+    dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon);
+  } else {
+    ierr = VecGetLocalSize(x1,&nxloc);CHKERRQ(ierr);
+    ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
+    ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr);
+    for (col=0; col<nxloc; col++) {
+      dx = xx[col];
+      if (PetscAbsScalar(dx) < umin) {
+        if (PetscRealPart(dx) >= 0.0)      dx = umin;
+        else if (PetscRealPart(dx) < 0.0 ) dx = -umin;
+      }
+      dx               *= epsilon;
+      vscale_array[col] = 1.0/dx;
+    }
+    ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
+    ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr);
+  }
+  if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
+    ierr = VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
+    ierr = VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
+  }
+
+  /* (3) Loop over each color */
+  if (!coloring->w3) {
+    ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
+    ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
+  }
+  w3 = coloring->w3;
+
+  ierr = VecGetOwnershipRange(x1,&cstart,&cend);CHKERRQ(ierr); /* used by ghosted vscale */
+  if (vscale) {
+    ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr);
+  }
+  nz   = 0;
+
+  if (coloring->bcols > 1) { /* use blocked insertion of Jentry */
+    PetscInt    i,m=J->rmap->n,nbcols,bcols=coloring->bcols;
+    PetscScalar *dy=coloring->dy,*dy_k;
+
+    nbcols = 0;
+    for (k=0; k<ncolors; k+=bcols) {
+      coloring->currentcolor = k;
+
+      /*
+       (3-1) Loop over each column associated with color
+       adding the perturbation to the vector w3 = x1 + dx.
+       */
+
+      dy_k = dy;
+      if (k + bcols > ncolors) bcols = ncolors - k;
+      for (i=0; i<bcols; i++) {
+
+        ierr = VecCopy(x1,w3);CHKERRQ(ierr);
+        ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
+        if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
+        if (coloring->htype[0] == 'w') {
+          for (l=0; l<ncolumns[k+i]; l++) {
+            col = coloring->columns[k+i][l]; /* local column (in global index!) of the matrix we are probing for */
+            w3_array[col] += 1.0/dx;
+          }
+        } else { /* htype == 'ds' */
+          vscale_array -= cstart; /* shift pointer so global index can be used */
+          for (l=0; l<ncolumns[k+i]; l++) {
+            col = coloring->columns[k+i][l]; /* local column (in global index!) of the matrix we are probing for */
+            w3_array[col] += 1.0/vscale_array[col];
+          }
+          vscale_array += cstart;
+        }
+        if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
+        ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
+
+        /*
+         (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
+                           w2 = F(x1 + dx) - F(x1)
+         */
+        ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
+        ierr = VecPlaceArray(w2,dy_k);CHKERRQ(ierr); /* place w2 to the array dy_i */
+        ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
+        ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
+        ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
+        ierr = VecResetArray(w2);CHKERRQ(ierr);
+        dy_k += m; /* points to dy+i*nxloc */
+      }
+
+      /*
+       (3-3) Loop over block rows of vector, putting results into Jacobian matrix
+       */
+      nrows_k = nrows[nbcols++];
+      ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
+
+      if (coloring->htype[0] == 'w') {
+        for (l=0; l<nrows_k; l++) {
+          row                      = Jentry2[nz].row;   /* local row index */
+          *(Jentry2[nz++].valaddr) = dy[row]*dx;
+        }
+      } else { /* htype == 'ds' */
+        for (l=0; l<nrows_k; l++) {
+          row                   = Jentry[nz].row;   /* local row index */
+          *(Jentry[nz].valaddr) = dy[row]*vscale_array[Jentry[nz].col];
+          nz++;
+        }
+      }
+      ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
+    }
+  } else { /* bcols == 1 */
+    for (k=0; k<ncolors; k++) {
+      coloring->currentcolor = k;
+
+      /*
+       (3-1) Loop over each column associated with color
+       adding the perturbation to the vector w3 = x1 + dx.
+       */
+      ierr = VecCopy(x1,w3);CHKERRQ(ierr);
+      ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
+      if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
+      if (coloring->htype[0] == 'w') {
+        for (l=0; l<ncolumns[k]; l++) {
+          col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
+          w3_array[col] += 1.0/dx;
+        }
+      } else { /* htype == 'ds' */
+        vscale_array -= cstart; /* shift pointer so global index can be used */
+        for (l=0; l<ncolumns[k]; l++) {
+          col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
+          w3_array[col] += 1.0/vscale_array[col];
+        }
+        vscale_array += cstart;
+      }
+      if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
+      ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
+
+      /*
+       (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
+                           w2 = F(x1 + dx) - F(x1)
+       */
+      ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
+      ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
+      ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
+      ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
+
+      /*
+       (3-3) Loop over rows of vector, putting results into Jacobian matrix
+       */
+      nrows_k = nrows[k];
+      ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
+      if (coloring->htype[0] == 'w') {
+        for (l=0; l<nrows_k; l++) {
+          row                      = Jentry2[nz].row;   /* local row index */
+          *(Jentry2[nz++].valaddr) = y[row]*dx;
+        }
+      } else { /* htype == 'ds' */
+        for (l=0; l<nrows_k; l++) {
+          row                   = Jentry[nz].row;   /* local row index */
+          *(Jentry[nz].valaddr) = y[row]*vscale_array[Jentry[nz].col];
+          nz++;
+        }
+      }
+      ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
+    }
+  }
+
+  ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
+  ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
+  if (vscale) {
+    ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr);
+  }
+  coloring->currentcolor = -1;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "MatFDColoringSetUp_MPIXAIJ"
+PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
 {
-  Mat_MPIAIJ             *aij = (Mat_MPIAIJ*)mat->data;
   PetscErrorCode         ierr;
   PetscMPIInt            size,*ncolsonproc,*disp,nn;
-  PetscInt               i,n,nrows,j,k,m,ncols,col;
-  const PetscInt         *is,*A_ci,*A_cj,*B_ci,*B_cj,*rows = 0,*ltog;
-  PetscInt               nis = iscoloring->n,nctot,*cols;
-  PetscInt               *rowhit,M,cstart,cend,colb;
-  PetscInt               *columnsforrow,l;
+  PetscInt               i,n,nrows,nrows_i,j,k,m,ncols,col,*rowhit,cstart,cend,colb;
+  const PetscInt         *is,*A_ci,*A_cj,*B_ci,*B_cj,*row=NULL,*ltog=NULL;
+  PetscInt               nis=iscoloring->n,nctot,*cols;
   IS                     *isa;
-  PetscBool              done,flg;
-  ISLocalToGlobalMapping map   = mat->cmap->mapping;
-  PetscInt               ctype=c->ctype;
+  ISLocalToGlobalMapping map=mat->cmap->mapping;
+  PetscInt               ctype=c->ctype,*spidxA,*spidxB,nz,bs,bs2,spidx;
+  Mat                    A,B;
+  PetscScalar            *A_val,*B_val,**valaddrhit;
+  MatEntry               *Jentry;
+  MatEntry2              *Jentry2;
+  PetscBool              isBAIJ;
+  PetscInt               bcols=c->bcols;
+#if defined(PETSC_USE_CTABLE)
+  PetscTable             colmap=NULL;
+#else
+  PetscInt               *colmap=NULL;     /* local col number of off-diag col */
+#endif
 
   PetscFunctionBegin;
-  if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
-  if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping");
+  if (ctype == IS_COLORING_GHOSTED) {
+    if (!map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping");
+    ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);
+  }
 
-  if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);}
-  else     ltog = NULL;
-  ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
+  ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
+  ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);
+  if (isBAIJ) {
+    Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
+    Mat_SeqBAIJ *spA,*spB;
+    A = baij->A;  spA = (Mat_SeqBAIJ*)A->data; A_val = spA->a;
+    B = baij->B;  spB = (Mat_SeqBAIJ*)B->data; B_val = spB->a;
+    nz = spA->nz + spB->nz; /* total nonzero entries of mat */
+    if (!baij->colmap) {
+      ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
+      colmap = baij->colmap;
+    }
+    ierr = MatGetColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr);
+    ierr = MatGetColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr);
+
+    if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') {  /* create vscale for storing dx */
+      PetscInt    *garray;
+      ierr = PetscMalloc(B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
+      for (i=0; i<baij->B->cmap->n/bs; i++) {
+        for (j=0; j<bs; j++) {
+          garray[i*bs+j] = bs*baij->garray[i]+j;
+        }
+      }
+      ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
+      ierr = PetscFree(garray);CHKERRQ(ierr);
+    }
+  } else {
+    Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data;
+    Mat_SeqAIJ *spA,*spB;
+    A = aij->A;  spA = (Mat_SeqAIJ*)A->data; A_val = spA->a;
+    B = aij->B;  spB = (Mat_SeqAIJ*)B->data; B_val = spB->a;
+    nz = spA->nz + spB->nz; /* total nonzero entries of mat */
+    if (!aij->colmap) {
+      /* Allow access to data structures of local part of matrix
+       - creates aij->colmap which maps global column number to local number in part B */
+      ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
+      colmap = aij->colmap;
+    }
+    ierr = MatGetColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr);
+    ierr = MatGetColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr);
 
-  M         = mat->rmap->n;
-  cstart    = mat->cmap->rstart;
-  cend      = mat->cmap->rend;
-  c->M      = mat->rmap->N;         /* set the global rows and columns and local rows */
-  c->N      = mat->cmap->N;
-  c->m      = mat->rmap->n;
-  c->rstart = mat->rmap->rstart;
+    bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */
+
+    if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
+      ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,aij->garray,&c->vscale);CHKERRQ(ierr);
+    }
+  }
+
+  m         = mat->rmap->n/bs;
+  cstart    = mat->cmap->rstart/bs;
+  cend      = mat->cmap->rend/bs;
 
-  c->ncolors = nis;
   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
-  ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
-  ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
-  ierr       = PetscLogObjectMemory((PetscObject)c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
+  ierr       = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr);
 
-  /* Allow access to data structures of local part of matrix */
-  if (!aij->colmap) {
-    ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
-  }
-  ierr = MatGetColumnIJ(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
-  ierr = MatGetColumnIJ(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
-
-  ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
-  ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
+  if (c->htype[0] == 'd') {
+    ierr       = PetscMalloc(nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr);
+    ierr       = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry));CHKERRQ(ierr);
+    c->matentry = Jentry;
+  } else if (c->htype[0] == 'w') {
+    ierr       = PetscMalloc(nz*sizeof(MatEntry2),&Jentry2);CHKERRQ(ierr);
+    ierr       = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry2));CHKERRQ(ierr);
+    c->matentry2 = Jentry2;
+  } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"htype is not supported");
 
-  for (i=0; i<nis; i++) {
+  ierr = PetscMalloc2(m+1,PetscInt,&rowhit,m+1,PetscScalar*,&valaddrhit);CHKERRQ(ierr);
+  nz = 0;
+  ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
+  for (i=0; i<nis; i++) { /* for each local color */
     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
 
-    c->ncolumns[i] = n;
+    c->ncolumns[i] = n; /* local number of columns of this color on this process */
     if (n) {
       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
       ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr);
     }
 
     if (ctype == IS_COLORING_GLOBAL) {
-      /* Determine the total (parallel) number of columns of this color */
+      /* Determine nctot, the total (parallel) number of columns of this color */
       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
 
+      /* ncolsonproc[j]: local ncolumns on proc[j] of this color */
       ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
       ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
       nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
         disp[j] = disp[j-1] + ncolsonproc[j-1];
       }
 
-      /* Get complete list of columns for color on each processor */
+      /* Get cols, the complete list of columns for this color on each process */
       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
       ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
 
-    /*
-       Mark all rows affect by these columns
-    */
-    /* Temporary option to allow for debugging/testing */
-    flg  = PETSC_FALSE;
-    ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
-    if (!flg) { /*-----------------------------------------------------------------------------*/
-      /* crude, fast version */
-      ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
-      /* loop over columns*/
-      for (j=0; j<nctot; j++) {
-        if (ctype == IS_COLORING_GHOSTED) {
-          col = ltog[cols[j]];
-        } else {
-          col = cols[j];
+    /* Mark all rows affect by these columns */
+    ierr    = PetscMemzero(rowhit,m*sizeof(PetscInt));CHKERRQ(ierr);
+    bs2     = bs*bs;
+    nrows_i = 0;
+    for (j=0; j<nctot; j++) { /* loop over columns*/
+      if (ctype == IS_COLORING_GHOSTED) {
+        col = ltog[cols[j]];
+      } else {
+        col = cols[j];
+      }
+      if (col >= cstart && col < cend) { /* column is in A, diagonal block of mat */
+        row      = A_cj + A_ci[col-cstart];
+        nrows    = A_ci[col-cstart+1] - A_ci[col-cstart];
+        nrows_i += nrows;
+        /* loop over columns of A marking them in rowhit */
+        for (k=0; k<nrows; k++) {
+          /* set valaddrhit for part A */
+          spidx            = bs2*spidxA[A_ci[col-cstart] + k];
+          valaddrhit[*row] = &A_val[spidx];
+          rowhit[*row++]   = col - cstart + 1; /* local column index */
         }
-        if (col >= cstart && col < cend) {
-          /* column is in diagonal block of matrix */
-          rows = A_cj + A_ci[col-cstart];
-          m    = A_ci[col-cstart+1] - A_ci[col-cstart];
-        } else {
+      } else { /* column is in B, off-diagonal block of mat */
 #if defined(PETSC_USE_CTABLE)
-          ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr);
-          colb--;
+        ierr = PetscTableFind(colmap,col+1,&colb);CHKERRQ(ierr);
+        colb--;
 #else
-          colb = aij->colmap[col] - 1;
+        colb = colmap[col] - 1; /* local column index */
 #endif
-          if (colb == -1) {
-            m = 0;
-          } else {
-            rows = B_cj + B_ci[colb];
-            m    = B_ci[colb+1] - B_ci[colb];
-          }
+        if (colb == -1) {
+          nrows = 0;
+        } else {
+          colb  = colb/bs;
+          row   = B_cj + B_ci[colb];
+          nrows = B_ci[colb+1] - B_ci[colb];
         }
-        /* loop over columns marking them in rowhit */
-        for (k=0; k<m; k++) {
-          rowhit[*rows++] = col + 1;
+        nrows_i += nrows;
+        /* loop over columns of B marking them in rowhit */
+        for (k=0; k<nrows; k++) {
+          /* set valaddrhit for part B */
+          spidx            = bs2*spidxB[B_ci[colb] + k];
+          valaddrhit[*row] = &B_val[spidx];
+          rowhit[*row++]   = colb + 1 + cend - cstart; /* local column index */
         }
       }
+    }
+    c->nrows[i] = nrows_i;
 
-      /* count the number of hits */
-      nrows = 0;
-      for (j=0; j<M; j++) {
-        if (rowhit[j]) nrows++;
-      }
-      c->nrows[i] = nrows;
-      ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
-      ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
-      ierr        = PetscLogObjectMemory((PetscObject)c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
-      nrows       = 0;
-      for (j=0; j<M; j++) {
+    if (c->htype[0] == 'd') {
+      for (j=0; j<m; j++) {
         if (rowhit[j]) {
-          c->rows[i][nrows]          = j;
-          c->columnsforrow[i][nrows] = rowhit[j] - 1;
-          nrows++;
+          Jentry[nz].row     = j;              /* local row index */
+          Jentry[nz].col     = rowhit[j] - 1;  /* local column index */
+          Jentry[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
+          nz++;
         }
       }
-    } else { /*-------------------------------------------------------------------------------*/
-      /* slow version, using rowhit as a linked list */
-      PetscInt currentcol,fm,mfm;
-      rowhit[M] = M;
-      nrows     = 0;
-      /* loop over columns*/
-      for (j=0; j<nctot; j++) {
-        if (ctype == IS_COLORING_GHOSTED) {
-          col = ltog[cols[j]];
-        } else {
-          col = cols[j];
-        }
-        if (col >= cstart && col < cend) {
-          /* column is in diagonal block of matrix */
-          rows = A_cj + A_ci[col-cstart];
-          m    = A_ci[col-cstart+1] - A_ci[col-cstart];
-        } else {
-#if defined(PETSC_USE_CTABLE)
-          ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr);
-          colb--;
-#else
-          colb = aij->colmap[col] - 1;
-#endif
-          if (colb == -1) {
-            m = 0;
-          } else {
-            rows = B_cj + B_ci[colb];
-            m    = B_ci[colb+1] - B_ci[colb];
-          }
-        }
-
-        /* loop over columns marking them in rowhit */
-        fm = M;    /* fm points to first entry in linked list */
-        for (k=0; k<m; k++) {
-          currentcol = *rows++;
-          /* is it already in the list? */
-          do {
-            mfm = fm;
-            fm  = rowhit[fm];
-          } while (fm < currentcol);
-          /* not in list so add it */
-          if (fm != currentcol) {
-            nrows++;
-            columnsforrow[currentcol] = col;
-            /* next three lines insert new entry into linked list */
-            rowhit[mfm]        = currentcol;
-            rowhit[currentcol] = fm;
-            fm                 = currentcol;
-            /* fm points to present position in list since we know the columns are sorted */
-          } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
+    } else { /* c->htype == 'wp' */
+      for (j=0; j<m; j++) {
+        if (rowhit[j]) {
+          Jentry2[nz].row     = j;              /* local row index */
+          Jentry2[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
+          nz++;
         }
       }
-      c->nrows[i] = nrows;
-
-      ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
-      ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
-      ierr = PetscLogObjectMemory((PetscObject)c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
-      /* now store the linked list of rows into c->rows[i] */
-      nrows = 0;
-      fm    = rowhit[M];
-      do {
-        c->rows[i][nrows]            = fm;
-        c->columnsforrow[i][nrows++] = columnsforrow[fm];
-        fm                           = rowhit[fm];
-      } while (fm < M);
-    } /* ---------------------------------------------------------------------------------------*/
+    }
     ierr = PetscFree(cols);CHKERRQ(ierr);
   }
 
-  /* Optimize by adding the vscale, and scaleforrow[][] fields */
-  /*
-       vscale will contain the "diagonal" on processor scalings followed by the off processor
-  */
-  if (ctype == IS_COLORING_GLOBAL) {
-    ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),aij->A->rmap->n,PETSC_DETERMINE,aij->B->cmap->n,aij->garray,&c->vscale);CHKERRQ(ierr);
-    ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
-    for (k=0; k<c->ncolors; k++) {
-      ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
-      for (l=0; l<c->nrows[k]; l++) {
-        col = c->columnsforrow[k][l];
-        if (col >= cstart && col < cend) {
-          /* column is in diagonal block of matrix */
-          colb = col - cstart;
-        } else {
-          /* column  is in "off-processor" part */
-#if defined(PETSC_USE_CTABLE)
-          ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr);
-          colb--;
-#else
-          colb = aij->colmap[col] - 1;
-#endif
-          colb += cend - cstart;
-        }
-        c->vscaleforrow[k][l] = colb;
-      }
-    }
-  } else if (ctype == IS_COLORING_GHOSTED) {
-    /* Get gtol mapping */
-    PetscInt N = mat->cmap->N,nlocal,*gtol;
-    ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
-    for (i=0; i<N; i++) gtol[i] = -1;
-    ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr);
-    for (i=0; i<nlocal; i++) gtol[ltog[i]] = i;
-
-    c->vscale = 0; /* will be created in MatFDColoringApply() */
-    ierr      = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
-    for (k=0; k<c->ncolors; k++) {
-      ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
-      for (l=0; l<c->nrows[k]; l++) {
-        col = c->columnsforrow[k][l];      /* global column index */
-
-        c->vscaleforrow[k][l] = gtol[col]; /* local column index */
-      }
-    }
-    ierr = PetscFree(gtol);CHKERRQ(ierr);
+  if (bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */
+    ierr = MatFDColoringSetUpBlocked_AIJ_Private(mat,c,nz);CHKERRQ(ierr);
   }
+
+  if (isBAIJ) {
+    ierr = MatRestoreColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr);
+    ierr = MatRestoreColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr);
+    ierr = PetscMalloc(bs*mat->rmap->n*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr);
+  } else {
+    ierr = MatRestoreColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr);
+    ierr = MatRestoreColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr);
+  }
+
   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
+  ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr);
 
-  ierr = PetscFree(rowhit);CHKERRQ(ierr);
-  ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
-  ierr = MatRestoreColumnIJ(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
-  ierr = MatRestoreColumnIJ(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
-  if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,&ltog);CHKERRQ(ierr);}
+  if (ctype == IS_COLORING_GHOSTED) {
+    ierr = ISLocalToGlobalMappingRestoreIndices(map,&ltog);CHKERRQ(ierr);
+  }
+  ierr = PetscInfo3(c,"ncolors %D, brows %D and bcols %D are used.\n",c->ncolors,c->brows,c->bcols);CHKERRQ(ierr);
   PetscFunctionReturn(0);
 }
 
+#undef __FUNCT__
+#define __FUNCT__ "MatFDColoringCreate_MPIXAIJ"
+PetscErrorCode MatFDColoringCreate_MPIXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
+{
+  PetscErrorCode ierr;
+  PetscInt       bs,nis=iscoloring->n,m=mat->rmap->n;
+  PetscBool      isBAIJ;
 
+  PetscFunctionBegin;
+  /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian;
+   bcols is chosen s.t. dy-array takes 50% of memory space as mat */
+  ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
+  ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);
+  if (isBAIJ) {
+    c->brows = m;
+    c->bcols = 1;
+  } else { /* mpiaij matrix */
+    /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */
+    Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data;
+    Mat_SeqAIJ *spA,*spB;
+    Mat        A,B;
+    PetscInt   nz,brows,bcols;
+    PetscReal  mem;
 
+    bs    = 1; /* only bs=1 is supported for MPIAIJ matrix */
 
+    A = aij->A;  spA = (Mat_SeqAIJ*)A->data;
+    B = aij->B;  spB = (Mat_SeqAIJ*)B->data;
+    nz = spA->nz + spB->nz; /* total local nonzero entries of mat */
+    mem = nz*(sizeof(PetscScalar) + sizeof(PetscInt)) + 3*m*sizeof(PetscInt);
+    bcols = (PetscInt)(0.5*mem /(m*sizeof(PetscScalar)));
+    brows = 1000/bcols;
+    if (bcols > nis) bcols = nis;
+    if (brows == 0 || brows > m) brows = m;
+    c->brows = brows;
+    c->bcols = bcols;
+  }
 
-
+  c->M       = mat->rmap->N/bs;         /* set the global rows and columns and local rows */
+  c->N       = mat->cmap->N/bs;
+  c->m       = mat->rmap->n/bs;
+  c->rstart  = mat->rmap->rstart/bs;
+  c->ncolors = nis;
+  PetscFunctionReturn(0);
+}

File src/mat/impls/aij/mpi/mpiaij.c

   PetscFunctionReturn(0);
 }
 
-extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
-
 #undef __FUNCT__
 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
                                        0,
                                        0,
                                        0,
-                                /*54*/ MatFDColoringCreate_MPIAIJ,
+                                /*54*/ MatFDColoringCreate_MPIXAIJ,
                                        0,
                                        MatSetUnfactored_MPIAIJ,
                                        MatPermute_MPIAIJ,
                                        0,
                                 /*139*/0,
                                        0,
-                                       0
+                                       0,
+                                       MatFDColoringSetUp_MPIXAIJ
 };
 
 /* ----------------------------------------------------------------------------------------*/

File src/mat/impls/aij/mpi/mpiaij.h

 PETSC_INTERN PetscErrorCode MatDisAssemble_MPIAIJ(Mat);
 PETSC_INTERN PetscErrorCode MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat*);
 PETSC_INTERN PetscErrorCode MatIncreaseOverlap_MPIAIJ(Mat,PetscInt,IS [],PetscInt);
-PETSC_INTERN PetscErrorCode MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
+PETSC_INTERN PetscErrorCode MatFDColoringCreate_MPIXAIJ(Mat,ISColoring,MatFDColoring);
+PETSC_INTERN PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat,ISColoring,MatFDColoring);
 PETSC_INTERN PetscErrorCode MatGetSubMatrices_MPIAIJ (Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
 PETSC_INTERN PetscErrorCode MatGetSubMatrix_MPIAIJ_All(Mat,MatGetSubMatrixOption,MatReuse,Mat *[]);
 PETSC_INTERN PetscErrorCode MatGetSubMatricesParallel_MPIAIJ (Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
 PETSC_INTERN PetscErrorCode MatGetMultiProcBlock_MPIAIJ(Mat,MPI_Comm,MatReuse,Mat*);
 
 PETSC_INTERN PetscErrorCode MatLoad_MPIAIJ(Mat,PetscViewer);
+PETSC_INTERN PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat);
 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
 PETSC_INTERN PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
 PETSC_INTERN PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ_nonscalable(Mat,Mat,PetscReal,Mat*);

File src/mat/impls/aij/seq/aij.c

   PetscFunctionReturn(0);
 }
 
+/*
+ MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from
+ MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output
+ spidx[], index of a->a, to be used in MatTransposeColoringCreate_SeqAIJ() and MatFDColoringCreate_SeqXAIJ()
+*/
+#undef __FUNCT__
+#define __FUNCT__ "MatGetColumnIJ_SeqAIJ_Color"
+PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
+{
+  Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
+  PetscErrorCode ierr;
+  PetscInt       i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n;
+  PetscInt       nz = a->i[m],row,*jj,mr,col;
+  PetscInt       *cspidx;
+
+  PetscFunctionBegin;
+  *nn = n;
+  if (!ia) PetscFunctionReturn(0);
+
+  ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
+  ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
+  ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
+  ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
+  ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);CHKERRQ(ierr);
+  jj   = a->j;
+  for (i=0; i<nz; i++) {
+    collengths[jj[i]]++;
+  }
+  cia[0] = oshift;
+  for (i=0; i<n; i++) {
+    cia[i+1] = cia[i] + collengths[i];
+  }
+  ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
+  jj   = a->j;
+  for (row=0; row<m; row++) {
+    mr = a->i[row+1] - a->i[row];
+    for (i=0; i<mr; i++) {
+      col = *jj++;
+      cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
+      cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
+    }
+  }
+  ierr   = PetscFree(collengths);CHKERRQ(ierr);
+  *ia    = cia; *ja = cja;
+  *spidx = cspidx;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "MatRestoreColumnIJ_SeqAIJ_Color"
+PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = MatRestoreColumnIJ_SeqAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
+  ierr = PetscFree(*spidx);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
 #undef __FUNCT__
 #define __FUNCT__ "MatSetValuesRow_SeqAIJ"
 PetscErrorCode MatSetValuesRow_SeqAIJ(Mat A,PetscInt row,const PetscScalar v[])
   PetscFunctionReturn(0);
 }
 
-#undef __FUNCT__
-#define __FUNCT__ "MatFDColoringApply_SeqAIJ"
-PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
-{
-  PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
-  PetscErrorCode ierr;
-  PetscInt       k,N,start,end,l,row,col,srow,**vscaleforrow;
-  PetscScalar    dx,*y,*xx,*w3_array;
-  PetscScalar    *vscale_array;
-  PetscReal      epsilon = coloring->error_rel,umin = coloring->umin;
-  Vec            w1,w2,w3;
-  void           *fctx = coloring->fctx;
-  PetscBool      flg   = PETSC_FALSE;
-
-  PetscFunctionBegin;
-  if (!coloring->w1) {
-    ierr = VecDuplicate(x1,&coloring->w1);CHKERRQ(ierr);
-    ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w1);CHKERRQ(ierr);
-    ierr = VecDuplicate(x1,&coloring->w2);CHKERRQ(ierr);
-    ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w2);CHKERRQ(ierr);
-    ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
-    ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
-  }
-  w1 = coloring->w1; w2 = coloring->w2; w3 = coloring->w3;
-
-  ierr = MatSetUnfactored(J);CHKERRQ(ierr);
-  ierr = PetscOptionsGetBool(((PetscObject)coloring)->prefix,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
-  if (flg) {
-    ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
-  } else {
-    PetscBool assembled;
-    ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
-    if (assembled) {
-      ierr = MatZeroEntries(J);CHKERRQ(ierr);
-    }
-  }
-
-  ierr = VecGetOwnershipRange(x1,&start,&end);CHKERRQ(ierr);
-  ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
-
-  if (!coloring->fset) {
-    ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
-    ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
-    ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
-  } else {
-    coloring->fset = PETSC_FALSE;
-  }
-
-  /*
-      Compute all the scale factors and share with other processors
-  */
-  ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);xx = xx - start;
-  ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);vscale_array = vscale_array - start;
-  for (k=0; k<coloring->ncolors; k++) {
-    /*
-       Loop over each column associated with color adding the
-       perturbation to the vector w3.
-    */
-    for (l=0; l<coloring->ncolumns[k]; l++) {
-      col = coloring->columns[k][l];    /* column of the matrix we are probing for */
-      dx  = xx[col];
-      if (dx == 0.0) dx = 1.0;
-      if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
-      else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
-      dx               *= epsilon;
-      vscale_array[col] = 1.0/dx;
-    }
-  }
-  vscale_array = vscale_array + start;
-
-  ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
-  ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
-  ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
-
-  /*  ierr = VecView(coloring->vscale,PETSC_VIEWER_STDOUT_WORLD);
-      ierr = VecView(x1,PETSC_VIEWER_STDOUT_WORLD);*/
-
-  if (coloring->vscaleforrow) vscaleforrow = coloring->vscaleforrow;
-  else                        vscaleforrow = coloring->columnsforrow;
-
-  ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
-  /*
-      Loop over each color
-  */
-  for (k=0; k<coloring->ncolors; k++) {
-    coloring->currentcolor = k;
-
-    ierr = VecCopy(x1,w3);CHKERRQ(ierr);
-    ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);w3_array = w3_array - start;
-    /*
-       Loop over each column associated with color adding the
-       perturbation to the vector w3.
-    */
-    for (l=0; l<coloring->ncolumns[k]; l++) {
-      col = coloring->columns[k][l];    /* column of the matrix we are probing for */
-      dx  = xx[col];
-      if (dx == 0.0) dx = 1.0;
-      if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
-      else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
-      dx *= epsilon;
-      if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
-      w3_array[col] += dx;
-    }
-    w3_array = w3_array + start; ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
-
-    /*
-       Evaluate function at x1 + dx (here dx is a vector of perturbations)
-    */
-
-    ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
-    ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
-    ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
-    ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
-
-    /*
-       Loop over rows of vector, putting results into Jacobian matrix
-    */
-    ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
-    for (l=0; l<coloring->nrows[k]; l++) {
-      row     = coloring->rows[k][l];
-      col     = coloring->columnsforrow[k][l];
-      y[row] *= vscale_array[vscaleforrow[k][l]];
-      srow    = row + start;
-      ierr    = MatSetValues_SeqAIJ(J,1,&srow,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
-    }
-    ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
-  }
-  coloring->currentcolor = k;
-
-  ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
-  xx   = xx + start;
-  ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
-  ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
-  ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-}
-
 /*
    Computes the number of nonzeros per row needed for preallocation when X and Y
    have different nonzero structure.
   PetscFunctionReturn(0);
 }
 
-extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
 /* -------------------------------------------------------------------*/
 static struct _MatOps MatOps_Values = { MatSetValues_SeqAIJ,
                                         MatGetRow_SeqAIJ,
                                         MatRestoreRowIJ_SeqAIJ,
                                         MatGetColumnIJ_SeqAIJ,
                                         MatRestoreColumnIJ_SeqAIJ,
-                                /* 54*/ MatFDColoringCreate_SeqAIJ,
+                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
                                         0,
                                         0,
                                         MatPermute_SeqAIJ,
                                         MatRARtNumeric_SeqAIJ_SeqAIJ,
                                  /*139*/0,
                                         0,
-                                        0
+                                        0,
+                                        MatFDColoringSetUp_SeqXAIJ
 };
 
 #undef __FUNCT__

File src/mat/impls/aij/seq/aij.h

 PETSC_INTERN PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat,Vec,Vec,Vec);
 PETSC_INTERN PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat,Mat,Mat);
 PETSC_INTERN PetscErrorCode MatMatSolve_SeqAIJ(Mat,Mat,Mat);
-PETSC_INTERN PetscErrorCode MatEqual_SeqAIJ(Mat A,Mat B,PetscBool * flg);
-PETSC_INTERN PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
+PETSC_INTERN PetscErrorCode MatEqual_SeqAIJ(Mat,Mat,PetscBool*);
+PETSC_INTERN PetscErrorCode MatFDColoringCreate_SeqXAIJ(Mat,ISColoring,MatFDColoring);
+PETSC_INTERN PetscErrorCode MatFDColoringSetUp_SeqXAIJ(Mat,ISColoring,MatFDColoring);
+PETSC_INTERN PetscErrorCode MatFDColoringSetUpBlocked_AIJ_Private(Mat,MatFDColoring,PetscInt);
+PETSC_INTERN PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
 PETSC_INTERN PetscErrorCode MatLoad_SeqAIJ(Mat,PetscViewer);
 PETSC_INTERN PetscErrorCode RegisterApplyPtAPRoutines_Private(Mat);
 
 PETSC_INTERN PetscErrorCode MatRestoreRowIJ_SeqAIJ(Mat,PetscInt,PetscBool,PetscBool,PetscInt*,const PetscInt *[],const PetscInt *[],PetscBool*);
 PETSC_INTERN PetscErrorCode MatGetColumnIJ_SeqAIJ(Mat,PetscInt,PetscBool,PetscBool,PetscInt*,const PetscInt *[],const PetscInt *[],PetscBool*);
 PETSC_INTERN PetscErrorCode MatRestoreColumnIJ_SeqAIJ(Mat,PetscInt,PetscBool,PetscBool,PetscInt*,const PetscInt *[],const PetscInt *[],PetscBool*);
+PETSC_INTERN PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat,PetscInt,PetscBool,PetscBool,PetscInt*,const PetscInt *[],const PetscInt *[],PetscInt *[],PetscBool*);
+PETSC_INTERN PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat,PetscInt,PetscBool,PetscBool,PetscInt*,const PetscInt *[],const PetscInt *[],PetscInt *[],PetscBool*);
 PETSC_INTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
 PETSC_INTERN PetscErrorCode MatSetUp_SeqAIJ(Mat);
 PETSC_INTERN PetscErrorCode MatView_SeqAIJ(Mat,PetscViewer);

File src/mat/impls/aij/seq/fdaij.c

 
 #include <../src/mat/impls/aij/seq/aij.h>
+#include <../src/mat/impls/baij/seq/baij.h>
 
+/*
+    This routine is shared by SeqAIJ and SeqBAIJ matrices,
+    since it operators only on the nonzero structure of the elements or blocks.
+*/
 #undef __FUNCT__
-#define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
+#define __FUNCT__ "MatFDColoringCreate_SeqXAIJ"
+PetscErrorCode MatFDColoringCreate_SeqXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
+{
+  PetscErrorCode ierr;
+  PetscInt       bs,nis=iscoloring->n,m=mat->rmap->n;
+  PetscBool      isBAIJ;
+
+  PetscFunctionBegin;
+  /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian */
+  ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
+  ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
+  if (isBAIJ) {
+    c->brows = m;
+    c->bcols = 1;
+  } else { /* seqaij matrix */
+    /* bcols is chosen s.t. dy-array takes 50% of memory space as mat */
+    Mat_SeqAIJ *spA = (Mat_SeqAIJ*)mat->data;
+    PetscReal  mem;
+    PetscInt   nz,brows,bcols;
+
+    bs    = 1; /* only bs=1 is supported for SeqAIJ matrix */
+
+    nz    = spA->nz;
+    mem   = nz*(sizeof(PetscScalar) + sizeof(PetscInt)) + 3*m*sizeof(PetscInt);
+    bcols = (PetscInt)(0.5*mem /(m*sizeof(PetscScalar)));
+    brows = 1000/bcols;
+    if (bcols > nis) bcols = nis;
+    if (brows == 0 || brows > m) brows = m;
+    c->brows = brows;
+    c->bcols = bcols;
+  }
+
+  c->M       = mat->rmap->N/bs;   /* set total rows, columns and local rows */
+  c->N       = mat->cmap->N/bs;
+  c->m       = mat->rmap->N/bs;
+  c->rstart  = 0;
+  c->ncolors = nis;
+  c->ctype   = IS_COLORING_GHOSTED;
+  PetscFunctionReturn(0);
+}
+
 /*
-    This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
-    This is why it has the ugly code with the MatGetBlockSize()
+ Reorder Jentry such that blocked brows*bols of entries from dense matrix are inserted into Jacobian for improved cache performance
+   Input Parameters:
++  mat - the matrix containing the nonzero structure of the Jacobian
+.  color - the coloring context
+-  nz - number of local non-zeros in mat
 */
-PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
+#undef __FUNCT__
+#define __FUNCT__ "MatFDColoringSetUpBlocked_AIJ_Private"
+PetscErrorCode MatFDColoringSetUpBlocked_AIJ_Private(Mat mat,MatFDColoring c,PetscInt nz)
 {
   PetscErrorCode ierr;
-  PetscInt       i,n,nrows,N,j,k,m,ncols,col;
-  const PetscInt *is,*rows,*ci,*cj;
-  PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
-  IS             *isa;
-  PetscBool      done,flg = PETSC_FALSE;
-  PetscBool      flg1,flg2;
+  PetscInt       i,j,nrows,nbcols,brows=c->brows,bcols=c->bcols,mbs=c->m,nis=c->ncolors;
+  PetscInt       *color_start,*row_start,*nrows_new,nz_new,row_end;
 
   PetscFunctionBegin;
-  if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
+  if (brows < 1 || brows > mbs) brows = mbs;
+  ierr = PetscMalloc2(bcols+1,PetscInt,&color_start,bcols,PetscInt,&row_start);CHKERRQ(ierr);
+  ierr = PetscMalloc(nis*sizeof(PetscInt),&nrows_new);CHKERRQ(ierr);
+  ierr = PetscMalloc(bcols*mat->rmap->n*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr);
+  ierr = PetscLogObjectMemory((PetscObject)c,bcols*mat->rmap->n*sizeof(PetscScalar));CHKERRQ(ierr);
+
+  nz_new = 0;
+  nbcols = 0;
+  color_start[bcols] = 0;
+
+  if (c->htype[0] == 'd') { /* ----  c->htype == 'ds', use MatEntry --------*/
+    MatEntry       *Jentry_new,*Jentry=c->matentry;
+    ierr = PetscMalloc(nz*sizeof(MatEntry),&Jentry_new);CHKERRQ(ierr);
+    for (i=0; i<nis; i+=bcols) { /* loop over colors */
+      if (i + bcols > nis) {
+        color_start[nis - i] = color_start[bcols];
+        bcols                = nis - i;
+      }
+
+      color_start[0] = color_start[bcols];
+      for (j=0; j<bcols; j++) {
+        color_start[j+1] = c->nrows[i+j] + color_start[j];
+        row_start[j]     = 0;
+      }
+
+      row_end = brows;
+      if (row_end > mbs) row_end = mbs;
+
+      while (row_end <= mbs) {   /* loop over block rows */
+        for (j=0; j<bcols; j++) {       /* loop over block columns */
+          nrows = c->nrows[i+j];
+          nz    = color_start[j];
+          while (row_start[j] < nrows) {
+            if (Jentry[nz].row >= row_end) {
+              color_start[j] = nz;
+              break;
+            } else { /* copy Jentry[nz] to Jentry_new[nz_new] */
+              Jentry_new[nz_new].row     = Jentry[nz].row + j*mbs; /* index in dy-array */
+              Jentry_new[nz_new].col     = Jentry[nz].col;
+              Jentry_new[nz_new].valaddr = Jentry[nz].valaddr;
+              nz_new++; nz++; row_start[j]++;
+            }
+          }
+        }
+        if (row_end == mbs) break;
+        row_end += brows;
+        if (row_end > mbs) row_end = mbs;
+      }
+      nrows_new[nbcols++] = nz_new;
+    }
+    ierr = PetscFree(Jentry);CHKERRQ(ierr);
+    c->matentry = Jentry_new;
+  } else { /* ---------  c->htype == 'wp', use MatEntry2 ------------------*/
+    MatEntry2      *Jentry2_new,*Jentry2=c->matentry2;
+    ierr = PetscMalloc(nz*sizeof(MatEntry2),&Jentry2_new);CHKERRQ(ierr);
+    for (i=0; i<nis; i+=bcols) { /* loop over colors */
+      if (i + bcols > nis) {
+        color_start[nis - i] = color_start[bcols];
+        bcols                = nis - i;
+      }
+
+      color_start[0] = color_start[bcols];
+      for (j=0; j<bcols; j++) {
+        color_start[j+1] = c->nrows[i+j] + color_start[j];
+        row_start[j]     = 0;
+      }
+
+      row_end = brows;
+      if (row_end > mbs) row_end = mbs;
 
+      while (row_end <= mbs) {   /* loop over block rows */
+        for (j=0; j<bcols; j++) {       /* loop over block columns */
+          nrows = c->nrows[i+j];
+          nz    = color_start[j];
+          while (row_start[j] < nrows) {
+            if (Jentry2[nz].row >= row_end) {
+              color_start[j] = nz;
+              break;
+            } else { /* copy Jentry2[nz] to Jentry2_new[nz_new] */
+              Jentry2_new[nz_new].row     = Jentry2[nz].row + j*mbs; /* index in dy-array */
+              Jentry2_new[nz_new].valaddr = Jentry2[nz].valaddr;
+              nz_new++; nz++; row_start[j]++;
+            }
+          }
+        }
+        if (row_end == mbs) break;
+        row_end += brows;
+        if (row_end > mbs) row_end = mbs;
+      }
+      nrows_new[nbcols++] = nz_new;
+    }
+    ierr = PetscFree(Jentry2);CHKERRQ(ierr);
+    c->matentry2 = Jentry2_new;
+  } /* ---------------------------------------------*/
+
+  ierr = PetscFree2(color_start,row_start);CHKERRQ(ierr);
+
+  for (i=nbcols-1; i>0; i--) nrows_new[i] -= nrows_new[i-1];
+  ierr = PetscFree(c->nrows);CHKERRQ(ierr);
+  c->nrows = nrows_new;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "MatFDColoringSetUp_SeqXAIJ"
+PetscErrorCode MatFDColoringSetUp_SeqXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
+{
+  PetscErrorCode ierr;
+  PetscInt       i,n,nrows,mbs=c->m,j,k,m,ncols,col,nis=iscoloring->n,*rowhit,bs,bs2,*spidx,nz;
+  const PetscInt *is,*row,*ci,*cj;
+  IS             *isa;
+  PetscBool      isBAIJ;
+  PetscScalar    *A_val,**valaddrhit;
+  MatEntry       *Jentry;
+  MatEntry2      *Jentry2;
+
+  PetscFunctionBegin;
   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
-  /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */
-  ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
-  ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr);
-  if (flg1 || flg2) {
-    ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
-  }
 
-  N         = mat->cmap->N/bs;
-  c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
-  c->N      = mat->cmap->N/bs;
-  c->m      = mat->rmap->N/bs;
-  c->rstart = 0;
+  ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
+  ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
+  if (isBAIJ) {
+    Mat_SeqBAIJ *spA = (Mat_SeqBAIJ*)mat->data;
+    A_val = spA->a;
+    nz    = spA->nz;
+  } else {
+    Mat_SeqAIJ  *spA = (Mat_SeqAIJ*)mat->data;
+    A_val = spA->a;
+    nz    = spA->nz;
+    bs    = 1; /* only bs=1 is supported for SeqAIJ matrix */
+  }
 
-  c->ncolors = nis;
   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
-  ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
-  ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
+  ierr       = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr);
 
-  ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
-  if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
+  if (c->htype[0] == 'd') {
+    ierr       = PetscMalloc(nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr);
+    ierr       = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry));CHKERRQ(ierr);
+    c->matentry = Jentry;
+  } else if (c->htype[0] == 'w') {
+    ierr       = PetscMalloc(nz*sizeof(MatEntry2),&Jentry2);CHKERRQ(ierr);
+    ierr       = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry2));CHKERRQ(ierr);
+    c->matentry2 = Jentry2;
+  } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"htype is not supported");
 
-  /*
-     Temporary option to allow for debugging/testing
-  */
-  ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
+  if (isBAIJ) {
+    ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
+  } else {
+    ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
+  }
 
-  ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
-  ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
+  ierr = PetscMalloc2(c->m,PetscInt,&rowhit,c->m,PetscScalar*,&valaddrhit);CHKERRQ(ierr);
+  ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
 
-  for (i=0; i<nis; i++) {
+  nz = 0;
+  for (i=0; i<nis; i++) { /* loop over colors */
     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
 
     c->ncolumns[i] = n;
     if (n) {
       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
+      ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr);
       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
     } else {
       c->columns[i] = 0;
     }
 
-    if (!flg) { /* ------------------------------------------------------------------------------*/
-      /* fast, crude version requires O(N*N) work */
-      ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
-      /* loop over columns*/
-      for (j=0; j<n; j++) {
-        col  = is[j];
-        rows = cj + ci[col];
-        m    = ci[col+1] - ci[col];
-        /* loop over columns marking them in rowhit */
-        for (k=0; k<m; k++) {
-          rowhit[*rows++] = col + 1;
-        }
+    /* fast, crude version requires O(N*N) work */
+    bs2   = bs*bs;
+    nrows = 0;
+    for (j=0; j<n; j++) {  /* loop over columns */
+      col    = is[j];
+      row    = cj + ci[col];
+      m      = ci[col+1] - ci[col];
+      nrows += m;
+      for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
+        rowhit[*row]       = col + 1;
+        valaddrhit[*row++] = &A_val[bs2*spidx[ci[col] + k]];
       }
-      /* count the number of hits */
-      nrows = 0;
-      for (j=0; j<N; j++) {
-        if (rowhit[j]) nrows++;
-      }
-      c->nrows[i] = nrows;
-      ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
-      ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
-      nrows       = 0;
-      for (j=0; j<N; j++) {
+    }
+    c->nrows[i] = nrows; /* total num of rows for this color */
+
+    if (c->htype[0] == 'd') {
+      for (j=0; j<mbs; j++) { /* loop over rows */
         if (rowhit[j]) {
-          c->rows[i][nrows]          = j;
-          c->columnsforrow[i][nrows] = rowhit[j] - 1;
-          nrows++;
+          Jentry[nz].row     = j;              /* local row index */
+          Jentry[nz].col     = rowhit[j] - 1;  /* local column index */
+          Jentry[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
+          nz++;
+          rowhit[j] = 0.0;                     /* zero rowhit for reuse */
         }
       }
-    } else {  /*-------------------------------------------------------------------------------*/
-      /* slow version, using rowhit as a linked list */
-      PetscInt currentcol,fm,mfm;
-      rowhit[N] = N;
-      nrows     = 0;
-      /* loop over columns */
-      for (j=0; j<n; j++) {
-        col  = is[j];
-        rows = cj + ci[col];
-        m    = ci[col+1] - ci[col];
-        /* loop over columns marking them in rowhit */
-        fm = N;    /* fm points to first entry in linked list */
-        for (k=0; k<m; k++) {
-          currentcol = *rows++;
-          /* is it already in the list? */
-          do {
-            mfm = fm;
-            fm  = rowhit[fm];
-          } while (fm < currentcol);
-          /* not in list so add it */
-          if (fm != currentcol) {
-            nrows++;
-            columnsforrow[currentcol] = col;
-            /* next three lines insert new entry into linked list */
-            rowhit[mfm]        = currentcol;
-            rowhit[currentcol] = fm;
-            fm                 = currentcol;
-            /* fm points to present position in list since we know the columns are sorted */
-          } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
+    }  else { /* c->htype == 'wp' */
+      for (j=0; j<mbs; j++) { /* loop over rows */
+        if (rowhit[j]) {
+          Jentry2[nz].row     = j;              /* local row index */
+          Jentry2[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
+          nz++;
+          rowhit[j] = 0.0;                     /* zero rowhit for reuse */
         }
       }
-      c->nrows[i] = nrows;
-      ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
-      ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
-      /* now store the linked list of rows into c->rows[i] */
-      nrows = 0;
-      fm    = rowhit[N];
-      do {
-        c->rows[i][nrows]            = fm;
-        c->columnsforrow[i][nrows++] = columnsforrow[fm];
-        fm                           = rowhit[fm];
-      } while (fm < N);
-    } /* ---------------------------------------------------------------------------------------*/
+    }
     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
   }
-  ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
-
-  ierr = PetscFree(rowhit);CHKERRQ(ierr);
-  ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
 
-  /* Optimize by adding the vscale, and scaleforrow[][] fields */
-  /*
-       see the version for MPIAIJ
-  */
-  ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
-  ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
-  for (k=0; k<c->ncolors; k++) {
-    ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
-    for (l=0; l<c->nrows[k]; l++) {
-      col = c->columnsforrow[k][l];
+  if (c->bcols > 1) {  /* reorder Jentry for faster MatFDColoringApply() */
+    ierr = MatFDColoringSetUpBlocked_AIJ_Private(mat,c,nz);CHKERRQ(ierr);
+  }
 
-      c->vscaleforrow[k][l] = col;
-    }
+  if (isBAIJ) {
+    ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
+    ierr = PetscMalloc(bs*mat->rmap->n*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr);
+    ierr = PetscLogObjectMemory((PetscObject)c,bs*mat->rmap->n*sizeof(PetscScalar));CHKERRQ(ierr);
+  } else {
+    ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
   }
+  ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr);
   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
+
+  ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
+  ierr = PetscInfo3(c,"ncolors %D, brows %D and bcols %D are used.\n",c->ncolors,c->brows,c->bcols);CHKERRQ(ierr);
   PetscFunctionReturn(0);
 }

File src/mat/impls/aij/seq/matmatmult.c

   PetscFunctionReturn(0);
 }
 
-/*
- MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from
- MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output
- spidx[], index of a->a, to be used in MatTransposeColoringCreate_SeqAIJ().
- */
-#undef __FUNCT__
-#define __FUNCT__ "MatGetColumnIJ_SeqAIJ_Color"
-PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
-{
-  Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
-  PetscErrorCode ierr;
-  PetscInt       i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n;
-  PetscInt       nz = a->i[m],row,*jj,mr,col;
-  PetscInt       *cspidx;
-
-  PetscFunctionBegin;
-  *nn = n;
-  if (!ia) PetscFunctionReturn(0);
-  if (symmetric) {
-    SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatGetColumnIJ_SeqAIJ_Color() not supported for the case symmetric");
-    ierr = MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,0,oshift,(PetscInt**)ia,(PetscInt**)ja);CHKERRQ(ierr);
-  } else {
-    ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
-    ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
-    ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
-    ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
-    ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);CHKERRQ(ierr);
-    jj   = a->j;
-    for (i=0; i<nz; i++) {
-      collengths[jj[i]]++;
-    }
-    cia[0] = oshift;
-    for (i=0; i<n; i++) {
-      cia[i+1] = cia[i] + collengths[i];
-    }
-    ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
-    jj   = a->j;
-    for (row=0; row<m; row++) {
-      mr = a->i[row+1] - a->i[row];
-      for (i=0; i<mr; i++) {
-        col = *jj++;
-
-        cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
-        cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
-      }
-    }
-    ierr   = PetscFree(collengths);CHKERRQ(ierr);
-    *ia    = cia; *ja = cja;
-    *spidx = cspidx;
-  }
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "MatRestoreColumnIJ_SeqAIJ_Color"
-PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
-{
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  if (!ia) PetscFunctionReturn(0);
-
-  ierr = PetscFree(*ia);CHKERRQ(ierr);
-  ierr = PetscFree(*ja);CHKERRQ(ierr);
-  ierr = PetscFree(*spidx);CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-}
-
 #undef __FUNCT__
 #define __FUNCT__ "MatTransposeColoringCreate_SeqAIJ"
 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c)
     /* fast, crude version requires O(N*N) work */
     ierr = PetscMemzero(rowhit,cm*sizeof(PetscInt));CHKERRQ(ierr);
 
-    /* loop over columns*/
-    for (j=0; j<n; j++) {
+    for (j=0; j<n; j++) { /* loop over columns*/
       col     = is[j];
       row_idx = cj + ci[col];
       m       = ci[col+1] - ci[col];
-      /* loop over columns marking them in rowhit */
-      for (k=0; k<m; k++) {
+      for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */
         idxhit[*row_idx]   = spidx[ci[col] + k];
         rowhit[*row_idx++] = col + 1;
       }
     colorforrow[i+1] = colorforrow[i] + nrows;
 
     nrows = 0;
-    for (j=0; j<cm; j++) {
+    for (j=0; j<cm; j++) { /* loop over rows */
       if (rowhit[j]) {
         rows_i[nrows]   = j;
         den2sp_i[nrows] = idxhit[j];

File src/mat/impls/baij/mpi/mpibaij.c

   PetscFunctionReturn(0);
 }
 
-extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
-
-#undef __FUNCT__
-#define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
-/*
-    This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
-*/
-PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
-{
-  Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
-  PetscErrorCode         ierr;
-  PetscMPIInt            size,*ncolsonproc,*disp,nn;
-  PetscInt               bs,i,n,nrows,j,k,m,ncols,col;
-  const PetscInt         *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog;
-  PetscInt               nis = iscoloring->n,nctot,*cols;
-  PetscInt               *rowhit,M,cstart,cend,colb;
-  PetscInt               *columnsforrow,l;
-  IS                     *isa;
-  PetscBool              done,flg;
-  ISLocalToGlobalMapping map = mat->cmap->bmapping;
-  PetscInt               ctype=c->ctype;
-
-  PetscFunctionBegin;
-  if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
-  if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
-
-  if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);}
-  else     ltog = NULL;
-  ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
-  ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
-
-  M         = mat->rmap->n/bs;
-  cstart    = mat->cmap->rstart/bs;
-  cend      = mat->cmap->rend/bs;
-  c->M      = mat->rmap->N/bs;         /* set the global rows and columns and local rows */
-  c->N      = mat->cmap->N/bs;
-  c->m      = mat->rmap->n/bs;
-  c->rstart = mat->rmap->rstart/bs;
-
-  c->ncolors = nis;
-  ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
-  ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
-  ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
-  ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
-  ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
-  ierr       = PetscLogObjectMemory((PetscObject)c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
-
-  /* Allow access to data structures of local part of matrix */
-  if (!baij->colmap) {
-    ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
-  }
-  ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
-  ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
-
-  ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
-  ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
-
-  for (i=0; i<nis; i++) {
-    ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
-    ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
-
-    c->ncolumns[i] = n;
-    if (n) {
-      ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
-      ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr);
-      ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
-    } else {
-      c->columns[i] = 0;
-    }
-
-    if (ctype == IS_COLORING_GLOBAL) {
-      /* Determine the total (parallel) number of columns of this color */
-      ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
-      ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
-
-      ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
-      ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
-      nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
-      if (!nctot) {
-        ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
-      }
-
-      disp[0] = 0;
-      for (j=1; j<size; j++) {
-        disp[j] = disp[j-1] + ncolsonproc[j-1];
-      }
-
-      /* Get complete list of columns for color on each processor */
-      ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
-      ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
-      ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
-    } else if (ctype == IS_COLORING_GHOSTED) {
-      /* Determine local number of columns of this color on this process, including ghost points */
-      nctot = n;
-      ierr  = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
-      ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
-    } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
-
-    /*
-       Mark all rows affect by these columns
-    */
-    /* Temporary option to allow for debugging/testing */
-    flg  = PETSC_FALSE;
-    ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
-    if (!flg) { /*-----------------------------------------------------------------------------*/
-      /* crude, fast version */
-      ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
-      /* loop over columns*/
-      for (j=0; j<nctot; j++) {
-        if (ctype == IS_COLORING_GHOSTED) {
-          col = ltog[cols[j]];
-        } else {
-          col = cols[j];
-        }
-        if (col >= cstart && col < cend) {
-          /* column is in diagonal block of matrix */
-          rows = A_cj + A_ci[col-cstart];
-          m    = A_ci[col-cstart+1] - A_ci[col-cstart];
-        } else {
-#if defined(PETSC_USE_CTABLE)
-          ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
-          colb--;
-#else
-          colb = baij->colmap[col] - 1;
-#endif
-          if (colb == -1) {
-            m = 0;
-          } else {
-            colb = colb/bs;
-            rows = B_cj + B_ci[colb];
-            m    = B_ci[colb+1] - B_ci[colb];
-          }
-        }
-        /* loop over columns marking them in rowhit */
-        for (k=0; k<m; k++) {
-          rowhit[*rows++] = col + 1;
-        }
-      }