Commits

Vijay Mahadevan  committed 5ae81b7 Merge

Merged petsc/petsc into master

  • Participants
  • Parent commits f122915, 603759c
  • Branches dmmoab

Comments (0)

Files changed (67)

File config/BuildSystem/config/libraries.py

       self.logPrint('Warning: tgamma() not found')
     return
 
+  def checkMathFenv(self):
+    '''Checks if <fenv.h> can be used with FE_DFL_ENV'''
+    if not self.math is None and self.check(self.math, ['fesetenv'], prototype = ['#include <fenv.h>'], call = ['fesetenv(FE_DFL_ENV);']):
+      self.addDefine('HAVE_FENV_H', 1)
+    else:
+      self.logPrint('Warning: <fenv.h> with FE_DFL_ENV not found')
+    return
+
   def checkCompression(self):
     '''Check for libz, the compression library'''
     self.compression = None
     self.executeTest(self.checkMath)
     self.executeTest(self.checkMathErf)
     self.executeTest(self.checkMathTgamma)
+    self.executeTest(self.checkMathFenv)
     self.executeTest(self.checkCompression)
     self.executeTest(self.checkRealtime)
     self.executeTest(self.checkDynamic)

File config/BuildSystem/config/package.py

         raise RuntimeError('Cannot use '+self.name+' without Fortran, make sure you do NOT have --with-fc=0')
       if self.noMPIUni and self.mpi.usingMPIUni:
         raise RuntimeError('Cannot use '+self.name+' with MPIUNI, you need a real MPI')
-      if not self.worksonWindows and self.setCompilers.isCygwin():
-        raise RuntimeError('External package '+self.name+' does not work on Microsoft Windows')
-      if self.download and self.framework.argDB.get('download-'+self.downloadname.lower()) and not self.downloadonWindows and self.setCompilers.isCygwin():
-        raise RuntimeError('External package '+self.name+' does not support --download-'+self.downloadname.lower()+' on Microsoft Windows')
+      if not self.worksonWindows and self.setCompilers.isWindows(self.setCompilers.CC):
+        raise RuntimeError('External package '+self.name+' does not work with Microsoft compilers')
+      if self.download and self.framework.argDB.get('download-'+self.downloadname.lower()) and not self.downloadonWindows and self.setCompilers.isWindows(self.setCompilers.CC):
+        raise RuntimeError('External package '+self.name+' does not support --download-'+self.downloadname.lower()+' with Microsoft compilers')
     if not self.download and self.framework.argDB.has_key('download-'+self.downloadname.lower()) and self.framework.argDB['download-'+self.downloadname.lower()]:
       raise RuntimeError('External package '+self.name+' does not support --download-'+self.downloadname.lower())
     return

File config/BuildSystem/config/packages/BlasLapack.py

         blib = glob.glob('/usr/lib/libblas.*')
         if blib != [] and not (os.path.isfile('/usr/lib/libblas.so') or os.path.isfile('/usr/lib/libblas.a')):
           raise RuntimeError('Incomplete BLAS install; Perhaps blas package is installed - but blas-dev/blas-devel is required?')
-        if hasattr(self.compilers, 'FC'): C = 'f'
-        else: C = 'c'
-        raise RuntimeError('Could not find a functional BLAS. Run with --with-blas-lib=<lib> to indicate the library containing BLAS.\n Or --download-'+C+'-blas-lapack=1 to have one automatically downloaded and installed\n')
+        if hasattr(self.compilers, 'FC') and (self.defaultPrecision != '__float128') : pkg = 'f-blas-lapack'
+        else: pkg = 'f2cblaslapack'
+        raise RuntimeError('Could not find a functional BLAS. Run with --with-blas-lib=<lib> to indicate the library containing BLAS.\n Or --download-'+pkg+'=1 to have one automatically downloaded and installed\n')
       if not self.foundLapack:
         # check for split blas/blas-dev packages
         import glob
         llib = glob.glob('/usr/lib/liblapack.*')
         if llib != [] and not (os.path.isfile('/usr/lib/liblapack.so') or os.path.isfile('/usr/lib/liblapack.a')):
           raise RuntimeError('Incomplete LAPACK install; Perhaps lapack package is installed - but lapack-dev/lapack-devel is required?')
-        if hasattr(self.compilers, 'FC'): C = 'f'
-        else: C = 'c'
-        raise RuntimeError('Could not find a functional LAPACK. Run with --with-lapack-lib=<lib> to indicate the library containing LAPACK.\n Or --download-'+C+'-blas-lapack=1 to have one automatically downloaded and installed\n')
+        if hasattr(self.compilers, 'FC') and (self.defaultPrecision != '__float128') : pkg = 'f-blas-lapack'
+        else: pkg = 'f2cblaslapack'
+        raise RuntimeError('Could not find a functional LAPACK. Run with --with-lapack-lib=<lib> to indicate the library containing LAPACK.\n Or --download-'+pkg+'=1 to have one automatically downloaded and installed\n')
 
     #  allow user to dictate which blas/lapack mangling to use (some blas/lapack libraries, like on Apple, provide several)
     if 'known-blaslapack-mangling' in self.argDB:

File config/BuildSystem/config/setCompilers.py

     yield (self.CC, ['-shared'], 'so')
     yield (self.CC, ['-dynamic'], 'so')
     yield (self.CC, ['-qmkshrobj'], 'so')
+    yield (self.CC, ['-shared'], 'dll')
     # Solaris default
     if Configure.isSolaris():
       if hasattr(self, 'CXX') and self.mainLanguage == 'Cxx':

File config/PETSc/Configure.py

                                             'unistd', 'sys/sysinfo', 'machine/endian', 'sys/param', 'sys/procfs', 'sys/resource',
                                             'sys/systeminfo', 'sys/times', 'sys/utsname','string', 'stdlib','memory',
                                             'sys/socket','sys/wait','netinet/in','netdb','Direct','time','Ws2tcpip','sys/types',
-                                            'WindowsX', 'cxxabi','float','ieeefp','stdint','fenv','sched','pthread','mathimf'])
-    functions = ['access', '_access', 'clock', 'drand48', 'getcwd', '_getcwd', 'getdomainname', 'gethostname', 'getpwuid',
+                                            'WindowsX', 'cxxabi','float','ieeefp','stdint','sched','pthread','mathimf'])
+    functions = ['access', '_access', 'clock', 'drand48', 'getcwd', '_getcwd', 'getdomainname', 'gethostname',
                  'gettimeofday', 'getwd', 'memalign', 'memmove', 'mkstemp', 'popen', 'PXFGETARG', 'rand', 'getpagesize',
                  'readlink', 'realpath',  'sigaction', 'signal', 'sigset', 'usleep', 'sleep', '_sleep', 'socket',
-                 'times', 'gethostbyname', 'uname','snprintf','_snprintf','_fullpath','lseek','_lseek','time','fork','stricmp',
+                 'times', 'gethostbyname', 'uname','snprintf','_snprintf','lseek','_lseek','time','fork','stricmp',
                  'strcasecmp', 'bzero', 'dlopen', 'dlsym', 'dlclose', 'dlerror','get_nprocs','sysctlbyname',
                  '_intel_fast_memcpy','_intel_fast_memset']
     libraries1 = [(['socket', 'nsl'], 'socket'), (['fpe'], 'handle_sigfpes')]
       raise RuntimeError('Could not find any unsigned integer type matching void*')
     self.popLanguage()
 
-  def configureInline(self):
-    '''Get a generic inline keyword, depending on the language'''
-    if self.languages.clanguage == 'C':
-      self.addDefine('STATIC_INLINE', self.compilers.cStaticInlineKeyword)
-      self.addDefine('RESTRICT', self.compilers.cRestrict)
-    elif self.languages.clanguage == 'Cxx':
-      self.addDefine('STATIC_INLINE', self.compilers.cxxStaticInlineKeyword)
-      self.addDefine('RESTRICT', self.compilers.cxxRestrict)
-
+  def configureRTLDDefault(self):
     if self.checkCompile('#include <dlfcn.h>\n void *ptr =  RTLD_DEFAULT;'):
       self.addDefine('RTLD_DEFAULT','1')
     return
       raise RuntimeError('PETSc requires a functional math library. Please send configure.log to petsc-maint@mcs.anl.gov.')
     if self.languages.clanguage == 'Cxx' and not hasattr(self.compilers, 'CXX'):
       raise RuntimeError('Cannot set C language to C++ without a functional C++ compiler.')
-    self.executeTest(self.configureInline)
+    self.executeTest(self.configureRTLDDefault)
     self.executeTest(self.configurePrefetch)
     self.executeTest(self.configureUnused)
     self.executeTest(self.configureDeprecated)

File config/PETSc/packages/sowing.py

 class Configure(PETSc.package.NewPackage):
   def __init__(self, framework):
     PETSc.package.NewPackage.__init__(self, framework)
-    self.download          = ['http://ftp.mcs.anl.gov/pub/petsc/externalpackages/sowing-1.1.16d.tar.gz']
+    self.download          = ['http://ftp.mcs.anl.gov/pub/petsc/externalpackages/sowing-1.1.16e.tar.gz']
     self.complex           = 1
     self.double            = 0
     self.requires32bitint  = 0

File config/PETSc/utilities/dataFilesPath.py

       self.datafilespath = os.path.join('/home','petsc','datafiles')
     elif os.path.isdir(os.path.join(self.petscdir.dir, '..', 'datafiles')) &  os.path.isdir(os.path.join(self.petscdir.dir, '..', 'datafiles', 'matrices')):
       self.datafilespath = os.path.join(self.petscdir.dir, '..', 'datafiles')
-    self.addMakeMacro('DATAFILESPATH',self.datafilespath)
+    if self.datafilespath is not None:
+      self.addMakeMacro('DATAFILESPATH',self.datafilespath)
     return
 
   def configure(self):

File include/finclude/petscmat.h

       PetscEnum MATOP_SETUP_PREALLOCATION
       PetscEnum MATOP_ILUFACTOR_SYMBOLIC
       PetscEnum MATOP_ICCFACTOR_SYMBOLIC
-      PetscEnum MATOP_GET_ARRAY
-      PetscEnum MATOP_RESTORE_ARRAY
+!     PetscEnum MATOP_PLACEHOLDER_32
+!     PetscEnum MATOP_PLACEHOLDER_33
       PetscEnum MATOP_DUPLICATE
       PetscEnum MATOP_FORWARD_SOLVE
       PetscEnum MATOP_BACKWARD_SOLVE
       PetscEnum MATOP_SCALE
       PetscEnum MATOP_SHIFT
       PetscEnum MATOP_DIAGONAL_SET
-      PetscEnum MATOP_ILUDT_FACTOR
-      PetscEnum MATOP_SET_BLOCK_SIZE
+      PetscEnum MATOP_ZERO_ROWS_COLUMNS
+      PetscEnum MATOP_SET_RANDOM
       PetscEnum MATOP_GET_ROW_IJ
       PetscEnum MATOP_RESTORE_ROW_IJ
       PetscEnum MATOP_GET_COLUMN_IJ
       PetscEnum MATOP_GET_ROW_MIN_ABS
       PetscEnum MATOP_CONVERT
       PetscEnum MATOP_SET_COLORING
-      PetscEnum MATOP_PLACEHOLDER
+!     PetscEnum MATOP_PLACEHOLDER_73
       PetscEnum MATOP_SET_VALUES_ADIFOR
       PetscEnum MATOP_FD_COLORING_APPLY
       PetscEnum MATOP_SET_FROM_OPTIONS
       PetscEnum MATOP_MULT_CONSTRAINED
       PetscEnum MATOP_MULT_TRANSPOSE_CONSTRAIN
-      PetscEnum MATOP_PERMUTE_SPARSIFY
+      PetscEnum MATOP_FIND_ZERO_DIAGONALS
       PetscEnum MATOP_MULT_MULTIPLE
       PetscEnum MATOP_SOLVE_MULTIPLE
       PetscEnum MATOP_GET_INERTIA
       PetscEnum MATOP_IS_SYMMETRIC
       PetscEnum MATOP_IS_HERMITIAN
       PetscEnum MATOP_IS_STRUCTURALLY_SYMMETRIC
-      PetscEnum MATOP_DUMMY
+      PetscEnum MATOP_SET_VALUES_BLOCKEDLOCAL
       PetscEnum MATOP_GET_VECS
       PetscEnum MATOP_MAT_MULT
       PetscEnum MATOP_MAT_MULT_SYMBOLIC
       PetscEnum MATOP_MAT_TRANSPOSE_MULT
       PetscEnum MATOP_MAT_TRANSPOSE_MULT_SYMBO
       PetscEnum MATOP_MAT_TRANSPOSE_MULT_NUMER
-      PetscEnum MATOP_PTAP_SYMBOLIC_SEQAIJ
-      PetscEnum MATOP_PTAP_NUMERIC_SEQAIJ
-      PetscEnum MATOP_PTAP_SYMBOLIC_MPIAIJ
-      PetscEnum MATOP_PTAP_NUMERIC_MPIAIJ
+!     PetscEnum MATOP_PLACEHOLDER_98
+!     PetscEnum MATOP_PLACEHOLDER_99
+!     PetscEnum MATOP_PLACEHOLDER_100
+!     PetscEnum MATOP_PLACEHOLDER_101
       PetscEnum MATOP_CONJUGATE
-      PetscEnum MATOP_SET_SIZES
+!     PetscEnum MATOP_PLACEHOLDER_103
       PetscEnum MATOP_SET_VALUES_ROW
       PetscEnum MATOP_REAL_PART
       PetscEnum MATOP_IMAGINARY_PART
       PetscEnum MATOP_MULT_HERMITIAN_TRANSPOSE
       PetscEnum MATOP_MULT_HERMITIAN_TRANS_ADD
       PetscEnum MATOP_GET_MULTI_PROC_BLOCK
+      PetscEnum MATOP_FIND_NONZERO_ROWS
       PetscEnum MATOP_GET_COLUMN_NORMS
+      PetscEnum MATOP_INVERT_BLOCK_DIAGONAL
+!     PetscEnum MATOP_PLACEHOLDER_127
       PetscEnum MATOP_GET_SUB_MATRICES_PARALLE
       PetscEnum MATOP_SET_VALUES_BATCH
+      PetscEnum MATOP_TRANSPOSE_MAT_MULT
+      PetscEnum MATOP_TRANSPOSE_MAT_MULT_SYMBO
+      PetscEnum MATOP_TRANSPOSE_MAT_MULT_NUMER
+      PetscEnum MATOP_TRANSPOSE_COLORING_CREAT
+      PetscEnum MATOP_TRANS_COLORING_APPLY_SPT
+      PetscEnum MATOP_TRANS_COLORING_APPLY_DEN
+      PetscEnum MATOP_RART
+      PetscEnum MATOP_RART_SYMBOLIC
+      PetscEnum MATOP_RART_NUMERIC
       PetscEnum MATOP_SET_BLOCK_SIZES
       PetscEnum MATOP_AYPX
       PetscEnum MATOP_RESIDUAL
       parameter(MATOP_SETUP_PREALLOCATION=29)
       parameter(MATOP_ILUFACTOR_SYMBOLIC=30)
       parameter(MATOP_ICCFACTOR_SYMBOLIC=31)
-      parameter(MATOP_GET_ARRAY=32)
-      parameter(MATOP_RESTORE_ARRAY=33)
+!     parameter(MATOP_PLACEHOLDER_32=32)
+!     parameter(MATOP_PLACEHOLDER_33=33)
       parameter(MATOP_DUPLICATE=34)
       parameter(MATOP_FORWARD_SOLVE=35)
       parameter(MATOP_BACKWARD_SOLVE=36)
       parameter(MATOP_SCALE=45)
       parameter(MATOP_SHIFT=46)
       parameter(MATOP_DIAGONAL_SET=47)
-      parameter(MATOP_ILUDT_FACTOR=48)
-      parameter(MATOP_SET_BLOCK_SIZE=49)
+      parameter(MATOP_ZERO_ROWS_COLUMNS=48)
+      parameter(MATOP_SET_RANDOM=49)
       parameter(MATOP_GET_ROW_IJ=50)
       parameter(MATOP_RESTORE_ROW_IJ=51)
       parameter(MATOP_GET_COLUMN_IJ=52)
       parameter(MATOP_GET_ROW_MIN_ABS=70)
       parameter(MATOP_CONVERT=71)
       parameter(MATOP_SET_COLORING=72)
-      parameter(MATOP_PLACEHOLDER=73)
+!     parameter(MATOP_PLACEHOLDER_73=73)
       parameter(MATOP_SET_VALUES_ADIFOR=74)
       parameter(MATOP_FD_COLORING_APPLY=75)
       parameter(MATOP_SET_FROM_OPTIONS=76)
       parameter(MATOP_MULT_CONSTRAINED=77)
       parameter(MATOP_MULT_TRANSPOSE_CONSTRAIN=78)
-      parameter(MATOP_PERMUTE_SPARSIFY=79)
+      parameter(MATOP_FIND_ZERO_DIAGONALS=79)
       parameter(MATOP_MULT_MULTIPLE=80)
       parameter(MATOP_SOLVE_MULTIPLE=81)
       parameter(MATOP_GET_INERTIA=82)
       parameter(MATOP_IS_SYMMETRIC=84)
       parameter(MATOP_IS_HERMITIAN=85)
       parameter(MATOP_IS_STRUCTURALLY_SYMMETRIC=86)
-      parameter(MATOP_DUMMY=87)
+      parameter(MATOP_SET_VALUES_BLOCKEDLOCAL=87)
       parameter(MATOP_GET_VECS=88)
       parameter(MATOP_MAT_MULT=89)
       parameter(MATOP_MAT_MULT_SYMBOLIC=90)
       parameter(MATOP_MAT_TRANSPOSE_MULT=95)
       parameter(MATOP_MAT_TRANSPOSE_MULT_SYMBO=96)
       parameter(MATOP_MAT_TRANSPOSE_MULT_NUMER=97)
-      parameter(MATOP_PTAP_SYMBOLIC_SEQAIJ=98)
-      parameter(MATOP_PTAP_NUMERIC_SEQAIJ=99)
-      parameter(MATOP_PTAP_SYMBOLIC_MPIAIJ=100)
-      parameter(MATOP_PTAP_NUMERIC_MPIAIJ=101)
+!     parameter(MATOP_PLACEHOLDER_98=98)
+!     parameter(MATOP_PLACEHOLDER_99=99)
+!     parameter(MATOP_PLACEHOLDER_100=100)
+!     parameter(MATOP_PLACEHOLDER_101=101)
       parameter(MATOP_CONJUGATE=102)
-      parameter(MATOP_SET_SIZES=103)
+!     parameter(MATOP_PLACEHOLDER_103=103)
       parameter(MATOP_SET_VALUES_ROW=104)
       parameter(MATOP_REAL_PART=105)
       parameter(MATOP_IMAGINARY_PART=106)
       parameter(MATOP_MULT_HERMITIAN_TRANSPOSE=121)
       parameter(MATOP_MULT_HERMITIAN_TRANS_ADD=122)
       parameter(MATOP_GET_MULTI_PROC_BLOCK=123)
+      parameter(MATOP_FIND_NONZERO_ROWS=124)
       parameter(MATOP_GET_COLUMN_NORMS=125)
+      parameter(MATOP_INVERT_BLOCK_DIAGONAL=126)
+!     parameter(MATOP_PLACEHOLDER_127=127)
       parameter(MATOP_GET_SUB_MATRICES_PARALLE=128)
       parameter(MATOP_SET_VALUES_BATCH=129)
+      parameter(MATOP_TRANSPOSE_MAT_MULT=130)
+      parameter(MATOP_TRANSPOSE_MAT_MULT_SYMBO=131)
+      parameter(MATOP_TRANSPOSE_MAT_MULT_NUMER=132)
+      parameter(MATOP_TRANSPOSE_COLORING_CREAT=133)
+      parameter(MATOP_TRANS_COLORING_APPLY_SPT=134)
+      parameter(MATOP_TRANS_COLORING_APPLY_DEN=135)
+      parameter(MATOP_RART=136)
+      parameter(MATOP_RART_SYMBOLIC=137)
+      parameter(MATOP_RART_NUMERIC=138)
       parameter(MATOP_SET_BLOCK_SIZES=139)
       parameter(MATOP_AYPX=140)
       parameter(MATOP_RESIDUAL=141)

File include/mpiuni/mpi.h

 
 #define MPI_Comm_f2c(comm) (MPI_Comm)(comm)
 #define MPI_Comm_c2f(comm) (MPI_Fint)(comm)
+#define MPI_Type_f2c(type) (MPI_Datatype)(type)
+#define MPI_Type_c2f(type) (MPI_Fint)(type)
 
 #define MPI_Send(buf,count,datatype,dest,tag,comm)  \
      (MPIUNI_TMP = (void*)(MPIUNI_INTPTR) (buf),\

File include/petsc-private/isimpl.h

   const char                  **fieldNames;   /* The field names */
   PetscInt                     *numFieldComponents; /* The number of components in each field */
   PetscSection                 *field;        /* A section describing the layout and constraints for each field */
+
+  PetscObject                   clObj;        /* Key forthe closure (right now we only have one) */
+  PetscSection                  clSection;    /* Section for the closure index */
+  IS                            clIndices;    /* Indices for the closure index */
 };
 
 

File include/petsc-private/matimpl.h

   PetscErrorCode (*setup)(Mat);
   PetscErrorCode (*ilufactorsymbolic)(Mat,Mat,IS,IS,const MatFactorInfo*);
   PetscErrorCode (*iccfactorsymbolic)(Mat,Mat,IS,const MatFactorInfo*);
-  PetscErrorCode (*dummy29)(Mat);
-  PetscErrorCode (*dummy210)(Mat);
+  PetscErrorCode (*placeholder_32)(Mat);
+  PetscErrorCode (*placeholder_33)(Mat);
   /*34*/
   PetscErrorCode (*duplicate)(Mat,MatDuplicateOption,Mat*);
   PetscErrorCode (*forwardsolve)(Mat,Vec,Vec);
   PetscErrorCode (*getrowminabs)(Mat,Vec,PetscInt[]);
   PetscErrorCode (*convert)(Mat, MatType,MatReuse,Mat*);
   PetscErrorCode (*setcoloring)(Mat,ISColoring);
-  PetscErrorCode (*dummy3)(Mat,void*);
+  PetscErrorCode (*placeholder_73)(Mat,void*);
   /*74*/
   PetscErrorCode (*setvaluesadifor)(Mat,PetscInt,void*);
   PetscErrorCode (*fdcoloringapply)(Mat,MatFDColoring,Vec,MatStructure*,void*);
   PetscErrorCode (*mattransposemult)(Mat,Mat,MatReuse,PetscReal,Mat*);
   PetscErrorCode (*mattransposemultsymbolic)(Mat,Mat,PetscReal,Mat*);
   PetscErrorCode (*mattransposemultnumeric)(Mat,Mat,Mat);
-  PetscErrorCode (*dummy98)(Mat);
+  PetscErrorCode (*placeholder_98)(Mat);
   /*99*/
-  PetscErrorCode (*dummy99)(Mat);
-  PetscErrorCode (*dummy100)(Mat);
-  PetscErrorCode (*dummy101)(Mat);
+  PetscErrorCode (*placeholder_99)(Mat);
+  PetscErrorCode (*placeholder_100)(Mat);
+  PetscErrorCode (*placeholder_101)(Mat);
   PetscErrorCode (*conjugate)(Mat);                              /* complex conjugate */
-  PetscErrorCode (*dummy5)(void);
+  PetscErrorCode (*placeholder_103)(void);
   /*104*/
   PetscErrorCode (*setvaluesrow)(Mat,PetscInt,const PetscScalar[]);
   PetscErrorCode (*realpart)(Mat);
   PetscErrorCode (*findnonzerorows)(Mat,IS*);
   PetscErrorCode (*getcolumnnorms)(Mat,NormType,PetscReal*);
   PetscErrorCode (*invertblockdiagonal)(Mat,const PetscScalar**);
-  PetscErrorCode (*dummy4)(Mat,Vec,Vec,Vec);
+  PetscErrorCode (*placeholder_127)(Mat,Vec,Vec,Vec);
   PetscErrorCode (*getsubmatricesparallel)(Mat,PetscInt,const IS[], const IS[], MatReuse, Mat**);
   /*129*/
   PetscErrorCode (*setvaluesbatch)(Mat,PetscInt,PetscInt,PetscInt*,const PetscScalar*);

File include/petscdm.h

 PETSC_EXTERN PetscErrorCode DMConvert(DM,DMType,DM*);
 
 PETSC_EXTERN PetscErrorCode DMGetCoordinateDM(DM,DM*);
+PETSC_EXTERN PetscErrorCode DMSetCoordinateDM(DM,DM);
 PETSC_EXTERN PetscErrorCode DMGetCoordinates(DM,Vec*);
 PETSC_EXTERN PetscErrorCode DMSetCoordinates(DM,Vec);
 PETSC_EXTERN PetscErrorCode DMGetCoordinatesLocal(DM,Vec*);

File include/petscdmplex.h

 PETSC_EXTERN PetscErrorCode DMPlexInvertCell(PetscInt, PetscInt, int []);
 PETSC_EXTERN PetscErrorCode DMPlexInterpolate(DM, DM *);
 PETSC_EXTERN PetscErrorCode DMPlexCopyCoordinates(DM, DM);
-PETSC_EXTERN PetscErrorCode DMPlexDistribute(DM, const char[], PetscInt, DM*);
+PETSC_EXTERN PetscErrorCode DMPlexDistribute(DM, const char[], PetscInt, PetscSF*, DM*);
+PETSC_EXTERN PetscErrorCode DMPlexDistributeField(DM,PetscSF,PetscSection,Vec,PetscSection,Vec);
+PETSC_EXTERN PetscErrorCode DMPlexDistributeData(DM,PetscSF,PetscSection,MPI_Datatype,void*,PetscSection,void**);
 PETSC_EXTERN PetscErrorCode DMPlexLoad(PetscViewer, DM);
 PETSC_EXTERN PetscErrorCode DMPlexGetSubpointMap(DM, DMLabel*);
 PETSC_EXTERN PetscErrorCode DMPlexSetSubpointMap(DM, DMLabel);
 PETSC_EXTERN PetscErrorCode DMPlexVecRestoreClosure(DM, PetscSection, Vec, PetscInt, PetscInt *, PetscScalar *[]);
 PETSC_EXTERN PetscErrorCode DMPlexVecSetClosure(DM, PetscSection, Vec, PetscInt, const PetscScalar[], InsertMode);
 PETSC_EXTERN PetscErrorCode DMPlexMatSetClosure(DM, PetscSection, PetscSection, Mat, PetscInt, const PetscScalar[], InsertMode);
+PETSC_EXTERN PetscErrorCode DMPlexCreateClosureIndex(DM, PetscSection);
 
 PETSC_EXTERN PetscErrorCode DMPlexCreateExodus(MPI_Comm, PetscInt, PetscBool, DM *);
 PETSC_EXTERN PetscErrorCode DMPlexCreateCGNS(MPI_Comm, PetscInt, PetscBool, DM *);

File include/petscis.h

 PETSC_EXTERN PetscErrorCode PetscSectionGetPointLayout(MPI_Comm, PetscSection, PetscLayout *);
 PETSC_EXTERN PetscErrorCode PetscSectionGetValueLayout(MPI_Comm, PetscSection, PetscLayout *);
 
+PETSC_EXTERN PetscErrorCode PetscSectionSetClosureIndex(PetscSection, PetscObject, PetscSection, IS);
+PETSC_EXTERN PetscErrorCode PetscSectionGetClosureIndex(PetscSection, PetscObject, PetscSection *, IS *);
+
 /* PetscSF support */
 PETSC_EXTERN PetscErrorCode PetscSFConvertPartition(PetscSF, PetscSection, IS, ISLocalToGlobalMapping *, PetscSF *);
 PETSC_EXTERN PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF, PetscSection, PetscSection, PetscInt **);

File include/petscmat.h

 PETSC_EXTERN PetscErrorCode MatCreateSBAIJ(MPI_Comm,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[],Mat*);
 PETSC_EXTERN PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[],Mat *);
 PETSC_EXTERN PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
-PETSC_EXTERN PetscErrorCode MatXAIJSetPreallocation(Mat,PetscInt,const PetscInt*,const PetscInt*,const PetscInt*,const PetscInt*);
+PETSC_EXTERN PetscErrorCode MatXAIJSetPreallocation(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscInt[],const PetscInt[]);
 
 PETSC_EXTERN PetscErrorCode MatCreateShell(MPI_Comm,PetscInt,PetscInt,PetscInt,PetscInt,void *,Mat*);
 PETSC_EXTERN PetscErrorCode MatCreateNormal(Mat,Mat*);
                MATOP_SETUP_PREALLOCATION=29,
                MATOP_ILUFACTOR_SYMBOLIC=30,
                MATOP_ICCFACTOR_SYMBOLIC=31,
-               MATOP_GET_ARRAY=32,
-               MATOP_RESTORE_ARRAY=33,
+               /* MATOP_PLACEHOLDER_32=32, */
+               /* MATOP_PLACEHOLDER_33=33, */
                MATOP_DUPLICATE=34,
                MATOP_FORWARD_SOLVE=35,
                MATOP_BACKWARD_SOLVE=36,
                MATOP_SCALE=45,
                MATOP_SHIFT=46,
                MATOP_DIAGONAL_SET=47,
-               MATOP_ILUDT_FACTOR=48,
-               MATOP_SET_BLOCK_SIZE=49,
+               MATOP_ZERO_ROWS_COLUMNS=48,
+               MATOP_SET_RANDOM=49,
                MATOP_GET_ROW_IJ=50,
                MATOP_RESTORE_ROW_IJ=51,
                MATOP_GET_COLUMN_IJ=52,
                MATOP_GET_ROW_MIN_ABS=70,
                MATOP_CONVERT=71,
                MATOP_SET_COLORING=72,
-               MATOP_PLACEHOLDER=73,
+               /* MATOP_PLACEHOLDER_73=73, */
                MATOP_SET_VALUES_ADIFOR=74,
                MATOP_FD_COLORING_APPLY=75,
                MATOP_SET_FROM_OPTIONS=76,
                MATOP_MULT_CONSTRAINED=77,
                MATOP_MULT_TRANSPOSE_CONSTRAIN=78,
-               MATOP_PERMUTE_SPARSIFY=79,
+               MATOP_FIND_ZERO_DIAGONALS=79,
                MATOP_MULT_MULTIPLE=80,
                MATOP_SOLVE_MULTIPLE=81,
                MATOP_GET_INERTIA=82,
                MATOP_MAT_TRANSPOSE_MULT=95,
                MATOP_MAT_TRANSPOSE_MULT_SYMBO=96,
                MATOP_MAT_TRANSPOSE_MULT_NUMER=97,
-               MATOP_DUMMY98=98,
-               MATOP_DUMMY99=99,
-               MATOP_DUMMY100=100,
-               MATOP_DUMMY101=101,
+               /* MATOP_PLACEHOLDER_98=98, */
+               /* MATOP_PLACEHOLDER_99=99, */
+               /* MATOP_PLACEHOLDER_100=100, */
+               /* MATOP_PLACEHOLDER_101=101, */
                MATOP_CONJUGATE=102,
-               MATOP_SET_SIZES=103,
+               /* MATOP_PLACEHOLDER_103=103, */
                MATOP_SET_VALUES_ROW=104,
                MATOP_REAL_PART=105,
                MATOP_IMAGINARY_PART=106,
                MATOP_MULT_HERMITIAN_TRANSPOSE=121,
                MATOP_MULT_HERMITIAN_TRANS_ADD=122,
                MATOP_GET_MULTI_PROC_BLOCK=123,
+               MATOP_FIND_NONZERO_ROWS=124,
                MATOP_GET_COLUMN_NORMS=125,
+               MATOP_INVERT_BLOCK_DIAGONAL=126,
+               /* MATOP_PLACEHOLDER_127=127, */
                MATOP_GET_SUB_MATRICES_PARALLE=128,
                MATOP_SET_VALUES_BATCH=129,
                MATOP_TRANSPOSE_MAT_MULT=130,

File include/petscsys.h

 #  define PETSC_FUNCTION_NAME PETSC_FUNCTION_NAME_C
 #endif
 
+#if defined(__cplusplus)
+#  define PETSC_RESTRICT PETSC_CXX_RESTRICT
+#else
+#  define PETSC_RESTRICT PETSC_C_RESTRICT
+#endif
+
+#if defined(__cplusplus)
+#  define PETSC_STATIC_INLINE PETSC_CXX_STATIC_INLINE
+#else
+#  define PETSC_STATIC_INLINE PETSC_C_STATIC_INLINE
+#endif
+
 #if defined(_WIN32) && defined(PETSC_USE_SHARED_LIBRARIES) /* For Win32 shared libraries */
 #  define PETSC_DLLEXPORT __declspec(dllexport)
 #  define PETSC_DLLIMPORT __declspec(dllimport)

File include/petscvec.h

 
 #if defined(PETSC_HAVE_CUSP)
 typedef struct _p_PetscCUSPIndices* PetscCUSPIndices;
-PETSC_EXTERN PetscErrorCode PetscCUSPIndicesCreate(PetscInt, PetscInt*,PetscInt, PetscInt*,PetscCUSPIndices*);
-PETSC_EXTERN PetscErrorCode PetscCUSPIndicesDestroy(PetscCUSPIndices*);
+typedef struct _p_VecScatterCUSPIndices_StoS* VecScatterCUSPIndices_StoS;
+typedef struct _p_VecScatterCUSPIndices_PtoP* VecScatterCUSPIndices_PtoP;
 PETSC_EXTERN PetscErrorCode VecCUSPCopyToGPUSome_Public(Vec,PetscCUSPIndices);
 PETSC_EXTERN PetscErrorCode VecCUSPCopyFromGPUSome_Public(Vec,PetscCUSPIndices);
 PETSC_EXTERN PetscErrorCode VecScatterInitializeForGPU(VecScatter,Vec,ScatterMode);

File src/dm/impls/composite/pack.c

 PetscErrorCode  DMCompositeGetGlobalISs(DM dm,IS *is[])
 {
   PetscErrorCode         ierr;
-  PetscInt               cnt = 0,*idx,i;
+  PetscInt               cnt = 0;
   struct DMCompositeLink *next;
   PetscMPIInt            rank;
   DM_Composite           *com = (DM_Composite*)dm->data;
 
   /* loop over packed objects, handling one at at time */
   while (next) {
-    ierr = PetscMalloc(next->n*sizeof(PetscInt),&idx);CHKERRQ(ierr);
-    for (i=0; i<next->n; i++) idx[i] = next->grstart + i;
-    ierr = ISCreateGeneral(PetscObjectComm((PetscObject)dm),next->n,idx,PETSC_OWN_POINTER,&(*is)[cnt]);CHKERRQ(ierr);
+    ierr = ISCreateStride(PetscObjectComm((PetscObject)dm),next->n,next->grstart,1,&(*is)[cnt]);CHKERRQ(ierr);
     if (dm->fields) {
       MatNullSpace space;
       Mat          pmat;

File src/dm/impls/plex/examples/tests/ex1.c

       *dm  = refinedMesh;
     }
     /* Distribute mesh over processes */
-    ierr = DMPlexDistribute(*dm, partitioner, 0, &distributedMesh);CHKERRQ(ierr);
+    ierr = DMPlexDistribute(*dm, partitioner, 0, NULL, &distributedMesh);CHKERRQ(ierr);
     if (distributedMesh) {
       ierr = DMDestroy(dm);CHKERRQ(ierr);
       *dm  = distributedMesh;

File src/dm/impls/plex/examples/tests/ex3.c

       *dm  = refinedMesh;
     }
     /* Distribute mesh over processes */
-    ierr = DMPlexDistribute(*dm, partitioner, 0, &distributedMesh);CHKERRQ(ierr);
+    ierr = DMPlexDistribute(*dm, partitioner, 0, NULL, &distributedMesh);CHKERRQ(ierr);
     if (distributedMesh) {
       ierr = DMDestroy(dm);CHKERRQ(ierr);
       *dm  = distributedMesh;

File src/dm/impls/plex/examples/tests/ex4.c

     DM distributedMesh = NULL;
 
     /* Distribute mesh over processes */
-    ierr = DMPlexDistribute(*dm, partitioner, 0, &distributedMesh);CHKERRQ(ierr);
+    ierr = DMPlexDistribute(*dm, partitioner, 0, NULL, &distributedMesh);CHKERRQ(ierr);
     if (distributedMesh) {
       PetscInt cMax = PETSC_DETERMINE, fMax = PETSC_DETERMINE;
 

File src/dm/impls/plex/examples/tests/ex5.c

     DM distributedMesh = NULL;
 
     /* Distribute mesh over processes */
-    ierr = DMPlexDistribute(*dm, partitioner, 0, &distributedMesh);CHKERRQ(ierr);
+    ierr = DMPlexDistribute(*dm, partitioner, 0, NULL, &distributedMesh);CHKERRQ(ierr);
     if (distributedMesh) {
       PetscInt cMax = PETSC_DETERMINE, fMax = PETSC_DETERMINE;
 

File src/dm/impls/plex/examples/tests/ex7.c

     DM distributedMesh = NULL;
 
     /* Distribute mesh over processes */
-    ierr = DMPlexDistribute(*dm, partitioner, 0, &distributedMesh);CHKERRQ(ierr);
+    ierr = DMPlexDistribute(*dm, partitioner, 0, NULL, &distributedMesh);CHKERRQ(ierr);
     if (distributedMesh) {
       ierr = DMDestroy(dm);CHKERRQ(ierr);
       *dm  = distributedMesh;

File src/dm/impls/plex/ftn-custom/zplex.c

 
 /* Definitions of Fortran Wrapper routines */
 
-PETSC_EXTERN void PETSC_STDCALL dmplexdistribute_(DM *dm, CHAR name PETSC_MIXED_LEN(lenN), PetscInt *overlap, DM *dmParallel, int *ierr PETSC_END_LEN(lenN))
+PETSC_EXTERN void PETSC_STDCALL dmplexdistribute_(DM *dm, CHAR name PETSC_MIXED_LEN(lenN), PetscInt *overlap, PetscSF *sf, DM *dmParallel, int *ierr PETSC_END_LEN(lenN))
 {
   char *partitioner;
 
   FIXCHAR(name, lenN, partitioner);
-  *ierr = DMPlexDistribute(*dm, partitioner, *overlap, dmParallel);
+  *ierr = DMPlexDistribute(*dm, partitioner, *overlap, sf, dmParallel);
   FREECHAR(name, partitioner);
 }
 

File src/dm/impls/plex/plex.c

 
 #undef __FUNCT__
 #define __FUNCT__ "DMPlexDistributeField"
-/*
+/*@
+  DMPlexDistributeField - Distribute field data to match a given PetscSF, usually the SF from mesh distribution
+
+  Collective on DM
+
   Input Parameters:
-. originalSection
-, originalVec
++ dm - The DMPlex object
+. pointSF - The PetscSF describing the communication pattern
+. originalSection - The PetscSection for existing data layout
+- originalVec - The existing data
 
   Output Parameters:
-. newSection
-. newVec
-*/
++ newSection - The PetscSF describing the new data layout
+- newVec - The new data
+
+  Level: developer
+
+.seealso: DMPlexDistribute(), DMPlexDistributeData()
+@*/
 PetscErrorCode DMPlexDistributeField(DM dm, PetscSF pointSF, PetscSection originalSection, Vec originalVec, PetscSection newSection, Vec newVec)
 {
   PetscSF        fieldSF;
 }
 
 #undef __FUNCT__
+#define __FUNCT__ "DMPlexDistributeData"
+/*@
+  DMPlexDistributeData - Distribute field data to match a given PetscSF, usually the SF from mesh distribution
+
+  Collective on DM
+
+  Input Parameters:
++ dm - The DMPlex object
+. pointSF - The PetscSF describing the communication pattern
+. originalSection - The PetscSection for existing data layout
+. datatype - The type of data
+- originalData - The existing data
+
+  Output Parameters:
++ newSection - The PetscSF describing the new data layout
+- newData - The new data
+
+  Level: developer
+
+.seealso: DMPlexDistribute(), DMPlexDistributeField()
+@*/
+PetscErrorCode DMPlexDistributeData(DM dm, PetscSF pointSF, PetscSection originalSection, MPI_Datatype datatype, void *originalData, PetscSection newSection, void **newData)
+{
+  PetscSF        fieldSF;
+  PetscInt      *remoteOffsets, fieldSize;
+  PetscMPIInt    dataSize;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = PetscSFDistributeSection(pointSF, originalSection, &remoteOffsets, newSection);CHKERRQ(ierr);
+
+  ierr = PetscSectionGetStorageSize(newSection, &fieldSize);CHKERRQ(ierr);
+  ierr = MPI_Type_size(datatype, &dataSize);CHKERRQ(ierr);
+  ierr = PetscMalloc(fieldSize * dataSize, newData);CHKERRQ(ierr);
+
+  ierr = PetscSFCreateSectionSF(pointSF, originalSection, remoteOffsets, newSection, &fieldSF);CHKERRQ(ierr);
+  ierr = PetscSFBcastBegin(fieldSF, datatype, originalData, *newData);CHKERRQ(ierr);
+  ierr = PetscSFBcastEnd(fieldSF, datatype, originalData, *newData);CHKERRQ(ierr);
+  ierr = PetscSFDestroy(&fieldSF);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
 #define __FUNCT__ "DMPlexDistribute"
 /*@C
   DMPlexDistribute - Distributes the mesh and any associated sections.
 - overlap - The overlap of partitions, 0 is the default
 
   Output Parameter:
-. parallelMesh - The distributed DMPlex object, or NULL
++ sf - The PetscSF used for point distribution
+- parallelMesh - The distributed DMPlex object, or NULL
 
   Note: If the mesh was not distributed, the return value is NULL
 
 .keywords: mesh, elements
 .seealso: DMPlexCreate(), DMPlexDistributeByFace()
 @*/
-PetscErrorCode DMPlexDistribute(DM dm, const char partitioner[], PetscInt overlap, DM *dmParallel)
+PetscErrorCode DMPlexDistribute(DM dm, const char partitioner[], PetscInt overlap, PetscSF *sf, DM *dmParallel)
 {
   DM_Plex               *mesh   = (DM_Plex*) dm->data, *pmesh;
   MPI_Comm               comm;
 
   PetscFunctionBegin;
   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
-  PetscValidPointer(dmParallel,4);
+  if (sf) PetscValidPointer(sf,4);
+  PetscValidPointer(dmParallel,5);
 
   ierr = PetscLogEventBegin(DMPLEX_Distribute,dm,0,0,0);CHKERRQ(ierr);
   ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr);
   }
   ierr = PetscLogEventEnd(DMPLEX_DistributeSF,dm,0,0,0);CHKERRQ(ierr);
   /* Cleanup */
-  ierr = PetscSFDestroy(&pointSF);CHKERRQ(ierr);
+  if (sf) {*sf = pointSF;}
+  else    {ierr = PetscSFDestroy(&pointSF);CHKERRQ(ierr);}
   ierr = DMSetFromOptions(*dmParallel);CHKERRQ(ierr);
   ierr = PetscLogEventEnd(DMPLEX_Distribute,dm,0,0,0);CHKERRQ(ierr);
   PetscFunctionReturn(0);
     ierr = VecCreate(PetscObjectComm((PetscObject)dm), &coordinatesNew);CHKERRQ(ierr);
     ierr = PetscObjectSetName((PetscObject) coordinatesNew, "coordinates");CHKERRQ(ierr);
     ierr = VecSetSizes(coordinatesNew, coordSizeNew, PETSC_DETERMINE);CHKERRQ(ierr);
-    ierr = VecSetType(coordinatesNew,dm->vectype);CHKERRQ(ierr);
+    ierr = VecSetType(coordinatesNew,VECSTANDARD);CHKERRQ(ierr);
     ierr = VecGetArray(coordinates, &coords);CHKERRQ(ierr);
     ierr = VecGetArray(coordinatesNew, &coordsNew);CHKERRQ(ierr);
     /* Old vertices have the same coordinates */
 @*/
 PetscErrorCode DMPlexVecGetClosure(DM dm, PetscSection section, Vec v, PetscInt point, PetscInt *csize, PetscScalar *values[])
 {
+  PetscSection   clSection;
+  IS             clIndices;
   PetscScalar   *array, *vArray;
   PetscInt      *points = NULL;
   PetscInt       offsets[32];
   PetscFunctionBegin;
   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
   PetscValidHeaderSpecific(v, VEC_CLASSID, 3);
-  if (!section) {
-    ierr = DMGetDefaultSection(dm, &section);CHKERRQ(ierr);
+  if (!section) {ierr = DMGetDefaultSection(dm, &section);CHKERRQ(ierr);}
+  ierr = PetscSectionGetClosureIndex(section, (PetscObject) dm, &clSection, &clIndices);CHKERRQ(ierr);
+  if (clSection) {
+    const PetscInt *idx;
+    PetscInt        dof, off;
+
+    ierr = PetscSectionGetDof(clSection, point, &dof);CHKERRQ(ierr);
+    if (csize) *csize = dof;
+    if (values) {
+      if (!*values) {
+        ierr = DMGetWorkArray(dm, dof, PETSC_SCALAR, &array);CHKERRQ(ierr);
+        *values = array;
+      } else {
+        array = *values;
+      }
+      ierr = PetscSectionGetOffset(clSection, point, &off);CHKERRQ(ierr);
+      ierr = ISGetIndices(clIndices, &idx);CHKERRQ(ierr);
+      ierr = VecGetArray(v, &vArray);CHKERRQ(ierr);
+      for (p = 0; p < dof; ++p) array[p] = vArray[idx[off+p]];
+      ierr = VecRestoreArray(v, &vArray);CHKERRQ(ierr);
+      ierr = ISRestoreIndices(clIndices, &idx);CHKERRQ(ierr);
+    }
+    PetscFunctionReturn(0);
   }
   ierr = DMPlexGetDepth(dm, &depth);CHKERRQ(ierr);
   ierr = PetscSectionGetNumFields(section, &numFields);CHKERRQ(ierr);
 }
 
 #undef __FUNCT__
+#define __FUNCT__ "DMPlexCreateClosureIndex"
+/*@
+  DMPlexCreateClosureIndex - Calculate an index for the given PetscSection for the closure operation on the DM
+
+  Not collective
+
+  Input Parameters:
++ dm - The DM
+- section - The section describing the layout in v, or NULL to use the default section
+
+  Note:
+  This should greatly improve the performance of the closure operations, at the cost of additional memory.
+
+  Level: intermediate
+
+.seealso DMPlexVecGetClosure(), DMPlexVecRestoreClosure(), DMPlexVecSetClosure(), DMPlexMatSetClosure()
+@*/
+PetscErrorCode DMPlexCreateClosureIndex(DM dm, PetscSection section)
+{
+  PetscSection   closureSection;
+  IS             closureIS;
+  PetscInt       offsets[32], *clIndices;
+  PetscInt       depth, numFields, pStart, pEnd, point, clSize;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
+  if (!section) {ierr = DMGetDefaultSection(dm, &section);CHKERRQ(ierr);}
+  ierr = DMPlexGetDepth(dm, &depth);CHKERRQ(ierr);
+  ierr = PetscSectionGetNumFields(section, &numFields);CHKERRQ(ierr);
+  ierr = PetscSectionGetChart(section, &pStart, &pEnd);CHKERRQ(ierr);
+  if (numFields > 31) SETERRQ1(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of fields %D limited to 31", numFields);
+  ierr = PetscSectionCreate(PetscObjectComm((PetscObject) section), &closureSection);CHKERRQ(ierr);
+  ierr = PetscSectionSetChart(closureSection, pStart, pEnd);CHKERRQ(ierr);
+  for (point = pStart; point < pEnd; ++point) {
+    PetscInt *points = NULL, numPoints, p, dof, cldof = 0;
+
+    ierr = DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &numPoints, &points);CHKERRQ(ierr);
+    for (p = 0; p < numPoints*2; p += 2) {
+      if ((points[p] >= pStart) && (points[p] < pEnd)) {
+        ierr = PetscSectionGetDof(section, points[p], &dof);CHKERRQ(ierr);
+        cldof += dof;
+      }
+    }
+    ierr = DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &numPoints, &points);CHKERRQ(ierr);
+    ierr = PetscSectionSetDof(closureSection, point, cldof);CHKERRQ(ierr);
+  }
+  ierr = PetscSectionSetUp(closureSection);CHKERRQ(ierr);
+  ierr = PetscSectionGetStorageSize(closureSection, &clSize);CHKERRQ(ierr);
+  ierr = PetscMalloc(clSize * sizeof(PetscInt), &clIndices);CHKERRQ(ierr);
+  for (point = pStart; point < pEnd; ++point) {
+    PetscInt *points = NULL, numPoints, p, q, cldof, cloff, fdof, f;
+
+    ierr = PetscSectionGetDof(closureSection, point, &cldof);CHKERRQ(ierr);
+    ierr = PetscSectionGetOffset(closureSection, point, &cloff);CHKERRQ(ierr);
+    ierr = PetscMemzero(offsets, 32 * sizeof(PetscInt));CHKERRQ(ierr);
+    ierr = DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &numPoints, &points);CHKERRQ(ierr);
+    /* Compress out points not in the section, and create field offsets */
+    for (p = 0, q = 0; p < numPoints*2; p += 2) {
+      if ((points[p] >= pStart) && (points[p] < pEnd)) {
+        points[q*2]   = points[p];
+        points[q*2+1] = points[p+1];
+        for (f = 0; f < numFields; ++f) {
+          ierr = PetscSectionGetFieldDof(section, points[p], f, &fdof);CHKERRQ(ierr);
+          offsets[f+1] += fdof;
+        }
+        ++q;
+      }
+    }
+    numPoints = q;
+    for (f = 1; f < numFields; ++f) offsets[f+1] += offsets[f];
+    if (numFields && offsets[numFields] != cldof) SETERRQ2(PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Invalid size for closure %d should be %d", offsets[numFields], cldof);
+    /* Create indices */
+    for (p = 0; p < numPoints*2; p += 2) {
+      PetscInt o = points[p+1], dof, off, d;
+
+      ierr = PetscSectionGetDof(section, points[p], &dof);CHKERRQ(ierr);
+      ierr = PetscSectionGetOffset(section, points[p], &off);CHKERRQ(ierr);
+      if (numFields) {
+        PetscInt fdof, foff, fcomp, f, c;
+
+        for (f = 0, foff = 0; f < numFields; ++f) {
+          ierr = PetscSectionGetFieldDof(section, points[p], f, &fdof);CHKERRQ(ierr);
+          if (o >= 0) {
+            for (d = 0; d < fdof; ++d, ++offsets[f]) clIndices[cloff+offsets[f]] = off+foff+d;
+          } else {
+            ierr = PetscSectionGetFieldComponents(section, f, &fcomp);CHKERRQ(ierr);
+            for (d = fdof/fcomp-1; d >= 0; --d) {
+              for (c = 0; c < fcomp; ++c, ++offsets[f]) clIndices[cloff+offsets[f]] = off+foff+d*fcomp+c;
+            }
+          }
+          foff += fdof;
+        }
+      } else {
+        if (o >= 0) for (d = 0;     d < dof; ++d) clIndices[cloff+d] = off+d;
+        else        for (d = dof-1; d >= 0;  --d) clIndices[cloff+d] = off+d;
+      }
+    }
+    ierr = DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &numPoints, &points);CHKERRQ(ierr);
+  }
+  ierr = ISCreateGeneral(PETSC_COMM_SELF, clSize, clIndices, PETSC_OWN_POINTER, &closureIS);CHKERRQ(ierr);
+  ierr = PetscSectionSetClosureIndex(section, (PetscObject) dm, closureSection, closureIS);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
 #define __FUNCT__ "DMPlexPrintMatSetValues"
 PetscErrorCode DMPlexPrintMatSetValues(PetscViewer viewer, Mat A, PetscInt point, PetscInt numIndices, const PetscInt indices[], const PetscScalar values[])
 {

File src/dm/impls/plex/plexcgns.c

   ierr = VecCreate(comm, &coordinates);CHKERRQ(ierr);
   ierr = PetscObjectSetName((PetscObject) coordinates, "coordinates");CHKERRQ(ierr);
   ierr = VecSetSizes(coordinates, coordSize, PETSC_DETERMINE);CHKERRQ(ierr);
-  ierr = VecSetType(coordinates,(*dm)->vectype);CHKERRQ(ierr);
+  ierr = VecSetType(coordinates,VECSTANDARD);CHKERRQ(ierr);
   ierr = VecGetArray(coordinates, &coords);CHKERRQ(ierr);
   if (!rank) {
     PetscInt off = 0;

File src/dm/impls/plex/plexcreate.c

   ierr = PetscSectionGetStorageSize(coordSection, &coordSize);CHKERRQ(ierr);
   ierr = VecCreate(PetscObjectComm((PetscObject)dm), &coordinates);CHKERRQ(ierr);
   ierr = VecSetSizes(coordinates, coordSize, PETSC_DETERMINE);CHKERRQ(ierr);
-  ierr = VecSetType(coordinates,dm->vectype);CHKERRQ(ierr);
+  ierr = VecSetType(coordinates,VECSTANDARD);CHKERRQ(ierr);
   ierr = VecGetArray(coordinates, &coords);CHKERRQ(ierr);
   for (vy = 0; vy <= edges[1]; ++vy) {
     for (vx = 0; vx <= edges[0]; ++vx) {
   ierr = VecCreate(PetscObjectComm((PetscObject)dm), &coordinates);CHKERRQ(ierr);
   ierr = PetscObjectSetName((PetscObject) coordinates, "coordinates");CHKERRQ(ierr);
   ierr = VecSetSizes(coordinates, coordSize, PETSC_DETERMINE);CHKERRQ(ierr);
-  ierr = VecSetType(coordinates,dm->vectype);CHKERRQ(ierr);
+  ierr = VecSetType(coordinates,VECSTANDARD);CHKERRQ(ierr);
   ierr = VecGetArray(coordinates, &coords);CHKERRQ(ierr);
   for (vz = 0; vz <= faces[2]; ++vz) {
     for (vy = 0; vy <= faces[1]; ++vy) {
     ierr = PetscSectionGetStorageSize(coordSection, &coordSize);CHKERRQ(ierr);
     ierr = VecCreate(PetscObjectComm((PetscObject)dm), &coordinates);CHKERRQ(ierr);
     ierr = VecSetSizes(coordinates, coordSize, PETSC_DETERMINE);CHKERRQ(ierr);
-    ierr = VecSetType(coordinates,dm->vectype);CHKERRQ(ierr);
+    ierr = VecSetType(coordinates,VECSTANDARD);CHKERRQ(ierr);
     ierr = VecGetArray(coordinates, &coords);CHKERRQ(ierr);
     for (vy = 0; vy < numYVertices; ++vy) {
       for (vx = 0; vx < numXVertices; ++vx) {
   ierr = VecCreate(PetscObjectComm((PetscObject)dm), &coordinates);CHKERRQ(ierr);
   ierr = PetscObjectSetName((PetscObject) coordinates, "coordinates");CHKERRQ(ierr);
   ierr = VecSetSizes(coordinates, coordSize, PETSC_DETERMINE);CHKERRQ(ierr);
-  ierr = VecSetType(coordinates,dm->vectype);CHKERRQ(ierr);
+  ierr = VecSetType(coordinates,VECSTANDARD);CHKERRQ(ierr);
   ierr = VecGetArray(coordinates, &coords);CHKERRQ(ierr);
   for (v = 0; v < numVertices; ++v) {
     for (d = 0; d < spaceDim; ++d) {
   ierr = VecCreate(PetscObjectComm((PetscObject)dm), &coordinates);CHKERRQ(ierr);
   ierr = PetscObjectSetName((PetscObject) coordinates, "coordinates");CHKERRQ(ierr);
   ierr = VecSetSizes(coordinates, coordSize, PETSC_DETERMINE);CHKERRQ(ierr);
-  ierr = VecSetType(coordinates,dm->vectype);CHKERRQ(ierr);
+  ierr = VecSetType(coordinates,VECSTANDARD);CHKERRQ(ierr);
   ierr = VecGetArray(coordinates, &coords);CHKERRQ(ierr);
   for (v = 0; v < numPoints[0]; ++v) {
     PetscInt off;

File src/dm/impls/plex/plexexodusii.c

   ierr = VecCreate(comm, &coordinates);CHKERRQ(ierr);
   ierr = PetscObjectSetName((PetscObject) coordinates, "coordinates");CHKERRQ(ierr);
   ierr = VecSetSizes(coordinates, coordSize, PETSC_DETERMINE);CHKERRQ(ierr);
-  ierr = VecSetType(coordinates,(*dm)->vectype);CHKERRQ(ierr);
+  ierr = VecSetType(coordinates,VECSTANDARD);CHKERRQ(ierr);
   ierr = VecGetArray(coordinates, &coords);CHKERRQ(ierr);
   if (!rank) {
     float *x, *y, *z;

File src/dm/impls/plex/plexinterpolate.c

   ierr = VecCreate(PetscObjectComm((PetscObject) dmB), &coordinatesB);CHKERRQ(ierr);
   ierr = PetscObjectSetName((PetscObject) coordinatesB, "coordinates");CHKERRQ(ierr);
   ierr = VecSetSizes(coordinatesB, coordSizeB, PETSC_DETERMINE);CHKERRQ(ierr);
-  ierr = VecSetType(coordinatesB,dmB->vectype);CHKERRQ(ierr);
+  ierr = VecSetType(coordinatesB,VECSTANDARD);CHKERRQ(ierr);
   ierr = VecGetArray(coordinatesA, &coordsA);CHKERRQ(ierr);
   ierr = VecGetArray(coordinatesB, &coordsB);CHKERRQ(ierr);
   for (v = 0; v < vEndB-vStartB; ++v) {

File src/dm/impls/plex/plexpreallocate.c

     PetscInt dof, off, d, q;
     PetscInt p = leaves[l], numAdj = maxAdjSize;
 
+    if ((p < pStart) || (p >= pEnd)) continue;
     ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr);
     ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr);
     ierr = DMPlexGetAdjacency_Internal(dm, p, useClosure, tmpClosure, &numAdj, tmpAdj);CHKERRQ(ierr);
     PetscInt dof, off, d, q;
     PetscInt p = leaves[l], numAdj = maxAdjSize;
 
+    if ((p < pStart) || (p >= pEnd)) continue;
     ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr);
     ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr);
     ierr = DMPlexGetAdjacency_Internal(dm, p, useClosure, tmpClosure, &numAdj, tmpAdj);CHKERRQ(ierr);
     ierr = PetscPrintf(comm, "Leaf adjacency indices\n");CHKERRQ(ierr);
     ierr = ISCreateGeneral(comm, adjSize, adj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr);
     ierr = ISView(tmp, NULL);CHKERRQ(ierr);
+    ierr = ISDestroy(&tmp);CHKERRQ(ierr);
   }
   /* Gather adjacenct indices to root */
   ierr = PetscSectionGetStorageSize(rootSectionAdj, &adjSize);CHKERRQ(ierr);
     ierr = PetscPrintf(comm, "Root adjacency indices after gather\n");CHKERRQ(ierr);
     ierr = ISCreateGeneral(comm, adjSize, rootAdj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr);
     ierr = ISView(tmp, NULL);CHKERRQ(ierr);
+    ierr = ISDestroy(&tmp);CHKERRQ(ierr);
   }
   /* Add in local adjacency indices for owned dofs on interface (roots) */
   for (p = pStart; p < pEnd; ++p) {
     ierr = PetscPrintf(comm, "Root adjacency indices\n");CHKERRQ(ierr);
     ierr = ISCreateGeneral(comm, adjSize, rootAdj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr);
     ierr = ISView(tmp, NULL);CHKERRQ(ierr);
+    ierr = ISDestroy(&tmp);CHKERRQ(ierr);
   }
   /* Compress indices */
   ierr = PetscSectionSetUp(rootSectionAdj);CHKERRQ(ierr);
     ierr = PetscPrintf(comm, "Root adjacency indices after compression\n");CHKERRQ(ierr);
     ierr = ISCreateGeneral(comm, adjSize, rootAdj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr);
     ierr = ISView(tmp, NULL);CHKERRQ(ierr);
+    ierr = ISDestroy(&tmp);CHKERRQ(ierr);
   }
   /* Build adjacency section: Maps global indices to sets of adjacent global indices */
   ierr = PetscSectionGetOffsetRange(sectionGlobal, &globalOffStart, &globalOffEnd);CHKERRQ(ierr);
     ierr = PetscPrintf(comm, "Column indices\n");CHKERRQ(ierr);
     ierr = ISCreateGeneral(comm, numCols, cols, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr);
     ierr = ISView(tmp, NULL);CHKERRQ(ierr);
+    ierr = ISDestroy(&tmp);CHKERRQ(ierr);
   }
   /* Create allocation vectors from adjacency graph */
   ierr = MatGetLocalSize(A, &locRows, NULL);CHKERRQ(ierr);

File src/dm/impls/plex/plexsubmesh.c

   ierr = VecCreate(PetscObjectComm((PetscObject)dm), &newCoordinates);CHKERRQ(ierr);
   ierr = PetscObjectSetName((PetscObject) newCoordinates, "coordinates");CHKERRQ(ierr);
   ierr = VecSetSizes(newCoordinates, coordSize, PETSC_DETERMINE);CHKERRQ(ierr);
-  ierr = VecSetType(newCoordinates,dm->vectype);CHKERRQ(ierr);
+  ierr = VecSetType(newCoordinates,VECSTANDARD);CHKERRQ(ierr);
   ierr = DMSetCoordinatesLocal(dmNew, newCoordinates);CHKERRQ(ierr);
   ierr = DMGetCoordinatesLocal(dm, &coordinates);CHKERRQ(ierr);
   ierr = VecGetArray(coordinates, &coords);CHKERRQ(ierr);
     ierr = PetscSectionGetStorageSize(subCoordSection, &coordSize);CHKERRQ(ierr);
     ierr = VecCreate(comm, &subCoordinates);CHKERRQ(ierr);
     ierr = VecSetSizes(subCoordinates, coordSize, PETSC_DETERMINE);CHKERRQ(ierr);
-    ierr = VecSetType(subCoordinates,dm->vectype);CHKERRQ(ierr);
+    ierr = VecSetType(subCoordinates,VECSTANDARD);CHKERRQ(ierr);
     if (coordSize) {
       ierr = VecGetArray(coordinates,    &coords);CHKERRQ(ierr);
       ierr = VecGetArray(subCoordinates, &subCoords);CHKERRQ(ierr);
     ierr = PetscSectionGetStorageSize(subCoordSection, &coordSize);CHKERRQ(ierr);
     ierr = VecCreate(comm, &subCoordinates);CHKERRQ(ierr);
     ierr = VecSetSizes(subCoordinates, coordSize, PETSC_DETERMINE);CHKERRQ(ierr);
-    ierr = VecSetType(subCoordinates,dm->vectype);CHKERRQ(ierr);
+    ierr = VecSetType(subCoordinates,VECSTANDARD);CHKERRQ(ierr);
     ierr = VecGetArray(coordinates,    &coords);CHKERRQ(ierr);
     ierr = VecGetArray(subCoordinates, &subCoords);CHKERRQ(ierr);
     for (v = 0; v < numSubPoints[0]; ++v) {
     ierr = PetscSectionGetStorageSize(subCoordSection, &coordSize);CHKERRQ(ierr);
     ierr = VecCreate(comm, &subCoordinates);CHKERRQ(ierr);
     ierr = VecSetSizes(subCoordinates, coordSize, PETSC_DETERMINE);CHKERRQ(ierr);
-    ierr = VecSetType(subCoordinates,dm->vectype);CHKERRQ(ierr);
+    ierr = VecSetType(subCoordinates,VECSTANDARD);CHKERRQ(ierr);
     ierr = VecGetArray(coordinates,    &coords);CHKERRQ(ierr);
     ierr = VecGetArray(subCoordinates, &subCoords);CHKERRQ(ierr);
     for (v = 0; v < numSubVertices; ++v) {
     ierr = PetscSectionGetStorageSize(subCoordSection, &coordSize);CHKERRQ(ierr);
     ierr = VecCreate(comm, &subCoordinates);CHKERRQ(ierr);
     ierr = VecSetSizes(subCoordinates, coordSize, PETSC_DETERMINE);CHKERRQ(ierr);
-    ierr = VecSetType(subCoordinates,dm->vectype);CHKERRQ(ierr);
+    ierr = VecSetType(subCoordinates,VECSTANDARD);CHKERRQ(ierr);
     ierr = VecGetArray(coordinates,    &coords);CHKERRQ(ierr);
     ierr = VecGetArray(subCoordinates, &subCoords);CHKERRQ(ierr);
     for (v = 0; v < numSubPoints[0]; ++v) {

File src/dm/interface/dm.c

 #undef __FUNCT__
 #define __FUNCT__ "DMGetCoordinateDM"
 /*@
-  DMGetCoordinateDM - Gets the DM that scatters between global and local coordinates
+  DMGetCoordinateDM - Gets the DM that prescribes coordinate layout and scatters between global and local coordinates
 
   Collective on DM
 
   Level: intermediate
 
 .keywords: distributed array, get, corners, nodes, local indices, coordinates
-.seealso: DMSetCoordinates(), DMSetCoordinatesLocal(), DMGetCoordinates(), DMGetCoordinatesLocal()
+.seealso: DMSetCoordinateDM(), DMSetCoordinates(), DMSetCoordinatesLocal(), DMGetCoordinates(), DMGetCoordinatesLocal()
 @*/
 PetscErrorCode DMGetCoordinateDM(DM dm, DM *cdm)
 {
 }
 
 #undef __FUNCT__
+#define __FUNCT__ "DMSetCoordinateDM"
+/*@
+  DMSetCoordinateDM - Sets the DM that prescribes coordinate layout and scatters between global and local coordinates
+
+  Logically Collective on DM
+
+  Input Parameters:
++ dm - the DM
+- cdm - coordinate DM
+
+  Level: intermediate
+
+.keywords: distributed array, get, corners, nodes, local indices, coordinates
+.seealso: DMGetCoordinateDM(), DMSetCoordinates(), DMSetCoordinatesLocal(), DMGetCoordinates(), DMGetCoordinatesLocal()
+@*/
+PetscErrorCode DMSetCoordinateDM(DM dm, DM cdm)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  PetscValidHeaderSpecific(dm,DM_CLASSID,1);
+  PetscValidHeaderSpecific(cdm,DM_CLASSID,2);
+  ierr = DMDestroy(&dm->coordinateDM);CHKERRQ(ierr);
+  dm->coordinateDM = cdm;
+  ierr = PetscObjectReference((PetscObject) dm->coordinateDM);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
 #define __FUNCT__ "DMLocatePoints"
 /*@
   DMLocatePoints - Locate the points in v in the mesh and return an IS of the containing cells

File src/docs/tex/petscapp.bib

 % LiteralHTML:
 % LiteralHTML:  <a name="geosciences"><H3><center>Geosciences</center></H3>
 % LiteralHTML:
+@article{ScheltingaMyersPietrzak10,
+  author={Terwisscha van Scheltinga, ArjenD. and Myers, PaulG. and Pietrzak, JulieD.},
+  title={A finite element sea ice model of the Canadian Arctic Archipelago},
+  journal={Ocean Dynamics},
+  volume={60},
+  number={6},
+  pages={1539-1558},
+  year={2010},
+  issn={1616-7341},
+  doi={10.1007/s10236-010-0356-5},
+  url={http://dx.doi.org/10.1007/s10236-010-0356-5},
+  publisher={Springer-Verlag},
+  keywords={Ocean modelling; Unstructured meshes; Sea ice; Arctic Ocean; Canadian Arctic Archipelago; Freshwater flux},
+  language={English}
+}
 @inbook{DeStefanoAndreasiSecchi13,
   author    = {M. De Stefano and F. Golfré Andreasi and A. Secchi},
   title     = {Towards preconditioned nonlinear conjugate gradients for generic geophysical inversions},
 number = {3},
 pages = {R69-R80},
 keywords = {geophysical image processing; inverse problems; seismology},
-url = {http://link.aip.org/link/?GPY/76/R69/1},
+url = {http://dx.doi.org/10.1190/1.3554652},
 doi = {10.1190/1.3554652}
 }
 @article{stefano:806,
 number = {1},
 pages = {806-810},
 keywords = {3D; imaging; inversion; magnetics; multiparameter},
-url = {http://link.aip.org/link/?SGA/30/806/1},
+url = {http://dx.doi.org/10.1190/1.3628198},
 doi = {10.1190/1.3628198}
 }
 @article{MayKnepley2011,
 % LiteralHTML:
 % LiteralHTML:  <a name="cfd"><H3><center>CFD</center></H3>
 % LiteralHTML:
+@article{BungartzGatzhammerLiebMehlNeckel11,
+  author={Bungartz, Hans-Joachim and Gatzhammer, Bernhard and Lieb, Michael and Mehl, Miriam and Neckel, Tobias},
+  title={Towards multi-phase flow simulations in the PDE framework Peano},
+  journal={Computational Mechanics},
+  volume={48},
+  number={3},
+  pages={365-376},
+  year={2011},
+  issn={0178-7675},
+  doi={10.1007/s00466-011-0626-1},
+  url={http://dx.doi.org/10.1007/s00466-011-0626-1},
+  publisher={Springer-Verlag},
+  keywords={PDE framework; Octree-like Cartesian grids; Computational fluid dynamics; Thermohydraulics; Two-phase flow},
+  language={English}
+}
+
 @article{bpr:cfl,
    author    ="H.~M. B{\"u}cker and B. Pollul and A. Rasch",
    title     ="{On {CFL} Evolution Strategies for  Implicit Upwind Methods in Linearized {E}uler Equations}",
 % LiteralHTML:
 % LiteralHTML:  <a name="otherapps"><H3><center>Other Application Areas</center></H3>
 % LiteralHTML:
-
+@inproceedings{YanRhodes11,
+  author = {Yan, Baoqiang and Rhodes, Philip J.},
+  title = {IDEA -- An API for Parallel Computing with Large Spatial Datasets},
+  booktitle = {Proceedings of the 2011 International Conference on Parallel Processing},
+  series = {ICPP '11},
+  pages = {355--364},
+  year = {2011},
+  isbn = {978-0-7695-4510-3},
+  numpages = {10},
+  url = {http://dx.doi.org/10.1109/ICPP.2011.70},
+  doi = {10.1109/ICPP.2011.70},
+  acmid = {2066965},
+  publisher = {IEEE Computer Society},
+  address = {Washington, DC, USA},
+  keywords = {cluster, parallelization, spatial dataset, dependency, data partitioning},
+}
 @Article{Hakansson2013MDStochasticLiouville,
   author ="Hakansson, Par and Nguyen, ThaoNguyen and Nair, Prasanth B. and Edge, Ruth and Stulz, Eugen",
   title  ="Cu(ii)-porphyrin molecular dynamics as seen in a novel EPR/Stochastic Liouville equation study",
 % LiteralHTML:
 % LiteralHTML:  <a name="packages"></a><H3><center>Software Packages that use or interface to PETSc</center></H3>
 % LiteralHTML:
+@article{Logg07,
+  author={Logg, Anders},
+  title={Automating the Finite Element Method},
+  journal={Archives of Computational Methods in Engineering},
+  volume={14},
+  number={2},
+  pages={93-138},
+  year={2007},
+  doi={10.1007/s11831-007-9003-9},
+  url={http://dx.doi.org/10.1007/s11831-007-9003-9},
+  publisher={Kluwer Academic Publishers},
+  issn={1134-3060},
+  language={English}
+}
+@article{JezequelCouturierDenis12,
+  author={Jezequel, Fabienne and Couturier, Rapha\"el and Denis, Christophe},
+  title={Solving large sparse linear systems in a grid environment: the GREMLINS code versus the PETSc library},
+  journal={The Journal of Supercomputing},
+  volume={59},
+  number={3},
+  pages={1517-1532},
+  year={2012},
+  issn={0920-8542},
+  doi={10.1007/s11227-011-0563-y},
+  url={http://dx.doi.org/10.1007/s11227-011-0563-y},
+  publisher={Springer US},
+  keywords={Asynchronous iterations; Grid computing; Iterative method; Multisplitting method; Sparse linear solver},
+  language={English}
+}
 
 @Unpublished{feap-web-page,
   title       = {FEAP: A Finite Element Analysis Program},
 % LiteralHTML:
 % LiteralHTML:  <a name="algorithm"><H3><center>Algorithm design and analysis</center></H3>
 % LiteralHTML:
+@ARTICLE{JhuraniDemkowicz12,
+   author = {{Jhurani}, C. and {Demkowicz}, L.},
+    title = "{Multiscale modeling using goal-oriented adaptivity and numerical homogenization. Part II: Algorithms for the Moore-Penrose pseudoinverse}",
+  journal = {Computer Methods in Applied Mechanics and Engineering},
+   volume = 213,
+    pages = {418-426},
+     year = 2012,
+    month = mar,
+      doi = {10.1016/j.cma.2011.06.003},
+   adsurl = {http://adsabs.harvard.edu/abs/2012CMAME.213..418J}
+}
+@article{YangCai11,
+  author = {Yang, Haijian and Cai, Xiao-Chuan},
+  title = {Parallel Two-Grid Semismooth Newton-Krylov-Schwarz Method for Nonlinear Complementarity Problems},
+  journal = {J. Sci. Comput.},
+  issue_date = {May       2011},
+  volume = {47},
+  number = {2},
+  month = may,
+  pages = {258--280},
+  year = {2011},
+  issn = {0885-7474},
+  numpages = {23},
+  url = {http://dx.doi.org/10.1007/s10915-010-9436-4},
+  doi = {10.1007/s10915-010-9436-4},
+  acmid = {1971246},
+  publisher = {Plenum Press},
+  address = {New York, NY, USA},
+  keywords = {Complementarity problem, Grid sequencing, Parallel computing, Schwarz preconditioners, Semismooth Newton, Two-level methods},
+}
+@article{Rheinbach09,
+  author={Rheinbach, Oliver},
+  title={Parallel Iterative Substructuring in Structural Mechanics},
+  journal={Archives of Computational Methods in Engineering},
+  volume={16},
+  number={4},
+  pages={425-463},
+  year={2009},
+  issn={1134-3060},
+  doi={10.1007/s11831-009-9035-4},
+  url={http://dx.doi.org/10.1007/s11831-009-9035-4},
+  publisher={Springer Netherlands},
+  language={English}
+}
 @Article{KnepleyTerrel2012,
   author   = {Matthew G. Knepley and Andy R. Terrel},
   title    = {Finite Element Integration on {GPUs}},

File src/ksp/ksp/impls/gmres/dgmres/dgmres.c

   PetscInt       N        = dgmres->max_k+1;
   PetscInt       n        = dgmres->it+1;
   PetscReal      alpha;
-#if !defined(PETSC_MISSING_LAPACK_GETRF)
-  PetscBLASInt info;
-#endif
 
   PetscFunctionBegin;
   ierr = PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);CHKERRQ(ierr);
 #if defined(PETSC_MISSING_LAPACK_GETRF)
   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
 #else
-  PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
-  if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
+  {
+    PetscBLASInt info;
+    PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
+    if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
+  }
 #endif
 
   /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
   PetscBLASInt   liwork;
   PetscScalar    *Ht;           /* Transpose of the Hessenberg matrix */
   PetscScalar    *t;            /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
-  PetscBLASInt   nrhs = 1;      /* Number of right hand side */
   PetscBLASInt   *ipiv;         /* Permutation vector to be used in LAPACK */
-#if !defined(PETSC_MISSING_LAPACK_HSEQR) || !defined(PETSC_MISSING_LAPACK_TRSEN)
-  PetscBLASInt ilo = 1;
-  PetscBLASInt info;
-  PetscReal    CondEig;         /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
-  PetscReal    CondSub;         /* estimated reciprocal condition number of the specified invariant subspace. */
-  PetscBool    flag;            /* determine whether to use Ritz vectors or harmonic Ritz vectors */
-#endif
+  PetscBool      flag;            /* determine whether to use Ritz vectors or harmonic Ritz vectors */
 
   PetscFunctionBegin;
   ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
 #if   defined(PETSC_MISSING_LAPACK_GESV)
     SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable.");
 #else
-    PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
-    if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling the Lapack routine DGESV");
+    {
+      PetscBLASInt info;
+      PetscBLASInt nrhs = 1;
+      PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
+      if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling the Lapack routine DGESV");
+    }
 #endif
     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
     for (i = 0; i < bn; i++) A[(bn-1)*bn+i] += t[i];
 #if defined(PETSC_MISSING_LAPACK_HSEQR)
   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable.");
 #else
-  PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
-  if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d",(int) info);
+  {
+    PetscBLASInt info;
+    PetscBLASInt ilo = 1;
+    PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
+    if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d",(int) info);
+  }
 #endif
   ierr = PetscFree(work);CHKERRQ(ierr);
 
 #if defined(PETSC_MISSING_LAPACK_TRSEN)
   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable.");
 #else
-  PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
-  if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
+  {
+    PetscBLASInt info;
+    PetscReal    CondEig;         /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
+    PetscReal    CondSub;         /* estimated reciprocal condition number of the specified invariant subspace. */
+    PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
+    if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
+  }
 #endif
   ierr = PetscFree(select);CHKERRQ(ierr);
 
   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
   PetscInt       i, r     = dgmres->r;
   PetscErrorCode ierr;
-  PetscBLASInt   nrhs     = 1;
   PetscReal      alpha    = 1.0;
   PetscInt       max_neig = dgmres->max_neig;
   PetscBLASInt   br,bmax;
   PetscReal      lambda = dgmres->lambdaN;
-#if !defined(PETSC_MISSING_LAPACK_GERFS)
-  PetscReal    berr, ferr;
-  PetscBLASInt info;
-#endif
 
   PetscFunctionBegin;
   ierr = PetscBLASIntCast(r,&br);CHKERRQ(ierr);
 #if defined(PETSC_MISSING_LAPACK_GETRS)
   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
 #else
-  PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
-  if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
+  {
+    PetscBLASInt info;
+    PetscBLASInt nrhs = 1;
+    PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
+    if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
+  }
 #endif
   /* Iterative refinement -- is it really necessary ?? */
   if (!WORK) {
 #if defined(PETSC_MISSING_LAPACK_GERFS)
   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GERFS - Lapack routine is unavailable.");
 #else
-  PetscStackCallBLAS("LAPACKgerfs",LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
-  if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
+  {
+    PetscBLASInt info;
+    PetscReal    berr, ferr;
+    PetscBLASInt nrhs = 1;
+    PetscStackCallBLAS("LAPACKgerfs",LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
+    if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
+  }
 #endif
 
   for (i = 0; i < r; i++) X2[i] =  X1[i]/lambda - X2[i];
   PetscReal    *Z;             /*  orthogonal matrix of  (right) schur vectors */
   PetscReal    *work;          /* working vector */
   PetscBLASInt lwork;          /* size of the working vector */
-  PetscBLASInt info;           /* Output info from Lapack routines */
   PetscInt     *perm;          /* Permutation vector to sort eigenvalues */
   PetscReal    *wr, *wi, *beta, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
   PetscInt     ierr;
   PetscBLASInt NbrEig = 0,nr,bm;
   PetscBLASInt *select;
   PetscBLASInt liwork, *iwork;
-#if !defined(PETSC_MISSING_LAPACK_TGSEN)
-  PetscReal    Dif[2];
-  PetscBLASInt ijob  = 2;
-  PetscBLASInt wantQ = 1, wantZ = 1;
-#endif
 
   PetscFunctionBegin;
   /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
 #if defined(PETSC_MISSING_LAPACK_GGES)
   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
 #else
-  PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
-  if (info) SETERRQ1 (PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
+  {
+    PetscBLASInt info;
+    PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
+    if (info) SETERRQ1 (PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
+  }
 #endif
   for (i=0; i<N; i++) {
     if (beta[i] !=0.0) {
 #if defined(PETSC_MISSING_LAPACK_TGSEN)
   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TGSEN - Lapack routine is unavailable.");
 #else
-  PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
-  if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
+  {
+    PetscBLASInt info;
+    PetscReal    Dif[2];
+    PetscBLASInt ijob  = 2;
+    PetscBLASInt wantQ = 1, wantZ = 1;
+    PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
+    if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
+  }
 #endif
   ierr = PetscFree(select);CHKERRQ(ierr);
 
 #if defined(PETSC_MISSING_LAPACK_GETRF)
   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
 #else
-  PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
-  if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
+  {
+    PetscBLASInt info;
+    PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
+    if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
+  }
 #endif
   /* Free Memory */
   ierr = PetscFree(wr);CHKERRQ(ierr);

File src/ksp/pc/impls/gamg/agg.c

     matA_2        = (Mat_SeqAIJ*)Gmat_2->data;
     lid_cprowID_1 = NULL;
   }
-  if (!(matA_1 && !matA_1->compressedrow.use)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"!(matA_1 && !matA_1->compressedrow.use)");
-  if (!(matB_1==0 || matB_1->compressedrow.use)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"!(matB_1==0 || matB_1->compressedrow.use)");
-  if (!(matA_2 && !matA_2->compressedrow.use)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"!(matA_2 && !matA_2->compressedrow.use)");
-  if (!(matB_2==0 || matB_2->compressedrow.use)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"!(matB_2==0 || matB_2->compressedrow.use)");
-
+  if (nloc>0) {
+    if (!(matA_1 && !matA_1->compressedrow.use)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"!(matA_1 && !matA_1->compressedrow.use)");
+    if (!(matB_1==0 || matB_1->compressedrow.use)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"!(matB_1==0 || matB_1->compressedrow.use)");
+    if (!(matA_2 && !matA_2->compressedrow.use)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"!(matA_2 && !matA_2->compressedrow.use)");
+    if (!(matB_2==0 || matB_2->compressedrow.use)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"!(matB_2==0 || matB_2->compressedrow.use)");
+  }
   /* get state of locals and selected gid for deleted */
   ierr = PetscMalloc(nloc*sizeof(NState), &lid_state);CHKERRQ(ierr);
   ierr = PetscMalloc(nloc*sizeof(PetscScalar), &lid_parent_gid);CHKERRQ(ierr);

File src/mat/impls/aij/seq/seqcusp/aijcusp.cu

       cuspstruct->nonzerorow=0;
       for (int j = 0; j<m; j++) cuspstruct->nonzerorow += ((a->i[j+1]-a->i[j])>0);
 
-      mat = new CUSPMATRIX;
       if (a->compressedrow.use) {
         m    = a->compressedrow.nrows;
         ii   = a->compressedrow.i;
         ridx = a->compressedrow.rindex;
-        mat->resize(m,A->cmap->n,a->nz);
-        mat->row_offsets.assign(ii,ii+m+1);
-        mat->column_indices.assign(a->j,a->j+a->nz);
-        mat->values.assign(a->a,a->a+a->nz);
-        cuspstruct->indices = new CUSPINTARRAYGPU;
-        cuspstruct->indices->assign(ridx,ridx+m);
+      } else {
+        /* Forcing compressed row on the GPU */
+        int k=0;
+        ierr = PetscMalloc((cuspstruct->nonzerorow+1)*sizeof(PetscInt), &ii);CHKERRQ(ierr);
+        ierr = PetscMalloc((cuspstruct->nonzerorow)*sizeof(PetscInt), &ridx);CHKERRQ(ierr);
+        ii[0]=0;
+        for (int j = 0; j<m; j++) {
+          if ((a->i[j+1]-a->i[j])>0) {
+            ii[k]  = a->i[j];
+            ridx[k]= j;
+            k++;
+          }
+        }
+        ii[cuspstruct->nonzerorow] = a->nz;
+        m = cuspstruct->nonzerorow;
+      }
+
+      /* now build matrix */
+      mat = new CUSPMATRIX;
+      mat->resize(m,A->cmap->n,a->nz);
+      mat->row_offsets.assign(ii,ii+m+1);
+      mat->column_indices.assign(a->j,a->j+a->nz);
+      mat->values.assign(a->a,a->a+a->nz);
+
+      /* convert to other formats if selected */
+      if (a->compressedrow.use || cuspstruct->format==MAT_CUSP_CSR) {
 	cuspstruct->mat = mat;
 	cuspstruct->format = MAT_CUSP_CSR;
       } else {
-        mat->resize(m,A->cmap->n,a->nz);
-        mat->row_offsets.assign(a->i,a->i+m+1);
-        mat->column_indices.assign(a->j,a->j+a->nz);
-        mat->values.assign(a->a,a->a+a->nz);
-
-	/* convert to other formats if selected */
-	if (cuspstruct->format!=MAT_CUSP_CSR) {
-	  if (cuspstruct->format==MAT_CUSP_ELL) {
-	    CUSPMATRIXELL *ellMat = new CUSPMATRIXELL(*mat);
-	    cuspstruct->mat = ellMat;
-	  } else {
-	    CUSPMATRIXDIA *diaMat = new CUSPMATRIXDIA(*mat);
-	    cuspstruct->mat = diaMat;
-	  }
-	  delete (CUSPMATRIX *) mat;
-	} else 
-	  cuspstruct->mat = mat;
+	if (cuspstruct->format==MAT_CUSP_ELL) {
+	  CUSPMATRIXELL *ellMat = new CUSPMATRIXELL(*mat);
+	  cuspstruct->mat = ellMat;
+	} else {
+	  CUSPMATRIXDIA *diaMat = new CUSPMATRIXDIA(*mat);
+	  cuspstruct->mat = diaMat;
+	}
+	delete (CUSPMATRIX*) mat;
+      }
+
+      /* assign the compressed row indices */
+      cuspstruct->indices = new CUSPINTARRAYGPU;
+      cuspstruct->indices->assign(ridx,ridx+m);
+
+      /* free the temporaries */
+      if (!a->compressedrow.use) {
+        ierr = PetscFree(ii);CHKERRQ(ierr);
+        ierr = PetscFree(ridx);CHKERRQ(ierr);
       }
       cuspstruct->tempvec = new CUSPARRAY;
       cuspstruct->tempvec->resize(m);
 {
   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
   PetscErrorCode ierr;
+  PetscBool      usecprow = a->compressedrow.use;
   Mat_SeqAIJCUSP *cuspstruct = (Mat_SeqAIJCUSP*)A->spptr;
   CUSPARRAY      *xarray     = NULL,*yarray=NULL,*zarray=NULL;
 
     ierr = VecCUSPGetArrayRead(yy,&yarray);CHKERRQ(ierr);
     ierr = VecCUSPGetArrayWrite(zz,&zarray);CHKERRQ(ierr);
 
-    if (a->compressedrow.use) {
+    if (usecprow) { 
+      /* use compressed row format */
       CUSPMATRIX *mat = (CUSPMATRIX*)cuspstruct->mat;
-      cusp::multiply(*mat,*xarray, *cuspstruct->tempvec);
-      thrust::for_each(
-        thrust::make_zip_iterator(
-          thrust::make_tuple(cuspstruct->tempvec->begin(),
-                             thrust::make_permutation_iterator(zarray->begin(), cuspstruct->indices->begin()))),
-        thrust::make_zip_iterator(
-          thrust::make_tuple(cuspstruct->tempvec->begin(),
-                             thrust::make_permutation_iterator(zarray->begin(),cuspstruct->indices->begin()))) + cuspstruct->tempvec->size(),
-        VecCUSPPlusEquals());
-    } else {
+      cusp::multiply(*mat,*xarray,*cuspstruct->tempvec);
+      ierr = VecSet_SeqCUSP(yy,0.0);CHKERRQ(ierr);
+      thrust::copy(cuspstruct->tempvec->begin(),cuspstruct->tempvec->end(),thrust::make_permutation_iterator(yarray->begin(),cuspstruct->indices->begin()));
+    } else { 
+
       if (cuspstruct->format==MAT_CUSP_ELL) {
 	CUSPMATRIXELL *mat = (CUSPMATRIXELL*)cuspstruct->mat;
 	cusp::multiply(*mat,*xarray,*cuspstruct->tempvec);
 	CUSPMATRIX *mat = (CUSPMATRIX*)cuspstruct->mat;
 	cusp::multiply(*mat,*xarray,*cuspstruct->tempvec);
       }
-      thrust::for_each(
-        thrust::make_zip_iterator(thrust::make_tuple(
-                                    cuspstruct->tempvec->begin(),
-                                    zarray->begin())),
-        thrust::make_zip_iterator(thrust::make_tuple(
-                                    cuspstruct->tempvec->end(),
-                                    zarray->end())),
-        VecCUSPPlusEquals());
+
+      if (zarray->size() == cuspstruct->indices->size()) {
+	thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(cuspstruct->tempvec->begin(),zarray->begin())),
+			 thrust::make_zip_iterator(thrust::make_tuple(cuspstruct->tempvec->end(),zarray->end())),
+			 VecCUSPPlusEquals());
+      } else {
+	thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(cuspstruct->tempvec->begin(),
+                             thrust::make_permutation_iterator(zarray->begin(), cuspstruct->indices->begin()))),
+			 thrust::make_zip_iterator(thrust::make_tuple(cuspstruct->tempvec->end(),
+                             thrust::make_permutation_iterator(zarray->end(),cuspstruct->indices->end()))),
+			 VecCUSPPlusEquals());
+      }
     }
     ierr = VecCUSPRestoreArrayRead(xx,&xarray);CHKERRQ(ierr);
     ierr = VecCUSPRestoreArrayRead(yy,&yarray);CHKERRQ(ierr);

File src/mat/impls/baij/seq/baij.c

     }
     b->free_a  = PETSC_TRUE;
     b->free_ij = PETSC_TRUE;
+#if defined(PETSC_THREADCOMM_ACTIVE)
+    ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr);
+#endif
   } else {
     b->free_a  = PETSC_FALSE;
     b->free_ij = PETSC_FALSE;

File src/mat/impls/baij/seq/baij2.c

 #include <petsc-private/kernels/blockinvert.h>
 #include <petscbt.h>
 #include <petscblaslapack.h>
+#if defined(PETSC_THREADCOMM_ACTIVE)
+#include <petscthreadcomm.h>
+#endif
 
 #undef __FUNCT__
 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
 /* Should check that shapes of vectors and matrices match */
 /* -------------------------------------------------------*/
 
+#if defined(PETSC_THREADCOMM_ACTIVE)
+PetscErrorCode MatMult_SeqBAIJ_1_Kernel(PetscInt thread_id,Mat A,Vec xx,Vec zz)
+{
+  PetscErrorCode    ierr;
+  Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
+  PetscScalar       *z;
+  const PetscScalar *x;
+  const MatScalar   *aa;
+  PetscInt          *trstarts=A->rmap->trstarts;
+  PetscInt          n,start,end,i;
+  const PetscInt    *aj,*ai;
+  PetscScalar       sum;
+
+  ierr  = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
+  ierr  = VecGetArray(zz,&z);CHKERRQ(ierr);
+  start = trstarts[thread_id];
+  end   = trstarts[thread_id+1];
+  ai    = a->i;
+  for (i=start; i<end; i++) {
+    n   = ai[i+1] - ai[i];
+    aj  = a->j + ai[i];
+    aa  = a->a + ai[i];
+    sum = 0.0;
+    PetscSparseDensePlusDot(sum,x,aa,aj,n);
+    z[i] = sum;
+  }
+  ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
+  ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
+  return 0;
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "MatMult_SeqBAIJ_1"
+PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
+{
+  Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
+  PetscScalar       *z,sum;
+  const PetscScalar *x;
+  const MatScalar   *v;
+  PetscErrorCode    ierr;
+  PetscInt          mbs,i,n;
+  const PetscInt    *idx,*ii,*ridx=NULL;
+  PetscBool         usecprow=a->compressedrow.use;
+
+  PetscFunctionBegin;
+
+  if (usecprow) {
+    ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
+    ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
+    mbs  = a->compressedrow.nrows;
+    ii   = a->compressedrow.i;
+    ridx = a->compressedrow.rindex;
+    ierr = PetscMemzero(z,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
+    for (i=0; i<mbs; i++) {
+      n   = ii[i+1] - ii[i];
+      v   = a->a + ii[i];
+      idx = a->j + ii[i];
+      PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA);   /* Indices for the next row (assumes same size as this one) */
+      PetscPrefetchBlock(v+1*n,1*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
+      sum = 0.0;
+      PetscSparseDensePlusDot(sum,x,v,idx,n);
+      z[ridx[i]] = sum;
+    }
+    ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
+    ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
+  } else {
+    ierr = PetscThreadCommRunKernel(PetscObjectComm((PetscObject)A),(PetscThreadKernel)MatMult_SeqBAIJ_1_Kernel,3,A,xx,zz);CHKERRQ(ierr);
+  }
+  ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+#else
 #undef __FUNCT__
 #define __FUNCT__ "MatMult_SeqBAIJ_1"
 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
   PetscFunctionReturn(0);
 }
+#endif
+
+#if defined(PETSC_THREADCOMM_ACTIVE)
+PetscErrorCode MatMult_SeqBAIJ_2_Kernel(PetscInt thread_id,Mat A,Vec xx,Vec zz)
+{
+  PetscErrorCode    ierr;
+  Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
+  PetscScalar       *z,x1,x2,sum1,sum2;
+  const PetscScalar *x,*xb;
+  const MatScalar   *aa;
+  PetscInt          *trstarts=A->rmap->trstarts;
+  PetscInt          n,start,end,i,j;
+  const PetscInt    *aj,*ai;
+
+  ierr   = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
+  ierr   = VecGetArray(zz,&z);CHKERRQ(ierr);
+  start  = trstarts[thread_id] / 2;
+  end    = trstarts[thread_id+1] / 2;
+  ai     = a->i;
+  for (i=start; i<end; i++) {
+    n    = ai[i+1] - ai[i];
+    aj   = a->j + ai[i];
+    aa   = a->a + ai[i]*4;
+    sum1 = 0.0; sum2 = 0.0;
+    for (j=0; j<n; j++) {
+      xb = x + 2*aj[j]; x1 = xb[0]; x2 = xb[1];
+      sum1 += aa[4*j]*x1   + aa[4*j+2]*x2;
+      sum2 += aa[4*j+1]*x1 + aa[4*j+3]*x2;
+    }
+    z[2*i] = sum1; z[2*i+1] = sum2;
+  }
+  ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
+  ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);