Commits

BarryFSmith committed bad7cb1

fixed all PCOPENMP that shoud be PCHMPI same for pc_type openmp should be pc_type hmpi
fixed bug in PetscInitialize() for HMPI spawned processes, needed to set PetscInitializeCalled flag

Hg-commit: 775ecf2b89e39a22b481dd7ba2844955cce151c1

Comments (0)

Files changed (11)

include/finclude/petscpcdef.h

 #define PCPROMETHEUS 'prometheus'
 #define PCGALERKIN 'galerkin'
 #define PCEXOTIC 'exotic'
-#define PCOPENMP 'openmp'
+#define PCHMPI 'hmpi'
 #define PCSUPPORTGRAPH 'supportgraph'
 #define PCASA 'asa'
 #define PCCP 'cp'

include/petscpc.h

 #define PCPROMETHEUS      "prometheus"
 #define PCGALERKIN        "galerkin"
 #define PCEXOTIC          "exotic"
-#define PCOPENMP          "openmp"
+#define PCHMPI            "hmpi"
 #define PCSUPPORTGRAPH    "supportgraph"
 #define PCASA             "asa"
 #define PCCP              "cp"

include/private/pcimpl.h

   Mat            mat,pmat;
   Vec            diagonalscaleright,diagonalscaleleft; /* used for time integration scaling */
   PetscBool      diagonalscale;
-  PetscBool      nonzero_guess; /* used by PCKSP, PCREDUNDANT and PCOPENMP */
+  PetscBool      nonzero_guess; /* used by PCKSP, PCREDUNDANT and PCHMPI */
   PetscErrorCode (*modifysubmatrices)(PC,PetscInt,const IS[],const IS[],Mat[],void*); /* user provided routine */
   void           *modifysubmatricesP; /* context for user routine */
   void           *data;

src/ksp/ksp/examples/tutorials/makefile

 # spwan a parallel solver on 2 procs.
 # See http://www.mcs.anl.gov/petsc/petsc-as/documentation/faq.html#datafiles for how to obtain the datafiles used below
 runex10_22:
-	-@${MPIEXEC} -n 2 ./ex10 -options_left no -hmpi_merge_size 2 -pc_type openmp  -ksp_type preonly -hmpi_pc_type ksp -f0 ${DATAFILESPATH}/matrices/medium > ex10_22.tmp 2>&1;\
+	-@${MPIEXEC} -n 2 ./ex10 -options_left no -hmpi_merge_size 2 -pc_type hmpi  -ksp_type preonly -hmpi_pc_type ksp -f0 ${DATAFILESPATH}/matrices/medium > ex10_22.tmp 2>&1;\
 	   if (${DIFF} output/ex10_22.out ex10_22.tmp) then true; \
 	   else echo "Possible problem with ex10_22, diffs above"; fi; \
 	   ${RM} -f ex10_22.tmp
 runex10_23:
-	-@${MPIEXEC} -n 2 ./ex10 -options_left no -hmpi_merge_size 2 -pc_type openmp  -ksp_type preonly -hmpi_pc_type ksp -hmpi_ksp_pc_type bjacobi -hmpi_ksp_ksp_type gmres -f0 ${DATAFILESPATH}/matrices/medium > ex10_23.tmp 2>&1;\
+	-@${MPIEXEC} -n 2 ./ex10 -options_left no -hmpi_merge_size 2 -pc_type hmpi  -ksp_type preonly -hmpi_pc_type ksp -hmpi_ksp_pc_type bjacobi -hmpi_ksp_ksp_type gmres -f0 ${DATAFILESPATH}/matrices/medium > ex10_23.tmp 2>&1;\
 	   if (${DIFF} output/ex10_23.out ex10_23.tmp) then true; \
 	   else echo "Possible problem with ex10_23, diffs above"; fi; \
 	   ${RM} -f ex10_23.tmp
 
 runex10_24:
-	-@${MPIEXEC} -n 2 ./ex10 -options_left no -hmpi_merge_size 2 -pc_type openmp -hmpi_pc_type sor -f0 ${DATAFILESPATH}/matrices/medium -hmpi_ksp_monitor_short -initialguess -ksp_type gmres -ksp_monitor_short -ksp_view > ex10_24.tmp 2>&1;\
+	-@${MPIEXEC} -n 2 ./ex10 -options_left no -hmpi_merge_size 2 -pc_type hmpi -hmpi_pc_type sor -f0 ${DATAFILESPATH}/matrices/medium -hmpi_ksp_monitor_short -initialguess -ksp_type gmres -ksp_monitor_short -ksp_view > ex10_24.tmp 2>&1;\
 	   if (${DIFF} output/ex10_24.out ex10_24.tmp) then true; \
 	   else echo "Possible problem with ex10_24, diffs above"; fi; \
 	   ${RM} -f ex10_24.tmp
 
 # Start a parallel user code [on 4 nodes, assembling MPIAIJ with
 # np=4] and then spwan a parallel sub-domain-solver on each node
-# [with np=2]. This emulates mixed MPI/HMPI model [MPI between
-# nodes, HMPI within the nodes]
+# [with np=2]. This emulates mixed MPI-shared memory model [MPI between
+# nodes, MPI within the nodes]
 # See http://www.mcs.anl.gov/petsc/petsc-as/documentation/faq.html#datafiles for how to obtain the datafiles used below
 runex10_25:
-	-@${MPIEXEC} -n 8 ./ex10 -options_left no -hmpi_merge_size 2 -sub_pc_type openmp -f0 ${DATAFILESPATH}/matrices/medium -ksp_monitor> ex10_25.tmp 2>&1;\
+	-@${MPIEXEC} -n 8 ./ex10 -options_left no -hmpi_merge_size 2 -sub_pc_type hmpi -f0 ${DATAFILESPATH}/matrices/medium -ksp_monitor_short> ex10_25.tmp 2>&1;\
 	   if (${DIFF} output/ex10_25.out ex10_25.tmp) then true; \
 	   else echo "Possible problem with ex10_25, diffs above"; fi; \
 	   ${RM} -f ex10_25.tmp

src/ksp/ksp/examples/tutorials/output/ex10_24.out

+  Residual norms for hmpi_ solve.
+  0 KSP Residual norm 12.1429 
+  1 KSP Residual norm 1.54508 
+  2 KSP Residual norm 0.839491 
+  3 KSP Residual norm 0.457414 
+  4 KSP Residual norm 0.185251 
+  5 KSP Residual norm 0.0924385 
+  6 KSP Residual norm 0.0537803 
+  7 KSP Residual norm 0.028352 
+  8 KSP Residual norm 0.00760009 
+  9 KSP Residual norm 0.00294661 
+ 10 KSP Residual norm 0.00193829 
+ 11 KSP Residual norm 0.000707955 
+ 12 KSP Residual norm 0.000376235 
+ 13 KSP Residual norm 0.000202981 
+ 14 KSP Residual norm 7.25381e-05 
+  0 KSP Residual norm 14.842 
+  Residual norms for hmpi_ solve.
   0 KSP Residual norm 1.27897 
   1 KSP Residual norm 0.206458 
   2 KSP Residual norm 0.163216 
  11 KSP Residual norm 5.99255e-05 
  12 KSP Residual norm 3.94164e-05 
  13 KSP Residual norm 1.0674e-05 
-  0 KSP Residual norm 1.27897 
-  1 KSP Residual norm 0.206458 
-  2 KSP Residual norm 0.163216 
-  3 KSP Residual norm 0.0655381 
-  4 KSP Residual norm 0.0278102 
-  5 KSP Residual norm 0.00661712 
-  6 KSP Residual norm 0.003633 
-  7 KSP Residual norm 0.00171924 
-  8 KSP Residual norm 0.000494165 
-  9 KSP Residual norm 0.000310366 
- 10 KSP Residual norm 0.000122678 
- 11 KSP Residual norm 5.99255e-05 
- 12 KSP Residual norm 3.94164e-05 
- 13 KSP Residual norm 1.0674e-05 
-  0 KSP Residual norm 1.92584 
-  0 KSP Residual norm 0.664109 
-  1 KSP Residual norm 0.107204 
-  2 KSP Residual norm 0.0847506 
-  3 KSP Residual norm 0.0340308 
-  4 KSP Residual norm 0.0144405 
-  5 KSP Residual norm 0.00343554 
-  6 KSP Residual norm 0.00188651 
-  7 KSP Residual norm 0.000892832 
-  8 KSP Residual norm 0.000256605 
-  9 KSP Residual norm 0.000161189 
- 10 KSP Residual norm 6.3706e-05 
- 11 KSP Residual norm 3.1119e-05 
- 12 KSP Residual norm 2.04704e-05 
- 13 KSP Residual norm 5.5427e-06 
-  1 KSP Residual norm 3.31578e-05 
-  0 KSP Residual norm 0.322196 
-  1 KSP Residual norm 0.208119 
-  2 KSP Residual norm 0.136287 
-  3 KSP Residual norm 0.0414391 
-  4 KSP Residual norm 0.0271348 
-  5 KSP Residual norm 0.0157586 
-  6 KSP Residual norm 0.00669657 
-  7 KSP Residual norm 0.00347773 
-  8 KSP Residual norm 0.00152007 
-  9 KSP Residual norm 0.00102927 
- 10 KSP Residual norm 0.000546309 
- 11 KSP Residual norm 0.00028202 
- 12 KSP Residual norm 0.000103255 
- 13 KSP Residual norm 4.7164e-05 
- 14 KSP Residual norm 3.32956e-05 
- 15 KSP Residual norm 1.30229e-05 
- 16 KSP Residual norm 6.78885e-06 
- 17 KSP Residual norm 3.45866e-06 
- 18 KSP Residual norm 1.44967e-06 
-  2 KSP Residual norm 1.661e-10 
-KSP Object:
+  Residual norms for hmpi_ solve.
+  0 KSP Residual norm 0.818146 
+  1 KSP Residual norm 0.104102 
+  2 KSP Residual norm 0.056562 
+  3 KSP Residual norm 0.0308189 
+  4 KSP Residual norm 0.0124816 
+  5 KSP Residual norm 0.0062282 
+  6 KSP Residual norm 0.00362344 
+  7 KSP Residual norm 0.0019102 
+  8 KSP Residual norm 0.000512077 
+  9 KSP Residual norm 0.000198566 
+ 10 KSP Residual norm 0.000130618 
+ 11 KSP Residual norm 4.77006e-05 
+ 12 KSP Residual norm 2.53483e-05 
+ 13 KSP Residual norm 1.36759e-05 
+ 14 KSP Residual norm 4.88729e-06 
+  1 KSP Residual norm 0.00025953 
+  Residual norms for hmpi_ solve.
+  0 KSP Residual norm 0.290395 
+  1 KSP Residual norm 0.179956 
+  2 KSP Residual norm 0.0846513 
+  3 KSP Residual norm 0.0404771 
+  4 KSP Residual norm 0.0173691 
+  5 KSP Residual norm 0.00899778 
+  6 KSP Residual norm 0.00505029 
+  7 KSP Residual norm 0.00219384 
+  8 KSP Residual norm 0.00133311 
+  9 KSP Residual norm 0.000768785 
+ 10 KSP Residual norm 0.000375249 
+ 11 KSP Residual norm 0.000220589 
+ 12 KSP Residual norm 6.11265e-05 
+ 13 KSP Residual norm 2.9544e-05 
+ 14 KSP Residual norm 1.6586e-05 
+ 15 KSP Residual norm 5.04764e-06 
+ 16 KSP Residual norm 2.08214e-06 
+  2 KSP Residual norm 2.06012e-09 
+KSP Object: 1 MPI processes
   type: gmres
     GMRES: restart=30, using Classical (unmodified) Gram-Schmidt Orthogonalization with no iterative refinement
     GMRES: happy breakdown tolerance 1e-30
   maximum iterations=10000
   tolerances:  relative=1e-05, absolute=1e-50, divergence=10000
   left preconditioning
-PC Object:
-  type: openmp
+  using nonzero initial guess
+  using PRECONDITIONED norm type for convergence test
+PC Object: 1 MPI processes
+  type: hmpi
     Size of solver nodes 2
     Parallel sub-solver given next
-  KSP Object:(openmp_)
+  KSP Object:  (hmpi_)   2 MPI processes  
     type: gmres
       GMRES: restart=30, using Classical (unmodified) Gram-Schmidt Orthogonalization with no iterative refinement
       GMRES: happy breakdown tolerance 1e-30
     maximum iterations=10000, initial guess is zero
     tolerances:  relative=1e-05, absolute=1e-50, divergence=10000
     left preconditioning
-  PC Object:(openmp_)
+    using PRECONDITIONED norm type for convergence test
+  PC Object:  (hmpi_)   2 MPI processes  
     type: sor
-      SOR: type = local_symmetric, iterations = 1, omega = 1
+      SOR: type = local_symmetric, iterations = 1, local iterations = 1, omega = 1
     linear system matrix = precond matrix:
-    Matrix Object:
+    Matrix Object:     2 MPI processes    
       type: mpiaij
       rows=181, cols=181
       total: nonzeros=2245, allocated nonzeros=2245
+      total number of mallocs used during MatSetValues calls =0
         using I-node (on process 0) routines: found 31 nodes, limit used is 5
   linear system matrix = precond matrix:
-  Matrix Object:
+  Matrix Object:   1 MPI processes  
     type: seqaij
     rows=181, cols=181
     total: nonzeros=2245, allocated nonzeros=2245
+    total number of mallocs used during MatSetValues calls =0
       using I-node routines: found 61 nodes, limit used is 5
 Number of iterations =   2
-Residual norm 8.35419e-08
+Residual norm 3.45716e-07

src/ksp/ksp/examples/tutorials/output/ex10_25.out

-  0 KSP Residual norm 1.430965438763e+00 
-  1 KSP Residual norm 1.564669696573e-01 
-  2 KSP Residual norm 1.339069271876e-01 
-  3 KSP Residual norm 6.484075454009e-02 
-  4 KSP Residual norm 3.615526410667e-02 
-  5 KSP Residual norm 2.818492111069e-02 
-  6 KSP Residual norm 2.479511353884e-02 
-  7 KSP Residual norm 1.033196123856e-02 
-  8 KSP Residual norm 7.790224820466e-03 
-  9 KSP Residual norm 3.832151402551e-03 
- 10 KSP Residual norm 1.548826984586e-03 
- 11 KSP Residual norm 7.942544915601e-04 
- 12 KSP Residual norm 5.452662389548e-04 
- 13 KSP Residual norm 3.250492800364e-04 
- 14 KSP Residual norm 2.142122042370e-04 
- 15 KSP Residual norm 1.376878254868e-04 
- 16 KSP Residual norm 7.424392986336e-05 
- 17 KSP Residual norm 4.881125828574e-05 
- 18 KSP Residual norm 2.516014972045e-05 
- 19 KSP Residual norm 1.435192028813e-05 
- 20 KSP Residual norm 8.640805306574e-06 
+  0 KSP Residual norm 1.43097 
+  1 KSP Residual norm 0.156467 
+  2 KSP Residual norm 0.133907 
+  3 KSP Residual norm 0.0648408 
+  4 KSP Residual norm 0.0361553 
+  5 KSP Residual norm 0.0281849 
+  6 KSP Residual norm 0.0247951 
+  7 KSP Residual norm 0.010332 
+  8 KSP Residual norm 0.00779022 
+  9 KSP Residual norm 0.00383215 
+ 10 KSP Residual norm 0.00154883 
+ 11 KSP Residual norm 0.000794254 
+ 12 KSP Residual norm 0.000545266 
+ 13 KSP Residual norm 0.000325049 
+ 14 KSP Residual norm 0.000214212 
+ 15 KSP Residual norm 0.000137688 
+ 16 KSP Residual norm 7.42439e-05 
+ 17 KSP Residual norm 4.88113e-05 
+ 18 KSP Residual norm 2.51601e-05 
+ 19 KSP Residual norm 1.43519e-05 
+ 20 KSP Residual norm 8.64081e-06 
 Number of iterations =  20
 Residual norm 0.000404315

src/ksp/pc/impls/openmp/openmp.c

 
 
 #undef __FUNCT__  
-#define __FUNCT__ "PCView_HMPI_HMPI"
+#define __FUNCT__ "PCView_HMPI_MP"
 /*
     Would like to have this simply call PCView() on the inner PC. The problem is
   that the outer comm does not live on the inside so cannot do this. Instead 
 
 /* -------------------------------------------------------------------------------------*/
 /*MC
-     PCOPENMP - Runs a preconditioner for a single process matrix across several MPI processes
+     PCHMPI - Runs a preconditioner for a single process matrix across several MPI processes
 
-$     This will usually be run with -pc_type openmp -ksp_type preonly
+$     This will usually be run with -pc_type hmpi -ksp_type preonly
 $     solver options are set with -hmpi_ksp_... and -hmpi_pc_... for example
 $     -hmpi_ksp_type cg would use cg as the Krylov method or -hmpi_ksp_monitor or
 $     -hmpi_pc_type hypre -hmpi_pc_hypre_type boomeramg

src/ksp/pc/interface/pcregis.c

   ierr = PCRegisterDynamic(PCFIELDSPLIT   ,path,"PCCreate_FieldSplit",PCCreate_FieldSplit);CHKERRQ(ierr);
   ierr = PCRegisterDynamic(PCGALERKIN     ,path,"PCCreate_Galerkin",PCCreate_Galerkin);CHKERRQ(ierr);
   ierr = PCRegisterDynamic(PCEXOTIC       ,path,"PCCreate_Exotic",PCCreate_Exotic);CHKERRQ(ierr);
-  ierr = PCRegisterDynamic(PCOPENMP       ,path,"PCCreate_HMPI",PCCreate_HMPI);CHKERRQ(ierr);
+  ierr = PCRegisterDynamic(PCHMPI         ,path,"PCCreate_HMPI",PCCreate_HMPI);CHKERRQ(ierr);
   ierr = PCRegisterDynamic(PCASA          ,path,"PCCreate_ASA",PCCreate_ASA);CHKERRQ(ierr);
   ierr = PCRegisterDynamic(PCCP           ,path,"PCCreate_CP",PCCreate_CP);CHKERRQ(ierr);
   ierr = PCRegisterDynamic(PCLSC          ,path,"PCCreate_LSC",PCCreate_LSC);CHKERRQ(ierr);

src/ksp/pc/interface/precon.c

    Notes:
     This is a weird function. Since PC's are linear operators on the right hand side they
     CANNOT use an initial guess. This function is for the "pass-through" preconditioners
-    PCKSP, PCREDUNDANT and PCOPENMP and causes the inner KSP object to use the nonzero
+    PCKSP, PCREDUNDANT and PCHMPI and causes the inner KSP object to use the nonzero
     initial guess. Not currently working for PCREDUNDANT, that has to be rewritten to use KSP.
 
 

src/sys/objects/mpinit.c

 $   PETSc calculations. The user THREADS and PETSc PROCESSES will NEVER run at the same time so the p CPUs 
 $   are always working on p task, never more than p.
 $
-$    See PCOPENMP for a PETSc preconditioner that can use this functionality
+$    See PCHMPI for a PETSc preconditioner that can use this functionality
 $
 
    For both PetscHMPISpawn() and PetscHMPIMerge() PETSC_COMM_WORLD consists of one process per "node", PETSC_COMM_LOCAL_WORLD
 $   PETSc calculations. The user THREADS and PETSc PROCESSES will NEVER run at the same time so the p CPUs 
 $   are always working on p task, never more than p.
 $
-$    See PCOPENMP for a PETSc preconditioner that can use this functionality
+$    See PCHMPI for a PETSc preconditioner that can use this functionality
 $
 
    For both PetscHMPISpawn() and PetscHMPIMerge() PETSC_COMM_WORLD consists of one process per "node", PETSC_COMM_LOCAL_WORLD

src/sys/objects/pinit.c

     if (flg) {
       ierr = PetscHMPIMerge((PetscMPIInt) nodesize,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 
       if (PetscHMPIWorker) { /* if worker then never enter user code */
+        PetscInitializeCalled = PETSC_TRUE;
         ierr = PetscEnd(); 
       }
     }