Commits

Hong Zhang committed cf3e443 Merge

Merge branch 'hzhang/pc-redistribute-example'

Comments (0)

Files changed (4)

src/ksp/pc/examples/tutorials/ex3.c

+
+static char help[] = "Test PC redistribute on matrix with imbalance load. \n\
+                      Modified from src/ksp/ksp/examples/tutorials/ex2.c.\n\
+Input parameters include:\n\
+  -random_exact_sol : use a random exact solution vector\n\
+  -view_exact_sol   : write exact solution vector to stdout\n\
+  -n <mesh_n>       : number of mesh points in y-direction\n\n";
+/*
+Example: 
+  mpiexec -n 8 ./ex3 -n 10000 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8 -log_summary 
+  mpiexec -n 8 ./ex3 -n 10000 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8 -log_summary 
+*/
+
+#include <petscksp.h>
+
+#undef __FUNCT__
+#define __FUNCT__ "main"
+int main(int argc,char **args)
+{
+  Vec            x,b,u;    /* approx solution, RHS, exact solution */
+  Mat            A;        /* linear system matrix */
+  KSP            ksp;      /* linear solver context */
+  PetscRandom    rctx;     /* random number generator context */
+  PetscReal      norm;     /* norm of solution error */
+  PetscInt       i,j,Ii,J,Istart,Iend,m,n = 7,its,nloc,matdistribute=0;
+  PetscErrorCode ierr;
+  PetscBool      flg = PETSC_FALSE;
+  PetscScalar    v;
+  PetscMPIInt    rank,size;
+#if defined(PETSC_USE_LOG)
+  PetscLogStage stage;
+#endif
+
+  PetscInitialize(&argc,&args,(char*)0,help);
+  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
+  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
+  if (size < 2) SETERRQ(PETSC_COMM_WORLD,1,"This example requires at least 2 MPI processes!");
+
+  ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr);
+  ierr = PetscOptionsGetInt(NULL,"-matdistribute",&matdistribute,NULL);CHKERRQ(ierr);
+  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+         Compute the matrix and right-hand-side vector that define
+         the linear system, Ax = b.
+     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+  switch(matdistribute) {
+  case 1: /* very imbalanced process load for matrix A */
+    m    = (1+size)*size;
+    nloc = (rank+1)*n;
+    if (rank == size-1) { /* proc[size-1] stores all remaining rows */
+      nloc = m*n;
+      for (i=0; i<size-1; i++){
+        nloc -= (i+1)*n;
+      }
+    }
+    break;
+  default: /* proc[0] and proc[1] load much smaller row blocks, the rest processes have same loads */
+    if (rank == 0 || rank == 1) {
+      nloc = n;
+    } else {
+      nloc = 10*n; /* 10x larger load */
+    }
+    m = 2 + (size-2)*10;
+    break;
+  }
+  ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
+  ierr = MatSetSizes(A,nloc,nloc,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
+  ierr = MatSetFromOptions(A);CHKERRQ(ierr);
+  ierr = MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);CHKERRQ(ierr);
+  ierr = MatSeqAIJSetPreallocation(A,5,NULL);CHKERRQ(ierr);
+  ierr = MatSetUp(A);CHKERRQ(ierr);
+
+  ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
+  nloc = Iend-Istart;
+  ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%D] A Istart,Iend: %D %D; nloc %D\n",rank,Istart,Iend,nloc);CHKERRQ(ierr);
+  ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD);CHKERRQ(ierr);
+
+  ierr = PetscLogStageRegister("Assembly", &stage);CHKERRQ(ierr);
+  ierr = PetscLogStagePush(stage);CHKERRQ(ierr);
+  for (Ii=Istart; Ii<Iend; Ii++) {
+    v = -1.0; i = Ii/n; j = Ii - i*n;
+    if (i>0)   {J = Ii - n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
+    if (i<m-1) {J = Ii + n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
+    if (j>0)   {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
+    if (j<n-1) {J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
+    v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
+  }
+  ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
+  ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
+  ierr = PetscLogStagePop();CHKERRQ(ierr);
+
+  /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
+  ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
+
+  /* Create parallel vectors. */
+  ierr = VecCreate(PETSC_COMM_WORLD,&u);CHKERRQ(ierr);
+  ierr = VecSetSizes(u,nloc,PETSC_DECIDE);CHKERRQ(ierr);
+  ierr = VecSetFromOptions(u);CHKERRQ(ierr);
+  ierr = VecDuplicate(u,&b);CHKERRQ(ierr);
+  ierr = VecDuplicate(b,&x);CHKERRQ(ierr);
+
+  /* Set exact solution; then compute right-hand-side vector. */
+  ierr = PetscOptionsGetBool(NULL,"-random_exact_sol",&flg,NULL);CHKERRQ(ierr);
+  if (flg) {
+    ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rctx);CHKERRQ(ierr);
+    ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr);
+    ierr = VecSetRandom(u,rctx);CHKERRQ(ierr);
+    ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr);
+  } else {
+    ierr = VecSet(u,1.0);CHKERRQ(ierr);
+  }
+  ierr = MatMult(A,u,b);CHKERRQ(ierr);
+
+  /* View the exact solution vector if desired */
+  flg  = PETSC_FALSE;
+  ierr = PetscOptionsGetBool(NULL,"-view_exact_sol",&flg,NULL);CHKERRQ(ierr);
+  if (flg) {ierr = VecView(u,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
+
+  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+                Create the linear solver and set various options
+     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+  ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
+  ierr = KSPSetOperators(ksp,A,A,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
+  ierr = KSPSetTolerances(ksp,1.e-2/((m+1)*(n+1)),1.e-50,PETSC_DEFAULT,
+                          PETSC_DEFAULT);CHKERRQ(ierr);
+  ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
+
+  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+                      Solve the linear system
+     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+  ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr);
+
+  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+                      Check solution and clean up
+     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+  ierr = VecAXPY(x,-1.0,u);CHKERRQ(ierr);
+  ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr);
+  ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr);
+  ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %G iterations %D\n",
+                     norm,its);CHKERRQ(ierr);
+
+  /* Free work space. */
+  ierr = KSPDestroy(&ksp);CHKERRQ(ierr);
+  ierr = VecDestroy(&u);CHKERRQ(ierr);  ierr = VecDestroy(&x);CHKERRQ(ierr);
+  ierr = VecDestroy(&b);CHKERRQ(ierr);  ierr = MatDestroy(&A);CHKERRQ(ierr);
+  ierr = PetscFinalize();
+  return 0;
+}

src/ksp/pc/examples/tutorials/makefile

 	-${CLINKER}  -o ex2 ex2.o ${PETSC_KSP_LIB}
 	${RM} ex2.o
 
+ex3: ex3.o  chkopts
+	-${CLINKER}  -o ex3 ex3.o ${PETSC_KSP_LIB}
+	${RM} ex3.o
+
 #------------------------------------------------------------------------------------
 runex1:
 	-@${MPIEXEC} -n 1 ./ex1 -pc_factor_shift_type positive_definite
 	-@${RM} -f ex1.tmp
 
-TESTEXAMPLES_C	   = ex1.PETSc runex1 ex1.rm
+runex3:
+	-@${MPIEXEC} -n 8 ./ex3 -n 100 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8 > ex3.tmp 2>&1;	  \
+	   if (${DIFF} output/ex3_1.out ex3.tmp) then true; \
+	   else echo ${PWD} ; echo "Possible problem with with ex3_1, diffs above \n========================================="; fi; \
+	   ${RM} -f ex3.tmp
+runex3_2:
+	-@${MPIEXEC} -n 8 ./ex3 -n 100 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8 > ex3.tmp 2>&1;	  \
+	   if (${DIFF} output/ex3_2.out ex3.tmp) then true; \
+	   else echo ${PWD} ; echo "Possible problem with with ex3_2, diffs above \n========================================="; fi; \
+	   ${RM} -f ex3.tmp
+
+TESTEXAMPLES_C	 = ex1.PETSc runex1 ex1.rm ex3.PETSc runex3 runex3_2 ex3.rm 
 TESTEXAMPLES_C_X = ex2.PETSc ex2.rm
 
 include ${PETSC_DIR}/conf/test

src/ksp/pc/examples/tutorials/output/ex3_1.out

+[0] A Istart,Iend: 0 100; nloc 100
+[1] A Istart,Iend: 100 200; nloc 100
+[2] A Istart,Iend: 200 1200; nloc 1000
+[3] A Istart,Iend: 1200 2200; nloc 1000
+[4] A Istart,Iend: 2200 3200; nloc 1000
+[5] A Istart,Iend: 3200 4200; nloc 1000
+[6] A Istart,Iend: 4200 5200; nloc 1000
+[7] A Istart,Iend: 5200 6200; nloc 1000
+Norm of error 2.62698e-06 iterations 98

src/ksp/pc/examples/tutorials/output/ex3_2.out

+[0] A Istart,Iend: 0 100; nloc 100
+[1] A Istart,Iend: 100 200; nloc 100
+[2] A Istart,Iend: 200 1200; nloc 1000
+[3] A Istart,Iend: 1200 2200; nloc 1000
+[4] A Istart,Iend: 2200 3200; nloc 1000
+[5] A Istart,Iend: 3200 4200; nloc 1000
+[6] A Istart,Iend: 4200 5200; nloc 1000
+[7] A Istart,Iend: 5200 6200; nloc 1000
+Norm of error 2.09751e-06 iterations 1