Commits

Matt Knepley committed 466ffd9 Merge

Merge branch 'karlrupp/feature-snes-ex52-OpenCL' into next

* karlrupp/feature-snes-ex52-OpenCL:
SNES ex52: Added uniform parallel refinement - Added test
DMPlex: Uniform refinement does not work on uninterpolated meshes

Comments (0)

Files changed (5)

config/builder.py

                                                                {'numProcs': 1, 'args': '-dm_view -refinement_limit 0.0 -compute_function -batch -gpu -gpu_batches 2',
                                                                 'setup': './bin/pythonscripts/PetscGenerateFEMQuadrature.py 2 1 1 1 laplacian src/snes/examples/tutorials/ex52.h',
                                                                 'source': ['src/snes/examples/tutorials/ex52_integrateElementOpenCL.c'], 'requires': ['opencl']},
+                                                               # 2D Laplacian Parallel Refinement 36-37
+                                                               {'numProcs': 2, 'args': '-dm_view -interpolate -refinement_limit 0.0625 -refinement_uniform -compute_function -batch -gpu -gpu_batches 2',
+                                                                'setup': './bin/pythonscripts/PetscGenerateFEMQuadrature.py 2 1 1 1 laplacian src/snes/examples/tutorials/ex52.h',
+                                                                'source': ['src/snes/examples/tutorials/ex52_integrateElement.cu'], 'requires': ['cuda']},
+                                                               {'numProcs': 2, 'args': '-dm_view -interpolate -refinement_limit 0.0625 -refinement_uniform -compute_function -batch -gpu -gpu_batches 2',
+                                                                'setup': './bin/pythonscripts/PetscGenerateFEMQuadrature.py 2 1 1 1 laplacian src/snes/examples/tutorials/ex52.h',
+                                                                'source': ['src/snes/examples/tutorials/ex52_integrateElementOpenCL.c'], 'requires': ['opencl']},
                                                                ],
                         'src/snes/examples/tutorials/ex62':   [# 2D serial P1 tests 0-3
                                                                {'numProcs': 1, 'args': '-run_type test -refinement_limit 0.0    -bc_type dirichlet -interpolate 0 -show_initial -dm_plex_print_fem 1',

src/dm/impls/plex/plex.c

   ierr = DMPlexSetDimension(rdm, dim);CHKERRQ(ierr);
   /* Calculate number of new points of each depth */
   ierr = DMPlexGetDepth(dm, &depth);CHKERRQ(ierr);
+  if (depth != dim) SETERRQ(PetscObjectComm((PetscObject) dm), PETSC_ERR_ARG_WRONG, "Mesh must be fully interpolated for uniform refinement");
   ierr = PetscMalloc((depth+1) * sizeof(PetscInt), &depthSize);CHKERRQ(ierr);
   ierr = PetscMemzero(depthSize, (depth+1) * sizeof(PetscInt));CHKERRQ(ierr);
   ierr = CellRefinerGetSizes(cellRefiner, dm, depthSize);CHKERRQ(ierr);

src/snes/examples/tutorials/ex52.c

   PetscInt      dim;               /* The topological mesh dimension */
   PetscBool     interpolate;       /* Generate intermediate mesh elements */
   PetscReal     refinementLimit;   /* The largest allowable cell volume */
+  PetscBool     refinementUniform; /* Uniformly refine the mesh */
   char          partitioner[2048]; /* The graph partitioner */
   PetscBool     computeFunction;   /* The flag for computing a residual */
   PetscBool     computeJacobian;   /* The flag for computing a Jacobian */
   PetscErrorCode ierr;
 
   PetscFunctionBeginUser;
-  options->debug           = 0;
-  options->dim             = 2;
-  options->interpolate     = PETSC_FALSE;
-  options->refinementLimit = 0.0;
-  options->computeFunction = PETSC_FALSE;
-  options->computeJacobian = PETSC_FALSE;
-  options->batch           = PETSC_FALSE;
-  options->gpu             = PETSC_FALSE;
-  options->numBatches      = 1;
-  options->numBlocks       = 1;
-  options->op              = LAPLACIAN;
-  options->showResidual    = PETSC_TRUE;
-  options->showJacobian    = PETSC_TRUE;
+  options->debug             = 0;
+  options->dim               = 2;
+  options->interpolate       = PETSC_FALSE;
+  options->refinementLimit   = 0.0;
+  options->refinementUniform = PETSC_FALSE;
+  options->computeFunction   = PETSC_FALSE;
+  options->computeJacobian   = PETSC_FALSE;
+  options->batch             = PETSC_FALSE;
+  options->gpu               = PETSC_FALSE;
+  options->numBatches        = 1;
+  options->numBlocks         = 1;
+  options->op                = LAPLACIAN;
+  options->showResidual      = PETSC_TRUE;
+  options->showJacobian      = PETSC_TRUE;
 
   ierr = MPI_Comm_size(comm, &options->numProcs);CHKERRQ(ierr);
   ierr = MPI_Comm_rank(comm, &options->rank);CHKERRQ(ierr);
   ierr = PetscOptionsInt("-dim", "The topological mesh dimension", "ex52.c", options->dim, &options->dim, NULL);CHKERRQ(ierr);
   ierr = PetscOptionsBool("-interpolate", "Generate intermediate mesh elements", "ex52.c", options->interpolate, &options->interpolate, NULL);CHKERRQ(ierr);
   ierr = PetscOptionsReal("-refinement_limit", "The largest allowable cell volume", "ex52.c", options->refinementLimit, &options->refinementLimit, NULL);CHKERRQ(ierr);
+  ierr = PetscOptionsBool("-refinement_uniform", "Uniformly refine the mesh", "ex52.c", options->refinementUniform, &options->refinementUniform, NULL);CHKERRQ(ierr);
   ierr = PetscStrcpy(options->partitioner, "chaco");CHKERRQ(ierr);
   ierr = PetscOptionsString("-partitioner", "The graph partitioner", "ex52.c", options->partitioner, options->partitioner, 2048, NULL);CHKERRQ(ierr);
   ierr = PetscOptionsBool("-compute_function", "Compute the residual", "ex52.c", options->computeFunction, &options->computeFunction, NULL);CHKERRQ(ierr);
 #define __FUNCT__ "CreateMesh"
 PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm)
 {
-  PetscInt       dim             = user->dim;
-  PetscBool      interpolate     = user->interpolate;
-  PetscReal      refinementLimit = user->refinementLimit;
-  const char     *partitioner    = user->partitioner;
+  PetscInt       dim               = user->dim;
+  PetscBool      interpolate       = user->interpolate;
+  PetscReal      refinementLimit   = user->refinementLimit;
+  PetscBool      refinementUniform = user->refinementUniform;
+  const char     *partitioner      = user->partitioner;
   PetscErrorCode ierr;
 
   PetscFunctionBeginUser;
       ierr = DMDestroy(dm);CHKERRQ(ierr);
       *dm  = distributedMesh;
     }
+    /* Use regualr refinement in parallel */
+    if (refinementUniform) {
+      ierr = DMPlexSetRefinementUniform(*dm, refinementUniform);CHKERRQ(ierr);
+      ierr = DMRefine(*dm, comm, &refinedMesh);CHKERRQ(ierr);
+      if (refinedMesh) {
+        ierr = DMDestroy(dm);CHKERRQ(ierr);
+        *dm  = refinedMesh;
+      }
+    }
   }
   ierr = DMSetFromOptions(*dm);CHKERRQ(ierr);
   ierr = PetscLogEventEnd(user->createMeshEvent,0,0,0,0);CHKERRQ(ierr);

src/snes/examples/tutorials/output/ex52_36.out

+Mesh in 2 dimensions:
+  0-cells: 8 8
+  1-cells: 15 15
+  2-cells: 8 8
+Labels:
+  depth: 3 strata of sizes (8, 15, 8)
+  marker: 2 strata of sizes (9, 1)
+Mesh in 2 dimensions:
+  0-cells: 23 23
+  1-cells: 54 54
+  2-cells: 32 32
+Labels:
+  marker: 2 strata of sizes (17, 1)
+  depth: 3 strata of sizes (23, 54, 32)
+GPU layout grid(1,5,1) block(3,1,1) with 2 batches
+ N_t: 3, N_cb: 2
+GPU layout grid(1,5,1) block(3,1,1) with 2 batches
+ N_t: 3, N_cb: 2
+Residual:
+Vec Object: 1 MPI processes
+  type: seq
+-0.03125
+-0.0625
+0.21875
+0.09375
+0.1875
+0.34375
+-0.125
+-0.125
+-0.125
+-0.125
+-0.0625
+-0.125
+0.1875
+-0.125
+0.4375
+-0.125
+-0.125
+-0.0625
+0.1875
+-0.125
+-0.125
+-0.0625
+-0.125
+Vec Object: 1 MPI processes
+  type: seq
+-0.15625
+-0.3125
+0.09375
+0.21875
+0.4375
+0.46875
+-0.125
+-0.125
+-0.125
+-0.0625
+-0.125
+-0.3125
+-0.125
+0.4375
+-0.125
+-0.125
+-0.3125
+-0.125
+0.4375
+-0.125
+-0.125
+-0.125
+0.4375

src/snes/examples/tutorials/output/ex52_37.out

+Mesh in 2 dimensions:
+  0-cells: 8 8
+  1-cells: 15 15
+  2-cells: 8 8
+Labels:
+  depth: 3 strata of sizes (8, 15, 8)
+  marker: 2 strata of sizes (9, 1)
+Mesh in 2 dimensions:
+  0-cells: 23 23
+  1-cells: 54 54
+  2-cells: 32 32
+Labels:
+  marker: 2 strata of sizes (17, 1)
+  depth: 3 strata of sizes (23, 54, 32)
+GPU layout grid(1,5,1) block(3,1,1) with 2 batches
+ N_t: 3, N_cb: 2
+GPU layout grid(1,5,1) block(3,1,1) with 2 batches
+ N_t: 3, N_cb: 2
+Residual:
+Vec Object: 1 MPI processes
+  type: seq
+-0.03125
+-0.0625
+0.21875
+0.09375
+0.1875
+0.34375
+-0.125
+-0.125
+-0.125
+-0.125
+-0.0625
+-0.125
+0.1875
+-0.125
+0.4375
+-0.125
+-0.125
+-0.0625
+0.1875
+-0.125
+-0.125
+-0.0625
+-0.125
+Vec Object: 1 MPI processes
+  type: seq
+-0.15625
+-0.3125
+0.09375
+0.21875
+0.4375
+0.46875
+-0.125
+-0.125
+-0.125
+-0.0625
+-0.125
+-0.3125
+-0.125
+0.4375
+-0.125
+-0.125
+-0.3125
+-0.125
+0.4375
+-0.125
+-0.125
+-0.125
+0.4375