- edited description
NPB-BT - MPI
Issue #6
new
MPI
Get the src
- ssh -Y daint01
- git clone --single-branch -b npbmz https://github.com/eth-cscs/parallel-debuggers.git
- cd parallel-debuggers/npbmz.git/src/
Comments (11)
-
reporter -
reporter - attached pgi_mpi_vampir.png
- edited description
- attached gnu_mpi_vampir.png
- attached cce_mpi_vampir.png
- attached intel_mpi_vampir.png
-
reporter - edited description
-
reporter NPB-BT - Scorep/1.3 (MPI/OPENMP)
GNU
- make CLASS=A NPROCS=4 FFLAGS=-O3 F77="scorep --mpp=mpi --thread=omp ftn -fopenmp"
- export SCOREP_TOTAL_MEMORY=500000000
- export OMP_NUM_THREADS=2
- aprun -n4 -d2 ./GNU.DAINT.A.4
PGI
- make CLASS=A NPROCS=4 FFLAGS=-O3 F77="scorep --mpp=mpi --thread=omp ftn -mp"
- export SCOREP_TOTAL_MEMORY=500000000
- export OMP_NUM_THREADS=2
- aprun -n4 -d2 ./PGI.DAINT.A.4
INTEL
- make CLASS=A NPROCS=4 FFLAGS=-O3 F77="scorep --mpp=mpi --thread=omp ftn -openmp"
- export SCOREP_TOTAL_MEMORY=500000000
- export OMP_NUM_THREADS=2
-
aprun -n4 -d2 ./INTEL.DAINT.A.4
-
export OMP_NUM_THREADS=4
- aprun -n4 -d4 -j0 ./INTEL.DAINT.A.4
CCE
- make CLASS=A NPROCS=4 FFLAGS=-O3 F77="scorep --mpp=mpi --thread=omp ftn -homp"
- Ignore this warning:
sh: /opt/cray/cce/8.3.2/cray-binutils/x86_64-unknown-linux-gnu/bin/nm: No such file or directory
- export SCOREP_TOTAL_MEMORY=500000000
- export OMP_NUM_THREADS=4
- aprun -n4 -d4 -j0 ./CRAY.DAINT.A.4
Reports
-
reporter - attached intel_vampir.png
- attached gnu_vampir.png
- attached pgi_vampir.png
- attached intel_vampir2.png
- attached cce_vampir.png
Tracing
-
reporter - changed title to NPB-BT - MPI
-
reporter - edited description
-
reporter - edited description
-
reporter SCOREP/1.3 (MPI only)
Compile
CCE: module load PrgEnv-cray # **cce/8.2.4** GNU: module swap PrgEnv-cray PrgEnv-gnu # **gcc/4.8.2** PGI: module swap PrgEnv-cray PrgEnv-pgi # **pgi/14.1.0** INTEL: module swap PrgEnv-cray PrgEnv-intel # **intel/14.0.1.106**
module load craype-accel-nvidia35 # CCE and GNU module load cudatoolkit # PGI and INTEL module load scorep/1.3
make clean make CLASS=A NPROCS=16 FFLAGS=-O3 F77="scorep --mpp=mpi ftn"
Run
- cd ../bin/ # $SCRATCH
export SCOREP_ENABLE_PROFILING=true export SCOREP_ENABLE_TRACING=false export SCOREP_CUDA_ENABLE=no export SCOREP_TOTAL_MEMORY=314572800
- salloc -N1
CCE: aprun -j0 -n16 ./CRAY.DAINT.A.16 GNU: aprun -j0 -n16 ./GNU.DAINT.A.16 PGI: aprun -j0 -n16 ./PGI.DAINT.A.16 INTEL: aprun -j0 -n16 ./INTEL.DAINT.A.16
- exit
NAS Parallel Benchmarks (NPB3.3-MZ-MPI) - BT-MZ MPI+OpenMP Benchmark Reading from input file inputbt-mz.data Number of zones: 4 x 4 Iterations: 25 dt: 0.000800 Number of active processes: 16 Use the default load factors with threads Calculated speedup = 4.87 Time step 1 Time step 10 Time step 20 Verification being performed for class A accuracy setting for epsilon = 0.1000000000000E-07 NITER does not match the reference value of 200 Comparison of RMS-norms of residual FAILURE: 1 0.3656822233387E+06 0.5536703889522E+05 0.5604691719757E+01 FAILURE: 2 0.3552334550905E+05 0.5077835038405E+04 0.5995765959386E+01 FAILURE: 3 0.7032268350906E+05 0.1067391361067E+05 0.5588275498011E+01 FAILURE: 4 0.4091382037186E+05 0.6441179694972E+04 0.5351914138306E+01 FAILURE: 5 0.2235228128313E+06 0.4371926324069E+05 0.4112684804425E+01 Comparison of RMS-norms of solution error FAILURE: 1 0.2365939164986E+05 0.6716797714343E+04 0.2522421346610E+01 FAILURE: 2 0.2058271812553E+04 0.6512687902160E+03 0.2160402960305E+01 FAILURE: 3 0.4419027575145E+04 0.1332930740128E+04 0.2315271710757E+01 FAILURE: 4 0.2764632766057E+04 0.7848302089180E+03 0.2522587095454E+01 FAILURE: 5 0.1798319782741E+05 0.5429053878818E+04 0.2312399955648E+01 Verification failed BT-MZ Benchmark Completed. Class = A Size = 128x 128x 16 Iterations = 25 Time in seconds = 4.82 Total processes = 16 Total threads = 16 Mop/s total = 3792.47 Mop/s/thread = 237.03 Operation type = floating point Verification = UNSUCCESSFUL Version = 3.3.1 Compile date = 01 Sep 2014 Compile options: F77 = ftn FLINK = $(F77) FLINK = $(F77) F_LIB = (none) F_INC = (none) FFLAGS = -O1 FLINKFLAGS = $(LDFLAGS) RAND = (none) Please send all errors/feedbacks to: NPB Development Team npb@nas.nasa.gov
Reports
Sampling
- cube scorep-*/profile.cubex
Tracing
- export SCOREP_ENABLE_PROFILING=false
- export SCOREP_ENABLE_TRACING=true
- export SCOREP_TOTAL_MEMORY=314572800
- salloc -N1
- CCE: aprun -j0 -n16 ./CRAY.DAINT.A.16
- GNU: aprun -j0 -n16 ./GNU.DAINT.A.16
- PGI: aprun -j0 -n16 ./PGI.DAINT.A.16
- INTEL: aprun -j0 -n16 ./INTEL.DAINT.A.16
- exit
- vampir83 scorep-*/traces.otf2
-
reporter SCOREP/1.4.2
GNU/482
- make CLASS=A NPROCS=4 FFLAGS=-O3 F77="scorep --mpp=mpi --thread=omp ftn -fopenmp"
PROFILING
- export SCOREP_ENABLE_PROFILING=true
- export SCOREP_ENABLE_TRACING=false
- export SCOREP_TOTAL_MEMORY=314572800
- aprun -n4 -N4 -d2 -j1 ./GNU.SANTIS.A.4
- square scorep-20150820_1434_629451849381883/profile.cubex
TRACING (Vampir)
- export SCOREP_ENABLE_PROFILING=false
- export SCOREP_ENABLE_TRACING=true
- aprun -n4 -N4 -d2 -j1 ./GNU.SANTIS.A.4
- vampir850 scorep-20150820_1442_629964033662162/traces.otf2
TRACING (Scalasca)
- export SCOREP_ENABLE_PROFILING=false
- export SCOREP_ENABLE_TRACING=true
- export SCOREP_TOTAL_MEMORY=355MB
- scan -t aprun -n 4 -N 4 -d 2
-j1
./GNU.SANTIS.A.4
S=C=A=N: Thu Aug 20 19:33:47 2015: Collect done (status=0) 7s S=C=A=N: Thu Aug 20 19:33:47 2015: Analyze start /opt/cray/alps/5.2.1-2.0502.9041.11.6.ari/bin/aprun -n 4 -N 4 -d 2 /apps/daint/5.2.UP02/scalasca/2.2.2/gnu482... /scorep_GNU_4p4x2_trace/traces.otf2 SCOUT Copyright (c) 1998-2015 Forschungszentrum Juelich GmbH Copyright (c) 2009-2014 German Research School for Simulation Sciences GmbH Analyzing experiment archive ./scorep_GNU_4p4x2_trace/traces.otf2 Opening experiment archive ... done (0.011s). Reading definition data ... done (0.017s). Reading event trace data ... done (3.367s). Preprocessing ... done (1.827s). Analyzing trace data ... done (11.385s). Writing analysis report ... done (0.476s). Max. memory usage : 878.684MB Total processing time : 17.686s Application 168399 resources: utime ~131s, stime ~3s, Rss ~899772, inblocks ~2603663, outblocks ~62910 S=C=A=N: Thu Aug 20 19:34:09 2015: Analyze done (status=0) 22s Warning: 1.231GB of analyzed trace data retained in ./scorep_GNU_4p4x2_trace/traces! S=C=A=N: ./scorep_GNU_4p4x2_trace complete. real 28.97
- square scorep_GNU_4p4x2_trace
-
reporter - More documentation ?
- Log in to comment