papi_wrap

Issue #65 new
jg piccinali repo owner created an issue

cirra/gnu (F90)

setup

cd /users/piccinal/papi-wrap.git/
module use \
/users/piccinal/SAVESPACE.EB/cirra/eb26/easybuild/apps/modules/all
mll PAPI/5.4.1

compile

make -f makefile.gnu CXX=g++ F90=gfortran \
PAPI_PATH=$EBROOTPAPI \
PAPI_LIB_PATH=$EBROOTPAPI/lib  clean all
  • cd test/
make -f makefile.gnu F90=gfortran \
PAPI_PATH=$EBROOTPAPI \
PAPI_LIB_PATH=$EBROOTPAPI/lib clean saxpy

run

  • export CSCSPERF_EVENTS="PAPI_TOT_CYC|PAPI_TOT_INS"
  • export OMP_NUM_THREADS=1
  • ./saxpy
----------------------------------------------
   Collector init :: wall time 0.346686 seconds
 [      TOTAL ]
 [  595390457 ] PAPI_TOT_CYC
 [  800007227 ] PAPI_TOT_INS
-----------------------------------------------
   Collector saxpy :: wall time 0.125008 seconds
 [      TOTAL ]
 [  448084488 ] PAPI_TOT_CYC
 [  800002134 ] PAPI_TOT_INS
------------------------------------------------
 handles with values           0 and           1
 the saxpy loop took   
200000000 floating point operations   
100000000.50000000   
  • grep pw_ saxpy.f90
    call pw_new_collector('init', handle1)
    call pw_new_collector('saxpy', handle2)
    call pw_start_collector(handle1)
    call pw_stop_collector(handle1)
    call pw_start_collector(handle2)
    call pw_stop_collector(handle2)
    call pw_print()

CPI

cpi = PAPI_TOT_CYC/PAPI_TOT_INS
= 448084488 / 800002134 
= 0.56

cirra/gnu (C)

setup

cd /users/piccinal/papi-wrap.git/
module use \
/users/piccinal/SAVESPACE.EB/cirra/eb26/easybuild/apps/modules/all
mll PAPI/5.4.1

compile

  • cd test/
make -f makefile.gnu CC=gcc \
PAPI_PATH=$EBROOTPAPI \
PAPI_LIB_PATH=$EBROOTPAPI/lib clean dot

run

  • export CSCSPERF_EVENTS="PAPI_TOT_CYC|PAPI_TOT_INS"
  • export OMP_NUM_THREADS=1
  • ./dot
two collectors with handles 0 and 1
the sum is 3.33333e+11
------------------------------
   Collector initialize :: wall time 0.00921297 seconds
 [      TOTAL ]
 [    8266664 ] PAPI_TOT_CYC
 [    8003202 ] PAPI_TOT_INS
------------------------------
   Collector dot product :: wall time 0.00216699 seconds
 [      TOTAL ]
 [    3010844 ] PAPI_TOT_CYC
 [    6001928 ] PAPI_TOT_INS
------------------------------
  • grep pw_ dot.c
    handle1 =  pw_new_collector("initialize");
    handle2 =  pw_new_collector("dot product");
    pw_start_collector(handle1);
    pw_stop_collector(handle1);
    pw_start_collector(handle2);
    pw_stop_collector(handle2);
    pw_print();

CPI

cpi = PAPI_TOT_CYC/PAPI_TOT_INS
= 3010844 / 6001928
= 0.501

Comments (3)

  1. jg piccinali reporter

    Cray/XC

    src:

    • ~/reframe.git/cscs-checks/tools/profiling_and_debugging/src/F90/

    build

    module load papi-wrap/1.0-CrayGNU-18.07
    export CRAYPE_LINK_TYPE=dynamic
    make FC="ftn -D_PAPIWRAP -fopenmp -std=c++11" \
      TOOL="-L$EBROOTPAPIMINWRAP/lib -L$PAT_BUILD_PAPI_BASEDIR/lib -lpapi -lpapi_wrap -lstdc++"
    

    run

    • OMP_NUM_THREADS=1 srun -Cgpu -n1 ./jacobi
    MPI-3.1#1
     Jacobi           1 MPI process(es) with           1 OpenMP-      201307  thread(s)/process
    --------------------------------------------------------------------------------
       Collector init :: wall time 0.063961 seconds
     [      TOTAL ]
     [  135147929 ] PAPI_TOT_CYC
     [  348038802 ] PAPI_TOT_INS
    --------------------------------------------------------------------------------
       Collector jacobi :: wall time 0.531846 seconds
     [      TOTAL ]
     [ 1782792727 ] PAPI_TOT_CYC
     [ 4012856105 ] PAPI_TOT_INS
    

    _main.F90

    program MAIN
    #ifdef _PAPIWRAP
        use m_papi_wrap
    #endif
        use VariableDef
        use JacobiMod
        implicit none
        include 'mpif.h'
    
        TYPE(JacobiData) :: myData
    
    #ifdef _PAPIWRAP
        integer :: papiw_handle1, papiw_handle2
    #endif
    !   sets default values or reads from stdin
    !    * inits MPI and OpenMP if needed
    !    * distribute MPI data, calculate MPI bounds
    !    */
        call Init(mydata)
    #ifdef _PAPIWRAP
        call pw_new_collector('init', papiw_handle1)
        call pw_new_collector('jacobi', papiw_handle2)
    #endif
    
        if ( allocated(myData%afU) .and. allocated(myData%afF) ) then
    !        /* matrix init */
    #ifdef _PAPIWRAP
            call pw_start_collector(papiw_handle1)
    #endif
            call InitializeMatrix(myData)
    #ifdef _PAPIWRAP
            call pw_stop_collector(papiw_handle1)
    #endif
    
    !        /* starting timer */
            mydata%fTimeStart = MPI_Wtime()
    
    !        /* running calculations */
    #ifdef _PAPIWRAP
            call pw_start_collector(papiw_handle2)
    #endif
            call Jacobi(myData)
    #ifdef _PAPIWRAP
            call pw_stop_collector(papiw_handle2)
    #endif
    
    !        /* stopping timer */
            mydata%fTimeStop = MPI_Wtime()
    
    !        /* error checking */
            call CheckError(myData)
    
    !        /* print result summary */
            call PrintResults(myData)
        else
            write (*,*) " Memory allocation failed ...\n"
        end if
    
    !    /* cleanup */
    #ifdef _PAPIWRAP
        call pw_print()
    #endif
        call Finish(myData)
    
    end program MAIN
    
  2. Log in to comment