Commits

Jeff Squyres committed 9530977

Up to SVN r28543

Comments (0)

Files changed (126)

 source tree, and therefore activates a number of developer-only
 debugging features in the Open MPI code base.  
 
-The same debugging features are activated if you build in a Mercurial
-clone (with the hidden ".hg" meta directory).
+The same debugging features are activated if you build in a Git or
+Mercurial clone (with the hidden ".git" or ".hg" meta directory).
 
 By definition, debugging builds will perform [much] slower than
 optimized builds of Open MPI.  You should *NOT* conduct timing tests
      shell$ make all install
 
 2. Use a VPATH build.  Simply build Open MPI from a different
-   directory than the source tree -- one where the .svn / .hg
+   directory than the source tree -- one where the .svn / .git / .hg
    subdirectory is not present.  For example:
 
      shell$ svn co http://svn.open-mpi.org/svn/ompi/trunk ompi
   available from that group for Windows-based use.
 - Added MPI Java bindings
 
+- Wrapper compilers now add rpath support by default to generated
+  executables on systems that support it.  This behavior can be
+  disabled via --disable-wrapper-rpath.  See note in README about ABI
+  issues when using rpath in MPI applications.
+
 
 1.7.2
 -----
   Be sure to read the description of --without-memory-manager, below;
   it may have some effect on --enable-static.
 
+--disable-wrapper-rpath
+
+  By default, the wrapper compilers (e.g., mpicc) will enable "rpath"
+  support in generated executables on systems that support it.  That
+  is, they will include a file reference to the location of Open MPI's
+  libraries in the MPI application executable itself.  This mean that
+  the user does not have to set LD_LIBRARY_PATH to find Open MPI's
+  libraries (e.g., if they are installed in a location that the
+  run-time linker does not search by default).
+
+  On systems that utilize the GNU ld linker, recent enough versions
+  will actually utilize "runpath" functionality, not "rpath".  There
+  is an important difference between the two:
+
+  "rpath": the location of the Open MPI libraries is hard-coded into
+      the MPI application and cannot be overridden at run-time.
+  "runpath": the location of the Open MPI libraries is hard-coded into
+      the MPI application, but can be overridden at run-time by
+      setting the LD_LIBRARY_PATH environment variable.
+
+  For example, consider that you install Open MPI vA.B.0 and
+  compile/link your MPI application against it.  Later, you install
+  Open MPI vA.B.1 to a different installation prefix (e.g.,
+  /opt/openmpi/A.B.1 vs. /opt/openmpi/A.B.0), and you leave the old
+  installation intact.
+
+  In the rpath case, your MPI application will always use the
+  libraries from your A.B.0 installation.  In the runpath case, you
+  can set the LD_LIBRARY_PATH environment variable to point to the
+  A.B.1 installation, and then your MPI application will use those
+  libraries.  
+
+  Note that in both cases, however, if you remove the original A.B.0
+  installation and set LD_LIBRARY_PATH to point to the A.B.1
+  installation, your application will use the A.B.1 libraries.
+
+  This rpath/runpath behavior can be disabled via
+  --disable-wrapper-rpath.
+
 --enable-dlopen
   Build all of Open MPI's components as standalone Dynamic Shared
   Objects (DSO's) that are loaded at run-time.  The opposite of this
 # only requirement is that it must be entirely printable ASCII 
 # characters and have no white space.
 
-greek=
+greek=a1
 
 # If want_repo_rev=1, then the repo revision number will be included
 # in the overall Open MPI version number in some form.
         }
     }
 
+    my $ifdef_string = uc "${project}_FRAMEWORKS_H";
     open(FRAMEWORKS_OUT, ">$project/include/$project/frameworks.h");
     printf FRAMEWORKS_OUT "%s", "/*
  * This file is autogenerated by autogen.pl. Do not edit this file by hand.
  */
+#ifndef $ifdef_string
+#define $ifdef_string
+
 #include <opal/mca/base/mca_base_framework.h>
 
 $framework_decl_output
 static mca_base_framework_t *${project}_frameworks[] = {
 $framework_array_output    NULL
-};\n";
+};
+
+#endif /* $ifdef_string */\n\n";
     close(FRAMEWORKS_OUT);
 }
 
     # Debugging output
     debug_dump($mca_found);
 
+    # Save (just) the list of MCA projects in the m4 file
+    my $str;
+    foreach my $p (@$projects) {
+        my $pname = $p->{name};
+        # Check if this project is an MCA project (contains MCA framework)
+        if (exists($mca_found->{$pname})) {
+            $str .= "$p->{name}, ";
+        }
+    }
+    $str =~ s/, $//;
+    $m4 .= "\ndnl List of MCA projects found by autogen.pl
+m4_define([mca_project_list], [$str])\n";
+
     #-----------------------------------------------------------------------
 
     $m4 .= "\n$dnl_line
 push(@{$projects}, { name => "ompi", dir => "ompi", need_base => 1 })
     if (!$no_ompi_arg);
 
-# Save the list of projects in the m4 file
-my $str;
-foreach my $p (@$projects) {
-    $str .= "$p->{name}, ";
-}
-$str =~ s/, $//;
-$m4 .= "dnl List of projects found by autogen.pl
-m4_define([mca_project_list], [$str])
-
-dnl Separate m4 define for each project\n";
+$m4 .= "dnl Separate m4 define for each project\n";
 foreach my $p (@$projects) {
     $m4 .= "m4_define([project_$p->{name}], [1])\n";
 }
 verbose "==> Running autoreconf\n";
 chdir("..");
 my $cmd = "autoreconf -ivf --warnings=all,no-obsolete,no-override -I config";
+foreach my $project (@{$projects}) {
+    $cmd .= " -I $project->{dir}/config"
+        if (-d "$project->{dir}/config");
+}
 safe_system($cmd);
 
 #---------------------------------------------------------------------------

config/ompi_check_openfabrics.m4

     #
     # Unconnect Datagram (UD) based connection manager
     #
-#    AC_ARG_ENABLE([openib-udcm],
-#        [AC_HELP_STRING([--enable-openib-udcm],
-#                        [Enable datagram connection support in openib BTL (default: enabled)])], 
-#                        [enable_openib_udcm="$enableval"], [enable_openib_udcm="yes"])
+    AC_ARG_ENABLE([openib-udcm],
+        [AC_HELP_STRING([--enable-openib-udcm],
+                        [Enable datagram connection support in openib BTL (default: enabled)])], 
+                        [enable_openib_udcm="$enableval"], [enable_openib_udcm="yes"])
     # Per discussion with Ralph and Nathan, disable UDCM for now.
     # It's borked and needs some surgery to get back on its feet.
-    enable_openib_udcm=no
+    # enable_openib_udcm=no
 
     #
     # Openfabrics RDMACM

config/ompi_get_libtool_linker_flags.m4

 dnl                         University of Stuttgart.  All rights reserved.
 dnl Copyright (c) 2004-2005 The Regents of the University of California.
 dnl                         All rights reserved.
+dnl Copyright (c) 2010-2012 Cisco Systems, Inc.  All rights reserved.
 dnl $COPYRIGHT$
 dnl 
 dnl Additional copyrights may follow
 # eat any extra whitespace in CC, as libtool will do the same
 tmpCC=`echo $CC | sed -e 's/\//\\\\\//g'`
 output=`echo $output | sed -e "s/^$tmpCC//"`
-eval "set $output"
 extra_ldflags=
-while test -n "[$]1"; do
-    case "[$]1" in
+for arg in $output ; do
+    case "$arg" in
     *.libs/bar*) ;;
     bar*) ;;
     -I*) ;;
     *.so) ;;
     *.a) ;;
     *)
-	extra_ldflags="$extra_ldflags [$]1"
+	extra_ldflags="$extra_ldflags $arg"
 	;;
     esac
     shift
     AC_MSG_RESULT([no extra flags])
 fi
 
+#
+# Now do something similar in order to capture the rpath flags: re-run
+# the link, but give the libtool --rpath argument.  Then capture the
+# difference between this output and the previous output.  Do this
+# separately from the above tests to ensure that we don't accidentally
+# remove -R if it's needed for rpath.
+#
+
+WRAPPER_RPATH_SUPPORT=disabled
+AS_IF([test "x$enable_wrapper_rpath" = "xyes"],
+      [AC_MSG_CHECKING([for libtool-supplied rpath arguments])
+       no_rpath_output=$output
+
+       cmd="$libtool --dry-run --mode=link --tag=CC $CC -rpath /ompi-bogus-test-dir bar.lo libfoo.la -o bar $extra_flags"
+       ompi_check_linker_flags_work yes
+
+       # eat any extra whitespace in CC, as libtool will do the same
+       tmpCC=`echo $CC | sed -e 's/\//\\\\\//g'`
+       output=`echo $output | sed -e "s/^$tmpCC//"`
+
+       rpath_args=
+       for rp in $output ; do
+           found=0
+           for nrp in $no_rpath_output ; do
+               AS_IF([test "$rp" = "$nrp"], [found=1])
+           done
+
+           # If we didn't find it, then it must be an rpath argument.
+           # Ensure to replace /ompi-bogus-test-dir with ${libdir} so
+           # that the wrapper can do proper replacement, later.
+           AS_IF([test "$found" = "0"], 
+                 [rpath_args="$rpath_args `echo $rp | sed -e 's@/ompi-bogus-test-dir@\@{libdir}@'`"])
+       done
+
+       # If there were no flags necessary, then we really aren't doing
+       # anything to enable rpath, so let's not claim that we are.
+       AS_IF([test "`echo $rpath_args`" = ""],
+             [rpath_args=
+              enable_wrapper_rpath=no
+              WRAPPER_RPATH_SUPPORT=unnecessary
+              msg="no extra flags"],
+             [wrapper_extra_ldflags="$wrapper_extra_ldflags $rpath_args"
+              WRAPPER_RPATH_SUPPORT=rpath
+              msg=$rpath_args])
+       AC_MSG_RESULT([$msg])
+      ])
+
+# We don't need to be in the subdir any more
 cd ..
-rm -rf conftest.$$])dnl
+rm -rf conftest.$$
+
+AS_IF([test "x$enable_wrapper_rpath" = "xyes"],
+      [
+       # Now that we have the rpath flags, check to see if the linker
+       # supports the DT_RUNPATH flags via --enable-new-dtags (a GNU
+       # ld-specific option).  These flags are more social than
+       # DT_RPATH -- they can be overridden by LD_LIBRARY_PATH (where
+       # a regular DT_RPATH cannot).
+       AC_MSG_CHECKING([if linker supports RUNPATH (vs. RPATH)])
+       LDFLAGS_save=$LDFLAGS
+       LDFLAGS="$LDFLAGS $rpath_args -Wl,--enable-new-dtags"
+       AC_LANG_PUSH([C])
+       AC_LINK_IFELSE([AC_LANG_PROGRAM([], [return 7;])],
+                      [msg=yes
+                       WRAPPER_RPATH_SUPPORT=runpath
+                       wrapper_extra_ldflags="$wrapper_extra_ldflags -Wl,--enable-new-dtags"],
+                      [msg=no])
+       AC_LANG_POP([C])
+       LDFLAGS=$LDFLAGS_save
+       AC_MSG_RESULT([$msg])
+      ])
+])dnl

config/opal_setup_wrappers.m4

 dnl Copyright (c) 2004-2005 The Regents of the University of California.
 dnl                         All rights reserved.
 dnl Copyright (c) 2006-2010 Oracle and/or its affiliates.  All rights reserved.
-dnl Copyright (c) 2009      Cisco Systems, Inc.  All rights reserved.
+dnl Copyright (c) 2009-2010 Cisco Systems, Inc.  All rights reserved.
 dnl $COPYRIGHT$
 dnl 
 dnl Additional copyrights may follow
                                 [Extra flags to add to LIBS when using wrapper compilers])])
     AS_IF([test "$with_wrapper_libs" = "yes" -o "$with_wrapper_libs" = "no"],
           [AC_MSG_ERROR([--with-wrapper-libs must have an argument.])])
+
+    AC_MSG_CHECKING([if want wrapper compiler rpath support])
+    AC_ARG_ENABLE([wrapper-rpath],
+                  [AS_HELP_STRING([--enable-wrapper-rpath],
+                  [enable rpath support in the wrapper compilers (default=no)])])
+    AS_IF([test "$enable_wrapper_rpath" != "no"], [enable_wrapper_rpath=yes])
+    AC_MSG_RESULT([$enable_wrapper_rpath])
 ])
 
 
        AC_DEFINE_UNQUOTED(WRAPPER_EXTRA_LIBS, "$OMPI_WRAPPER_EXTRA_LIBS",
            [Additional LIBS to pass through the wrapper compilers])
     ])
+
+    AC_DEFINE_UNQUOTED(WRAPPER_RPATH_SUPPORT, "$WRAPPER_RPATH_SUPPORT",
+        [Whether the wrapper compilers add rpath flags by default])
 ])

ompi/contrib/vt/vt/ChangeLog

+5.14.4openmpi
+	- fixed build issues due to API changes in MPI-3
+        - removed non-posix call to length(array) in AWK script for generating
+	  the C MPI wrapper functions
+	- fixed potential buffer overflow when reading the filter file
+	- enabled access to CUPTI counters for CUDA tracing via CUPTI
+	- enabled GPU memory usage tracing independent of the CUDA API
+	- enabled recording of CUDA synchronization and implicit synchronization in 
+	  blocking CUDA memory copies for CUDA tracing via CUPTI
+	- enabled recording of synchronous peer-to-peer CUDA memory copies for CUDA
+	  tracing via CUPTI
+	- consider CUDA data transfers as not idle for option 'pure_idle'
+	- fixed identification of the CUDA device ID for CUDA tracing via CUPTI
+	- fixed region filtering for applications using the CUDA runtime API wrapper
+	- compiler wrappers: add path to mpi.h to the PDT parser command and
+	  preprocessor flags
+
 5.14.3openmpi
 	- updated version of internal OTF to 1.12.3openmpi
 	  (see extlib/otf/ChangeLog)

ompi/contrib/vt/vt/VERSION

-5.14.3openmpi
+5.14.4openmpi

ompi/contrib/vt/vt/config/m4/acinclude.mpi.m4

 	AC_ARG_WITH(fmpi-inc-dir,
 		AC_HELP_STRING([--with-fmpi-inc-dir=FMPIINCDIR],
 		[give the path for Fortran MPI-include files, default: MPIINCDIR]),
-	[FMPIINCDIR="-I$withval/"],
-	[
-		FMPIINCDIR=$MPIINCDIR
-		AS_IF([test x"$FMPIINCDIR" = x], [FMPIINCDIR="-I/usr/include"])
-	])
+	[FMPIINCDIR="-I$withval/"], [FMPIINCDIR="$MPIINCDIR"])
 
 	AC_ARG_WITH(mpi-lib-dir,
 		AC_HELP_STRING([--with-mpi-lib-dir=MPILIBDIR],
 
 		AC_CHECK_PROGS(MPICC, mpicc hcc mpcc_r mpcc mpxlc_r mpxlc mpixlc_r mpixlc cmpicc mpiicc, $CC)
 
-dnl		check for mpi.h, if MPICC was not found
+dnl		check for mpi.h, using *CC*
 
-		AS_IF([test x"$MPICC" = x"$CC" -a x"$inside_openmpi" = "xno"],
+		AS_IF([test x"$inside_openmpi" = "xno"],
 		[
-			sav_CPPFLAGS=$CPPFLAGS
-			CPPFLAGS="$CPPFLAGS $MPIINCDIR"
-			AC_CHECK_HEADER([mpi.h], [],
+dnl			guess the MPI include directory based on MPICC's pathname
+
+			mpiincdir_guessed="no"
+			AS_IF([test x"$MPICC" != x"$CC" -a x"$MPIINCDIR" = x],
 			[
-				AC_MSG_NOTICE([error: no mpi.h found; check path for MPI package first...])
-				mpi_error="yes"
+				mpicc_pathname="`which $MPICC 2>/dev/null`"
+				AS_IF([test x"$mpicc_pathname" != x],
+				[
+					MPIINCDIR="-I`dirname $mpicc_pathname`/../include"
+					mpiincdir_guessed="yes"
+				],
+				[
+					AC_MSG_NOTICE([error: $MPICC not found; check path for MPI package first...])
+					mpi_error="yes"
+				])
 			])
-			CPPFLAGS=$sav_CPPFLAGS
+
+dnl			check for mpi.h; print a warning message if MPIINCDIR was guessed, otherwise trigger an error
+
+			AS_IF([test x"$mpi_error" = "xno"],
+			[
+				sav_CPPFLAGS=$CPPFLAGS
+				CPPFLAGS="$CPPFLAGS $MPIINCDIR"
+				AC_CHECK_HEADER([mpi.h],
+				[
+					AS_IF([test x"$mpiincdir_guessed" = "xyes" -a x"$FMPIINCDIR" = x],
+					[FMPIINCDIR=$MPIINCDIR])
+				],
+				[
+					AS_IF([test x"$mpiincdir_guessed" = "xyes"],
+					[
+						AC_MSG_WARN([could not determine the MPI include directory based on $MPICC; use the '--with-mpi-inc-dir' option to specify it...])
+						MPIINCDIR=
+					],
+					[
+						AC_MSG_NOTICE([error: no mpi.h found; check path for MPI package first...])
+						mpi_error="yes"
+					])
+				])
+			])
 		])
 
 dnl		check for MPICXX

ompi/contrib/vt/vt/config/mpigen/c_dont_generate.txt

 # conflicting argument types with SGI-MPT (noticed on version 1.26)
 MPI_Grequest_complete
 
+# conflicting argument types with SGI-MPT (noticed on version 2.06)
+MPI_Add_error_string
+
 # conflicting argument types with Platform MPI (noticed on version 7.1)
 MPI_Info_delete
 MPI_Info_get

ompi/contrib/vt/vt/config/mpigen/mk_c_wrapper.sh.in

 ##
 # @configure_input@
 #
-# make wrapper.c
+# make vtlib/vt_mpiwrap.gen.c
 ##
 
+# exit the script if any statement returns a non-true return value
+set -e
+
+
+
 export SRCDIR=@top_vt_srcdir@/config/mpigen
 
 have_mpi2_thread=@VT_MPIGEN_HAVE_MPI2_THREAD@
   
   if (NF > 2) {
     for (i=3; i<=NF; i++) {
-      split(\$i,typeandpara," ")
-      type[i-2]=typeandpara[1]
-      para[i-2]=typeandpara[2]
+      n=split(\$i,typeandpara," ")
+      j=1
       if (i > 3) printf ", "
+      if (n == 3) printf "%s ",typeandpara[j++]
+      type[i-2]=typeandpara[j++]
+      para[i-2]=typeandpara[j]
       printf "%s %s",type[i-2],para[i-2]
     }
   }

ompi/contrib/vt/vt/config/mpigen/mk_fortran_wrapper.sh.in

 ##
 # @configure_input@
 #
-# make fortran_wrapper.c
+# make vtlib/vt_fmpiwrap.gen.c
 ##
 
+# exit the script if any statement returns a non-true return value
+set -e
+
+
+
 export SRCDIR=@top_vt_srcdir@/config/mpigen
 
 export COMM_CONVERT=@VT_MPIGEN_HAVE_FC_CONV_COMM@
     -e 's/(/,/' \
     -e 's/);//' \
     -e 's/, /,/g' \
-    -e 's/,$//'
+    -e 's/,$//' \
+    -e 's/CONST//g'
 done
 
 cat <<End-of-File >$tmp.c

ompi/contrib/vt/vt/config/mpigen/mk_registry.sh.in

 ##
 # @configure_input@
 #
-# make vt_mpireg.[c|h]
+# make vtlib/vt_mpireg.gen.[c|h]
 ##
 
+# exit the script if any statement returns a non-true return value
+set -e
+
+
+
 export SRCDIR=@top_vt_srcdir@/config/mpigen
 
 have_mpi2_thread=@VT_MPIGEN_HAVE_MPI2_THREAD@

ompi/contrib/vt/vt/config/mpigen/mpi2_1sided.h

  *
  **/
 
-VT_MPI_INT MPI_Accumulate(void* origin_addr_CLASS_BUFFER, VT_MPI_INT origin_count, MPI_Datatype origin_datatype, VT_MPI_INT target_rank, MPI_Aint target_disp, VT_MPI_INT target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win);
+VT_MPI_INT MPI_Accumulate(CONST void* origin_addr_CLASS_BUFFER, VT_MPI_INT origin_count, MPI_Datatype origin_datatype, VT_MPI_INT target_rank, MPI_Aint target_disp, VT_MPI_INT target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win);
 VT_MPI_INT MPI_Get(void* origin_addr_CLASS_BUFFER, VT_MPI_INT origin_count, MPI_Datatype origin_datatype, VT_MPI_INT target_rank, MPI_Aint target_disp, VT_MPI_INT target_count, MPI_Datatype target_datatype, MPI_Win win);
-VT_MPI_INT MPI_Put(void* origin_addr_CLASS_BUFFER, VT_MPI_INT origin_count, MPI_Datatype origin_datatype, VT_MPI_INT target_rank, MPI_Aint target_disp, VT_MPI_INT target_count, MPI_Datatype target_datatype, MPI_Win win);
+VT_MPI_INT MPI_Put(CONST void* origin_addr_CLASS_BUFFER, VT_MPI_INT origin_count, MPI_Datatype origin_datatype, VT_MPI_INT target_rank, MPI_Aint target_disp, VT_MPI_INT target_count, MPI_Datatype target_datatype, MPI_Win win);
 VT_MPI_INT MPI_Win_complete(MPI_Win win);
 VT_MPI_INT MPI_Win_create(void* base, MPI_Aint size, VT_MPI_INT disp_unit, MPI_Info info, MPI_Comm comm, MPI_Win* win_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Win_fence(VT_MPI_INT assert, MPI_Win win);

ompi/contrib/vt/vt/config/mpigen/mpi2_extcoll.h

  *
  **/
 
-VT_MPI_INT MPI_Alltoallw(void* sendbuf_CLASS_BUFFER, VT_MPI_INT* sendcounts, VT_MPI_INT* sdispls, MPI_Datatype* sendtypes_CLASS_ARRAY_IN_sendcounts, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* rdispls, MPI_Datatype* recvtypes_CLASS_ARRAY_IN_recvcounts, MPI_Comm comm); /*COLL_ALL2ALL*/
-VT_MPI_INT MPI_Exscan(void* sendbuf_CLASS_BUFFER, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_ALL2ALL*/
+VT_MPI_INT MPI_Alltoallw(CONST void* sendbuf_CLASS_BUFFER, CONST VT_MPI_INT* sendcounts, CONST VT_MPI_INT* sdispls, CONST MPI_Datatype* sendtypes_CLASS_ARRAY_IN_sendcounts, void* recvbuf_CLASS_BUFFER, CONST VT_MPI_INT* recvcounts, CONST VT_MPI_INT* rdispls, CONST MPI_Datatype* recvtypes_CLASS_ARRAY_IN_recvcounts, MPI_Comm comm); /*COLL_ALL2ALL*/
+VT_MPI_INT MPI_Exscan(CONST void* sendbuf_CLASS_BUFFER, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_ALL2ALL*/

ompi/contrib/vt/vt/config/mpigen/mpi2_file.h

  *
  **/
 
-VT_MPI_INT MPI_File_open(MPI_Comm comm, char* filename_CLASS_SINGLE_IN, VT_MPI_INT amode, MPI_Info info, MPI_File* fh_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_File_open(MPI_Comm comm, CONST char* filename_CLASS_SINGLE_IN, VT_MPI_INT amode, MPI_Info info, MPI_File* fh_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_close(MPI_File* fh_CLASS_SINGLE_IO);
-VT_MPI_INT MPI_File_delete(char* filename_CLASS_SINGLE_IN, MPI_Info info);
+VT_MPI_INT MPI_File_delete(CONST char* filename_CLASS_SINGLE_IN, MPI_Info info);
 VT_MPI_INT MPI_File_set_size(MPI_File fh, MPI_Offset size);
 VT_MPI_INT MPI_File_preallocate(MPI_File fh, MPI_Offset size);
 VT_MPI_INT MPI_File_get_size(MPI_File fh, MPI_Offset* size_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_get_amode(MPI_File fh, VT_MPI_INT* amode);
 VT_MPI_INT MPI_File_set_info(MPI_File fh, MPI_Info info);
 VT_MPI_INT MPI_File_get_info(MPI_File fh, MPI_Info* info_used_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, MPI_Datatype filetype, char* datarep_CLASS_SINGLE_IN, MPI_Info info);
+VT_MPI_INT MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, MPI_Datatype filetype, CONST char* datarep_CLASS_SINGLE_IN, MPI_Info info);
 VT_MPI_INT MPI_File_get_view(MPI_File fh, MPI_Offset* disp_CLASS_SINGLE_OUT, MPI_Datatype* etype_CLASS_SINGLE_OUT, MPI_Datatype* filetype_CLASS_SINGLE_OUT, char* datarep_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_read_at(MPI_File fh, MPI_Offset offset, void* buf, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_File_write_at(MPI_File fh, MPI_Offset offset, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_File_write_at(MPI_File fh, MPI_Offset offset, CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_iread_at(MPI_File fh, MPI_Offset offset, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Request* request_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Request* request_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Request* request_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_read(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_read_all(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_File_write(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_File_write_all(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_File_write(MPI_File fh, CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_File_write_all(MPI_File fh, CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_iread(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Request* request_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_File_iwrite(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Request* request_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_File_iwrite(MPI_File fh, CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Request* request_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_seek(MPI_File fh, MPI_Offset offset, VT_MPI_INT whence);
 VT_MPI_INT MPI_File_get_position(MPI_File fh, MPI_Offset* offset_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_get_byte_offset(MPI_File fh, MPI_Offset offset, MPI_Offset* disp_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_read_shared(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_File_write_shared(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_File_write_shared(MPI_File fh, CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_iread_shared(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Request* request_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_File_iwrite_shared(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Request* request_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_File_iwrite_shared(MPI_File fh, CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Request* request_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_read_ordered(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_File_write_ordered(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_File_write_ordered(MPI_File fh, CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_seek_shared(MPI_File fh, MPI_Offset offset, VT_MPI_INT whence);
 VT_MPI_INT MPI_File_get_position_shared(MPI_File fh, MPI_Offset* offset_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_read_at_all_begin(MPI_File fh, MPI_Offset offset, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype);
 VT_MPI_INT MPI_File_read_at_all_end(MPI_File fh, void* buf, MPI_Status* status_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_File_write_at_all_begin(MPI_File fh, MPI_Offset offset, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype);
-VT_MPI_INT MPI_File_write_at_all_end(MPI_File fh, void* buf, MPI_Status* status_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_File_write_at_all_begin(MPI_File fh, MPI_Offset offset, CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype);
+VT_MPI_INT MPI_File_write_at_all_end(MPI_File fh, CONST void* buf, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_read_all_begin(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype);
 VT_MPI_INT MPI_File_read_all_end(MPI_File fh, void* buf, MPI_Status* status_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_File_write_all_begin(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype);
-VT_MPI_INT MPI_File_write_all_end(MPI_File fh, void* buf, MPI_Status* status_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_File_write_all_begin(MPI_File fh, CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype);
+VT_MPI_INT MPI_File_write_all_end(MPI_File fh, CONST void* buf, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_read_ordered_begin(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype);
 VT_MPI_INT MPI_File_read_ordered_end(MPI_File fh, void* buf, MPI_Status* status_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_File_write_ordered_begin(MPI_File fh, void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype);
-VT_MPI_INT MPI_File_write_ordered_end(MPI_File fh, void* buf, MPI_Status* status_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_File_write_ordered_begin(MPI_File fh, CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype);
+VT_MPI_INT MPI_File_write_ordered_end(MPI_File fh, CONST void* buf, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_get_type_extent(MPI_File fh, MPI_Datatype datatype, MPI_Aint* extent_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_File_set_atomicity(MPI_File fh, VT_MPI_INT flag);
 VT_MPI_INT MPI_File_get_atomicity(MPI_File fh, VT_MPI_INT* flag);
 VT_MPI_INT MPI_File_sync(MPI_File fh);
-VT_MPI_INT MPI_Register_datarep(char* datarep_CLASS_SINGLE_IN, MPI_Datarep_conversion_function* read_conversion_fn, MPI_Datarep_conversion_function* write_conversion_fn, MPI_Datarep_extent_function* dtype_file_extent_fn, void* extra_state);
+VT_MPI_INT MPI_Register_datarep(CONST char* datarep_CLASS_SINGLE_IN, MPI_Datarep_conversion_function* read_conversion_fn, MPI_Datarep_conversion_function* write_conversion_fn, MPI_Datarep_extent_function* dtype_file_extent_fn, void* extra_state);

ompi/contrib/vt/vt/config/mpigen/mpi2_proc.h

  *
  **/
 
-VT_MPI_INT MPI_Close_port(char* port_name_CLASS_SINGLE_IN);
-VT_MPI_INT MPI_Comm_accept(char* port_name_CLASS_SINGLE_IN, MPI_Info info, VT_MPI_INT root, MPI_Comm comm, MPI_Comm* newcomm_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Comm_connect(char* port_name_CLASS_SINGLE_IN, MPI_Info info, VT_MPI_INT root, MPI_Comm comm, MPI_Comm* newcomm_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Close_port(CONST char* port_name_CLASS_SINGLE_IN);
+VT_MPI_INT MPI_Comm_accept(CONST char* port_name_CLASS_SINGLE_IN, MPI_Info info, VT_MPI_INT root, MPI_Comm comm, MPI_Comm* newcomm_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Comm_connect(CONST char* port_name_CLASS_SINGLE_IN, MPI_Info info, VT_MPI_INT root, MPI_Comm comm, MPI_Comm* newcomm_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Comm_disconnect(MPI_Comm* comm_CLASS_SINGLE_IO);
 VT_MPI_INT MPI_Comm_get_parent(MPI_Comm* parent_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Comm_join(VT_MPI_INT fd, MPI_Comm* intercomm_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Comm_spawn(char* command_CLASS_SINGLE_IN, char** argv, VT_MPI_INT maxprocs, MPI_Info info, VT_MPI_INT root, MPI_Comm comm, MPI_Comm* intercomm_CLASS_SINGLE_OUT, VT_MPI_INT* array_of_errcodes);
-VT_MPI_INT MPI_Comm_spawn_multiple(VT_MPI_INT count, char** array_of_commands, char*** array_of_argv, VT_MPI_INT* array_of_maxprocs, MPI_Info* array_of_info_CLASS_ARRAY_IN_count, VT_MPI_INT root, MPI_Comm comm, MPI_Comm* intercomm_CLASS_SINGLE_OUT, VT_MPI_INT* array_of_errcodes);
-VT_MPI_INT MPI_Lookup_name(char* service_name_CLASS_SINGLE_IN, MPI_Info info, char* port_name_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Comm_spawn(const char* command_CLASS_SINGLE_IN, char** argv, VT_MPI_INT maxprocs, MPI_Info info, VT_MPI_INT root, MPI_Comm comm, MPI_Comm* intercomm_CLASS_SINGLE_OUT, VT_MPI_INT* array_of_errcodes);
+VT_MPI_INT MPI_Comm_spawn_multiple(VT_MPI_INT count, char** array_of_commands, char*** array_of_argv, CONST VT_MPI_INT* array_of_maxprocs, CONST MPI_Info* array_of_info_CLASS_ARRAY_IN_count, VT_MPI_INT root, MPI_Comm comm, MPI_Comm* intercomm_CLASS_SINGLE_OUT, VT_MPI_INT* array_of_errcodes);
+VT_MPI_INT MPI_Lookup_name(CONST char* service_name_CLASS_SINGLE_IN, MPI_Info info, char* port_name_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Open_port(MPI_Info info, char* port_name_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Publish_name(char* service_name_CLASS_SINGLE_IN, MPI_Info info, char* port_name_CLASS_SINGLE_IN);
-VT_MPI_INT MPI_Unpublish_name(char* service_name_CLASS_SINGLE_IN, MPI_Info info, char* port_name_CLASS_SINGLE_IN);
+VT_MPI_INT MPI_Publish_name(CONST char* service_name_CLASS_SINGLE_IN, MPI_Info info, CONST char* port_name_CLASS_SINGLE_IN);
+VT_MPI_INT MPI_Unpublish_name(CONST char* service_name_CLASS_SINGLE_IN, MPI_Info info, CONST char* port_name_CLASS_SINGLE_IN);

ompi/contrib/vt/vt/config/mpigen/mpi2_standard.h

 VT_MPI_INT MPI_File_set_errhandler(MPI_File file, MPI_Errhandler errhandler);
 VT_MPI_INT MPI_Finalized(VT_MPI_INT* flag);
 VT_MPI_INT MPI_Free_mem(void* base);
-VT_MPI_INT MPI_Get_address(void* location, MPI_Aint* address_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Get_address(CONST void* location, MPI_Aint* address_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Info_create(MPI_Info* info_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Info_delete(MPI_Info info, char* key_CLASS_SINGLE_IN);
+VT_MPI_INT MPI_Info_delete(MPI_Info info, CONST char* key_CLASS_SINGLE_IN);
 VT_MPI_INT MPI_Info_dup(MPI_Info info, MPI_Info* newinfo_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Info_free(MPI_Info* info_CLASS_SINGLE_IO);
-VT_MPI_INT MPI_Info_get(MPI_Info info, char* key_CLASS_SINGLE_IN, VT_MPI_INT valuelen, char* value_CLASS_SINGLE_OUT, VT_MPI_INT* flag);
+VT_MPI_INT MPI_Info_get(MPI_Info info, CONST char* key_CLASS_SINGLE_IN, VT_MPI_INT valuelen, char* value_CLASS_SINGLE_OUT, VT_MPI_INT* flag);
 VT_MPI_INT MPI_Info_get_nkeys(MPI_Info info, VT_MPI_INT* nkeys);
 VT_MPI_INT MPI_Info_get_nthkey(MPI_Info info, VT_MPI_INT n, char* key_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Info_get_valuelen(MPI_Info info, char* key_CLASS_SINGLE_IN, VT_MPI_INT* valuelen, VT_MPI_INT* flag);
-VT_MPI_INT MPI_Info_set(MPI_Info info, char* key_CLASS_SINGLE_IN, char* value_CLASS_SINGLE_IN);
-VT_MPI_INT MPI_Pack_external(char* datarep_CLASS_SINGLE_IN, void* inbuf, VT_MPI_INT incount, MPI_Datatype datatype, void* outbuf, MPI_Aint outsize, MPI_Aint* position_CLASS_SINGLE_IO);
-VT_MPI_INT MPI_Pack_external_size(char* datarep_CLASS_SINGLE_IN, VT_MPI_INT incount, MPI_Datatype datatype, MPI_Aint* size_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Info_get_valuelen(MPI_Info info, CONST char* key_CLASS_SINGLE_IN, VT_MPI_INT* valuelen, VT_MPI_INT* flag);
+VT_MPI_INT MPI_Info_set(MPI_Info info, CONST char* key_CLASS_SINGLE_IN, CONST char* value_CLASS_SINGLE_IN);
+VT_MPI_INT MPI_Pack_external(CONST char* datarep_CLASS_SINGLE_IN, CONST void* inbuf, VT_MPI_INT incount, MPI_Datatype datatype, void* outbuf, MPI_Aint outsize, MPI_Aint* position_CLASS_SINGLE_IO);
+VT_MPI_INT MPI_Pack_external_size(CONST char* datarep_CLASS_SINGLE_IN, VT_MPI_INT incount, MPI_Datatype datatype, MPI_Aint* size_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Request_get_status(MPI_Request request, VT_MPI_INT* flag, MPI_Status* status_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Type_create_darray(VT_MPI_INT size, VT_MPI_INT rank, VT_MPI_INT ndims, VT_MPI_INT* array_of_gsizes, VT_MPI_INT* array_of_distribs, VT_MPI_INT* array_of_dargs, VT_MPI_INT* array_of_psizes, VT_MPI_INT order, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Type_create_hindexed(VT_MPI_INT count, VT_MPI_INT* array_of_blocklengths, MPI_Aint* array_of_displacements_CLASS_ARRAY_IN_count, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Type_create_darray(VT_MPI_INT size, VT_MPI_INT rank, VT_MPI_INT ndims, CONST VT_MPI_INT* array_of_gsizes, CONST VT_MPI_INT* array_of_distribs, CONST VT_MPI_INT* array_of_dargs, CONST VT_MPI_INT* array_of_psizes, VT_MPI_INT order, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Type_create_hindexed(VT_MPI_INT count, CONST VT_MPI_INT* array_of_blocklengths, CONST MPI_Aint* array_of_displacements_CLASS_ARRAY_IN_count, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Type_create_hvector(VT_MPI_INT count, VT_MPI_INT blocklength, MPI_Aint stride, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Type_create_indexed_block(VT_MPI_INT count, VT_MPI_INT blocklength, VT_MPI_INT* array_of_displacements, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Type_create_indexed_block(VT_MPI_INT count, VT_MPI_INT blocklength, CONST VT_MPI_INT* array_of_displacements, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Type_create_resized(MPI_Datatype oldtype, MPI_Aint lb, MPI_Aint extent, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Type_create_struct(VT_MPI_INT count, VT_MPI_INT* array_of_blocklengths, MPI_Aint* array_of_displacements_CLASS_ARRAY_IN_count, MPI_Datatype* array_of_types_CLASS_ARRAY_IN_count, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Type_create_subarray(VT_MPI_INT ndims, VT_MPI_INT* array_of_sizes, VT_MPI_INT* array_of_subsizes, VT_MPI_INT* array_of_starts, VT_MPI_INT order, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Type_create_struct(VT_MPI_INT count, CONST VT_MPI_INT* array_of_blocklengths, CONST MPI_Aint* array_of_displacements_CLASS_ARRAY_IN_count, CONST MPI_Datatype* array_of_types_CLASS_ARRAY_IN_count, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Type_create_subarray(VT_MPI_INT ndims, CONST VT_MPI_INT* array_of_sizes, CONST VT_MPI_INT* array_of_subsizes, CONST VT_MPI_INT* array_of_starts, VT_MPI_INT order, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Type_get_extent(MPI_Datatype datatype, MPI_Aint* lb_CLASS_SINGLE_OUT, MPI_Aint* extent_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Type_get_true_extent(MPI_Datatype datatype, MPI_Aint* true_lb_CLASS_SINGLE_OUT, MPI_Aint* true_extent_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Unpack_external(char* datarep_CLASS_SINGLE_IN, void* inbuf, MPI_Aint insize, MPI_Aint* position_CLASS_SINGLE_IO, void* outbuf, VT_MPI_INT outcount, MPI_Datatype datatype);
+VT_MPI_INT MPI_Unpack_external(CONST char* datarep_CLASS_SINGLE_IN, CONST void* inbuf, MPI_Aint insize, MPI_Aint* position_CLASS_SINGLE_IO, void* outbuf, VT_MPI_INT outcount, MPI_Datatype datatype);
 VT_MPI_INT MPI_Win_create_errhandler(MPI_Win_errhandler_fn* function, MPI_Errhandler* errhandler_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler* errhandler_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Win_set_errhandler(MPI_Win win, MPI_Errhandler errhandler);
 /* External Interfaces */
 VT_MPI_INT MPI_Add_error_class(VT_MPI_INT* errorclass);
 VT_MPI_INT MPI_Add_error_code(VT_MPI_INT errorclass, VT_MPI_INT* errorcode);
-VT_MPI_INT MPI_Add_error_string(VT_MPI_INT errorcode, char* string_CLASS_SINGLE_IN);
+VT_MPI_INT MPI_Add_error_string(VT_MPI_INT errorcode, CONST char* string_CLASS_SINGLE_IN);
 VT_MPI_INT MPI_Comm_call_errhandler(MPI_Comm comm, VT_MPI_INT errorcode);
 VT_MPI_INT MPI_Comm_create_keyval(MPI_Comm_copy_attr_function* comm_copy_attr_fn, MPI_Comm_delete_attr_function* comm_delete_attr_fn, VT_MPI_INT* comm_keyval, void* extra_state);
 VT_MPI_INT MPI_Comm_delete_attr(MPI_Comm comm, VT_MPI_INT comm_keyval);
 VT_MPI_INT MPI_Comm_get_attr(MPI_Comm comm, VT_MPI_INT comm_keyval, void* attribute_val, VT_MPI_INT* flag);
 VT_MPI_INT MPI_Comm_get_name(MPI_Comm comm, char* comm_name_CLASS_SINGLE_OUT, VT_MPI_INT* resultlen);
 VT_MPI_INT MPI_Comm_set_attr(MPI_Comm comm, VT_MPI_INT comm_keyval, void* attribute_val);
-VT_MPI_INT MPI_Comm_set_name(MPI_Comm comm, char* comm_name_CLASS_SINGLE_IN);
+VT_MPI_INT MPI_Comm_set_name(MPI_Comm comm, CONST char* comm_name_CLASS_SINGLE_IN);
 VT_MPI_INT MPI_Grequest_complete(MPI_Request request);
 VT_MPI_INT MPI_Grequest_start(MPI_Grequest_query_function* query_fn, MPI_Grequest_free_function* free_fn, MPI_Grequest_cancel_function* cancel_fn, void* extra_state, MPI_Request* request_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Status_set_cancelled(MPI_Status* status_CLASS_SINGLE_IO, VT_MPI_INT flag);
 VT_MPI_INT MPI_Type_get_envelope(MPI_Datatype datatype, VT_MPI_INT* num_integers, VT_MPI_INT* num_addresses, VT_MPI_INT* num_datatypes, VT_MPI_INT* combiner);
 VT_MPI_INT MPI_Type_get_name(MPI_Datatype type, char* type_name_CLASS_SINGLE_OUT, VT_MPI_INT* resultlen);
 VT_MPI_INT MPI_Type_set_attr(MPI_Datatype type, VT_MPI_INT type_keyval, void* attribute_val);
-VT_MPI_INT MPI_Type_set_name(MPI_Datatype type, char* type_name_CLASS_SINGLE_IN);
+VT_MPI_INT MPI_Type_set_name(MPI_Datatype type, CONST char* type_name_CLASS_SINGLE_IN);
 VT_MPI_INT MPI_Win_call_errhandler(MPI_Win win, VT_MPI_INT errorcode);
 VT_MPI_INT MPI_Win_create_keyval(MPI_Win_copy_attr_function* win_copy_attr_fn, MPI_Win_delete_attr_function* win_delete_attr_fn, VT_MPI_INT* win_keyval, void* extra_state);
 VT_MPI_INT MPI_Win_delete_attr(MPI_Win win, VT_MPI_INT win_keyval);
 VT_MPI_INT MPI_Win_get_attr(MPI_Win win, VT_MPI_INT win_keyval, void* attribute_val, VT_MPI_INT* flag);
 VT_MPI_INT MPI_Win_get_name(MPI_Win win, char* win_name_CLASS_SINGLE_OUT, VT_MPI_INT* resultlen);
 VT_MPI_INT MPI_Win_set_attr(MPI_Win win, VT_MPI_INT win_keyval, void* attribute_val);
-VT_MPI_INT MPI_Win_set_name(MPI_Win win, char* win_name_CLASS_SINGLE_IN);
+VT_MPI_INT MPI_Win_set_name(MPI_Win win, CONST char* win_name_CLASS_SINGLE_IN);
 
 /* Language Bindings */
 VT_MPI_INT MPI_Type_create_f90_complex(VT_MPI_INT p, VT_MPI_INT r, MPI_Datatype* newtype_CLASS_SINGLE_OUT);

ompi/contrib/vt/vt/config/mpigen/mpi_standard.h

  **/
 
 VT_MPI_INT MPI_Abort(MPI_Comm comm, VT_MPI_INT errorcode);
-VT_MPI_INT MPI_Address(void* location, MPI_Aint* address_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Allgather(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
-VT_MPI_INT MPI_Allgatherv(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* displs, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
-VT_MPI_INT MPI_Allreduce(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_ALL2ALL*/
-VT_MPI_INT MPI_Alltoall(void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
-VT_MPI_INT MPI_Alltoallv(void* sendbuf_CLASS_BUFFER, VT_MPI_INT* sendcounts, VT_MPI_INT* sdispls, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* rdispls, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
+VT_MPI_INT MPI_Address(CONST void* location, MPI_Aint* address_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Allgather(CONST void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
+VT_MPI_INT MPI_Allgatherv(CONST void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, CONST VT_MPI_INT* recvcounts, CONST VT_MPI_INT* displs, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
+VT_MPI_INT MPI_Allreduce(CONST void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_ALL2ALL*/
+VT_MPI_INT MPI_Alltoall(CONST void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
+VT_MPI_INT MPI_Alltoallv(CONST void* sendbuf_CLASS_BUFFER, CONST VT_MPI_INT* sendcounts, CONST VT_MPI_INT* sdispls, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, CONST VT_MPI_INT* recvcounts, CONST VT_MPI_INT* rdispls, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
 VT_MPI_INT MPI_Attr_delete(MPI_Comm comm, VT_MPI_INT keyval);
 VT_MPI_INT MPI_Attr_get(MPI_Comm comm, VT_MPI_INT keyval, void* attribute_val, VT_MPI_INT* flag);
 VT_MPI_INT MPI_Attr_put(MPI_Comm comm, VT_MPI_INT keyval, void* attribute_val);
 VT_MPI_INT MPI_Barrier(MPI_Comm comm); /*COLL_BARRIER*/
 VT_MPI_INT MPI_Bcast(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
 VT_MPI_INT MPI_Bsend(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
-VT_MPI_INT MPI_Bsend_init(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Bsend_init(CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Buffer_attach(void* buf, VT_MPI_INT size);
 VT_MPI_INT MPI_Buffer_detach(void* buf, VT_MPI_INT* size);
 VT_MPI_INT MPI_Cancel(MPI_Request* request_CLASS_SINGLE_IN);
 VT_MPI_INT MPI_Cart_coords(MPI_Comm comm, VT_MPI_INT rank, VT_MPI_INT maxdims, VT_MPI_INT* coords);
-VT_MPI_INT MPI_Cart_create(MPI_Comm comm_old, VT_MPI_INT ndims, VT_MPI_INT* dims, VT_MPI_INT* periods, VT_MPI_INT reorder, MPI_Comm* comm_cart_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Cart_create(MPI_Comm comm_old, VT_MPI_INT ndims, CONST VT_MPI_INT* dims, CONST VT_MPI_INT* periods, VT_MPI_INT reorder, MPI_Comm* comm_cart_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Cart_get(MPI_Comm comm, VT_MPI_INT maxdims, VT_MPI_INT* dims, VT_MPI_INT* periods, VT_MPI_INT* coords);
-VT_MPI_INT MPI_Cart_map(MPI_Comm comm, VT_MPI_INT ndims, VT_MPI_INT* dims, VT_MPI_INT* periods, VT_MPI_INT* newrank);
-VT_MPI_INT MPI_Cart_rank(MPI_Comm comm, VT_MPI_INT* coords, VT_MPI_INT* rank);
+VT_MPI_INT MPI_Cart_map(MPI_Comm comm, VT_MPI_INT ndims, CONST VT_MPI_INT* dims, CONST VT_MPI_INT* periods, VT_MPI_INT* newrank);
+VT_MPI_INT MPI_Cart_rank(MPI_Comm comm, CONST VT_MPI_INT* coords, VT_MPI_INT* rank);
 VT_MPI_INT MPI_Cart_shift(MPI_Comm comm, VT_MPI_INT direction, VT_MPI_INT disp, VT_MPI_INT* rank_source, VT_MPI_INT* rank_dest);
-VT_MPI_INT MPI_Cart_sub(MPI_Comm comm, VT_MPI_INT* remain_dims, MPI_Comm* newcomm_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Cart_sub(MPI_Comm comm, CONST VT_MPI_INT* remain_dims, MPI_Comm* newcomm_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Cartdim_get(MPI_Comm comm, VT_MPI_INT* ndims);
 VT_MPI_INT MPI_Comm_compare(MPI_Comm comm1, MPI_Comm comm2, VT_MPI_INT* _result);
 VT_MPI_INT MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm* newcomm_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Error_class(VT_MPI_INT errorcode, VT_MPI_INT* errorclass);
 VT_MPI_INT MPI_Error_string(VT_MPI_INT errorcode, char* string_CLASS_SINGLE_OUT, VT_MPI_INT* resultlen);
 VT_MPI_INT MPI_Finalize(void);
-VT_MPI_INT MPI_Gather(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ALL2ONE*/
-VT_MPI_INT MPI_Gatherv(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* displs, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ALL2ONE*/
-VT_MPI_INT MPI_Get_count(MPI_Status* status_CLASS_SINGLE_IN, MPI_Datatype datatype, VT_MPI_INT* count);
-VT_MPI_INT MPI_Get_elements(MPI_Status* status_CLASS_SINGLE_IN, MPI_Datatype datatype, VT_MPI_INT* count);
+VT_MPI_INT MPI_Gather(CONST void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ALL2ONE*/
+VT_MPI_INT MPI_Gatherv(CONST void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, CONST VT_MPI_INT* recvcounts, CONST VT_MPI_INT* displs, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ALL2ONE*/
+VT_MPI_INT MPI_Get_count(CONST MPI_Status* status_CLASS_SINGLE_IN, MPI_Datatype datatype, VT_MPI_INT* count);
+VT_MPI_INT MPI_Get_elements(CONST MPI_Status* status_CLASS_SINGLE_IN, MPI_Datatype datatype, VT_MPI_INT* count);
 VT_MPI_INT MPI_Get_processor_name(char* name_CLASS_SINGLE_OUT, VT_MPI_INT* resultlen);
 VT_MPI_INT MPI_Get_version(VT_MPI_INT* version, VT_MPI_INT* subversion);
-VT_MPI_INT MPI_Graph_create(MPI_Comm comm_old, VT_MPI_INT nnodes, VT_MPI_INT* index, VT_MPI_INT* edges, VT_MPI_INT reorder, MPI_Comm* comm_graph_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Graph_create(MPI_Comm comm_old, VT_MPI_INT nnodes, CONST VT_MPI_INT* index, CONST VT_MPI_INT* edges, VT_MPI_INT reorder, MPI_Comm* comm_graph_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Graph_get(MPI_Comm comm, VT_MPI_INT maxindex, VT_MPI_INT maxedges, VT_MPI_INT* index, VT_MPI_INT* edges);
-VT_MPI_INT MPI_Graph_map(MPI_Comm comm, VT_MPI_INT nnodes, VT_MPI_INT* index, VT_MPI_INT* edges, VT_MPI_INT* newrank);
+VT_MPI_INT MPI_Graph_map(MPI_Comm comm, VT_MPI_INT nnodes, CONST VT_MPI_INT* index, CONST VT_MPI_INT* edges, VT_MPI_INT* newrank);
 VT_MPI_INT MPI_Graph_neighbors(MPI_Comm comm, VT_MPI_INT rank, VT_MPI_INT maxneighbors, VT_MPI_INT* neighbors);
 VT_MPI_INT MPI_Graph_neighbors_count(MPI_Comm comm, VT_MPI_INT rank, VT_MPI_INT* nneighbors);
 VT_MPI_INT MPI_Graphdims_get(MPI_Comm comm, VT_MPI_INT* nnodes, VT_MPI_INT* nedges);
 VT_MPI_INT MPI_Group_compare(MPI_Group group1,MPI_Group group2, VT_MPI_INT* _result);
 VT_MPI_INT MPI_Group_difference(MPI_Group group1, MPI_Group group2, MPI_Group* newgroup_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Group_excl(MPI_Group group, VT_MPI_INT n, VT_MPI_INT* ranks, MPI_Group* newgroup_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Group_excl(MPI_Group group, VT_MPI_INT n, CONST VT_MPI_INT* ranks, MPI_Group* newgroup_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Group_free(MPI_Group* group_CLASS_SINGLE_IO);
-VT_MPI_INT MPI_Group_incl(MPI_Group group, VT_MPI_INT n, VT_MPI_INT* ranks, MPI_Group* newgroup_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Group_incl(MPI_Group group, VT_MPI_INT n, CONST VT_MPI_INT* ranks, MPI_Group* newgroup_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Group_intersection(MPI_Group group1, MPI_Group group2, MPI_Group* newgroup_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Group_range_excl(MPI_Group group, VT_MPI_INT n, VT_MPI_INT ranges[][3], MPI_Group* newgroup_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Group_range_incl(MPI_Group group, VT_MPI_INT n, VT_MPI_INT ranges[][3], MPI_Group* newgroup_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Group_rank(MPI_Group group, VT_MPI_INT* rank);
 VT_MPI_INT MPI_Group_size(MPI_Group group, VT_MPI_INT* size);
-VT_MPI_INT MPI_Group_translate_ranks(MPI_Group group1, VT_MPI_INT n, VT_MPI_INT* ranks1, MPI_Group group2, VT_MPI_INT* ranks2);
+VT_MPI_INT MPI_Group_translate_ranks(MPI_Group group1, VT_MPI_INT n, CONST VT_MPI_INT* ranks1, MPI_Group group2, VT_MPI_INT* ranks2);
 VT_MPI_INT MPI_Group_union(MPI_Group group1, MPI_Group group2, MPI_Group* newgroup_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Ibsend(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Ibsend(CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Init(VT_MPI_INT* argc, char*** argv);
 VT_MPI_INT MPI_Initialized(VT_MPI_INT* flag);
 VT_MPI_INT MPI_Intercomm_create(MPI_Comm local_comm, VT_MPI_INT local_leader, MPI_Comm peer_comm, VT_MPI_INT remote_leader, VT_MPI_INT tag, MPI_Comm* newintercomm_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Intercomm_merge(MPI_Comm intercomm, VT_MPI_INT high, MPI_Comm* newintracomm_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Iprobe(VT_MPI_INT source, VT_MPI_INT tag, MPI_Comm comm, VT_MPI_INT* flag, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Irecv(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT source, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Irsend(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Isend(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Issend(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Irsend(CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Isend(CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Issend(CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Keyval_create(MPI_Copy_function* copy_fn, MPI_Delete_function* delete_fn, VT_MPI_INT* keyval, void* extra_state);
 VT_MPI_INT MPI_Keyval_free(VT_MPI_INT* keyval);
 VT_MPI_INT MPI_Op_create(MPI_User_function* function, VT_MPI_INT commute, MPI_Op* op_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Op_free( MPI_Op* op_CLASS_SINGLE_IO);
-VT_MPI_INT MPI_Pack(void* inbuf_CLASS_BUFFER, VT_MPI_INT incount, MPI_Datatype datatype, void* outbuf, VT_MPI_INT outsize, VT_MPI_INT* position,  MPI_Comm comm);
+VT_MPI_INT MPI_Pack(CONST void* inbuf_CLASS_BUFFER, VT_MPI_INT incount, MPI_Datatype datatype, void* outbuf, VT_MPI_INT outsize, VT_MPI_INT* position,  MPI_Comm comm);
 VT_MPI_INT MPI_Pack_size(VT_MPI_INT incount, MPI_Datatype datatype, MPI_Comm comm, VT_MPI_INT* size);
 VT_MPI_INT MPI_Pcontrol(VT_MPI_INT level, ...);
 VT_MPI_INT MPI_Probe(VT_MPI_INT source, VT_MPI_INT tag, MPI_Comm comm, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Recv(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT source, VT_MPI_INT tag, MPI_Comm comm, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Recv_init(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT source, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Reduce(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, VT_MPI_INT root, MPI_Comm comm); /*COLL_ALL2ONE*/
-VT_MPI_INT MPI_Reduce_scatter(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_ALL2ALL*/
+VT_MPI_INT MPI_Reduce(CONST void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, VT_MPI_INT root, MPI_Comm comm); /*COLL_ALL2ONE*/
+VT_MPI_INT MPI_Reduce_scatter(CONST void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, CONST VT_MPI_INT* recvcounts, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_ALL2ALL*/
 VT_MPI_INT MPI_Request_free(MPI_Request* request_CLASS_SINGLE_IO);
-VT_MPI_INT MPI_Rsend(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
-VT_MPI_INT MPI_Rsend_init(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Scan(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_ALL2ALL*/
-VT_MPI_INT MPI_Scatter(void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
-VT_MPI_INT MPI_Scatterv(void* sendbuf_CLASS_BUFFER, VT_MPI_INT* sendcounts, VT_MPI_INT* displs, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
-VT_MPI_INT MPI_Send(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
-VT_MPI_INT MPI_Send_init(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Sendrecv(void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, VT_MPI_INT dest, VT_MPI_INT sendtag, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT source, VT_MPI_INT recvtag, MPI_Comm comm, MPI_Status* status_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Rsend(CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
+VT_MPI_INT MPI_Rsend_init(CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Scan(CONST void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_ALL2ALL*/
+VT_MPI_INT MPI_Scatter(CONST void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
+VT_MPI_INT MPI_Scatterv(CONST void* sendbuf_CLASS_BUFFER, CONST VT_MPI_INT* sendcounts, CONST VT_MPI_INT* displs, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
+VT_MPI_INT MPI_Send(CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
+VT_MPI_INT MPI_Send_init(CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Sendrecv(CONST void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, VT_MPI_INT dest, VT_MPI_INT sendtag, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT source, VT_MPI_INT recvtag, MPI_Comm comm, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Sendrecv_replace(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT sendtag, VT_MPI_INT source, VT_MPI_INT recvtag, MPI_Comm comm, MPI_Status* status_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Ssend(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
-VT_MPI_INT MPI_Ssend_init(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Ssend(CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
+VT_MPI_INT MPI_Ssend_init(CONST void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Start(MPI_Request* request_CLASS_SINGLE_IO);
 VT_MPI_INT MPI_Startall(VT_MPI_INT count, MPI_Request* array_of_requests_CLASS_ARRAY_IO_count);
 VT_MPI_INT MPI_Test(MPI_Request* request_CLASS_SINGLE_IO, VT_MPI_INT* flag, MPI_Status* status_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Test_cancelled(MPI_Status* status_CLASS_SINGLE_IN, VT_MPI_INT* flag);
+VT_MPI_INT MPI_Test_cancelled(CONST MPI_Status* status_CLASS_SINGLE_IN, VT_MPI_INT* flag);
 VT_MPI_INT MPI_Testall(VT_MPI_INT count, MPI_Request* array_of_requests_CLASS_ARRAY_IO_count, VT_MPI_INT* flag, MPI_Status* array_of_statuses_CLASS_ARRAY_OUT_count);
 VT_MPI_INT MPI_Testany(VT_MPI_INT count, MPI_Request* array_of_requests_CLASS_ARRAY_IO_count, VT_MPI_INT* index_CLASS_ARRAYINDEX_OUT_SINGLE_CONDITION_flag, VT_MPI_INT* flag, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Testsome(VT_MPI_INT incount, MPI_Request* array_of_requests_CLASS_ARRAY_IO_incount, VT_MPI_INT* outcount, VT_MPI_INT* array_of_indices_CLASS_ARRAYINDEX_OUT_ARRAY_outcount, MPI_Status* array_of_statuses_CLASS_ARRAY_OUT_outcount);
 VT_MPI_INT MPI_Type_contiguous(VT_MPI_INT count, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Type_extent(MPI_Datatype datatype, MPI_Aint* extent_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Type_free(MPI_Datatype* datatype_CLASS_SINGLE_IO);
-VT_MPI_INT MPI_Type_hindexed(VT_MPI_INT count, VT_MPI_INT* array_of_blocklengths, MPI_Aint* array_of_displacements_CLASS_ARRAY_IN_count, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Type_hindexed(VT_MPI_INT count, CONST VT_MPI_INT* array_of_blocklengths, CONST MPI_Aint* array_of_displacements_CLASS_ARRAY_IN_count, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Type_hvector(VT_MPI_INT count, VT_MPI_INT blocklength, MPI_Aint stride, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Type_indexed(VT_MPI_INT count, VT_MPI_INT* array_of_blocklengths, VT_MPI_INT* array_of_displacements, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Type_indexed(VT_MPI_INT count, CONST VT_MPI_INT* array_of_blocklengths, CONST VT_MPI_INT* array_of_displacements, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Type_lb(MPI_Datatype datatype, MPI_Aint* displacement_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Type_size(MPI_Datatype datatype, VT_MPI_INT* size);
-VT_MPI_INT MPI_Type_struct(VT_MPI_INT count, VT_MPI_INT* array_of_blocklengths, MPI_Aint* array_of_displacements_CLASS_ARRAY_IN_count, MPI_Datatype* array_of_types_CLASS_ARRAY_IN_count, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
+VT_MPI_INT MPI_Type_struct(VT_MPI_INT count, CONST VT_MPI_INT* array_of_blocklengths, CONST MPI_Aint* array_of_displacements_CLASS_ARRAY_IN_count, CONST MPI_Datatype* array_of_types_CLASS_ARRAY_IN_count, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Type_ub(MPI_Datatype datatype, MPI_Aint* displacement_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Type_vector(VT_MPI_INT count, VT_MPI_INT blocklength, VT_MPI_INT stride, MPI_Datatype oldtype, MPI_Datatype* newtype_CLASS_SINGLE_OUT);
-VT_MPI_INT MPI_Unpack(void* inbuf, VT_MPI_INT insize, VT_MPI_INT* position, void* outbuf_CLASS_BUFFER, VT_MPI_INT outcount, MPI_Datatype datatype, MPI_Comm comm);
+VT_MPI_INT MPI_Unpack(CONST void* inbuf, VT_MPI_INT insize, VT_MPI_INT* position, void* outbuf_CLASS_BUFFER, VT_MPI_INT outcount, MPI_Datatype datatype, MPI_Comm comm);
 VT_MPI_INT MPI_Wait(MPI_Request* request_CLASS_SINGLE_IO, MPI_Status* status_CLASS_SINGLE_OUT);
 VT_MPI_INT MPI_Waitall(VT_MPI_INT count, MPI_Request* array_of_requests_CLASS_ARRAY_IO_count, MPI_Status* array_of_statuses_CLASS_ARRAY_OUT_count);
 VT_MPI_INT MPI_Waitany(VT_MPI_INT count, MPI_Request* array_of_requests_CLASS_ARRAY_IO_count, VT_MPI_INT* index_CLASS_ARRAYINDEX_OUT_SINGLE_NOCONDITION, MPI_Status* status_CLASS_SINGLE_OUT);

ompi/contrib/vt/vt/configure.ac

 AC_CHECK_SIZEOF(int, 4)
 AC_CHECK_SIZEOF(long, 8)
 AC_CHECK_SIZEOF(mode_t, 4)
+AC_CHECK_SIZEOF(size_t, 8)
 SIZEOF_LONG=$ac_cv_sizeof_long
 AC_SUBST(SIZEOF_LONG)
 

ompi/contrib/vt/vt/doc/UserManual.html

 <P>
 
 <P>
-<B><BIG CLASS="XHUGE">VampirTrace 5.14.3&nbsp;User Manual</BIG></B>
+<B><BIG CLASS="XHUGE">VampirTrace 5.14.4&nbsp;User Manual</BIG></B>
 <BR>
 <BR>
 <BR>
 <P>
 After a successful tracing run, VampirTrace writes all collected data to a  
 trace file in the Open Trace Format (OTF)<A NAME="tex2html1"
-  HREF="#foot1601"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot1614"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
 As a result, the information is available for post-mortem analysis and 
 visualization by various tools. 
 Most notably, VampirTrace provides the input data for the Vampir analysis  
 and visualization tool<A NAME="tex2html2"
-  HREF="#foot1602"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot1615"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>. 
 
 <P>
 VampirTrace is included in OpenMPI&nbsp;1.3 and later versions.
 If not disabled explicitly, VampirTrace is built automatically when installing
 OpenMPI<A NAME="tex2html3"
-  HREF="#foot1603"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot1616"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
 
 <P>
   The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line.
   The lists end with <TT>END[_FILE]_&lt;INCLUDE|EXCLUDE&gt;_LIST</TT>. For further information on selective 
   profiling have a look at the TAU documentation<A NAME="tex2html4"
-  HREF="#foot1627"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot1640"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
   To announce the file through the compiler wrapper use the option <TT>-vt:tau</TT>:
 <PRE>
 The option <TT>-vt:inst dyninst</TT> is used with the compiler wrapper to 
 instrument the application during runtime (binary instrumentation), by using 
 Dyninst<A NAME="tex2html5"
-  HREF="#foot1628"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot1641"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
 Recompiling is not necessary for this kind of instrumentation,
 but relinking:
   VampirTrace is also capable to trace calls to third party libraries, which come with
   at least one C header file even without the library's source code. If VampirTrace was
   built with support for library tracing (the CTool library<A NAME="tex2html6"
-  HREF="#foot1629"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot1642"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>  is required), the tool <TT>vtlibwrapgen</TT> can be used to
   generate a wrapper library to intercept each call to the actual library functions.
   This wrapper library can be linked to the application or used in combination with the
 <TD ALIGN="LEFT">Interval between two successive synchronization phases in s.</TD>
 <TD ALIGN="LEFT">120</TD>
 </TR>
+<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_GPUTRACE"></A><TT>VT_GPUTRACE</TT></TD>
+<TD ALIGN="LEFT">Comma-separated list of GPU tracing options. <BR>
+	&#8658; Section&nbsp;<A HREF="#sec:cuda">4.5</A></TD>
+<TD ALIGN="LEFT">no</TD>
+</TR>
 <TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_IOLIB_PATHNAME"></A><TT>VT_IOLIB_PATHNAME</TT></TD>
 <TD ALIGN="LEFT">Provides an alternative library to use for LIBC I/O calls.
 	&#8658; Section&nbsp;<A HREF="#sec:io_calls">4.8</A></TD>
 	<BR></TD>
 <TD></TD>
 </TR>
+<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_CUPTI_METRICS"></A><TT>VT_CUPTI_METRICS</TT></TD>
+<TD ALIGN="LEFT">Specify CUDA hardware counter metrics (CUPTI events) to be recorded with trace events 
+	as a colon/VT_METRICS_SEP-separated list of names.
+	<BR>
+	&#8658; Section&nbsp;<A HREF="#sec:cuda">4.5</A></TD>
+<TD ALIGN="LEFT">-</TD>
+</TR>
+<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_CUPTI_EVENTS_SAMPLING"></A><TT>VT_CUPTI_EVENTS_SAMPLING</TT></TD>
+<TD ALIGN="LEFT">Sample CUDA hardware counters during the execution of a kernel.
+	<BR>
+	&#8658; Section&nbsp;<A HREF="#sec:cuda">4.5</A></TD>
+<TD ALIGN="LEFT">no</TD>
+</TR>
 <TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_METRICS"></A><TT>VT_METRICS</TT></TD>
 <TD ALIGN="LEFT">Specify counter metrics to be recorded with trace events as a
 	colon/VT_METRICS_SEP-separated list of names.
 
 <UL>
 <LI>CLAPACK <A NAME="tex2html7"
-  HREF="#foot1639"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot1652"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>
 </LI>
 <LI>AMD ACML
 <TR><TD ALIGN="LEFT"><TT>idle</TT></TD>
 <TD ALIGN="LEFT">GPU compute idle time</TD>
 </TR>
+<TR><TD ALIGN="LEFT"><TT>pure_idle</TT></TD>
+<TD ALIGN="LEFT">GPU idle time - considering data transfers as not idle</TD>
+</TR>
+<TR><TD ALIGN="LEFT">&nbsp;</TD>
+<TD ALIGN="LEFT">(acts as <TT>idle</TT> for CUDA runtime API wrapper)</TD>
+</TR>
 <TR><TD ALIGN="LEFT"><TT>memcpy</TT></TD>
 <TD ALIGN="LEFT">CUDA memory copies</TD>
 </TR>
+<TR><TD ALIGN="LEFT"><TT>sync</TT></TD>
+<TD ALIGN="LEFT">enable recording of synchronization for tracing via CUPTI</TD>
+</TR>
 <TR><TD ALIGN="LEFT"><TT>stream_reuse</TT></TD>
 <TD ALIGN="LEFT">force reusing of CUDA streams after cudaStreamDestroy()</TD>
 </TR>
 <P>
 </DD>
 <DT></DT>
-<DD><TT>VT_CUDATRACE_SYNC=[0|1|2|<SPAN  CLASS="textbf">3</SPAN>]</TT>
-         <SMALL CLASS="SCRIPTSIZE">(CUDA runtime API wrapper only) </SMALL>
+<DD><TT>VT_GPUTRACE_SYNC=[0|1|2|<SPAN  CLASS="textbf">3</SPAN>]</TT>
 <BR>
 Controls how VampirTrace handles synchronizing CUDA API calls, especially 
-      <SPAN  CLASS="textit">cudaMemcpy</SPAN> and <SPAN  CLASS="textit">cudaThreadSynchronize</SPAN>.
+      CUDA memory copies and CUDA device synchronization. 
       At level 0 only the CUDA calls will be executed, messages will be 
       displayed from the beginning to the end of the <SPAN  CLASS="textit">cudaMemcpy</SPAN>, regardless how 
       long the <SPAN  CLASS="textit">cudaMemcpy</SPAN> call has to wait for a kernel until the actual 
 </DD>
 <DT></DT>
 <DD><TT>VT_CUPTI_METRICS</TT>
-            <SMALL CLASS="SCRIPTSIZE">(CUDA runtime API wrapper only) </SMALL>
 <BR>
 Capture CUDA CUPTI counters. Metrics are separated by default with '':`` 
       or user specified by <TT>VT_METRICS_SEP</TT>.
 <P>
 </DD>
 <DT></DT>
-<DD><TT>VT_CUPTI_EVENTS_SAMPLING=[yes|<SPAN  CLASS="textbf">no</SPAN>]</TT> 
-	    <SMALL CLASS="SCRIPTSIZE">(CUDA runtime API wrapper only) </SMALL>
+<DD><TT>VT_CUPTI_EVENTS_SAMPLING=[yes|<SPAN  CLASS="textbf">no</SPAN>]</TT>
 <BR>
 Poll for CUPTI counter values during kernel execution, if set to <TT>yes</TT>.
 
 <P>
 </DD>
 <DT></DT>
-<DD><TT>VT_GPUTRACE_MEMUSAGE=[yes|2]</TT>
+<DD><TT>VT_GPUTRACE_MEMUSAGE=[<SPAN  CLASS="textbf">yes</SPAN>|2]</TT>
 <BR>
 Record GPU memory usage as counter ``gpu_mem_usage``, if set to <TT>yes</TT>, 
       which is the same as adding the option <TT>memusage</TT> to <TT>VT_GPUTRACE</TT>. 
   case the CUDA runtime library should be preloaded to reduce tracing overhead 
   (the dynamic linker can use  <TT>LD_PRELOAD=libcudart.so</TT>). Otherwise 
   the library wrapper intercepts every CUDA runtime API call and makes a short 
-  but unnecessary check, whether it is enabled. The CUPTI tracing method does 
-  not support recording of peer-to-peer memory copies. 
+  but unnecessary check, whether it is enabled. 
 <P></P>
 
 <P>
+Synchronous CUDA peer-to-peer memory copies will only be recorded,
+  if the <TT>sync</TT> option is set and the synchronization level is 3 (default).
+
+<P>
 
 <H3><A NAME="SECTION00550200000000000000">
 CUDA Runtime API Wrapper Particularities</A>
 </H3>
-  To ensure measurement of correct data rates for synchronous CUDA 
-  memory copies, the VampirTrace CUDA runtime library wrapper inserts a CUDA 
-  synchronization before the memory copy call. 
-  Otherwise the implicit synchronization of the CUDA memory copy call could not 
-  be exposed and it was not possible to get correct transfer rates.
+  CUDA tracing via this method will always record the CUDA runtime API
+  calls. It is not possible to only record kernels, memory copies or memory 
+  usage. CUDA driver API programs cannot be traced with the CUDA runtime API 
+  wrapper. 
 <P></P>
 
 <P>
 <H3><A NAME="SECTION00550300000000000000">
 Counter via CUDA API</A>
 </H3>
-  If <TT>VT_GPUTRACE_MEMUSAGE</TT> is enabled, 
-  <SPAN  CLASS="textit">cudaMalloc</SPAN> and <SPAN  CLASS="textit">cudaFree</SPAN> functions will be tracked to write 
-  the GPU memory usage counter <TT>gpu_mem_usage</TT>. 
-  This counter does not need space in the CUDA buffer. The counter values 
-  will be written directly to the default CUDA stream '1'. This stream will be 
+  If <TT>VT_GPUTRACE_MEMUSAGE</TT> is enabled, CUDA memory allocations on the 
+  GPU will be tracked to write the GPU memory usage counter 
+  <TT>gpu_mem_usage</TT>. The counter values will be written directly to the 
+  default CUDA stream '1'. This stream will be 
   created, if it does not exist and does not have to contain any other CUDA 
   device activities. If the environment variable is set to <TT>2</TT>, missing 
   <SPAN  CLASS="textit">cudaFree()</SPAN> calls will be printed to stderr.
 <P>
 
 <H3><A NAME="SECTION00550400000000000000">
-CUDA Performance Counters via CUPTI Events 
-<BR>    <SMALL CLASS="FOOTNOTESIZE">(CUDA runtime API wrapper only!)</SMALL></A>
+CUDA Performance Counters via CUPTI Events</A>
 </H3>
   To capture performance counters in CUDA applications, CUPTI events can be 
   specified with the environment variable <TT>VT_CUPTI_METRICS</TT>. Counters 
   <TT>VT_METRICS_SEP</TT>. The <SPAN  CLASS="textit">CUPTI User's Guide - Event Reference</SPAN> 
   provides information about the available counters. Alternatively set 
   <TT>VT_CUPTI_METRICS=help</TT> to show a list of available counters 
-  (<TT>help_long</TT> to print the counter description as well).
+  (<TT>help_long</TT> to print the counter description as well). This will only 
+  take effect, when a kernel is about to be executed. 
 
 <P>
 
 <P>
 
 <H3><A NAME="SECTION00550600000000000000">
-Tracing the NVIDIA CUDA SDK 3.x and 4.x</A>
+Tracing the NVIDIA CUDA Sample Applications</A>
 </H3>
-  To get some example traces, replace the compiler commands in the common 
+  <SMALL CLASS="SMALL"><SPAN  CLASS="textbf">CUDA 3.x and 4.x:</SPAN></SMALL>
+<BR>
+To get some example traces, replace the compiler commands in the common 
   Makefile include file (<TT>common/common.mk</TT>) with the corresponding 
   VampirTrace compiler wrappers (&#8658;<A HREF="#sec:compiler_wrappers">2.1</A>) for 
   automatic instrumentation:
   CC   := vtcc
   LINK := vtc++ #-vt:mt
 </PRE>
-  Use the compiler switches for MPI, multi-threaded 
+
+<P>
+<SMALL CLASS="SMALL"><SPAN  CLASS="textbf">CUDA 5.0:</SPAN></SMALL>
+<BR>
+Set the following environment variables for automatic instrumentation before 
+  running <TT>make</TT>:
+  <PRE>
+  export GCC=vtc++ #-vt:mt
+  export NVCC=vtnvcc #-vt:mt
+</PRE>
+
+<P>
+Use the compiler switches for MPI, multi-threaded 
   and hybrid programs, if necessary (e.g. the CUDA SDK example 
   <TT>simpleMultiGPU</TT> is a multi-threaded program, which needs to be linked 
   with a multi-threaded VampirTrace library). 
 
 <P>
 VampirTrace CUDA support has been successfully tested with CUDA 
-  toolkit version 3.x, 4.0 and 4.1.
+  toolkit version 3.x, 4.x and 5.0.
 
 <P>
 
 enable support for Dyninst instrumentation,
                               default: enable if found by configure.
                               <SPAN  CLASS="textbf">Note:</SPAN> Requires Dyninst<A NAME="tex2html8"
-  HREF="#foot1667"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot1680"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> version 6.1 or higher!
 
 <P>
                               instrumentation by using TAU, default: enable if
                               found by configure.
                               <SPAN  CLASS="textbf">Note:</SPAN> Requires PDToolkit<A NAME="tex2html9"
-  HREF="#foot1668"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot1681"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> or TAU<A NAME="tex2html10"
-  HREF="#foot1669"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot1682"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>!
 
 <P>
 
 <P>
 To enable support for generating wrapper for 3th-Party libraries the C code parser CTool<A NAME="tex2html11"
-  HREF="#foot1670"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot1683"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> is needed:
 
 <P>
 
 <P>
 When using the IOFSL integration, all write requests in OTF are issued using the zoidfs API<A NAME="tex2html12"
-  HREF="#foot3279"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot3310"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>. Those writes are handled by the IOFSL forwarding servers and aggregated into a single file using the atomic append feature. The offset in the multifile is returned to OTF and stored in a second file, the so called index file, in order to maintain the mapping between written blocks and streams. For any block of a stream written into the multifile, the index file contains the ID of the stream, the start of the block, and its length. This allows for an efficient reading of blocks since only the index file has to be scanned for entries for a given stream ID. Additionally, a large number of logical files (streams) can be stored using only two physical files.
 
 <P>
 <P>
 In order to use this setup, IOFSL and VampirTrace have to be compiled in order.
 In the following sections, the directory <TT>&lt;install_dir&gt;</TT> should be replaced with a - possibly user-local - directory used for installation, e.g. <TT>$HOME/local</TT><A NAME="tex2html13"
-  HREF="#foot3283"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot3314"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
 The installation procedure for IOFSL is described at https://trac.mcs.anl.gov/projects/iofsl/wiki/Building.
 Currently the <TT>iofsl_vampir</TT> git branch is required.
 PBS Options</A>
 </H4>
 It is important to reserve a sufficient number of processor cores. The number of cores requested must be large enough to contain the number of application cores plus the number of cores required for the IOFSL server instances. Each IOFSL server will run on a dedicated node<A NAME="tex2html14"
-  HREF="#foot3327"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot3358"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.Thus N_allocated &#8805;((N_IOFSL * 16) + N_Application) must hold.
 <P>
 Example using 64 server instances:
 The server is configured using a configuration file.
 At server start-up, this file is provided using the <TT>-config</TT> argument.
 The cray XK6 configuration file is provided in the package<A NAME="tex2html15"
-  HREF="#foot3467"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot3498"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
 For more information about the options available please refer to the IOFSL documentation<A NAME="tex2html16"
-  HREF="#foot3468"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
+  HREF="#foot3499"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
 The most important option is the <TT>serverlist</TT> entry in the <TT>bmi</TT> section which takes a list of server addresses, e.g. :
 <PRE>
 into the official VampirTrace package.
 <BR><HR><H4>Footnotes</H4>
 <DL>
-<DT><A NAME="foot1601">... (OTF)</A><A
+<DT><A NAME="foot1614">... (OTF)</A><A
  HREF="#tex2html1"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>http://www.tu-dresden.de/zih/otf
 
 </DD>
-<DT><A NAME="foot1602">... tool </A><A
+<DT><A NAME="foot1615">... tool </A><A
  HREF="#tex2html2"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>http://www.vampir.eu
 
 </DD>
-<DT><A NAME="foot1603">...
+<DT><A NAME="foot1616">...
 Open MPI </A><A
  HREF="#tex2html3"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>http://www.open-mpi.org/faq/?category=vampirtrace
 
 </DD>
-<DT><A NAME="foot1627">... documentation </A><A
+<DT><A NAME="foot1640">... documentation </A><A
  HREF="#tex2html4"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>http://www.cs.uoregon.edu/Research/tau/docs/newguide/bk05ch02.html#d0e3770
 
 </DD>
-<DT><A NAME="foot1628">...
+<DT><A NAME="foot1641">...
 Dyninst </A><A
  HREF="#tex2html5"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>http://www.dyninst.org
 
 </DD>
-<DT><A NAME="foot1629">... library </A><A
+<DT><A NAME="foot1642">... library </A><A
  HREF="#tex2html6"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>http://sourceforge.net/projects/ctool
 
 </DD>
-<DT><A NAME="foot1639">... CLAPACK</A><A
+<DT><A NAME="foot1652">... CLAPACK</A><A
  HREF="#tex2html7"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>www.netlib.org/clapack
 
 </DD>
-<DT><A NAME="foot1667">... Dyninst </A><A
+<DT><A NAME="foot1680">... Dyninst </A><A
  HREF="#tex2html8"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>http://www.dyninst.org
 
 </DD>
-<DT><A NAME="foot1668">... PDToolkit </A><A
+<DT><A NAME="foot1681">... PDToolkit </A><A
  HREF="#tex2html9"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>http://www.cs.uoregon.edu/research/pdt/home.php
 
 </DD>
-<DT><A NAME="foot1669">... TAU </A><A
+<DT><A NAME="foot1682">... TAU </A><A
  HREF="#tex2html10"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>http://tau.uoregon.edu
 
 </DD>
-<DT><A NAME="foot1670">... CTool </A><A
+<DT><A NAME="foot1683">... CTool </A><A
  HREF="#tex2html11"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>http://sourceforge.net/projects/ctool
 
 </DD>
-<DT><A NAME="foot3279">... API</A><A
+<DT><A NAME="foot3310">... API</A><A
  HREF="#tex2html12"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>The OTF master control file is written using POSIX I/O in any case.
 
 </DD>
-<DT><A NAME="foot3283">...$HOME/local</A><A
+<DT><A NAME="foot3314">...$HOME/local</A><A
  HREF="#tex2html13"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>The software packages can be installed in different directories.
 
 </DD>
-<DT><A NAME="foot3327">... node</A><A
+<DT><A NAME="foot3358">... node</A><A
  HREF="#tex2html14"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>The server makes use of all the nodes resources by multithreading and allocating large I/O buffers
 
 </DD>
-<DT><A NAME="foot3467">... package</A><A
+<DT><A NAME="foot3498">... package</A><A
  HREF="#tex2html15"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD><TT>tools/vtiofsl/platform/crayxk6-iofwd.cf</TT>
 
 </DD>
-<DT><A NAME="foot3468">... documentation</A><A
+<DT><A NAME="foot3499">... documentation</A><A
  HREF="#tex2html16"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
 <DD>https://trac.mcs.anl.gov/projects/iofsl/wiki/ConfigurationFile

ompi/contrib/vt/vt/doc/UserManual.pdf

Binary file modified.

ompi/contrib/vt/vt/rfg/rfg_filter.c

       break;
     }
 
+    filter->file_content_size = file_stat.st_size;
+
     /* allocate the buffer for storing the filter file content */
-    filter->file_content = (char*)malloc( file_stat.st_size * sizeof( char ) );
+    filter->file_content = (char*)malloc( (filter->file_content_size + 1) * sizeof( char ) );
     if( !filter->file_content )
     {
       ret = 0;
     }
 
     /* read the filter file */
-    if( read(fd, filter->file_content, file_stat.st_size ) == -1 )
+    if( read(fd, filter->file_content, filter->file_content_size ) == -1 )
     {
       ret = 0;
       break;
     }
 
+    filter->file_content[ filter->file_content_size ] = '\0';
+    
   } while( 0 );
 
   /* close the filter file */
 static int get_file_content_line( RFG_Filter* filter, char* buf,
                                   size_t bufsize, size_t* pos )
 {
-  size_t content_size;
   size_t i;
 
   if( !filter || !filter->file_content )
     return 0;
 
-  content_size = strlen( filter->file_content );
 
-  if( *pos >= content_size )
+  if( *pos >= filter->file_content_size )
     return 0;
 
-  for( i = 0; i < bufsize && *pos < content_size; i++ )
+  for( i = 0; i < bufsize && *pos < filter->file_content_size; i++ )
   {
     buf[i] = filter->file_content[(*pos)++];
     if( buf[i] == '\n' )

ompi/contrib/vt/vt/rfg/rfg_regions.c

   return ret;
 }
 
+RFG_Filter* RFG_Regions_getFilter( RFG_Regions* regions )
+{
+  if( !regions || !regions->filter )
+    return NULL;
+
+  return regions->filter;
+}
+
+RFG_Groups* RFG_Regions_getGroups( RFG_Regions* regions )
+{
+  if( !regions || !regions->groups )
+    return NULL;
+
+  return regions->groups;
+}
+
 int RFG_Regions_setRegionIdGenFunc( RFG_Regions* regions,
                                     uint32_t (*func)(void) )
 {

ompi/contrib/vt/vt/rfg/rfg_regions.h

 /* cleanup RFG regions object */
 int RFG_Regions_free( RFG_Regions* regions );
 
+/* get RFG filter object associated with given regions object */
+RFG_Filter* RFG_Regions_getFilter( RFG_Regions* regions );
+
+/* get RFG groups object associated with given regions object */
+RFG_Groups* RFG_Regions_getGroups( RFG_Regions* groups );
+
 /* sets pointer to a function which generates region ids */
 int RFG_Regions_setRegionIdGenFunc( RFG_Regions* regions,
                                     uint32_t (*func)(void) );

ompi/contrib/vt/vt/tools/vtfilter/old/vt_tracefilter.cc

 			cerr << "Error while reading definitions. aborting" << endl;
 			OTF_WStream_close( fha.wstream );
 			OTF_RStream_close( defrstream );
+			OTF_HandlerArray_close( defhandlers );
 #ifdef VT_MPI
 			MPI_Abort( MPI_COMM_WORLD, 1 );
 #endif // VT_MPI
 				cerr << "Error while reading definitions. aborting" << endl;
 				OTF_WStream_close( fha.wstream );
 				OTF_RStream_close( defrstream );
+				OTF_HandlerArray_close( defhandlers );
 #ifdef VT_MPI
 				MPI_Abort( MPI_COMM_WORLD, 1 );
 #endif // VT_MPI

ompi/contrib/vt/vt/tools/vtfilter/vt_filter_gen.cc

     delete [] argv;
   }
 
-  if( envp )
+  //if( envp )
   {
     if( envp[0] )
       delete [] envp[0];

ompi/contrib/vt/vt/tools/vtfilter/vt_filter_trc.cc

         for( std::map<uint32_t, std::string>::const_iterator it = funcs.begin();
              it != funcs.end(); it++ )
         {
-          const uint32_t& func = it->first;
+          const uint32_t func = it->first;
           const std::string& func_name = it->second;
           int32_t limit;
 

ompi/contrib/vt/vt/tools/vtsetup/vtsetup-config.dtd

 *** VampirTrace
 *** http://www.tu-dresden.de/zih/vampirtrace
 ***
-*** Copyright (c) 2005-2012, ZIH, TU Dresden, Federal Republic of Germany
+*** Copyright (c) 2005-2013, ZIH, TU Dresden, Federal Republic of Germany
 ***
 *** Copyright (c) 1998-2005, Forschungszentrum Juelich GmbH, Federal
 *** Republic of Germany

ompi/contrib/vt/vt/vtlib/vt_cudart.c

 #include "vt_libwrap.h"     /* wrapping of CUDA Runtime API functions */
 #include "vt_cudartwrap.h"  /* CUDA wrapper functions for external use */
 #include "vt_gpu.h"         /* common for GPU */
-#include "vt_mallocwrap.h"     /* Switch memory tracing on/off */
+#include "vt_mallocwrap.h"  /* Switch memory tracing on/off */
 
 #if (defined(VT_CUPTI_EVENTS))
 #include "vt_cupti_events.h"       /* Support for CUPTI events */
 VTThrdMutex* VTThrdMutexCudart = NULL;
 #endif /* VT_MT || VT_HYB */
 
+#define VT_CUDART_CALL(_err, _msg) \
+  if(cudaSuccess != _err)          \
+    __checkCUDACall(_err, _msg, __FILE__,__LINE__)
+
 /*
  * Register the finalize function of the CUDA wrapper to be called before
  * the program exits and CUDA has done its implicit clean-up.
   vtDev = VTCUDAcheckThread(NULL, _ptid, &strm);                               \
   strmID = strm->tid;                                                          \
   if(_kind != cudaMemcpyHostToHost){                                           \
-    if(syncLevel > 2) VTCUDAflush(vtDev, _ptid);                               \
-    else if(syncLevel > 0){                                                    \
+    if(vt_gpu_sync_level > 2) VTCUDAflush(vtDev, _ptid);                       \
+    else if(vt_gpu_sync_level > 0){                                            \
       time = vt_pform_wtime();                                                 \
-      if(syncLevel > 1) vt_enter(_ptid, &time, rid_sync);                      \
+      if(vt_gpu_sync_level > 1) vt_enter(_ptid, &time, vt_gpu_rid_sync);       \
       VT_CUDART_CALL(cudaThreadSynchronize_ptr(),"vtcudaSync() failed!");      \
-      if(syncLevel > 1){time = vt_pform_wtime(); vt_exit(_ptid, &time);}       \
+      if(vt_gpu_sync_level > 1){time = vt_pform_wtime(); vt_exit(_ptid, &time);}\
     }                                                                          \
     CUDARTWRAP_LOCK();                                                         \
       if(_kind != cudaMemcpyDeviceToDevice)                                    \
       vt_gpu_prop[strmID] |= VTGPU_GPU_COMM;                                   \
     CUDARTWRAP_UNLOCK();                                                       \
   }                                                                            \
-  if(syncLevel == 1 && time != 0){ /* no hostTohost and sync==1 */             \
+  if(vt_gpu_sync_level == 1 && time != 0){ /* no hostTohost and sync==1 */     \
     do_traceE = vt_enter(_ptid, &time, VT_LIBWRAP_FUNC_ID);                    \
     time = vt_pform_wtime();                                                   \
   }else{                                                                       \
     }else if(_kind == cudaMemcpyDeviceToHost){                                 \
       vt_mpi_rma_get(_ptid, &time, VT_GPU_RANK_ID(strmID),                     \
                      vt_gpu_commCID, 0, (uint64_t)_bytes);                     \
-    }else if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){              \
+    }else if(_kind == cudaMemcpyDeviceToDevice && vt_gpu_sync_level > 2){      \
       vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID),                    \
                      vt_gpu_commCID, 0, (uint64_t)_bytes);                     \
     }                                                                          \
   _call  /* the CUDA memcpy call itself */                                     \
   time = vt_pform_wtime();                                                     \
   if(do_traceE){                                                               \
-    if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){                    \
+    if(_kind == cudaMemcpyDeviceToDevice && vt_gpu_sync_level > 2){            \
       vt_mpi_rma_end(strmID, &time, vt_gpu_commCID, 0);                        \
     }else if(_kind != cudaMemcpyHostToHost){                                   \
       vt_mpi_rma_end(_ptid, &time, vt_gpu_commCID, 0);                         \
     }                                                                          \
   }                                                                            \
-  if(syncLevel > 2) vtDev->sync.lastTime = time;                               \
+  if(vt_gpu_sync_level > 2) vtDev->sync.lastTime = time;                       \
   REGISTER_FINALIZE;                                                           \
   vt_exit(_ptid, &time);                                                       \
 }
   uint8_t do_traceE = 0;\
   uint64_t time = vt_cudart_setupMemcpyPeer2Peer(_ptid, vtSrcDev, vtDstDev);\
 \
-  if(syncLevel == 1){\
+  if(vt_gpu_sync_level == 1){\
     do_traceE = vt_enter(_ptid, &time, VT_LIBWRAP_FUNC_ID);\
-    time = vt_pform_wtime();\
+    /*time = vt_pform_wtime();*/\
   }else{\
-    time = vt_pform_wtime();\
     do_traceE = vt_enter(_ptid, &time, VT_LIBWRAP_FUNC_ID);\
   }\
 \
   if(do_traceE){\
     vt_mpi_rma_end(vtSrcDev->strmList->tid, &time, vt_gpu_commCID, 0); \
   }\
-  if(syncLevel > 2){\
+  if(vt_gpu_sync_level > 2){\
     vtSrcDev->sync.lastTime = time;\
     vtDstDev->sync.lastTime = time;\
   }\
 #if (defined(VT_CUPTI_EVENTS))
 # define CUDA_MEMCPY_ASYNC(_kind, _bytes, _stream, _call)    \
   if(vt_cudart_trace_enabled){                               \
-    if(trace_cupti_events || _stream == NULL)                \
+    if(vt_cupti_events_enabled || _stream == NULL)           \
       CUDA_MEMCPY_ASYNC_CUPTI(_kind, _bytes, _stream, _call) \
     else                                                     \
       CUDA_MEMCPY_ASYNC_EVT(_kind, _bytes, _stream, _call)   \
   do_trace = vt_enter(ptid, &time, VT_LIBWRAP_FUNC_ID);                      \
   if(do_trace && vt_gpu_trace_mcpy){                                         \
     strmID = vtStrm->tid;                                                    \
-    if(syncLevel > 1) vt_enter(ptid, &time, rid_sync);                       \
+    if(vt_gpu_sync_level > 1) vt_enter(ptid, &time, vt_gpu_rid_sync);        \
     VT_CUDART_CALL(cudaThreadSynchronize_ptr(),"vtcudaSync() failed!");      \
-    if(syncLevel > 1){time = vt_pform_wtime(); vt_exit(ptid, &time);}        \
+    if(vt_gpu_sync_level > 1){time = vt_pform_wtime(); vt_exit(ptid, &time);}\
     if(_kind == cudaMemcpyHostToDevice){                                     \
       vt_mpi_rma_put(ptid, &time, VT_GPU_RANK_ID(strmID),                    \
                      vt_gpu_commCID, 0, _bytes);                             \
     }else if(_kind == cudaMemcpyDeviceToHost){                               \
       vt_mpi_rma_get(ptid, &time, VT_GPU_RANK_ID(strmID),                    \
                      vt_gpu_commCID, 0, _bytes);                             \
-    }else if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){            \
+    }else if(_kind == cudaMemcpyDeviceToDevice && vt_gpu_sync_level > 2){    \
       vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID),                  \
                      vt_gpu_commCID, 0, _bytes);                             \
       CUDARTWRAP_LOCK();                                                     \
   if(do_trace && vt_gpu_trace_mcpy){                                         \
     VT_CUDART_CALL(cudaThreadSynchronize_ptr(),"vtcudaSync() failed!");      \
     time = vt_pform_wtime();                                                 \
-    if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){                  \
+    if(_kind == cudaMemcpyDeviceToDevice && vt_gpu_sync_level > 2){          \
       vt_mpi_rma_end(strmID, &time, vt_gpu_commCID, 0);                      \
     }else if(_kind == cudaMemcpyHostToDevice || _kind == cudaMemcpyDeviceToHost){\
       vt_mpi_rma_end(ptid, &time, vt_gpu_commCID, 0);                        \
   vt_exit(ptid, &time);                                                      \
 }
 
-#define VT_CUDART_CALL(_err, _msg) \
-  if(cudaSuccess != _err)         \
-    __checkCUDACall(_err, _msg, __FILE__,__LINE__)
-
 /* library wrapper object */
 VTLibwrap* vt_cudart_lw = VT_LIBWRAP_NULL;
 
 /* library wrapper attributes */
-VTLibwrapAttr vt_cudart_lw_attr = VT_LIBWRAP_ATTR_INITIALIZER(vt_cudartwrap_lw_attr_init);
+VTLibwrapAttr vt_cudart_lw_attr = 
+        VT_LIBWRAP_ATTR_INITIALIZER(vt_cudartwrap_lw_attr_init);
 
 /* flag: cuda specific stuff initialized? */
 uint8_t vt_cudart_initialized = 0;
 /* flag: tracing of CUDA API enabled? */
 uint8_t vt_cudart_trace_enabled = 0;
 
-/* flag: synchronization and flush points during runtime enabled? */
-static uint8_t syncLevel = 3;
-
 /* region filter for kernel filtering */
 static RFG_Filter* vt_cudart_filter = NULL;
 
-/* flag: trace NVIDIA CUPTI events/counters */
-static uint8_t trace_cupti_events = 0;
-
-#if defined(VT_CUPTI_EVENTS)
-/* flag: sampling for CUPTI counter values enabled? */
-static uint8_t cupti_event_sampling = 0;
-#endif
-
 /* flag: event based tracing (kernels, memcpyAsync) enabled? */
 static uint8_t trace_events = 1;
 
 static uint8_t finalized = 0;
 
 /* global region IDs for wrapper internal tracing */
-static uint32_t rid_check, rid_create, rid_sync, rid_flush;
+static uint32_t rid_check, rid_create, rid_flush;
 
 /* global counter IDs */
 static uint32_t cid_blocksPerGrid;    /* number of blocks per grid */
  * 
  * @param ecode the CUDA error code
  * @param msg a message to get more detailed information about the error
- * @param the corresponding file
- * @param the line the error occurred
+ * @param file the corresponding file
+ * @param line the line the error occurred
  */
 static void __checkCUDACall(cudaError_t ecode, const char* msg,
                             const char *file, const int line)
     size_t minTaskSize = sizeof(VTCUDAKernel) + sizeof(VTCUDAMemcpy);
     size_t minBufSize = sizeof(VTCUDAKernel) + sizeof(VTCUDAknconf);
     
-    syncLevel = (uint8_t)vt_env_cudatrace_sync();
+    vt_gpu_sync_level = (uint8_t)vt_env_gputrace_sync();
 
     trace_events = 0;
     
 #if defined(VT_CUPTI_EVENTS)
     if(vt_env_cupti_events() != NULL && vt_gpu_trace_kernels){
-      trace_cupti_events = 1;
-      cupti_event_sampling = (uint8_t)vt_env_cupti_sampling();
+      vt_cupti_events_enabled = 1;
     }else{
-      trace_cupti_events = 0;
+      vt_cupti_events_enabled = 0;
     }
     
     /* check whether CUPTI event gathering is enabled */
-    if(!trace_cupti_events)
+    if(!vt_cupti_events_enabled)
 #endif
     {
       if(vt_gpu_trace_mcpy){
                     sizeof(VTCUDAKernel), sizeof(VTCUDAMemcpy), maxEvtNum);
 
     }
-    
-    /* read filter file for CUDA kernel filtering */
-    {
-      const char *filter_file = vt_env_filter_spec();
-