In MHMXX we are encountering the following problem with static builds on cori, but the shared build works fine. The build also works okay with cmake 3.10.2 (that does not well support library interfaces so our build does not use the UPCXX::upcxx library interface), but not the default module 3.14.2 that does support it well and our build does use it.
I’m not sure if this is a problem with CMake’s cuda support or UPC++'s CMake support…
[ 56%] Linking CUDA device code CMakeFiles/mhmxx.dir/cmake_device_link.o
cd /global/homes/r/regan/workspace-shared/mhmxx/build-gnu-gpu/src && /global/common/sw/cray/cnl7/haswell/cmake/3.14.4/gcc/8.2.0/2hef55n/bin/cmake -E cmake_link_script CMakeFiles/mhmxx.dir/dlink.txt --verbose=1
/usr/common/software/cuda/10.2.89/bin/nvcc -g -Xcompiler=-fPIC -Wno-deprecated-gpu-targets -shared -dlink CMakeFiles/mhmxx.dir/main.cpp.o CMakeFiles/mhmxx.dir/merge_reads.cpp.o CMakeFiles/mhmxx.dir/kcount.cpp.o CMakeFiles/mhmxx.dir/dbjg_traversal.cpp.o CMakeFiles/mhmxx.dir/hash_funcs.c.o CMakeFiles/mhmxx.dir/klign.cpp.o CMakeFiles/mhmxx.dir/cgraph.cpp.o CMakeFiles/mhmxx.dir/build_ctg_graph.cpp.o CMakeFiles/mhmxx.dir/walk_ctg_graph.cpp.o CMakeFiles/mhmxx.dir/spanner.cpp.o CMakeFiles/mhmxx.dir/splinter.cpp.o CMakeFiles/mhmxx.dir/localassm.cpp.o CMakeFiles/mhmxx.dir/histogrammer.cpp.o CMakeFiles/mhmxx.dir/aln_depths.cpp.o -o CMakeFiles/mhmxx.dir/cmake_device_link.o -L/usr/common/software/cuda/10.2.89/targets/x86_64-linux/lib/stubs -L/usr/common/software/cuda/10.2.89/targets/x86_64-linux/lib ssw/libSSW_LIBRARY.a ../libMHMXX_VERSION_LIB.a adept-sw/libADEPT_SW_LIBRARY_static.a ../upcxx-utils/src/libUPCXX_UTILS_LIBRARY.a -lpthread -L/usr/common/ftg/upcxx/2020.3.2/craype-none/gpu/gnu/gcc-8.3.0/upcxx.debug.gasnet_seq.ibv/lib -lupcxx -L/usr/common/ftg/upcxx/2020.3.2/craype-none/gpu/gnu/gcc-8.3.0/gasnet.debug/lib -lgasnet-ibv-seq -L/global/homes/h/hargrove/lib -libverbs -L/opt/esslurm/lib64 -lpmi2 -lpthread -lrt -L/opt/gcc/8.3.0/snos/lib/gcc/x86_64-suse-linux/8.3.0 -lgcc -lm -Wl,--start-group -L/usr/common/software/cuda/10.2.89/bin/../targets/x86_64-linux/lib/stubs -L/usr/common/software/cuda/10.2.89/bin/../targets/x86_64-linux/lib -lcudadevrt -lcudart_static -lrt -lpthread -ldl -Wl,--end-group -lcuda -lcudadevrt -lcudart_static -lrt -ldl
nvcc fatal : Unknown option '-Wl,--start-group'
make[2]: *** [src/CMakeFiles/mhmxx.dir/build.make:286: src/CMakeFiles/mhmxx.dir/cmake_device_link.o] Error 1
make[2]: Leaving directory '/global/u2/r/regan/workspace-shared/mhmxx/build-gnu-gpu'
make[1]: *** [CMakeFiles/Makefile2:870: src/CMakeFiles/mhmxx.dir/all] Error 2
make[1]: Leaving directory '/global/u2/r/regan/workspace-shared/mhmxx/build-gnu-gpu'
I believe the --start-group / --end-group commands are being introduced by the cuda language support in CMake.
I tracked down the erroneous dependency that CMake picks up from the UPCXX::upcxx library interface which then makes its way into the above cuda device link dependency somehow.
UPCXX::upcxx library interface: UPCXX::upcxx;-L/usr/common/ftg/upcxx/2020.3.2/craype-none/gpu/gnu/gcc-8.3.0/upcxx.debug.gasnet_seq.ibv/lib -lupcxx -L/usr/common/ftg/upcxx/2020.3.2/craype-none/gpu/gnu/gcc-8.3.0/gasnet.debug/lib -lgasnet-ibv-seq -L/global/homes/h/hargrove/lib -libverbs -L/opt/esslurm/lib64 -lpmi2 -lpthread -lrt -L/opt/gcc/8.3.0/snos/lib/gcc/x86_64-suse-linux/8.3.0 -lgcc -lm -Wl,--start-group -L/usr/common/software/cuda/10.2.89/bin/../targets/x86_64-linux/lib/stubs -L/usr/common/software/cuda/10.2.89/bin/../targets/x86_64-linux/lib -lcudadevrt -lcudart_static -lrt -lpthread -ldl -Wl,--end-group -lcuda
Normally having extra libs on the line is fine, but the issue is that during linking “-Wl,--start-group” and “-Wl,--end-group” is breaking. replacing the argument text with “-Xlinker --start-group” and “-Xlinker --end-group” works okay. So does removing everything between these arguments.
When I remove the offending arguments from the above nvcc command, it links fine and when restarting the build, it completes successfully.
In fact the only part of the cmake_device_link.o build command above which is required is the actual library file which is built for the gpu:
/usr/common/software/cuda/10.2.89/bin/nvcc -g -Xcompiler=-fPIC -Wno-deprecated-gpu-targets -shared -dlink -o CMakeFiles/mhmxx.dir/cmake_device_link.o adept-sw/libADEPT_SW_LIBRARY_static.a
… so this may be an issue with how CMake is applying linking dependencies to the cmake_device_link.o target that do not have a dependency when a library interface is involved in the build.
This is the CMakeLists.txt for the only subdirectory that requires nvcc.
if(NOT ENABLE_CUDA)
message(FATAL_ERROR "Trying to build ADEPT-SW but CUDA is not enabled")
endif()
find_package(OpenMP REQUIRED)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=-Wall,${OpenMP_CXX_FLAGS} -gencode arch=compute_70,code=sm_70")
message(STATUS "CMAKE_CUDA_FLAGS=${CMAKE_CUDA_FLAGS}")
option(ADEPT_SW_SHARED "Adept-SW shared library" ON)
option(ADEPT_SW_STATIC "Adept-SW static library" ON)
if (ADEPT_SW_SHARED)
add_library(ADEPT_SW_LIBRARY_shared SHARED driver.cpp kernel.cpp gpu_alns.cpp utils_gpu.cpp)
target_link_libraries(ADEPT_SW_LIBRARY_shared INTERFACE OpenMP::OpenMP_CXX)
install(TARGETS ADEPT_SW_LIBRARY_shared LIBRARY DESTINATION lib)
if (NOT ADEPT_SW_STATIC)
add_library(ADEPT_SW_LIBRARY ALIAS ADEPT_SW_LIBRARY_shared)
endif()
endif()
if (ADEPT_SW_STATIC)
add_library(ADEPT_SW_LIBRARY_static STATIC driver.cpp kernel.cpp gpu_alns.cpp utils_gpu.cpp)
target_link_libraries(ADEPT_SW_LIBRARY_static INTERFACE OpenMP::OpenMP_CXX)
install(TARGETS ADEPT_SW_LIBRARY_static ARCHIVE DESTINATION lib)
add_library(ADEPT_SW_LIBRARY ALIAS ADEPT_SW_LIBRARY_static)
endif()
set_source_files_properties(driver.cpp kernel.cpp gpu_alns.cpp utils_gpu.cpp PROPERTIES LANGUAGE CUDA LINKER_LANGUAGE CUDA)
And I should note that the cmake_device_link.o file for this library builds just fine, because it does not have any dependencies.
And this is from the parent dir that has all the dependencies listed and I cannot figure out why it generates dependencies on the cmake_device_link.o file with all the other build targets:
if(ENABLE_CUDA)
include_directories("adept-sw")
add_subdirectory(adept-sw)
endif()
add_executable(mhmxx
main.cpp merge_reads.cpp kcount.cpp dbjg_traversal.cpp hash_funcs.c klign.cpp cgraph.cpp build_ctg_graph.cpp
walk_ctg_graph.cpp spanner.cpp splinter.cpp localassm.cpp histogrammer.cpp aln_depths.cpp)
if(ENABLE_CUDA)
set_property(TARGET mhmxx PROPERTY CUDA_SEPARABLE_COMPILATION ON)
target_link_libraries(mhmxx
Threads::Threads
SSW_LIBRARY
${ZLIB_LIBRARIES}
${UPCXX_LIBRARIES}
${UPCXX_UTILS_LIBRARIES}
MHMXX_VERSION_LIB
ADEPT_SW_LIBRARY)
else()
target_link_libraries(mhmxx
Threads::Threads
SSW_LIBRARY
${ZLIB_LIBRARIES}
${UPCXX_LIBRARIES}
${UPCXX_UTILS_LIBRARIES}
MHMXX_VERSION_LIB)
endif()
This is the state of modules loaded when building:
Currently Loaded Modulefiles:
1) modules/3.2.11.4 11) ugni/6.0.14.0-7.0.1.1_7.33__ge78e5b0.ari 21) PrgEnv-gnu/6.0.5
2) altd/2.0 12) pmi/5.0.14 22) craype-hugepages2M
3) darshan/3.1.7 13) dmapp/7.1.1-7.0.1.1_4.48__g38cf134.ari 23) craype-network-aries
4) cray-mpich/7.7.10 14) gni-headers/5.0.12.0-7.0.1.1_6.28__g3b1768f.ari 24) craype-x86-skylake
5) git/2.21.0 15) xpmem/2.2.20-7.0.1.1_4.10__g0475745.ari 25) jdk/1.8.0_202
6) esslurm 16) job/2.2.4-7.0.1.1_3.36__g36b56f4.ari 26) cudnn/7.6.5
7) gcc/8.3.0 17) dvs/2.12_2.2.156-7.0.1.1_8.9__g5aab709e 27) cuda/10.2.89
8) craype/2.6.2 18) alps/6.6.58-7.0.1.1_6.4__g437d88db.ari 28) cmake/3.14.4
9) cray-libsci/19.06.1 19) rca/2.2.20-7.0.1.1_4.46__g8e3fb5b.ari 29) upcxx-gpu/2020.3.2
10) udreg/2.3.2-7.0.1.1_3.31__g8175d3d.ari 20) atp/2.1.3
So the root problem here is that
nvcc
has a different command-line syntax than theg++
/clang++
/icpc
backend C++ compiler that it wraps. In addition,nvcc
silently adds its own CUDA-related linker options that are not automatically added by the backend compiler.This means that in order to provide correct link-stage flags, we need to know which linker wrapper is being used. Currently
upcxx
andupcxx-meta
(and the CMake package wrapping them) assume the backend C++ compiler is being invoked to wrap the system linker (ie that's whatupcxx
invokes for linking andupcxx-meta CXX
is that linker). So we provide flags in the syntax the C++ compiler expects and explicitly add the CUDA-related linker options that are needed. However passing these same flags tonvcc
for linkage breaks because it has a different (and IMHO horribly broken/restricted) input syntax for flags.I believe that an approach like this could work (at least in recent CMake versions). However our scripts that generate the linker flags don't currently generate both versions - IOW I currently have the value of
${UPCXX_LINK_OPTIONS}
available, but no automated way to get a valid value for${UPCXX_CUDA_LINK_OPTIONS}
needed above (at least not in the general case). In order to support this approach portably/robustly, we'd need to add logic to our configure script to build the second set of linker flags and export it viaupcxx-meta
for consumption in the CMake package.TL;DR: we can solve this, but it requires some hacking in our build infrastructure before we can deploy such a solution in a robust/general way. If your current workaround is sufficient, I'd suggest sticking with that for now and hopefully we can deploy something better by next release.