|
| 1 | +if(NOT WITH_GPU) |
| 2 | + return() |
| 3 | +endif() |
| 4 | + |
| 5 | +set(paddle_known_gpu_archs "30 35 50 52 60 61 70") |
| 6 | +set(paddle_known_gpu_archs7 "30 35 50 52") |
| 7 | +set(paddle_known_gpu_archs8 "30 35 50 52 60 61") |
| 8 | + |
| 9 | +###################################################################################### |
| 10 | +# A function for automatic detection of GPUs installed (if autodetection is enabled) |
| 11 | +# Usage: |
| 12 | +# detect_installed_gpus(out_variable) |
| 13 | +function(detect_installed_gpus out_variable) |
| 14 | + if(NOT CUDA_gpu_detect_output) |
| 15 | + set(cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu) |
| 16 | + |
| 17 | + file(WRITE ${cufile} "" |
| 18 | + "#include <cstdio>\n" |
| 19 | + "int main() {\n" |
| 20 | + " int count = 0;\n" |
| 21 | + " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" |
| 22 | + " if (count == 0) return -1;\n" |
| 23 | + " for (int device = 0; device < count; ++device) {\n" |
| 24 | + " cudaDeviceProp prop;\n" |
| 25 | + " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" |
| 26 | + " std::printf(\"%d.%d \", prop.major, prop.minor);\n" |
| 27 | + " }\n" |
| 28 | + " return 0;\n" |
| 29 | + "}\n") |
| 30 | + |
| 31 | + execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "-ccbin=${CUDA_HOST_COMPILER}" |
| 32 | + "--run" "${cufile}" |
| 33 | + WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" |
| 34 | + RESULT_VARIABLE nvcc_res OUTPUT_VARIABLE nvcc_out |
| 35 | + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) |
| 36 | + |
| 37 | + if(nvcc_res EQUAL 0) |
| 38 | + # only keep the last line of nvcc_out |
| 39 | + STRING(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}") |
| 40 | + STRING(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}") |
| 41 | + list(GET nvcc_out -1 nvcc_out) |
| 42 | + string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}") |
| 43 | + set(CUDA_gpu_detect_output ${nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_installed_gpus tool" FORCE) |
| 44 | + endif() |
| 45 | + endif() |
| 46 | + |
| 47 | + if(NOT CUDA_gpu_detect_output) |
| 48 | + message(STATUS "Automatic GPU detection failed. Building for all known architectures.") |
| 49 | + set(${out_variable} ${paddle_known_gpu_archs} PARENT_SCOPE) |
| 50 | + else() |
| 51 | + set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE) |
| 52 | + endif() |
| 53 | +endfunction() |
| 54 | + |
| 55 | + |
| 56 | +######################################################################## |
| 57 | +# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME |
| 58 | +# Usage: |
| 59 | +# select_nvcc_arch_flags(out_variable) |
| 60 | +function(select_nvcc_arch_flags out_variable) |
| 61 | + # List of arch names |
| 62 | + set(archs_names "Kepler" "Maxwell" "Pascal" "All" "Manual") |
| 63 | + set(archs_name_default "All") |
| 64 | + if(NOT CMAKE_CROSSCOMPILING) |
| 65 | + list(APPEND archs_names "Auto") |
| 66 | + endif() |
| 67 | + |
| 68 | + # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui) |
| 69 | + set(CUDA_ARCH_NAME ${archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.") |
| 70 | + set_property( CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${archs_names} ) |
| 71 | + mark_as_advanced(CUDA_ARCH_NAME) |
| 72 | + |
| 73 | + # verify CUDA_ARCH_NAME value |
| 74 | + if(NOT ";${archs_names};" MATCHES ";${CUDA_ARCH_NAME};") |
| 75 | + string(REPLACE ";" ", " archs_names "${archs_names}") |
| 76 | + message(FATAL_ERROR "Only ${archs_names} architeture names are supported.") |
| 77 | + endif() |
| 78 | + |
| 79 | + if(${CUDA_ARCH_NAME} STREQUAL "Manual") |
| 80 | + set(CUDA_ARCH_BIN ${paddle_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") |
| 81 | + set(CUDA_ARCH_PTX "50" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") |
| 82 | + mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX) |
| 83 | + else() |
| 84 | + unset(CUDA_ARCH_BIN CACHE) |
| 85 | + unset(CUDA_ARCH_PTX CACHE) |
| 86 | + endif() |
| 87 | + |
| 88 | + if(${CUDA_ARCH_NAME} STREQUAL "Kepler") |
| 89 | + set(cuda_arch_bin "30 35") |
| 90 | + elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") |
| 91 | + set(cuda_arch_bin "50") |
| 92 | + elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal") |
| 93 | + set(cuda_arch_bin "60 61") |
| 94 | + elseif(${CUDA_ARCH_NAME} STREQUAL "Volta") |
| 95 | + set(cuda_arch_bin "70") |
| 96 | + elseif(${CUDA_ARCH_NAME} STREQUAL "All") |
| 97 | + set(cuda_arch_bin ${paddle_known_gpu_archs}) |
| 98 | + elseif(${CUDA_ARCH_NAME} STREQUAL "Auto") |
| 99 | + detect_installed_gpus(cuda_arch_bin) |
| 100 | + else() # (${CUDA_ARCH_NAME} STREQUAL "Manual") |
| 101 | + set(cuda_arch_bin ${CUDA_ARCH_BIN}) |
| 102 | + endif() |
| 103 | + |
| 104 | + # remove dots and convert to lists |
| 105 | + string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}") |
| 106 | + string(REGEX REPLACE "\\." "" cuda_arch_ptx "${CUDA_ARCH_PTX}") |
| 107 | + string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}") |
| 108 | + string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}") |
| 109 | + list(REMOVE_DUPLICATES cuda_arch_bin) |
| 110 | + list(REMOVE_DUPLICATES cuda_arch_ptx) |
| 111 | + |
| 112 | + set(nvcc_flags "") |
| 113 | + set(nvcc_archs_readable "") |
| 114 | + |
| 115 | + # Tell NVCC to add binaries for the specified GPUs |
| 116 | + foreach(arch ${cuda_arch_bin}) |
| 117 | + if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)") |
| 118 | + # User explicitly specified PTX for the concrete BIN |
| 119 | + list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) |
| 120 | + list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1}) |
| 121 | + else() |
| 122 | + # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN |
| 123 | + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch}) |
| 124 | + list(APPEND nvcc_archs_readable sm_${arch}) |
| 125 | + endif() |
| 126 | + endforeach() |
| 127 | + |
| 128 | + # Tell NVCC to add PTX intermediate code for the specified architectures |
| 129 | + foreach(arch ${cuda_arch_ptx}) |
| 130 | + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch}) |
| 131 | + list(APPEND nvcc_archs_readable compute_${arch}) |
| 132 | + endforeach() |
| 133 | + |
| 134 | + string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") |
| 135 | + set(${out_variable} ${nvcc_flags} PARENT_SCOPE) |
| 136 | + set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE) |
| 137 | +endfunction() |
| 138 | + |
| 139 | +message(STATUS "CUDA detected: " ${CUDA_VERSION}) |
| 140 | +if (${CUDA_VERSION} LESS 7.0) |
| 141 | + set(paddle_known_gpu_archs ${paddle_known_gpu_archs}) |
| 142 | +elseif (${CUDA_VERSION} LESS 8.0) # CUDA 7.x |
| 143 | + set(paddle_known_gpu_archs ${paddle_known_gpu_archs7}) |
| 144 | + list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") |
| 145 | + list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") |
| 146 | +elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x |
| 147 | + set(paddle_known_gpu_archs ${paddle_known_gpu_archs8}) |
| 148 | + list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") |
| 149 | + list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") |
| 150 | + # CUDA 8 may complain that sm_20 is no longer supported. Suppress the |
| 151 | + # warning for now. |
| 152 | + list(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets") |
| 153 | +endif() |
| 154 | + |
| 155 | +include_directories(${CUDA_INCLUDE_DIRS}) |
| 156 | +list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) |
| 157 | +if(NOT WITH_DSO) |
| 158 | + list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY}) |
| 159 | +endif(NOT WITH_DSO) |
| 160 | + |
| 161 | +# setting nvcc arch flags |
| 162 | +select_nvcc_arch_flags(NVCC_FLAGS_EXTRA) |
| 163 | +list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA}) |
| 164 | +message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}") |
| 165 | + |
| 166 | +# Set C++11 support |
| 167 | +set(CUDA_PROPAGATE_HOST_FLAGS OFF) |
| 168 | + |
| 169 | +# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc. |
| 170 | +# So, don't set these flags here. |
| 171 | +list(APPEND CUDA_NVCC_FLAGS "-std=c++11") |
| 172 | +list(APPEND CUDA_NVCC_FLAGS "--use_fast_math") |
| 173 | +list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC") |
| 174 | +# Set :expt-relaxed-constexpr to suppress Eigen warnings |
| 175 | +list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr") |
| 176 | + |
| 177 | +if(CMAKE_BUILD_TYPE STREQUAL "Debug") |
| 178 | + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG}) |
| 179 | +elseif(CMAKE_BUILD_TYPE STREQUAL "Release") |
| 180 | + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE}) |
| 181 | +elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") |
| 182 | + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) |
| 183 | +elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") |
| 184 | + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_MINSIZEREL}) |
| 185 | +endif() |
| 186 | + |
| 187 | +mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD) |
| 188 | +mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION) |
0 commit comments