/perf/kseta : revision 1

1

###############################################################################

2

# For more information, please see: http://software.sci.utah.edu

3

#

4

# The MIT License

5

#

6

7

# NVIDIA Corp.

8

#

9

10

# Scientific Computing and Imaging Institute, University of Utah

11

#

12

# License for the specific language governing rights and limitations under

13

# Permission is hereby granted, free of charge, to any person obtaining a

14

# copy of this software and associated documentation files (the "Software"),

15

# to deal in the Software without restriction, including without limitation

16

# the rights to use, copy, modify, merge, publish, distribute, sublicense,

17

# and/or sell copies of the Software, and to permit persons to whom the

18

# Software is furnished to do so, subject to the following conditions:

19

#

20

# The above copyright notice and this permission notice shall be included

21

# in all copies or substantial portions of the Software.

22

#

23

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

24

# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

25

# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL

26

# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

27

# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

28

# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

29

# DEALINGS IN THE SOFTWARE.

30

#

31

# This script locates the Nvidia Compute Unified Driver Architecture (CUDA)

32

# tools. It should work on linux, windows, and mac and should be reasonably

33

# up to date with cuda releases.

34

#

35

# This script makes use of the standard find_package arguments of <VERSION>,

36

# REQUIRED and QUIET. CUDA_FOUND will report if an acceptable version of CUDA

37

# was found.

38

#

39

# The script will prompt the user to specify CUDA_TOOLKIT_ROOT_DIR if the

40

# prefix cannot be determined by the location of nvcc in the system path. To

41

# use a different installed version of the toolkit set the environment variable

42

# CUDA_BIN_PATH before running cmake (e.g. CUDA_BIN_PATH=/usr/local/cuda1.0

43

# instead of the default /usr/local/cuda).

44

#

45

# Set CUDA_BUILD_EMULATION to ON for Emulation mode. Defaults to OFF (device

46

# mode).

47

# _DEVICEEMU is defined when CUDA_BUILD_EMULATION is TRUE.

48

#

49

# Set CUDA_HOST_COMPILATION_CPP to OFF for C compilation of host code.

50

# Default TRUE.

51

#

52

# Set CUDA_BUILD_CUBIN to "ON" or "OFF" to enable and extra compilation pass

53

# with the -cubin option in Device mode. The output is parsed and register,

54

# shared memory usage is printed during build. Default ON.

55

#

56

# Set CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE to ON if you want the custom build

57

# rule to be attached to the source file in Visual Studio. Defaults to ON.

58

# Turn OFF if you add the same cuda file to multiple targets.

59

#

60

# This allows the user to build the target from the CUDA file, however bad

61

# things can happen if the CUDA source file is added to multiple targets. When

62

# performing parallel builds it is possible for the custom build command to be

63

# run more than once and in parallel causing cryptic build errors. This is

64

# because VS runs the rules for every source file in the target, and a source

65

# can have only one rule no matter how many projects it is added to. Therefore,

66

# the rule assigned to the source file really only applies to one target you get

67

# clashes when it is run from multiple targets. Eventually everything will get

68

# built, but if the user is unaware of this behavior, there may be confusion.

69

# It would be nice if we could detect the reuse of source files across multiple

70

# targets and turn the option off for the user, but no good solution could be

71

# found.

72

#

73

# Set CUDA_64_BIT_DEVICE_CODE to ON to compile for 64 bit devices. Defaults to

74

# match host bit size. Note that making this different than the host code when

75

# generating C files from CUDA code just won't work, because size_t gets defined

76

# by nvcc in the generated source. If you compile to PTX and then load the file

77

# yourself, you can mix bit sizes between device and host.

78

#

79

# Set CUDA_VERBOSE_BUILD to ON to see all the commands used when building the

80

# CUDA file. When using a Makefile generator the value defaults to VERBOSE (run

81

# make VERBOSE=1 to see output). You can override this by setting

82

# CUDA_VERBOSE_BUILD to ON.

83

#

84

# Set CUDA_GENERATED_OUTPUT_DIR to the path you wish to have the generated files

85

# placed. If it is blank output files will be placed in

86

# CMAKE_CURRENT_BINARY_DIR. Intermediate files will always be placed in

87

# CMAKE_CURRENT_BINARY_DIR.

88

#

89

# The script creates the following macros:

90

# CUDA_INCLUDE_DIRECTORIES( path0 path1 ... )

91

# -- Sets the directories that should be passed to nvcc

92

# (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu

93

# files.

94

#

95

# CUDA_ADD_LIBRARY( cuda_target file0 file1 ... [OPTIONS ...] )

96

# -- Creates a shared library "cuda_target" which contains all of the source

97

# (*.c, *.cc, etc.) specified and all of the nvcc'ed .cu files specified.

98

# All of the specified source files and generated .cpp files are compiled

99

# using the standard CMake compiler, so the normal INCLUDE_DIRECTORIES,

100

# LINK_DIRECTORIES, and TARGET_LINK_LIBRARIES can be used to affect their

101

# build and link. In addition CUDA_INCLUDE_DIRS is added automatically added

102

# to include_directories().

103

#

104

# CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ... [OPTIONS ...] )

105

# -- Same as CUDA_ADD_LIBRARY except that an exectuable is created.

106

#

107

# CUDA_COMPILE( generated_files file0 file1 ... [OPTIONS ...] )

108

# -- Returns a list of generated files from the input source files to be used

109

# with ADD_LIBRARY or ADD_EXECUTABLE.

110

#

111

# CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] )

112

# -- Returns a list of PTX files generated from the input source files.

113

#

114

# CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ...

115

# [OPTIONS ...] )

116

# -- This is where all the magic happens. CUDA_ADD_EXECUTABLE,

117

# CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this function

118

# under the hood.

119

#

120

# Given the list of files (file0 file1 ... fileN) this macro generates custom

121

# commands that generate either PTX or linkable objects (use "PTX" or "OBJ"

122

# for the format argument to switch. Files that don't end with .cu or have

123

# the HEADER_FILE_ONLY property are ignored.

124

#

125

# The arguments passed in after OPTIONS are extra command line options to

126

# give to NVCC. You can also specify per configuration options by specifying

127

# the name of the configuration followed by the options. General options

128

# must preceed configuration specific options. Not all configurations need

129

# to be specified, only the ones provided will be used.

130

#

131

# OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag"

132

# DEBUG -g

133

# RELEASE --use_fast_math

134

# RELWITHDEBINFO --use_fast_math;-g

135

# MINSIZEREL --use_fast_math

136

#

137

# For certain configurations (namely VS generating object files with

138

# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will

139

# be produced for the given cuda file. This is because when you add the cuda

140

# file to Visual Studio it knows that this file produces and will link in the

141

# resulting object file automatically.

142

#

143

# This script will also generate a separate cmake script that is used at

144

# build time to invoke nvcc. This is for serveral reasons.

145

#

146

# 1. nvcc can return negative numbers as return values which confuses

147

# Visual Studio into thinking that the command succeeded. The script now

148

# checks the error codes and produces errors when there was a problem

149

#

150

# 2. nvcc has been known to not delete intermediate results when it

151

# encounters problems. The build rules then don't complete, because there

152

# exists a partially written output file. The script now deletes the

153

# output files if there was an error.

154

#

155

# 3. By putting all the options that affect the build into a file and then

156

# make the build rule dependent on the file, when the options change the

157

# output files will be regenerated.

158

#

159

# CUDA_ADD_CUFFT_TO_TARGET( cuda_target )

160

# -- Adds the cufft library to the target. Handles whether you are in emulation

161

# mode or not.

162

#

163

# CUDA_ADD_CUBLAS_TO_TARGET( cuda_target )

164

# -- Adds the cublas library to the target. Handles whether you are in emulation

165

# mode or not.

166

#

167

# CUDA_BUILD_CLEAN_TARGET()

168

# -- Creates a convience target that deletes all the dependency files generated.

169

# You should make clean after running this target to ensure the dependency

170

# files get regenerated.

171

#

172

# The script defines the following variables:

173

#

174

# ( Note CUDA_ADD_* macros setup cuda/cut library dependencies automatically.

175

# These variables are only needed if a cuda API call must be made from code in

176

# a outside library or executable. )

177

#

178

# CUDA_VERSION_MAJOR -- The major version of cuda as reported by nvcc.

179

# CUDA_VERSION_MINOR -- The minor version.

180

# CUDA_VERSION

181

# CUDA_VERSION_STRING -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR

182

#

183

# CUDA_INCLUDE_DIRS -- Include directory for cuda headers. Added automatically

184

# for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY.

185

# CUDA_LIBRARIES -- Cuda RT library.

186

# CUDA_CUT_INCLUDE_DIR -- Include directory for cuda SDK headers (cutil.h).

187

# CUDA_CUT_LIBRARIES -- SDK libraries.

188

# CUDA_NVCC_FLAGS -- Additional NVCC command line arguments. NOTE:

189

# multiple arguments must be semi-colon delimited

190

# e.g. --compiler-options;-Wall

191

# CUDA_NVCC_FLAGS_<CONFIG> -- Confugration specific flags for NVCC.

192

# CUDA_CUFFT_LIBRARIES -- Device or emulation library for the Cuda FFT

193

# implementation (alternative to:

194

# CUDA_ADD_CUFFT_TO_TARGET macro)

195

# CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS

196

# implementation (alterative to:

197

# CUDA_ADD_CUBLAS_TO_TARGET macro).

198

#

199

#

200

# The script now builds object files instead of generating C files. In order to

201

# facilitate this, the script now makes use of the CMAKE_{C,CXX}_FLAGS along

202

# with their configuration dependent counterparts (i.e. CMAKE_C_FLAGS_DEBUG).

203

# These flags are passed through nvcc to the native compiler. In addition, on

204

# some systems special flags are added for building objects intended for shared

205

# libraries. FindCUDA make use of the CMake variable BUILD_SHARED_LIBS to

206

# determine if these flags should be used. Please set this variable according

207

# to how the objects are to be used before calling CUDA_ADD_LIBRARY. A

208

# preprocessor macro, <target_name>_EXPORTS is defined when BUILD_SHARED_LIBS is

209

# defined. In addition, flags passed into add_definitions with -D or /D are

210

# passed along to nvcc.

211

#

212

# Files with the HEADER_FILE_ONLY property set will not be compiled.

213

#

214

# It might be necessary to set CUDA_TOOLKIT_ROOT_DIR manually on certain platforms,

215

# or to use a cuda runtime not installed in the default location. In newer

216

# versions of the toolkit the cuda library is included with the graphics

217

# driver- be sure that the driver version matches what is needed by the cuda

218

# runtime version.

219

#

220

# -- Abe Stephens SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html

221

# -- James Bigler NVIDIA Corp

222

###############################################################################

223

224

# FindCUDA.cmake

225

226

# We need to have at least this version to support the VERSION_LESS argument to 'if'.

227

cmake_policy(PUSH)

228

cmake_minimum_required(VERSION 2.6.2)

229

cmake_policy(POP)

230

231

# This macro helps us find the location of helper files we will need the full path to

232

macro(CUDA_FIND_HELPER_FILE _name _extension)

233

set(_full_name "${_name}.${_extension}")

234

# CMAKE_CURRENT_LIST_FILE contains the full path to the file currently being

235

# processed. Using this variable, we can pull out the current path, and

236

# provide a way to get access to the other files we need local to here.

237

get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)

238

find_file(CUDA_${_name} ${_full_name} PATHS ${CMAKE_CURRENT_LIST_DIR} NO_DEFAULT_PATH)

239

if(NOT CUDA_${_name})

240

set(error_message "${_full_name} not found in CMAKE_MODULE_PATH")

241

if(CUDA_FIND_REQUIRED)

242

message(FATAL_ERROR "${error_message}")

243

else(CUDA_FIND_REQUIRED)

244

if(NOT CUDA_FIND_QUIETLY)

245

message(STATUS "${error_message}")

246

endif(NOT CUDA_FIND_QUIETLY)

247

endif(CUDA_FIND_REQUIRED)

248

endif(NOT CUDA_${_name})

249

# Set this variable as internal, so the user isn't bugged with it.

250

set(CUDA_${_name} ${CUDA_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE)

251

endmacro(CUDA_FIND_HELPER_FILE)

252

253

#####################################################################

254

## CUDA_INCLUDE_NVCC_DEPENDENCIES

255

##

256

257

# So we want to try and include the dependency file if it exists. If

258

# it doesn't exist then we need to create an empty one, so we can

259

# include it.

260

261

# If it does exist, then we need to check to see if all the files it

262

# depends on exist. If they don't then we should clear the dependency

263

# file and regenerate it later. This covers the case where a header

264

# file has disappeared or moved.

265

266

macro(CUDA_INCLUDE_NVCC_DEPENDENCIES dependency_file)

267

set(CUDA_NVCC_DEPEND)

268

set(CUDA_NVCC_DEPEND_REGENERATE FALSE)

269

270

271

# Include the dependency file. Create it first if it doesn't exist . The

272

# INCLUDE puts a dependency that will force CMake to rerun and bring in the

273

# new info when it changes. DO NOT REMOVE THIS (as I did and spent a few

274

# hours figuring out why it didn't work.

275

if(NOT EXISTS ${dependency_file})

276

file(WRITE ${dependency_file} "#FindCUDA.cmake generated file. Do not edit.\n")

277

endif()

278

# Always include this file to force CMake to run again next

279

# invocation and rebuild the dependencies.

280

#message("including dependency_file = ${dependency_file}")

281

include(${dependency_file})

282

283

# Now we need to verify the existence of all the included files

284

# here. If they aren't there we need to just blank this variable and

285

# make the file regenerate again.

286

# if(DEFINED CUDA_NVCC_DEPEND)

287

# message("CUDA_NVCC_DEPEND set")

288

# else()

289

# message("CUDA_NVCC_DEPEND NOT set")

290

# endif()

291

if(CUDA_NVCC_DEPEND)

292

#message("CUDA_NVCC_DEPEND true")

293

foreach(f ${CUDA_NVCC_DEPEND})

294

#message("searching for ${f}")

295

if(NOT EXISTS ${f})

296

#message("file ${f} not found")

297

set(CUDA_NVCC_DEPEND_REGENERATE TRUE)

298

endif()

299

endforeach(f)

300

else(CUDA_NVCC_DEPEND)

301

#message("CUDA_NVCC_DEPEND false")

302

# No dependencies, so regenerate the file.

303

set(CUDA_NVCC_DEPEND_REGENERATE TRUE)

304

endif(CUDA_NVCC_DEPEND)

305

306

#message("CUDA_NVCC_DEPEND_REGENERATE = ${CUDA_NVCC_DEPEND_REGENERATE}")

307

# No incoming dependencies, so we need to generate them. Make the

308

# output depend on the dependency file itself, which should cause the

309

# rule to re-run.

310

if(CUDA_NVCC_DEPEND_REGENERATE)

311

file(WRITE ${dependency_file} "#FindCUDA.cmake generated file. Do not edit.\n")

312

endif(CUDA_NVCC_DEPEND_REGENERATE)

313

314

endmacro(CUDA_INCLUDE_NVCC_DEPENDENCIES)

315

316

###############################################################################

317

###############################################################################

318

# Setup default variables

319

###############################################################################

320

###############################################################################

321

322

#DS

323

INCLUDE (CheckTypeSize)

324

CHECK_TYPE_SIZE("void*" CMAKE_SIZEOF_VOID_P)

325

#EDS

326

327

# Set whether we are using emulation or device mode.

328

option(CUDA_BUILD_EMULATION "Build in Emulation mode" OFF)

329

# Parse HOST_COMPILATION mode.

330

option(CUDA_HOST_COMPILATION_CPP "Generated file extension" ON)

331

# Allow the user to specify if the device code is supposed to be 32 or 64 bit.

332

if(CMAKE_SIZEOF_VOID_P EQUAL 8)

333

set(CUDA_64_BIT_DEVICE_CODE_DEFAULT ON)

334

else()

335

set(CUDA_64_BIT_DEVICE_CODE_DEFAULT OFF)

336

endif()

337

option(CUDA_64_BIT_DEVICE_CODE "Compile device code in 64 bit mode" ${CUDA_64_BIT_DEVICE_CODE_DEFAULT})

338

# Prints out extra information about the cuda file during compilation

339

option(CUDA_BUILD_CUBIN "Generate and parse .cubin files in Device mode." ON)

340

# Extra user settable flags

341

set(CUDA_NVCC_FLAGS "" CACHE STRING "Semi-colon delimit multiple arguments.")

342

# Attach the build rule to the source file in VS. This option

343

option(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE "Attach the build rule to the CUDA source file. Enable only when the CUDA source file is added to at most one target." ON)

344

# Specifies whether the commands used when compiling the .cu file will be printed out.

345

option(CUDA_VERBOSE_BUILD "Print out the commands run while compiling the CUDA source file. With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF)

346

# Where to put the generated output.

347

set(CUDA_GENERATED_OUTPUT_DIR "" CACHE PATH "Directory to put all the output files. If blank it will default to the CMAKE_CURRENT_BINARY_DIR")

348

#SDS addtional masked options

349

mark_as_advanced(

350

CUDA_HOST_COMPILATION_CPP

351

CUDA_64_BIT_DEVICE_CODE

352

CUDA_NVCC_FLAGS

353

CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE

354

CUDA_GENERATED_OUTPUT_DIR

355

CUDA_BUILD_CUBIN

356

CUDA_BUILD_EMULATION

357

CUDA_VERBOSE_BUILD

358

)

359

#EDS

360

361

# Makefile and similar generators don't define CMAKE_CONFIGURATION_TYPES, so we

362

# need to add another entry for the CMAKE_BUILD_TYPE. We also need to add the

363

# standerd set of 4 build types (Debug, MinSizeRel, Release, and RelWithDebInfo)

364

# for completeness. We need run this loop in order to accomodate the addition

365

# of extra configuration types. Duplicate entries will be removed by

366

# REMOVE_DUPLICATES.

367

set(CUDA_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo)

368

list(REMOVE_DUPLICATES CUDA_configuration_types)

369

foreach(config ${CUDA_configuration_types})

370

string(TOUPPER ${config} config_upper)

371

set(CUDA_NVCC_FLAGS_${config_upper} "" CACHE STRING "Semi-colon delimit multiple arguments.")

372

mark_as_advanced(CUDA_NVCC_FLAGS_${config_upper})

373

endforeach()

374

375

###############################################################################

376

###############################################################################

377

# Locate CUDA, Set Build Type, etc.

378

###############################################################################

379

###############################################################################

380

381

# Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed,

382

# if they have then clear the cache variables, so that will be detected again.

383

if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}")

384

unset(CUDA_NVCC_EXECUTABLE CACHE)

385

unset(CUDA_VERSION CACHE)

386

unset(CUDA_TOOLKIT_INCLUDE CACHE)

387

unset(CUDA_CUDART_LIBRARY CACHE)

388

unset(CUDA_CUDA_LIBRARY CACHE)

389

unset(CUDA_cublas_LIBRARY CACHE)

390

unset(CUDA_cublasemu_LIBRARY CACHE)

391

unset(CUDA_cufft_LIBRARY CACHE)

392

unset(CUDA_cufftemu_LIBRARY CACHE)

393

unset(CUDA_npp_LIBRARY CACHE)

394

unset(CUDA_nppemu_LIBRARY CACHE)

395

endif()

396

397

if(NOT "${CUDA_SDK_ROOT_DIR}" STREQUAL "${CUDA_SDK_ROOT_DIR_INTERNAL}")

398

unset(CUDA_CUT_INCLUDE_DIR CACHE)

399

unset(CUDA_CUT_LIBRARY CACHE)

400

endif()

401

402

# Search for the cuda distribution.

403

if(NOT CUDA_TOOLKIT_ROOT_DIR)

404

405

# Search in the CUDA_BIN_PATH first.

406

find_path(CUDA_TOOLKIT_ROOT_DIR

407

NAMES nvcc nvcc.exe

408

PATHS ENV CUDA_BIN_PATH

409

DOC "Toolkit location."

410

NO_DEFAULT_PATH

411

)

412

# Now search default paths, #SDS /opt/cuda/bin

413

find_path(CUDA_TOOLKIT_ROOT_DIR

414

NAMES nvcc nvcc.exe

415

PATHS /usr/local/bin

416

/usr/local/cuda/bin

417

/opt/cuda/bin

418

DOC "Toolkit location."

419

)

420

421

if (CUDA_TOOLKIT_ROOT_DIR)

422

string(REGEX REPLACE "[/\\\\]?bin[/\\\\]?$" "" CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR})

423

# We need to force this back into the cache.

424

set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR} CACHE PATH "Toolkit location." FORCE)

425

endif(CUDA_TOOLKIT_ROOT_DIR)

426

if (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR})

427

if(CUDA_FIND_REQUIRED)

428

message(FATAL_ERROR "Specify CUDA_TOOLKIT_ROOT_DIR")

429

elseif(NOT CUDA_FIND_QUIETLY)

430

message("CUDA_TOOLKIT_ROOT_DIR not found or specified")

431

endif()

432

endif (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR})

433

endif (NOT CUDA_TOOLKIT_ROOT_DIR)

434

435

# CUDA_NVCC_EXECUTABLE

436

find_program(CUDA_NVCC_EXECUTABLE

437

NAMES nvcc

438

PATHS "${CUDA_TOOLKIT_ROOT_DIR}/bin"

439

ENV CUDA_BIN_PATH

440

NO_DEFAULT_PATH

441

)

442

# Search default search paths, after we search our own set of paths.

443

find_program(CUDA_NVCC_EXECUTABLE nvcc)

444

mark_as_advanced(CUDA_NVCC_EXECUTABLE)

445

446

if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION)

447

# Compute the version.

448

exec_program(${CUDA_NVCC_EXECUTABLE} ARGS "--version" OUTPUT_VARIABLE NVCC_OUT)

449

string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${NVCC_OUT})

450

string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT})

451

set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.")

452

set(CUDA_VERSION_MAJOR "${CUDA_VERSION_MAJOR}" CACHE STRING "Version of CUDA as computed from nvcc.")

453

set(CUDA_VERSION_MINOR "${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.")

454

mark_as_advanced(CUDA_VERSION)

455

endif()

456

457

# Always set this convenience variable

458

set(CUDA_VERSION_STRING "${CUDA_VERSION}")

459

460

# Here we need to determine if the version we found is acceptable. We will

461

# assume that is unless CUDA_FIND_VERSION_EXACT or CUDA_FIND_VERSION is

462

# specified. The presence of either of these options checks the version

463

# string and signals if the version is acceptable or not.

464

set(_cuda_version_acceptable TRUE)

465

#

466

if(CUDA_FIND_VERSION_EXACT AND NOT CUDA_VERSION VERSION_EQUAL CUDA_FIND_VERSION)

467

set(_cuda_version_acceptable FALSE)

468

endif()

469

#

470

if(CUDA_FIND_VERSION AND CUDA_VERSION VERSION_LESS CUDA_FIND_VERSION)

471

set(_cuda_version_acceptable FALSE)

472

endif()

473

#

474

if(NOT _cuda_version_acceptable)

475

set(_cuda_error_message "Requested CUDA version ${CUDA_FIND_VERSION}, but found unacceptable version ${CUDA_VERSION}")

476

if(CUDA_FIND_REQUIRED)

477

message("${_cuda_error_message}")

478

elseif(NOT CUDA_FIND_QUIETLY)

479

message("${_cuda_error_message}")

480

endif()

481

endif()

482

483

# CUDA_TOOLKIT_INCLUDE

484

find_path(CUDA_TOOLKIT_INCLUDE

485

device_functions.h # Header included in toolkit

486

PATHS "${CUDA_TOOLKIT_ROOT_DIR}/include"

487

ENV CUDA_INC_PATH

488

NO_DEFAULT_PATH

489

)

490

# Search default search paths, after we search our own set of paths.

491

find_path(CUDA_TOOLKIT_INCLUDE device_functions.h)

492

mark_as_advanced(CUDA_TOOLKIT_INCLUDE)

493

494

# Set the user list of include dir to nothing to initialize it.

495

set (CUDA_NVCC_INCLUDE_ARGS_USER "")

496

set (CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE})

497

498

macro(FIND_LIBRARY_LOCAL_FIRST _var _names _doc)

499

find_library(${_var}

500

NAMES ${_names}

501

PATHS "${CUDA_TOOLKIT_ROOT_DIR}/lib"

502

ENV CUDA_LIB_PATH

503

DOC ${_doc}

504

NO_DEFAULT_PATH

505

)

506

# Search default search paths, after we search our own set of paths.

507

find_library(${_var} NAMES ${_names} DOC ${_doc})

508

endmacro()

509

510

# CUDA_LIBRARIES

511

find_library_local_first(CUDA_CUDART_LIBRARY cudart "\"cudart\" library")

512

set(CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY})

513

514

# 1.1 toolkit on linux doesn't appear to have a separate library on

515

# some platforms.

516

find_library_local_first(CUDA_CUDA_LIBRARY cuda "\"cuda\" library (older versions only).")

517

518

# Add cuda library to the link line only if it is found.

519

if (CUDA_CUDA_LIBRARY)

520

set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})

521

endif(CUDA_CUDA_LIBRARY)

522

523

mark_as_advanced(

524

CUDA_CUDA_LIBRARY

525

CUDA_CUDART_LIBRARY

526

)

527

528

#######################

529

# Look for some of the toolkit helper libraries

530

macro(FIND_CUDA_HELPER_LIBS _name)

531

find_library_local_first(CUDA_${_name}_LIBRARY ${_name} "\"${_name}\" library")

532

mark_as_advanced(CUDA_${_name}_LIBRARY)

533

endmacro(FIND_CUDA_HELPER_LIBS)

534

535

# Search for cufft and cublas libraries.

536

find_cuda_helper_libs(cufftemu)

537

find_cuda_helper_libs(cublasemu)

538

find_cuda_helper_libs(cufft)

539

find_cuda_helper_libs(cublas)

540

find_cuda_helper_libs(npp)

541

542

if (CUDA_BUILD_EMULATION)

543

set(CUDA_CUFFT_LIBRARIES ${CUDA_cufftemu_LIBRARY})

544

set(CUDA_NPP_LIBRARIES ${CUDA_nppemu_LIBRARY})

545

else()

546

set(CUDA_CUFFT_LIBRARIES ${CUDA_cufft_LIBRARY})

547

set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY})

548

set(CUDA_NPP_LIBRARIES ${CUDA_npp_LIBRARY})

549

endif()

550

551

########################

552

# Look for the SDK stuff

553

554

#SDS

555

if (CUDA_VERSION_MAJOR LESS 5)

556

find_path(CUDA_SDK_ROOT_DIR cutil.h

557

PATHS

558

"$ENV{NVSDKCUDA_ROOT}"

559

"[HKEY_LOCAL_MACHINE\\SOFTWARE\\NVIDIA Corporation\\Installed Products\\NVIDIA SDK 10\\Compute;InstallDir]"

560

"/Developer/CUDA"

561

"/usr/local/cuda/sdk"

562

"/opt/cuda/sdk"

563

"/usr/local/cuda/"

564

"/opt/cuda/"

565

PATH_SUFFIXES

566

common/inc

567

C/common/inc

568

)

569

else (CUDA_VERSION_MAJOR LESS 5)

570

find_path(CUDA_SDK_ROOT_DIR helper_cuda.h

571

PATHS

572

"${CUDA_TOOLKIT_ROOT_DIR}/samples"

573

PATH_SUFFIXES

574

common/inc

575

)

576

endif (CUDA_VERSION_MAJOR LESS 5)

577

578

if (CUDA_SDK_ROOT_DIR)

579

string(REGEX REPLACE "[/\\\\]?(C[/\\\\])?common[/\\\\]inc[/\\\\]?$" "" CUDA_SDK_ROOT_DIR ${CUDA_SDK_ROOT_DIR})

580

# We need to force this back into the cache.

581

set(CUDA_SDK_ROOT_DIR ${CUDA_SDK_ROOT_DIR} CACHE PATH "CUDA SDK location." FORCE)

582

endif(CUDA_SDK_ROOT_DIR)

583

584

#EDS

585

586

# Keep the CUDA_SDK_ROOT_DIR first in order to be able to override the

587

# environment variables.

588

# SDS some directories are added

589

set(CUDA_SDK_SEARCH_PATH

590

"${CUDA_SDK_ROOT_DIR}"

591

"${CUDA_TOOLKIT_ROOT_DIR}"

592

"${CUDA_TOOLKIT_ROOT_DIR}/sdk"

593

"${CUDA_TOOLKIT_ROOT_DIR}/local/NVSDK0.2"

594

"${CUDA_TOOLKIT_ROOT_DIR}/NVSDK0.2"

595

"${CUDA_TOOLKIT_ROOT_DIR}/NV_CUDA_SDK"

596

"${CUDA_TOOLKIT_ROOT_DIR}/samples"

597

"$ENV{HOME}/NVIDIA_CUDA_SDK"

598

"$ENV{HOME}/NVIDIA_CUDA_SDK_MACOSX"

599

"/Developer/CUDA"

600

"/usr/local/cuda/sdk"

601

"/opt/cuda/sdk"

602

"/usr/local/cuda/"

603

"/opt/cuda/"

604

"/opt/cuda/samples/"

605

)

606

#EDS

607

# CUDA_CUT_INCLUDE_DIR

608

#SDS C/common is added

609

if (CUDA_VERSION_MAJOR LESS 5)

610

find_path(CUDA_CUT_INCLUDE_DIR

611

cutil.h

612

PATHS ${CUDA_SDK_SEARCH_PATH}

613

PATH_SUFFIXES "common/inc" "C/common/inc"

614

DOC "Location of cutil.h"

615

NO_DEFAULT_PATH

616

)

617

else (CUDA_VERSION_MAJOR LESS 5)

618

find_path(CUDA_CUT_INCLUDE_DIR

619

helper_cuda.h

620

PATHS ${CUDA_SDK_SEARCH_PATH}

621

PATH_SUFFIXES "common/inc"

622

DOC "Location of cutil.h"

623

NO_DEFAULT_PATH

624

)

625

626

endif (CUDA_VERSION_MAJOR LESS 5)

627

#EDS

628

# Now search system paths

629

find_path(CUDA_CUT_INCLUDE_DIR cutil.h DOC "Location of cutil.h")

630

631

mark_as_advanced(CUDA_CUT_INCLUDE_DIR)

632

633

634

# CUDA_CUT_LIBRARIES

635

636

# cutil library is called cutil64 for 64 bit builds on windows. We don't want

637

# to get these confused, so we are setting the name based on the word size of

638

# the build.

639

if(CMAKE_SIZEOF_VOID_P EQUAL 8)

640

set(cuda_cutil_name cutil64 cutil_x86_64)

641

else(CMAKE_SIZEOF_VOID_P EQUAL 8)

642

set(cuda_cutil_name cutil32 cutil_i386)

643

endif(CMAKE_SIZEOF_VOID_P EQUAL 8)

644

645

#SDS C/lib

646

find_library(CUDA_CUT_LIBRARY

647

NAMES cutil ${cuda_cutil_name} cutil

648

PATHS ${CUDA_SDK_SEARCH_PATH}

649

# The new version of the sdk shows up in common/lib, but the old one is in lib

650

PATH_SUFFIXES "C/lib" "common/lib" "lib"

651

DOC "Location of cutil library"

652

NO_DEFAULT_PATH

653

)

654

#EDS

655

# Now search system paths

656

find_library(CUDA_CUT_LIBRARY NAMES cutil ${cuda_cutil_name} DOC "Location of cutil library")

657

mark_as_advanced(CUDA_CUT_LIBRARY)

658

set(CUDA_CUT_LIBRARIES ${CUDA_CUT_LIBRARY})

659

660

#SDS

661

find_path(CUDPP_ROOT_DIR cudpp.h

662

PATHS

663

"${CUDA_SDK_ROOT_DIR}"

664

${CUDA_SDK_SEARCH_PATH}

665

SUFFIXES

666

"include"

667

"cudpp/include"

668

"C/common/inc/cudpp"

669

)

670

671

if (CUDPP_ROOT_DIR)

672

string(REGEX REPLACE "[/\\\\]?(C[/\\\\])?common[/\\\\]inc[/\\\\]cudpp[/\\\\]?$" "" CUDPP_ROOT_DIR ${CUDPP_ROOT_DIR})

673

string(REGEX REPLACE "[/\\\\]?(cudpp[/\\\\])?include[/\\\\]?$" "" CUDPP_ROOT_DIR ${CUDPP_ROOT_DIR})

674

# We need to force this back into the cache.

675

set(CUDPP_ROOT_DIR ${CUDPP_ROOT_DIR} CACHE PATH "CUDPP location." FORCE)

676

endif(CUDPP_ROOT_DIR)

677

678

#EDS

679

680

FIND_PATH(CUDPP_INCLUDE_DIR cudpp.h

681

PATHS

682

${CUDPP_ROOT_DIR}

683

${CUDA_SDK_SEARCH_PATH}

684

PATH_SUFFIXES

685

"include"

686

"cudpp/include"

687

"C/common/inc/cudpp"

688

)

689

690

if(CMAKE_SIZEOF_VOID_P EQUAL 8)

691

set(cuda_cudpp_name cudpp64)

692

else(CMAKE_SIZEOF_VOID_P EQUAL 8)

693

set(cuda_cudpp_name cudpp32)

694

endif(CMAKE_SIZEOF_VOID_P EQUAL 8)

695

696

FIND_LIBRARY(CUDPP_LIBRARY

697

NAMES ${cuda_cudpp_name} cudpp

698

PATHS

699

${CUDPP_ROOT_DIR}

700

${CUDA_SDK_SEARCH_PATH}

701

PATH_SUFFIXES

702

"lib"

703

"cudpp/lib"

704

"C/common/lib/linux"

705

"C/common/lib"

706

)

707

708

IF(CUDPP_LIBRARY)

709

IF (CUDPP_INCLUDE_DIR)

710

# OK, found all we need

711

SET(CUDPP_FOUND TRUE)

712

GET_FILENAME_COMPONENT(CUDPP_LINK_DIRECTORIES ${CUDPP_LIBRARY} PATH)

713

ENDIF(CUDPP_INCLUDE_DIR)

714

ENDIF(CUDPP_LIBRARY)

715

716

MARK_AS_ADVANCED(

717

CUDPP_INCLUDE_DIR

718

CUDPP_LIBRARY

719

CUDPP_LINK_DIRECTORIES

720

)

721

722

#EDS

723

724

#############################

725

# Check for required components

726

set(CUDA_FOUND TRUE)

727

728

set(CUDA_TOOLKIT_ROOT_DIR_INTERNAL "${CUDA_TOOLKIT_ROOT_DIR}" CACHE INTERNAL

729

"This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was set successfully." FORCE)

730

set(CUDA_SDK_ROOT_DIR_INTERNAL "${CUDA_SDK_ROOT_DIR}" CACHE INTERNAL

731

"This is the value of the last time CUDA_SDK_ROOT_DIR was set successfully." FORCE)

732

733

include(FindPackageHandleStandardArgs)

734

find_package_handle_standard_args(CUDA DEFAULT_MSG

735

CUDA_TOOLKIT_ROOT_DIR

736

CUDA_NVCC_EXECUTABLE

737

CUDA_INCLUDE_DIRS

738

CUDA_CUDART_LIBRARY

739

_cuda_version_acceptable

740

)

741

742

743

744

###############################################################################

745

###############################################################################

746

# Macros

747

###############################################################################

748

###############################################################################

749

750

###############################################################################

751

# Add include directories to pass to the nvcc command.

752

macro(CUDA_INCLUDE_DIRECTORIES)

753

foreach(dir ${ARGN})

754

list(APPEND CUDA_NVCC_INCLUDE_ARGS_USER "-I${dir}")

755

endforeach(dir ${ARGN})

756

endmacro(CUDA_INCLUDE_DIRECTORIES)

757

758

759

##############################################################################

760

cuda_find_helper_file(parse_cubin cmake)

761

cuda_find_helper_file(make2cmake cmake)

762

cuda_find_helper_file(run_nvcc cmake)

763

764

##############################################################################

765

# Separate the OPTIONS out from the sources

766

#

767

macro(CUDA_GET_SOURCES_AND_OPTIONS _sources _options)

768

set( ${_sources} )

769

set( ${_options} )

770

set( _found_options FALSE )

771

foreach(arg ${ARGN})

772

if(arg STREQUAL "OPTIONS")

773

set( _found_options TRUE )

774

else()

775

if ( _found_options )

776

list(APPEND ${_options} "${arg}")

777

else()

778

# Assume this is a file

779

list(APPEND ${_sources} "${arg}")

780

endif()

781

endif()

782

endforeach()

783

endmacro()

784

785

##############################################################################

786

# Parse the OPTIONS from ARGN and set the variables prefixed by _option_prefix

787

#

788

macro(CUDA_PARSE_NVCC_OPTIONS _option_prefix)

789

set( _found_config )

790

foreach(arg ${ARGN})

791

# Determine if we are dealing with a perconfiguration flag

792

foreach(config ${CUDA_configuration_types})

793

string(TOUPPER ${config} config_upper)

794

if (arg STREQUAL "${config_upper}")

795

set( _found_config _${arg})

796

# Set arg to nothing to keep it from being processed further

797

set( arg )

798

endif()

799

endforeach()

800

801

if ( arg )

802

list(APPEND ${_option_prefix}${_found_config} "${arg}")

803

endif()

804

endforeach()

805

endmacro()

806

807

##############################################################################

808

# Helper to add the include directory for CUDA only once

809

function(CUDA_ADD_CUDA_INCLUDE_ONCE)

810

get_directory_property(_include_directories INCLUDE_DIRECTORIES)

811

set(_add TRUE)

812

if(_include_directories)

813

foreach(dir ${_include_directories})

814

if("${dir}" STREQUAL "${CUDA_INCLUDE_DIRS}")

815

set(_add FALSE)

816

endif()

817

endforeach()

818

endif()

819

if(_add)

820

include_directories(${CUDA_INCLUDE_DIRS})

821

endif()

822

endfunction()

823

824

##############################################################################

825

# This helper macro populates the following variables and setups up custom

826

# commands and targets to invoke the nvcc compiler to generate C or PTX source

827

# dependant upon the format parameter. The compiler is invoked once with -M

828

# to generate a dependency file and a second time with -cuda or -ptx to generate

829

# a .cpp or .ptx file.

830

# INPUT:

831

# cuda_target - Target name

832

# format - PTX or OBJ

833

# FILE1 .. FILEN - The remaining arguments are the sources to be wrapped.

834

# OPTIONS - Extra options to NVCC

835

# OUTPUT:

836

# generated_files - List of generated files

837

##############################################################################

838

##############################################################################

839

840

macro(CUDA_WRAP_SRCS cuda_target format generated_files)

841

842

if( ${format} MATCHES "PTX" )

843

set( compile_to_ptx ON )

844

elseif( ${format} MATCHES "OBJ")

845

set( compile_to_ptx OFF )

846

else()

847

message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS: '${format}'. Use OBJ or PTX.")

848

endif()

849

850

# Set up all the command line flags here, so that they can be overriden on a per target basis.

851

852

set(nvcc_flags "")

853

854

# Emulation if the card isn't present.

855

if (CUDA_BUILD_EMULATION)

856

# Emulation.

857

set(nvcc_flags ${nvcc_flags} --device-emulation -D_DEVICEEMU -g)

858

else(CUDA_BUILD_EMULATION)

859

# Device mode. No flags necessary.

860

endif(CUDA_BUILD_EMULATION)

861

862

if(CUDA_HOST_COMPILATION_CPP)

863

set(CUDA_C_OR_CXX CXX)

864

else(CUDA_HOST_COMPILATION_CPP)

865

set(nvcc_flags ${nvcc_flags} --host-compilation C)

866

set(CUDA_C_OR_CXX C)

867

endif(CUDA_HOST_COMPILATION_CPP)

868

869

set(generated_extension ${CMAKE_${CUDA_C_OR_CXX}_OUTPUT_EXTENSION})

870

871

if(CUDA_64_BIT_DEVICE_CODE)

872

set(nvcc_flags ${nvcc_flags} -m64)

873

else()

874

set(nvcc_flags ${nvcc_flags} -m32)

875

endif()

876

877

# This needs to be passed in at this stage, because VS needs to fill out the

878

# value of VCInstallDir from within VS.

879

if(CMAKE_GENERATOR MATCHES "Visual Studio")

880

if( CMAKE_SIZEOF_VOID_P EQUAL 8 )

881

# Add nvcc flag for 64b Windows

882

set(ccbin_flags -D "\"CCBIN:PATH=$(VCInstallDir)bin\"" )

883

endif()

884

endif()

885

886

# Figure out which configure we will use and pass that in as an argument to

887

# the script. We need to defer the decision until compilation time, because

888

# for VS projects we won't know if we are making a debug or release build

889

# until build time.

890

if(CMAKE_GENERATOR MATCHES "Visual Studio")

891

set( CUDA_build_configuration "$(ConfigurationName)" )

892

else()

893

set( CUDA_build_configuration "${CMAKE_BUILD_TYPE}")

894

endif()

895

896

# Initialize our list of includes with the user ones followed by the CUDA system ones.

897

set(CUDA_NVCC_INCLUDE_ARGS ${CUDA_NVCC_INCLUDE_ARGS_USER} "-I${CUDA_INCLUDE_DIRS}")

898

# Get the include directories for this directory and use them for our nvcc command.

899

get_directory_property(CUDA_NVCC_INCLUDE_DIRECTORIES INCLUDE_DIRECTORIES)

900

if(CUDA_NVCC_INCLUDE_DIRECTORIES)

901

foreach(dir ${CUDA_NVCC_INCLUDE_DIRECTORIES})

902

list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}")

903

endforeach()

904

endif()

905

906

# Reset these variables

907

set(CUDA_WRAP_OPTION_NVCC_FLAGS)

908

foreach(config ${CUDA_configuration_types})

909

string(TOUPPER ${config} config_upper)

910

set(CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper})

911

endforeach()

912

913

CUDA_GET_SOURCES_AND_OPTIONS(_cuda_wrap_sources _cuda_wrap_options ${ARGN})

914

CUDA_PARSE_NVCC_OPTIONS(CUDA_WRAP_OPTION_NVCC_FLAGS ${_cuda_wrap_options})

915

916

# CUDA_HOST_FLAGS

917

if(BUILD_SHARED_LIBS)

918

# If BUILD_SHARED_LIBS is true, then we need to add extra flags for

919

# compiling objects for shared libraries.

920

set(CUDA_HOST_SHARED_FLAGS ${CMAKE_SHARED_LIBRARY_${CUDA_C_OR_CXX}_FLAGS})

921

endif()

922

set(CUDA_HOST_FLAGS "set(CMAKE_HOST_FLAGS ${CMAKE_${CUDA_C_OR_CXX}_FLAGS} ${CUDA_HOST_SHARED_FLAGS})")

923

set(CUDA_NVCC_FLAGS_CONFIG "# Build specific configuration flags")

924

# Loop over all the configuration types to generate appropriate flags for run_nvcc.cmake

925

foreach(config ${CUDA_configuration_types})

926

string(TOUPPER ${config} config_upper)

927

# CMAKE_FLAGS are strings and not lists. By not putting quotes around CMAKE_FLAGS

928

# we convert the strings to lists (like we want).

929

930

# nvcc chokes on -g3, so replace it with -g

931

if(CMAKE_COMPILER_IS_GNUCC)

932

string(REPLACE "-g3" "-g" _cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}")

933

else()

934

set(_cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}")

935

endif()

936

set(CUDA_HOST_FLAGS "${CUDA_HOST_FLAGS}\nset(CMAKE_HOST_FLAGS_${config_upper} ${_cuda_C_FLAGS})")

937

# Note that if we ever want CUDA_NVCC_FLAGS_<CONFIG> to be string (instead of a list

938

# like it is currently), we can remove the quotes around the

939

# ${CUDA_NVCC_FLAGS_${config_upper}} variable like the CMAKE_HOST_FLAGS_<CONFIG> variable.

940

set(CUDA_NVCC_FLAGS_CONFIG "${CUDA_NVCC_FLAGS_CONFIG}\nset(CUDA_NVCC_FLAGS_${config_upper} \"${CUDA_NVCC_FLAGS_${config_upper}};${CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}}\")")

941

endforeach()

942

943

# Get the list of definitions from the directory property

944

get_directory_property(CUDA_NVCC_DEFINITIONS COMPILE_DEFINITIONS)

945

if(CUDA_NVCC_DEFINITIONS)

946

foreach(_definition ${CUDA_NVCC_DEFINITIONS})

947

list(APPEND nvcc_flags "-D${_definition}")

948

endforeach()

949

endif()

950

951

if(BUILD_SHARED_LIBS)

952

list(APPEND nvcc_flags "-D${cuda_target}_EXPORTS")

953

endif()

954

955

# Determine output directory

956

if(CUDA_GENERATED_OUTPUT_DIR)

957

set(cuda_compile_output_dir "${CUDA_GENERATED_OUTPUT_DIR}")

958

else()

959

set(cuda_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}")

960

endif()

961

962

# Reset the output variable

963

set(_cuda_wrap_generated_files "")

964

965

# Iterate over the macro arguments and create custom

966

# commands for all the .cu files.

967

foreach(file ${ARGN})

968

# Ignore any file marked as a HEADER_FILE_ONLY

969

get_source_file_property(_is_header ${file} HEADER_FILE_ONLY)

970

if(${file} MATCHES ".*\\.cu$" AND NOT _is_header)

971

972

# Add a custom target to generate a c or ptx file. ######################

973

974

get_filename_component( basename ${file} NAME )

975

if( compile_to_ptx )

976

set(generated_file_path "${cuda_compile_output_dir}")

977

set(generated_file_basename "${cuda_target}_generated_${basename}.ptx")

978

set(format_flag "-ptx")

979

file(MAKE_DIRECTORY "${cuda_compile_output_dir}")

980

else( compile_to_ptx )

981

set(generated_file_path "${cuda_compile_output_dir}/${CMAKE_CFG_INTDIR}")

982

set(generated_file_basename "${cuda_target}_generated_${basename}${generated_extension}")

983

set(format_flag "-c")

984

endif( compile_to_ptx )

985

986

# Set all of our file names. Make sure that whatever filenames that have

987

# generated_file_path in them get passed in through as a command line

988

# argument, so that the ${CMAKE_CFG_INTDIR} gets expanded at run time

989

# instead of configure time.

990

#SDS move cu.o.* files into the generated folder

991

set(generated_file "${generated_file_path}/${generated_file_basename}")

992

set(cmake_dependency_file "${generated_file_path}/${generated_file_basename}.depend")

993

set(NVCC_generated_dependency_file "${generated_file_path}/${generated_file_basename}.NVCC-depend")

994

set(generated_cubin_file "${generated_file_path}/${generated_file_basename}.cubin.txt")

995

set(custom_target_script "${generated_file_path}/${generated_file_basename}.cmake")

996

#EDS

997

# Setup properties for obj files:

998

if( NOT compile_to_ptx )

999

set_source_files_properties("${generated_file}"

1000

PROPERTIES

1001

EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked.

1002

)

1003

endif()

1004

1005

# Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path.

1006

get_filename_component(file_path "${file}" PATH)

1007

if(IS_ABSOLUTE "${file_path}")

1008

set(source_file "${file}")

1009

else()

1010

set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}")

1011

endif()

1012

1013

# Bring in the dependencies. Creates a variable CUDA_NVCC_DEPEND #######

1014

cuda_include_nvcc_dependencies(${cmake_dependency_file})

1015

1016

# Convience string for output ###########################################

1017

if(CUDA_BUILD_EMULATION)

1018

set(cuda_build_type "Emulation")

1019

else(CUDA_BUILD_EMULATION)

1020

set(cuda_build_type "Device")

1021

endif(CUDA_BUILD_EMULATION)

1022

1023

# Build the NVCC made dependency file ###################################

1024

set(build_cubin OFF)

1025

if ( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN )

1026

if ( NOT compile_to_ptx )

1027

set ( build_cubin ON )

1028

endif( NOT compile_to_ptx )

1029

endif( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN )

1030

1031

# Configure the build script

1032

configure_file("${CUDA_run_nvcc}" "${custom_target_script}" @ONLY)

1033

1034

# So if a user specifies the same cuda file as input more than once, you

1035

# can have bad things happen with dependencies. Here we check an option

1036

# to see if this is the behavior they want.

1037

if(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE)

1038

set(main_dep MAIN_DEPENDENCY ${source_file})

1039

else()

1040

set(main_dep DEPENDS ${source_file})

1041

endif()

1042

1043

if(CUDA_VERBOSE_BUILD)

1044

set(verbose_output ON)

1045

elseif(CMAKE_GENERATOR MATCHES "Makefiles")

1046

set(verbose_output "$(VERBOSE)")

1047

else()

1048

set(verbose_output OFF)

1049

endif()

1050

1051

# Create up the comment string

1052

file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}")

1053

if(compile_to_ptx)

1054

set(cuda_build_comment_string "Building NVCC ptx file ${generated_file_relative_path}")

1055

else()

1056

set(cuda_build_comment_string "Building NVCC (${cuda_build_type}) object ${generated_file_relative_path}")

1057

endif()

1058

1059

# Build the generated file and dependency file ##########################

1060

add_custom_command(

1061

OUTPUT ${generated_file}

1062

# These output files depend on the source_file and the contents of cmake_dependency_file

1063

${main_dep}

1064

DEPENDS ${CUDA_NVCC_DEPEND}

1065

DEPENDS ${custom_target_script}

1066

COMMAND ${CMAKE_COMMAND} ARGS

1067

-D verbose:BOOL=${verbose_output}

1068

${ccbin_flags}

1069

-D build_configuration:STRING=${CUDA_build_configuration}

1070

-D "generated_file:STRING=${generated_file}"

1071

-D "generated_cubin_file:STRING=${generated_cubin_file}"

1072

-P "${custom_target_script}"

1073

COMMENT "${cuda_build_comment_string}"

1074

)

1075

1076

# Make sure the build system knows the file is generated.

1077

set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE)

1078

1079

# Don't add the object file to the list of generated files if we are using

1080

# visual studio and we are attaching the build rule to the cuda file. VS

1081

# will add our object file to the linker automatically for us.

1082

set(cuda_add_generated_file TRUE)

1083

1084

if(NOT compile_to_ptx AND CMAKE_GENERATOR MATCHES "Visual Studio" AND CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE)

1085

# Visual Studio 8 crashes when you close the solution when you don't add the object file.

1086

if(NOT CMAKE_GENERATOR MATCHES "Visual Studio 8")

1087

set(cuda_add_generated_file FALSE)

1088

endif()

1089

endif()

1090

1091

if(cuda_add_generated_file)

1092

list(APPEND _cuda_wrap_generated_files ${generated_file})

1093

endif()

1094

1095

# Add the other files that we want cmake to clean on a cleanup ##########

1096

list(APPEND CUDA_ADDITIONAL_CLEAN_FILES "${cmake_dependency_file}")

1097

list(REMOVE_DUPLICATES CUDA_ADDITIONAL_CLEAN_FILES)

1098

set(CUDA_ADDITIONAL_CLEAN_FILES ${CUDA_ADDITIONAL_CLEAN_FILES} CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.")

1099

1100

endif(${file} MATCHES ".*\\.cu$" AND NOT _is_header)

1101

endforeach(file)

1102

1103

# Set the return parameter

1104

set(${generated_files} ${_cuda_wrap_generated_files})

1105

endmacro(CUDA_WRAP_SRCS)

1106

1107

1108

###############################################################################

1109

###############################################################################

1110

# ADD LIBRARY

1111

###############################################################################

1112

###############################################################################

1113

macro(CUDA_ADD_LIBRARY cuda_target)

1114

1115

CUDA_ADD_CUDA_INCLUDE_ONCE()

1116

1117

# Separate the sources from the options

1118

CUDA_GET_SOURCES_AND_OPTIONS(_sources _options ${ARGN})

1119

# Create custom commands and targets for each file.

1120

CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources} OPTIONS ${_options} )

1121

1122

# Add the library. (SDS add shared and static)

1123

if (WIN32)

1124

add_library(${cuda_target} STATIC

1125

${_generated_files}

1126

${_sources}

1127

)

1128

else (WIN32)

1129

add_library(${cuda_target}

1130

${_generated_files}

1131

${_sources}

1132

)

1133

endif (WIN32)

1134

1135

target_link_libraries(${cuda_target}

1136

${CUDA_LIBRARIES}

1137

)

1138

1139

# We need to set the linker language based on what the expected generated file

1140

# would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP.

1141

set_target_properties(${cuda_target}

1142

PROPERTIES

1143

LINKER_LANGUAGE ${CUDA_C_OR_CXX}

1144

)

1145

1146

endmacro(CUDA_ADD_LIBRARY cuda_target)

1147

1148

1149

###############################################################################

1150

###############################################################################

1151

# ADD EXECUTABLE

1152

###############################################################################

1153

###############################################################################

1154

macro(CUDA_ADD_EXECUTABLE cuda_target)

1155

1156

CUDA_ADD_CUDA_INCLUDE_ONCE()

1157

1158

# Separate the sources from the options

1159

CUDA_GET_SOURCES_AND_OPTIONS(_sources _options ${ARGN})

1160

# Create custom commands and targets for each file.

1161

CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources} OPTIONS ${_options} )

1162

1163

# Add the library.

1164

add_executable(${cuda_target}

1165

${_generated_files}

1166

${_sources}

1167

)

1168

1169

target_link_libraries(${cuda_target}

1170

${CUDA_LIBRARIES}

1171

)

1172

1173

# We need to set the linker language based on what the expected generated file

1174

# would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP.

1175

set_target_properties(${cuda_target}

1176

PROPERTIES

1177

LINKER_LANGUAGE ${CUDA_C_OR_CXX}

1178

)

1179

1180

endmacro(CUDA_ADD_EXECUTABLE cuda_target)

1181

1182

1183

###############################################################################

1184

###############################################################################

1185

# CUDA COMPILE

1186

###############################################################################

1187

###############################################################################

1188

macro(CUDA_COMPILE generated_files)

1189

1190

# Separate the sources from the options

1191

CUDA_GET_SOURCES_AND_OPTIONS(_sources _options ${ARGN})

1192

# Create custom commands and targets for each file.

1193

CUDA_WRAP_SRCS( cuda_compile OBJ _generated_files ${_sources} OPTIONS ${_options} )

1194

1195

set( ${generated_files} ${_generated_files})

1196

1197

endmacro(CUDA_COMPILE)

1198

1199

1200

###############################################################################

1201

###############################################################################

1202

# CUDA COMPILE PTX

1203

###############################################################################

1204

###############################################################################

1205

macro(CUDA_COMPILE_PTX generated_files)

1206

1207

# Separate the sources from the options

1208

CUDA_GET_SOURCES_AND_OPTIONS(_sources _options ${ARGN})

1209

# Create custom commands and targets for each file.

1210

CUDA_WRAP_SRCS( cuda_compile_ptx PTX _generated_files ${_sources} OPTIONS ${_options} )

1211

1212

set( ${generated_files} ${_generated_files})

1213

1214

endmacro(CUDA_COMPILE_PTX)

1215

1216

###############################################################################

1217

###############################################################################

1218

# CUDA ADD CUFFT TO TARGET

1219

###############################################################################

1220

###############################################################################

1221

macro(CUDA_ADD_CUFFT_TO_TARGET target)

1222

if (CUDA_BUILD_EMULATION)

1223

target_link_libraries(${target} ${CUDA_cufftemu_LIBRARY})

1224

else()

1225

target_link_libraries(${target} ${CUDA_cufft_LIBRARY})

1226

endif()

1227

endmacro()

1228

1229

###############################################################################

1230

###############################################################################

1231

# CUDA ADD CUBLAS TO TARGET

1232

###############################################################################

1233

###############################################################################

1234

macro(CUDA_ADD_CUBLAS_TO_TARGET target)

1235

if (CUDA_BUILD_EMULATION)

1236

target_link_libraries(${target} ${CUDA_cublasemu_LIBRARY})

1237

else()

1238

target_link_libraries(${target} ${CUDA_cublas_LIBRARY})

1239

endif()

1240

endmacro()

1241

1242

###############################################################################

1243

###############################################################################

1244

# CUDA ADD CUFFT TO TARGET

1245

###############################################################################

1246

###############################################################################

1247

macro(CUDA_ADD_NPP_TO_TARGET target)

1248

if (CUDA_BUILD_EMULATION)

1249

target_link_libraries(${target} ${CUDA_nppemu_LIBRARY})

1250

else()

1251

target_link_libraries(${target} ${CUDA_npp_LIBRARY})

1252

endif()

1253

endmacro()

1254

1255

###############################################################################

1256

###############################################################################

1257

# CUDA BUILD CLEAN TARGET

1258

###############################################################################

1259

###############################################################################

1260

macro(CUDA_BUILD_CLEAN_TARGET)

1261

# Call this after you add all your CUDA targets, and you will get a convience

1262

# target. You should also make clean after running this target to get the

1263

# build system to generate all the code again.

1264

1265

set(cuda_clean_target_name clean_cuda_depends)

1266

if (CMAKE_GENERATOR MATCHES "Visual Studio")

1267

string(TOUPPER ${cuda_clean_target_name} cuda_clean_target_name)

1268

endif()

1269

add_custom_target(${cuda_clean_target_name}

1270

COMMAND ${CMAKE_COMMAND} -E remove ${CUDA_ADDITIONAL_CLEAN_FILES})

1271

1272

# Clear out the variable, so the next time we configure it will be empty.

1273

# This is useful so that the files won't persist in the list after targets

1274

# have been removed.

1275

set(CUDA_ADDITIONAL_CLEAN_FILES "" CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.")

1276

endmacro(CUDA_BUILD_CLEAN_TARGET)