1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
SET(BUILD_SHARED_LIBS ON)
SET(CMAKE_INCLUDE_CURRENT_DIR ON)
SET(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE ON)
SET(CUDA_GENERATED_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/build)
#SET(CUDA_GENERATED_OUTPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/build)
ADD_DEFINITIONS(
${PYHST_DEFINITIONS}
)
CUDA_INCLUDE_DIRECTORIES(
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_SOURCE_DIR}
)
INCLUDE_DIRECTORIES(
${CUDA_CUT_INCLUDE_DIR}
${PYHST_INCLUDE_DIRS}
)
SET(HEADERS
hst_cuda.h
)
SET(hst_cuda_SRCS
hst_cuda.cu
hst_cuda_kernels.h
hst_cuda_int_kernels.h
${HEADERS}
)
#\ --maxrregcount=64
if (IGNORE_OLD_HARDWARE)
if (ENABLE_KEPLER_INSTRUCTIONS GREATER 4)
SET(NVCODES -gencode arch=compute_52,code=sm_52 -gencode arch=compute_61,code=sm_61)
CUDA_ADD_LIBRARY(hst_cuda ${hst_cuda_SRCS} OPTIONS ${NVCODES} -lineinfo --ptxas-options=-v,-abi=no --ftz=true --use_fast_math)
elseif (ENABLE_KEPLER_INSTRUCTIONS)
SET(NVCODES -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35)
CUDA_COMPILE_PTX(ptx ${hst_cuda_SRCS} OPTIONS -lineinfo --ptxas-options=-v -arch=sm_30)
# CUDA_ADD_LIBRARY(hst_cuda ${hst_cuda_SRCS} OPTIONS ${NVCODES} -lineinfo --ptxas-options=-v --ftz=true --use_fast_math)
CUDA_ADD_LIBRARY(hst_cuda ${hst_cuda_SRCS} OPTIONS ${NVCODES} -lineinfo --ptxas-options=-v,-abi=no --ftz=true --use_fast_math)
else (ENABLE_KEPLER_INSTRUCTIONS)
# CUDA_COMPILE_PTX(ptx ${hst_cuda_SRCS} OPTIONS -arch sm_21 -lineinfo --ptxas-options=-v)
CUDA_ADD_LIBRARY(hst_cuda ${hst_cuda_SRCS} OPTIONS -arch sm_21 -lineinfo --ptxas-options=-v,-abi=no) #21
endif (ENABLE_KEPLER_INSTRUCTIONS)
else (IGNORE_OLD_HARDWARE)
CUDA_ADD_LIBRARY(hst_cuda ${hst_cuda_SRCS} OPTIONS -lineinfo --ptxas-options=-v -arch sm_11 )
endif (IGNORE_OLD_HARDWARE)
CUDA_ADD_CUFFT_TO_TARGET(hst_cuda)
if (CUDA_VERSION_MAJOR GREATER 3)
CUDA_ADD_NPP_TO_TARGET(hst_cuda)
endif (CUDA_VERSION_MAJOR GREATER 3)
TARGET_LINK_LIBRARIES(hst_cuda ${PYHST_LIBRARIES} -lgsl -lgslcblas)
#add_custom_target(tests
ADD_EXECUTABLE(hst_cuda_test EXCLUDE_FROM_ALL
hst_cuda_test.cpp
${HEADERS}
)
TARGET_LINK_LIBRARIES(hst_cuda_test hst_cuda)
# Add a special target to clean nvcc generated files.
#CUDA_BUILD_CLEAN_TARGET()
# clean extra files
#FILE(GLOB EXTRA_BUILD_FILES "hst_cuda.linkinfo")
SET_DIRECTORY_PROPERTIES(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES hst_cuda.linkinfo)
ADD_CUSTOM_COMMAND(
COMMAND ${FIND_EXECUTABLE}
ARGS ../hst_cuda/`${LS_EXECUTABLE} -qd ${CMAKE_SOURCE_DIR}/hst_cuda/lib* | ${HEAD_EXECUTABLE} -n 1 | ${SED_EXECUTABLE} -e 's|.*/||'` -name *.so | ${CUT_EXECUTABLE} -d '/' -f 2- | ${XARGS_EXECUTABLE} ${LN_EXECUTABLE} -t .. -sf
TARGET hst_cuda
)
|