1
################################################################################
3
# Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
7
# This source code is subject to NVIDIA ownership rights under U.S. and
8
# international Copyright laws.
10
# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
11
# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
12
# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
13
# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
14
# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
15
# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
16
# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
17
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
18
# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
19
# OR PERFORMANCE OF THIS SOURCE CODE.
21
# U.S. Government End Users. This source code is a "commercial item" as
22
# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
23
# "commercial computer software" and "commercial computer software
24
# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
25
# and is provided to the U.S. Government only as a commercial end item.
26
# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
27
# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
28
# source code with only those rights set forth herein.
30
################################################################################
34
################################################################################
36
.SUFFIXES : .cu .cu_dbg_o .c_dbg_o .cpp_dbg_o .cu_rel_o .c_rel_o .cpp_rel_o .cubin
38
# Add new SM Versions here as devices with new Compute Capability are released
39
SM_VERSIONS := sm_10 sm_11 sm_12 sm_13
41
CUDA_INSTALL_PATH ?= /opt/cuda
44
CUDA_INSTALL_PATH := $(cuda-install)
48
OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])
49
OSLOWER = $(shell uname -s 2>/dev/null | tr [:upper:] [:lower:])
50
# 'linux' is output for Linux system, 'darwin' for OS X
51
DARWIN = $(strip $(findstring DARWIN, $(OSUPPER)))
53
# Basic directory setup for SDK
54
# (override directories only if they are not already defined)
60
LIBDIR := /opt/cuda/sdk/lib
61
COMMONDIR := /opt/cuda/sdk/common
64
NVCC := $(CUDA_INSTALL_PATH)/bin/nvcc
67
LINK := g++-4.2.4 -fPIC
70
INCLUDES += -I. -I$(CUDA_INSTALL_PATH)/include -I$(COMMONDIR)/inc
72
# architecture flag for cubin build
73
CUBIN_ARCH_FLAG := -m32
88
-Wno-unused-function \
91
CWARN_FLAGS := $(CXXWARN_FLAGS) \
93
-Wmissing-prototypes \
94
-Wmissing-declarations \
98
# Compiler-specific flags
100
CXXFLAGS := $(CXXWARN_FLAGS)
101
CFLAGS := $(CWARN_FLAGS)
104
COMMONFLAGS += $(INCLUDES) -DUNIX
106
# Debug/release configuration
109
NVCCFLAGS += -D_DEBUG
116
NVCCFLAGS += --compiler-options -fno-strict-aliasing
117
CXXFLAGS += -fno-strict-aliasing
118
CFLAGS += -fno-strict-aliasing
121
# append optional arch/SM version flags (such as -arch sm_11)
122
#NVCCFLAGS += $(SMVERSIONFLAGS)
124
# architecture flag for cubin build
125
CUBIN_ARCH_FLAG := -m32
127
# detect if 32 bit or 64 bit system
128
HP_64 = $(shell uname -m | grep 64)
130
# OpenGL is used or not (if it is used, then it is necessary to include GLEW)
134
OPENGLLIB := -L/System/Library/Frameworks/OpenGL.framework/Libraries -lGL -lGLU $(COMMONDIR)/lib/$(OSLOWER)/libGLEW.a
136
OPENGLLIB := -lGL -lGLU
138
ifeq "$(strip $(HP_64))" ""
141
OPENGLLIB += -lGLEW_x86_64
145
CUBIN_ARCH_FLAG := -m64
150
OPENGLLIB += -framework GLUT
156
ifeq ($(USEPARAMGL),1)
157
PARAMGLLIB := -lparamgl$(LIBSUFFIX)
160
ifeq ($(USERENDERCHECKGL),1)
161
RENDERCHECKGLLIB := -lrendercheckgl$(LIBSUFFIX)
164
ifeq ($(USECUDPP), 1)
165
ifeq "$(strip $(HP_64))" ""
168
CUDPPLIB := -lcudpp64
171
CUDPPLIB := $(CUDPPLIB)$(LIBSUFFIX)
174
CUDPPLIB := $(CUDPPLIB)_emu
179
LIB := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER)
180
ifeq ($(USEDRVAPI),1)
181
LIB += -lcuda ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB}
183
LIB += -lcudart ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB}
194
ifeq ($(USECUBLAS),1)
202
# Lib/exe configuration
203
ifneq ($(STATIC_LIB),)
204
TARGETDIR := $(LIBDIR)
205
TARGET := $(subst .a,$(LIBSUFFIX).a,$(LIBDIR)/$(STATIC_LIB))
206
LINKLINE = ar qv $(TARGET) $(OBJS)
208
LIB += -lcutil$(LIBSUFFIX)
209
# Device emulation configuration
211
NVCCFLAGS += -deviceemu
213
BINSUBDIR := emu$(BINSUBDIR)
214
# consistency, makes developing easier
215
CXXFLAGS += -D__DEVICE_EMULATION__
216
CFLAGS += -D__DEVICE_EMULATION__
218
TARGETDIR := $(BINDIR)/$(BINSUBDIR)
219
TARGET := $(TARGETDIR)/$(EXECUTABLE)
220
LINKLINE = $(LINK) -o $(TARGET) $(OBJS) $(LIB)
230
################################################################################
231
# Check for input flags and set compiler flags appropriately
232
################################################################################
233
ifeq ($(fastmath), 1)
234
NVCCFLAGS += -use_fast_math
239
NVCC_KEEP_CLEAN := *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx
243
NVCCFLAGS += -maxrregcount $(maxregisters)
247
NVCCFLAGS += $(CUDACCFLAGS)
249
# workaround for mac os x cuda 1.1 compiler issues
251
NVCCFLAGS += --host-compilation=C
255
NVCCFLAGS += $(COMMONFLAGS)
256
CXXFLAGS += $(COMMONFLAGS)
257
CFLAGS += $(COMMONFLAGS)
259
ifeq ($(nvcc_warn_verbose),1)
260
NVCCFLAGS += $(addprefix --compiler-options ,$(CXXWARN_FLAGS))
261
NVCCFLAGS += --compiler-options -fno-strict-aliasing
264
################################################################################
265
# Set up object files
266
################################################################################
267
OBJDIR := $(ROOTOBJDIR)/$(BINSUBDIR)
268
OBJS += $(patsubst %.cpp,$(OBJDIR)/%.cpp_o,$(notdir $(CCFILES)))
269
OBJS += $(patsubst %.c,$(OBJDIR)/%.c_o,$(notdir $(CFILES)))
270
OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu_o,$(notdir $(CUFILES)))
272
################################################################################
274
################################################################################
275
CUBINDIR := $(SRCDIR)data
276
CUBINS += $(patsubst %.cu,$(CUBINDIR)/%.cubin,$(notdir $(CUBINFILES)))
278
################################################################################
280
################################################################################
281
$(OBJDIR)/%.c_o : $(SRCDIR)%.c $(C_DEPS)
282
$(VERBOSE)$(CC) $(CFLAGS) -o $@ -c $<
284
$(OBJDIR)/%.cpp_o : $(SRCDIR)%.cpp $(C_DEPS)
285
$(VERBOSE)$(CXX) $(CXXFLAGS) -o $@ -c $<
287
$(OBJDIR)/%.cu_o : $(SRCDIR)%.cu $(CU_DEPS)
288
$(VERBOSE)$(NVCC) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -c $<
290
$(CUBINDIR)/%.cubin : $(SRCDIR)%.cu cubindirectory
291
$(VERBOSE)$(NVCC) $(CUBIN_ARCH_FLAG) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -cubin $<
294
# The following definition is a template that gets instantiated for each SM
295
# version (sm_10, sm_13, etc.) stored in SMVERSIONS. It does 2 things:
296
# 1. It adds to OBJS a .cu_sm_XX_o for each .cu file it finds in CUFILES_sm_XX.
297
# 2. It generates a rule for building .cu_sm_XX_o files from the corresponding
300
# The intended use for this is to allow Makefiles that use common.mk to compile
301
# files to different Compute Capability targets (aka SM arch version). To do
302
# so, in the Makefile, list files for each SM arch separately, like so:
304
# CUFILES_sm_10 := mycudakernel_sm10.cu app.cu
305
# CUFILES_sm_12 := anothercudakernel_sm12.cu
307
define SMVERSION_template
308
OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu_$(1)_o,$(notdir $(CUFILES_$(1))))
309
$(OBJDIR)/%.cu_$(1)_o : $(SRCDIR)%.cu $(CU_DEPS)
310
$(VERBOSE)$(NVCC) -o $$@ -c $$< $(NVCCFLAGS) -arch $(1)
313
# This line invokes the above template for each arch version stored in
314
# SM_VERSIONS. The call funtion invokes the template, and the eval
315
# function interprets it as make commands.
316
$(foreach smver,$(SM_VERSIONS),$(eval $(call SMVERSION_template,$(smver))))
318
$(TARGET): makedirectories $(OBJS) $(CUBINS) Makefile
319
$(VERBOSE)$(LINKLINE)
322
$(VERBOSE)mkdir -p $(CUBINDIR)
325
$(VERBOSE)mkdir -p $(LIBDIR)
326
$(VERBOSE)mkdir -p $(OBJDIR)
327
$(VERBOSE)mkdir -p $(TARGETDIR)
331
$(VERBOSE)find . | egrep "#" | xargs rm -f
332
$(VERBOSE)find . | egrep "\~" | xargs rm -f
335
$(VERBOSE)rm -f $(OBJS)
336
$(VERBOSE)rm -f $(CUBINS)
337
$(VERBOSE)rm -f $(TARGET)
338
$(VERBOSE)rm -f $(NVCC_KEEP_CLEAN)
341
$(VERBOSE)rm -rf $(ROOTOBJDIR)