CUDA on Mac OS X

Hi,
I am trying to get CUDA setup on a late 2009 MacMini (OS 10.6.2, Xcode 3.2, Cudadriver 2.3.1a, Cudatoolkit 2.3a, gpucomputingtoolkit 2.3a).
I am getting:
ld: warning: in /usr/local/cuda/lib/libcudart.dylib, file is not of required architecture
Undefined symbols:
“_cudaDriverGetVersion”, referenced from:
_main in deviceQuery.cpp.o
“_cudaRuntimeGetVersion”, referenced from:
_main in deviceQuery.cpp.o
“_cudaGetDeviceProperties”, referenced from:
_main in deviceQuery.cpp.o
“_cudaGetDeviceCount”, referenced from:
_main in deviceQuery.cpp.o
ld: symbol(s) not found
collect2: ld returned 1 exit status
make: *** […/…/bin/darwin/release/deviceQuery] Error 1

Checking obj/ directory I see an i386 ???

$nvcc -V
nvcc: NVIDIA ® Cuda compiler driver
Copyright © 2005-2009 NVIDIA Corporation
Built on Thu_Sep__3_00:38:40_PDT_2009
Cuda compilation tools, release 2.3, V0.2.1221

$/usr/bin/gcc --help
Usage: i686-apple-darwin10-gcc-4.2.1 [options] file…

$ cat common.mk
############################################################
####################

Copyright 1993-2009 NVIDIA Corporation. All rights reserved.

NVIDIA Corporation and its licensors retain all intellectual property and

proprietary rights in and to this software and related documentation.

Any use, reproduction, disclosure, or distribution of this software

and related documentation without an express license agreement from

NVIDIA Corporation is strictly prohibited.

Please refer to the applicable NVIDIA end user license agreement (EULA)

associated with this source code for terms and conditions that govern

your use of this NVIDIA software.

############################################################
####################

Common build script for CUDA source projects for Linux and Mac platforms

############################################################
####################

.SUFFIXES : .cu .cu_dbg.o .c_dbg.o .cpp_dbg.o .cu_rel.o .c_rel.o .cpp_rel.o .cubin .ptx

Add new SM Versions here as devices with new Compute Capability are released

SM_VERSIONS := sm_10 sm_11 sm_12 sm_13

CUDA_INSTALL_PATH ?= /usr/local/cuda

ifdef cuda-install
CUDA_INSTALL_PATH := $(cuda-install)
endif

detect OS

OSUPPER = (shell uname -s 2>/dev/null | tr [:lower:] [:upper:]) OSLOWER = (shell uname -s 2>/dev/null | tr [:upper:] [:lower:])

‘linux’ is output for Linux system, ‘darwin’ for OS X

DARWIN = (strip (findstring DARWIN, (OSUPPER))) ifneq ((DARWIN),)
SNOWLEOPARD = (strip (findstring 10.6, $(shell egrep “10.6” /System/Library/CoreServices/SystemVersion.plist)))
endif

detect 32-bit or 64-bit platform

HP_64 = (shell uname -m | grep 64) OSARCH= (shell uname -m)

Basic directory setup for SDK

(override directories only if they are not already defined)

SRCDIR ?=
ROOTDIR ?= …
ROOTBINDIR ?= (ROOTDIR)/../bin BINDIR ?= (ROOTBINDIR)/(OSLOWER) ROOTOBJDIR ?= obj LIBDIR := (ROOTDIR)/…/lib
COMMONDIR := $(ROOTDIR)/…/common

Compilers

NVCC := $(CUDA_INSTALL_PATH)/bin/nvcc
CXX := g++
CC := gcc
LINK := g++ -fPIC

Includes

INCLUDES += -I. -I$(CUDA_INSTALL_PATH)/include -I$(COMMONDIR)/inc

Warning flags

CXXWARN_FLAGS :=
-W -Wall
-Wimplicit
-Wswitch
-Wformat
-Wchar-subscripts
-Wparentheses
-Wmultichar
-Wtrigraphs
-Wpointer-arith
-Wcast-align
-Wreturn-type
-Wno-unused-function
$(SPACE)

CWARN_FLAGS := $(CXXWARN_FLAGS)
-Wstrict-prototypes
-Wmissing-prototypes
-Wmissing-declarations
-Wnested-externs
-Wmain \

architecture flag for nvcc and gcc compilers build

CUBIN_ARCH_FLAG :=
CXX_ARCH_FLAGS :=
NVCCFLAGS :=
LIB_ARCH := $(OSARCH)

Determining the necessary Cross-Compilation Flags

32-bit OS, but we target 64-bit cross compilation

ifeq ($(x86_64),1)
NVCCFLAGS += -m64
LIB_ARCH = x86_64

ifneq ($(DARWIN),)
     CXX_ARCH_FLAGS += -arch x86_64
else
     CXX_ARCH_FLAGS += -m64
endif

else

64-bit OS, and we target 32-bit cross compilation

ifeq ($(i386),1)
    NVCCFLAGS += -m32
    LIB_ARCH = i386
    ifneq ($(DARWIN),)
         CXX_ARCH_FLAGS += -arch i386
    else
         CXX_ARCH_FLAGS += -m32
    endif
else 
    ifneq ($(SNOWLEOPARD),)
         NVCCFLAGS += -m32
         CXX_ARCH_FLAGS += -arch i386 -m32
         LIB_ARCH  = i386
    endif
endif

endif

Compiler-specific flags

CXXFLAGS := (CXXWARN_FLAGS) (CXX_ARCH_FLAGS)
CFLAGS := (CWARN_FLAGS) (CXX_ARCH_FLAGS)
LINK += $(CXX_ARCH_FLAGS)

This option for Mac allows CUDA applications to work without requiring to set DYLD_LIBRARY_PATH

ifneq ((DARWIN),) LINK += -Xlinker -rpath (CUDA_INSTALL_PATH)/lib
endif

Common flags

COMMONFLAGS += $(INCLUDES) -DUNIX

Debug/release configuration

ifeq ($(dbg),1)
COMMONFLAGS += -g
NVCCFLAGS += -D_DEBUG
CXXFLAGS += -D_DEBUG
CFLAGS += -D_DEBUG
BINSUBDIR := debug
LIBSUFFIX := D
else
COMMONFLAGS += -O2
BINSUBDIR := release
LIBSUFFIX :=
NVCCFLAGS += --compiler-options -fno-strict-aliasing
CXXFLAGS += -fno-strict-aliasing
CFLAGS += -fno-strict-aliasing
endif

architecture flag for cubin build

CUBIN_ARCH_FLAG :=

OpenGL is used or not (if it is used, then it is necessary to include GLEW)

ifeq ((USEGLLIB),1) ifneq ((DARWIN),)
OPENGLLIB := -L/System/Library/Frameworks/OpenGL.framework/Libraries
OPENGLLIB += -lGL -lGLU (COMMONDIR)/lib/(OSLOWER)/libGLEW.a
else

this case for linux platforms

OPENGLLIB := -lGL -lGLU -lX11 -lXi -lXmu

check if x86_64 flag has been set, otherwise, check HP_64 is i386/x86_64

    ifeq ($(x86_64),1) 
       OPENGLLIB += -lGLEW_x86_64 -L/usr/X11R6/lib64
    else
         ifeq ($(i386),)
             ifeq "$(strip $(HP_64))" ""
             OPENGLLIB += -lGLEW -L/usr/X11R6/lib
             else
             OPENGLLIB += -lGLEW_x86_64 -L/usr/X11R6/lib64
             endif
         endif
    endif

check if i386 flag has been set, otehrwise check HP_64 is i386/x86_64

    ifeq ($(i386),1)
       OPENGLLIB += -lGLEW -L/usr/X11R6/lib
    else
         ifeq ($(x86_64),)
             ifeq "$(strip $(HP_64))" ""
             OPENGLLIB += -lGLEW -L/usr/X11R6/lib
             else
             OPENGLLIB += -lGLEW_x86_64 -L/usr/X11R6/lib64
             endif
         endif
    endif
endif

endif

ifeq ((USEGLUT),1) ifneq ((DARWIN),)
OPENGLLIB += -framework GLUT
else
ifeq ((x86_64),1) OPENGLLIB += -lglut -L/usr/lib64 endif ifeq ((i386),1)
OPENGLLIB += -lglut -L/usr/lib
endif

    ifeq ($(x86_64),)
        ifeq ($(i386),)  
        OPENGLLIB += -lglut
        endif
    endif
endif

endif

ifeq ((USEPARAMGL),1) PARAMGLLIB := -lparamgl_(LIB_ARCH)$(LIBSUFFIX)
endif

ifeq ((USERENDERCHECKGL),1) RENDERCHECKGLLIB := -lrendercheckgl_(LIB_ARCH)$(LIBSUFFIX)
endif

ifeq ((USECUDPP), 1) ifeq ((x86_64),1)
CUDPPLIB := -lcudpp64
else
ifneq ((SNOWLEOPARD),) CUDPPLIB := -lcudpp else ifeq "(strip $(HP_64))" “”
CUDPPLIB := -lcudpp
else
CUDPPLIB := -lcudpp64
endif
endif
endif

ifeq ($(i386),1)
    CUDPPLIB := -lcudpp
else
    ifneq ($(SNOWLEOPARD),) 
        CUDPPLIB := -lcudpp
    else
        ifeq "$(strip $(HP_64))" ""
           CUDPPLIB := -lcudpp
        else
           CUDPPLIB := -lcudpp64
        endif
    endif
endif

CUDPPLIB := $(CUDPPLIB)$(LIBSUFFIX)
ifeq ($(emu), 1)
    CUDPPLIB := $(CUDPPLIB)_emu
endif

endif

ifeq ((USENVCUVID), 1) ifneq ((DARWIN),)
NVCUVIDLIB := -L…/…/common/lib/darwin -lnvcuvid
endif
endif

Libs

ifneq ((DARWIN),) LIB := -L(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/(OSLOWER) (NVCUVIDLIB)
else
ifeq “(strip (HP_64))” “”
ifeq ((x86_64),1) LIB := -L(CUDA_INSTALL_PATH)/lib64 -L$(LIBDIR) -L$(COMMONDIR)/lib/(OSLOWER) else LIB := -L(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/(OSLOWER) endif else ifeq ((i386),1)
LIB := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/(OSLOWER) else LIB := -L(CUDA_INSTALL_PATH)/lib64 -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER)
endif
endif
endif

If dynamically linking to CUDA and CUDART, we exclude the libraries from the LIB

ifeq ((USECUDADYNLIB),1) LIB += {OPENGLLIB} (PARAMGLLIB) (RENDERCHECKGLLIB) (CUDPPLIB) {LIB} -ldl -rdynamic
else

static linking, we will statically link against CUDA and CUDART

ifeq ((USEDRVAPI),1) LIB += -lcuda {OPENGLLIB} (PARAMGLLIB) (RENDERCHECKGLLIB) (CUDPPLIB) {LIB}
else
LIB += -lcudart {OPENGLLIB} (PARAMGLLIB) (RENDERCHECKGLLIB) (CUDPPLIB) ${LIB}
endif
endif

ifeq ((USECUFFT),1) ifeq ((emu),1)
LIB += -lcufftemu
else
LIB += -lcufft
endif
endif

ifeq ((USECUBLAS),1) ifeq ((emu),1)
LIB += -lcublasemu
else
LIB += -lcublas
endif
endif

Lib/exe configuration

ifneq ((STATIC_LIB),) TARGETDIR := (LIBDIR)
TARGET := (subst .a,_(LIB_ARCH)(LIBSUFFIX).a,(LIBDIR)/(STATIC_LIB)) LINKLINE = ar rucv (TARGET) (OBJS) else ifneq ((OMIT_CUTIL_LIB),1)
LIB += -lcutil_(LIB_ARCH)(LIBSUFFIX)
endif
# Device emulation configuration
ifeq ((emu), 1) NVCCFLAGS += -deviceemu CUDACCFLAGS += BINSUBDIR := emu(BINSUBDIR)
# consistency, makes developing easier
CXXFLAGS += -D__DEVICE_EMULATION__
CFLAGS += -D__DEVICE_EMULATION__
endif
TARGETDIR := (BINDIR)/(BINSUBDIR)
TARGET := (TARGETDIR)/(EXECUTABLE)
LINKLINE = (LINK) -o (TARGET) (OBJS) (LIB)
endif

check if verbose

ifeq ($(verbose), 1)
VERBOSE :=
else
VERBOSE := @
endif

############################################################
####################

Check for input flags and set compiler flags appropriately

############################################################
####################
ifeq ($(fastmath), 1)
NVCCFLAGS += -use_fast_math
endif

ifeq ($(keep), 1)
NVCCFLAGS += -keep
NVCC_KEEP_CLEAN := .i *.cubin *.cu.c .cudafe *.fatbin.c *.ptx
endif

ifdef maxregisters
NVCCFLAGS += -maxrregcount $(maxregisters)
endif

Add cudacc flags

NVCCFLAGS += $(CUDACCFLAGS)

Add common flags

NVCCFLAGS += (COMMONFLAGS) CXXFLAGS += (COMMONFLAGS)
CFLAGS += $(COMMONFLAGS)

ifeq ((nvcc_warn_verbose),1) NVCCFLAGS += (addprefix --compiler-options ,$(CXXWARN_FLAGS))
NVCCFLAGS += --compiler-options -fno-strict-aliasing
endif

############################################################
####################

Set up object files

############################################################
####################
OBJDIR := (ROOTOBJDIR)/(LIB_ARCH)/(BINSUBDIR) OBJS += (patsubst %.cpp,(OBJDIR)/%.cpp.o,(notdir (CCFILES))) OBJS += (patsubst %.c,(OBJDIR)/%.c.o,(notdir (CFILES))) OBJS += (patsubst %.cu,(OBJDIR)/%.cu.o,(notdir $(CUFILES)))

############################################################
####################

Set up cubin output files

############################################################
####################
CUBINDIR := (SRCDIR)data CUBINS += (patsubst %.cu,(CUBINDIR)/%.cubin,(notdir $(CUBINFILES)))

############################################################
####################

Set up PTX output files

############################################################
####################
PTXDIR := (SRCDIR)data PTXBINS += (patsubst %.cu,(PTXDIR)/%.ptx,(notdir $(PTXFILES)))

############################################################
####################

Rules

############################################################
####################
(OBJDIR)/%.c.o : (SRCDIR)%.c (C_DEPS) (VERBOSE)(CC) (CFLAGS) -o @ -c <

(OBJDIR)/%.cpp.o : (SRCDIR)%.cpp (C_DEPS) (VERBOSE)(CXX) (CXXFLAGS) -o @ -c <

(OBJDIR)/%.cu.o : (SRCDIR)%.cu (CU_DEPS) (VERBOSE)(NVCC) (NVCCFLAGS) (SMVERSIONFLAGS) -o @ -c $<

(CUBINDIR)/%.cubin : (SRCDIR)%.cu cubindirectory
(VERBOSE)(NVCC) (CUBIN_ARCH_FLAG) (NVCCFLAGS) (SMVERSIONFLAGS) -o @ -cubin $<

(PTXDIR)/%.ptx : (SRCDIR)%.cu ptxdirectory
(VERBOSE)(NVCC) (CUBIN_ARCH_FLAG) (NVCCFLAGS) (SMVERSIONFLAGS) -o @ -ptx $<

The following definition is a template that gets instantiated for each SM

version (sm_10, sm_13, etc.) stored in SMVERSIONS. It does 2 things:

1. It adds to OBJS a .cu_sm_XX.o for each .cu file it finds in CUFILES_sm_XX.

2. It generates a rule for building .cu_sm_XX.o files from the corresponding

.cu file.

The intended use for this is to allow Makefiles that use common.mk to compile

files to different Compute Capability targets (aka SM arch version). To do

so, in the Makefile, list files for each SM arch separately, like so:

CUFILES_sm_10 := mycudakernel_sm10.cu app.cu

CUFILES_sm_12 := anothercudakernel_sm12.cu

define SMVERSION_template
OBJS += (patsubst %.cu,(OBJDIR)/%.cu_(1).o,(notdir (CUFILES_(1))))
(OBJDIR)/%.cu_(1).o : (SRCDIR)%.cu (CU_DEPS)
(VERBOSE)(NVCC) -o $$@ -c $$< (NVCCFLAGS) -arch (1)
endef

This line invokes the above template for each arch version stored in

SM_VERSIONS. The call funtion invokes the template, and the eval

function interprets it as make commands.

(foreach smver,(SM_VERSIONS),(eval (call SMVERSION_template,$(smver))))

(TARGET): makedirectories (OBJS) (CUBINS) (PTXBINS) Makefile
(VERBOSE)(LINKLINE)

cubindirectory:
(VERBOSE)mkdir -p (CUBINDIR)

ptxdirectory:
(VERBOSE)mkdir -p (PTXDIR)

makedirectories:
(VERBOSE)mkdir -p (LIBDIR)
(VERBOSE)mkdir -p (OBJDIR)
(VERBOSE)mkdir -p (TARGETDIR)

tidy :
(VERBOSE)find . | egrep "#" | xargs rm -f (VERBOSE)find . | egrep “~” | xargs rm -f

clean : tidy
(VERBOSE)rm -f (OBJS)
(VERBOSE)rm -f (CUBINS)
(VERBOSE)rm -f (PTXBINS)
(VERBOSE)rm -f (TARGET)
(VERBOSE)rm -f (NVCC_KEEP_CLEAN)
(VERBOSE)rm -f (ROOTBINDIR)/(OSLOWER)/(BINSUBDIR)/.ppm
(VERBOSE)rm -f (ROOTBINDIR)/(OSLOWER)/(BINSUBDIR)/
.pgm
(VERBOSE)rm -f (ROOTBINDIR)/(OSLOWER)/(BINSUBDIR)/.bin
(VERBOSE)rm -f (ROOTBINDIR)/(OSLOWER)/(BINSUBDIR)/
.bmp

clobber : clean
(VERBOSE)rm -rf (ROOTOBJDIR)