__host__ doesn't work

I have a linking problem with nvcc on Linux. I have 2 source files, main.c and sum.c. In main.c I call sum_cuda(…) . sum_cuda is properly defined in sum.h and sum.c with host decoration. Both source compile file with “nvcc -c”. On the lining phase nvcc -o main.o -o sum.o -lcudart gives an “unresolved external” in main.c for <sum_cuda>. When I do a “nm main.o” “nm sum.o” I can clearly see that the symbol for “sum_cuda” generated is not the same. In main.o, it is unresolved with " U sum_cuda" and in sum.o it is defined as “T _Z8sum_cudaPfS_S_i”. It is as if the host wasn’t recognized and/or ignored when compiling main.c. This is with cuda dev kit 11.2 ----- You have everything below to reproduce the problem.

sum.h ----->
__host__ void sum_cuda
(
float * ab, // OUT: a * b
float * a, // IN: a array
float * b, // IN: b array
int n // IN: n size of a, b, a*b
);
<------ sum.h ends
main.c ---->
#include <stdio.h>
#include <cuda_runtime.h>
#include “sum.h”

int main( int argc, char * argv[\ ] )
{
float a[10];
float b[10];
float res[10];

sum_cuda( res, a, b, 10 );

}
<------ sum.h ends
sum.cu ----------->
#include <cuda_runtime.h>

#include “sum.h”

__global__ void Sum_Kernel( float *ab, float *a, float *b, unsigned int n )
{
int i = threadIdx.x + blockIdx.x * blockDim.x;

if( i < n )
{
    ab[i] = a[i]+b[i];
}

}

__host__ void sum_cuda
(
float *ab, // OUT: a * b
float *a, // IN: a array
float b, // IN: b array
int n // IN: n size of a, b, a
b
)
{
const int threadsPerBlock = 256;
const int blocksPerGrid = 10;

Sum_Kernel<<< blocksPerGrid, threadsPerBlock >>>( ab, a, b, n );

}
<--------- sum.cu ends
------> Instructions to compile
nvcc -c main.c -I…/XAI_dl -I/usr/local/cuda/include
nvcc -c sum.cu -I…/XAI_dl -I/usr/local/cuda/include
nvcc main.o sum.o -lcudart -lc -L/usr/local/cuda/lib64
<--------- Instructions to compile ends.
Compiler version---->
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Mon_Nov_30_19:08:53_PST_2020
Cuda compilation tools, release 11.2, V11.2.67
Build cuda_11.2.r11.2/compiler.29373293_0
<----Compiler version ends.

NVCC is a C++ compiler hence the symbols will be mangled when compiling sum.cu. Hence the symbol names will be different than the symbols in main.c.

To fix, either add an extern “C” statement in the header file so the symbol isn’t mangled, or rename “main.c” to “main.cpp” so it’s compiled as C++.

% nvcc sum.cu main.c
/tmp/tmpxft_000121b1_00000000-12_main.o: In function main': main.c:(.text+0x3f): undefined reference to sum_cuda’
collect2: error: ld returned 1 exit status
% vi sum.h
% cat sum.h
ifdef __cplusplus
extern “C” {
endif
host void sum_cuda
(
float * ab, // OUT: a * b
float * a, // IN: a array
float * b, // IN: b array
int n // IN: n size of a, b, a*b
);
ifdef __cplusplus
}
endif
% nvcc sum.cu main.c
%

That worked. Thanks.