System:
GeForce GTX 470
CUDA Toolkit 3.2 (January 2011)
nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2010 NVIDIA Corporation
Built on Wed_Nov__3_16:16:57_PDT_2010
Cuda compilation tools, release 3.2, V0.2.1221
Problem:
The following program is intended to use cufft with gtx 470 and should be built from several files. It was reduced to a minimal form in two files reproducing the error. When the two files are merged into one file it compiles and works. But with two files the compiler does not find the called function:
nvcc -c -arch compute_20 x1.cu
nvcc -c x.c
nvcc x.o x1.o -lcufft
x.o: In functionmain': x.c:(.text+0x79): undefined reference to
mpmul’
collect2: ld returned 1 exit status
frank@pc101100643Xb:~/svn/clib/trunk/c_lib/mathlib/complex/fft>
cat x.c
include <stdio.h>
void mpmul(unsigned char w, unsigned char u, unsigned char v, int n, int m);
int main (void)
{
unsigned char a = { 3, 1, 4, 1, 5, 9, };
unsigned char b = { 2, 6, 5, 3, 5, 8, 9, 7, 9, 3, };
unsigned char c [1000];
int i;
mpmul (c-1, a-1, b-1, 6, 10);
for (i = 0; i < 16; i++) printf (" %d", c [i]);
return 0;
}
cat x1.cu
include <stdio.h>
include <math.h>
include <cuda.h>
include <cufft.h>
char * cufftErrStr (int i)
{
…
}
global void cuSet0 (cufftDoubleReal * a, int n)
{
int i;
for
( i = blockIdx.x * blockDim.x + threadIdx.x; // start in grid 0
i < n; // go to end of array
i += blockDim.x * gridDim.x // jump to next grid
)
{
a [i] = 0;
} }
void mpmul(unsigned char w, unsigned char u, unsigned char v, int n, int m)
{
int mn,nn;
char * U, * V;
cufftDoubleComplex * A;
cufftDoubleReal * a;
mn = m>=n ? m : n;
nn = 1; while (nn < mn) nn <<= 1; nn <<= 1;
cudaMalloc ((void**) &U, sizeof (char) * n);
cudaMalloc ((void**) &V, sizeof (char) * m);
cudaMalloc ((void**) &a, sizeof (cufftDoubleReal) * nn * 2);
cudaMalloc ((void**) &A, sizeof (cufftDoubleComplex) * nn * 2);
cudaMemcpy (U, u+1, n*sizeof(char), cudaMemcpyHostToDevice);
cudaMemcpy (V, v+1, m*sizeof(char), cudaMemcpyHostToDevice);
cuSet0 <<<bs, gs>>> (a, 2*nn);
}
Perhaps the following could be helpful for the analysis: When displaying the object with nm the name mpmul is not stored there directly:
nm x1.o
0000000000000000 W Z10cudaLaunchIcE9cudaErrorPT
0000000000000000 T _Z11cufftErrStri
00000000000012cb T _Z26__device_stub__Z6cuSet0PdiPdi
0000000000000067 T _Z5mpmulPhS_S_ii
0000000000001349 T _Z6cuSet0Pdi
…
00000000000011ef W umin
I would be very grateful if anybody could kindly help me with the problem. Thank you.
(Edit: removed the nonimportant parts of the question after having solved it)