Why is this not working? I have tried many variations on the same theme!
a is a matrix of order 4. cudaMalloc works ok, but not cudaMemcpy.
[codebox]
#include <stdio.h>
#include <stdlib.h>
#include “cufft.h”
#include “cuda.h”
#include “cutil_inline.h”
extern “C” void testpassing_(cufftReal* a, int* kk)
{
int kk1=*kk+1;
size_t size = sizeof(cufftReal)*kk1*kk1*kk1*kk1;
cufftReal *h_r ;
cufftReal *d_r ;
h_r=a;
cutilSafeCall( cudaMalloc((void **)&h_r, size));
cutilSafeCall( cudaMalloc((void **)&d_r, size));
cutilSafeCall( cudaMemcpy(d_r,h_r, size , cudaMemcpyHostToDevice));
cutilSafeCall( cudaMemcpy(h_r,d_r, size , cudaMemcpyDeviceToHost));
}
[/codebox]
The make file
[codebox]
all:
gfortran -c -o test_passing_calling.o test_passing_calling.F
nvcc -arch=sm_11 -c test_passing.cu -I$(CUDA_HOME)/cuda_23/cuda/include -I$(CUDA_SDK_HOME)/C/common/inc
gfortran -o testpassing test_passing_calling.o test_passing.o -lgfortran -lstdc++ -L$(CUDA_HOME)/cuda_23/cuda/lib64 -lcufft -lcudart -L$(CUDA_SDK_HOME)/lib
[/codebox]
The fortran program
[codebox]
parameter (kk=2)
real*4 :: a(0:kk,0:kk,0:kk,0:kk)
do i=0,kk
do j=0,kk
do k=0,kk
do m=0,kk
a(i,j,k,m) = rand()
end do
end do
end do
end do
do i=0,kk
do j=0,kk
do k=0,kk
write( *,1000) , a(i,j,k,0), a(i,j,k,1), a(i,j,k,2)
end do
print *
end do
print *
print *
end do
call testpassing(a,kk)
do i=0,kk
do j=0,kk
do k=0,kk
write( *,1000) , a(i,j,k,0), a(i,j,k,1), a(i,j,k,2)
end do
print *
end do
print *
print *
end do
1000 format ( 3f10.5)
end
[/codebox]