I’m trying to use the CUBLAS Fortran wrapper (fortran.c). The code runs OK on my desktop (Intel Core2 Duo, GTX285, Ubuntu10.04 64bit). But the same code will return “CUBLAS_STATUS_MAPPING_ERROR” at the cublas_set_vector, if run on one node of a GPU cluster (6-core Intel Xeon, Tesla M2050, Linux x86-64).
I also test the C version of the code and it runs OK on both machines.
And it seems that there are some similar issues posted before but didn’t get solved:
Here is the Fortran code:
program main
implicit none
integer, parameter :: N=8
integer, parameter :: sizeof_real=4
real(kind=4), dimension(:),pointer :: h_p
integer :: d_p
integer stat
integer cublas_init, cublas_alloc, cublas_set_vector, cublas_free, cublas_shutdown
allocate(h_p(N))
h_p = 0.0d0
stat = cublas_init()
if (stat .NE. 0) then
write(*,*) "cublas_int failed"
write(*,*) "stat=",stat
stop
endif
stat = cublas_alloc(N, sizeof_real, d_p)
if (stat .NE. 0) then
write(*,*) " cublas_alloc failed"
write(*,*) "stat=",stat
stop
endif
write(*,*) "d_p =", d_p
stat = cublas_set_vector (N, sizeof_real, h_p, 1, d_p, 1)
if (stat .NE. 0) then
write(*,*) "cublas_set_vector failed"
write(*,*) "stat=",stat
stop
endif
deallocate(h_p)
stat = cublas_free (d_p)
stat = cublas_shutdown()
write(*,*) "test pass..."
stop
end
Here is C version code:
#include <stdio.h>
#include <stdint.h>
#include <cublas.h>
int main(int argc, char *argv[])
{
int N=8;
float* h_p;
float* d_p;
int devPtr;
cublasStatus stat;
h_p = (float *)malloc (N * sizeof(float));
for (int i = 0; i < N; i++) h_p[i] = 0;
stat = cublasInit();
if (stat != 0) {
printf ("cublasInit failed\n");
printf ("stat=%d\n",stat);
return 1;
}
stat = cublasAlloc (N, sizeof(float), (void**)&d_p);
if (stat != 0) {
printf ("cublasAlloc failed");
printf ("stat=%d\n",stat);
return 1;
}
devPtr = (int)(uintptr_t)d_p;
printf("devPtr=%d\n",devPtr);
stat = cublasSetVector (N, sizeof(float), h_p, 1, d_p, 1);
if (stat != 0) {
printf ("cublasSetVector failed");
printf ("stat=%d\n",stat);
return 1;
}
cublasFree (d_p);
cublasShutdown();
printf("test pass...\n");
return 0;
}
The output of the Fortran code on GTX 285:
d_p = 1048576
test pass...
The output of the C code on GTX 285:
devPtr=1048576
test pass...
The output of the Fortran code on M2050:
d_p = 0
cublas_set_vector failed
stat= 11
The output of the C code on M2050:
devPtr=0
test pass...
For all the cases, the compilers and options are:
gcc -O3
gfortran -cpp -O3 -ffree-form -ffree-line-length-0 -fno-second-underscore
nvcc -O3 -arch sm_13 -use_fast_math
Any clue?
Thanks,
Kong