Hi all,
I’ve written a simple matrix multiplication task using sgemm for profiling purposes. However it is sometimes returning incorrect results that look like pointers to me.
Its being run on a GTX260 with an Intel Core 2 (64bit) host.
Has anyone seen this before? Do you know why? What can I do about this?
[codebox]
// g+±4.3 multiply_matrices_gpu.c -I/usr/local/cuda/include -I/home/dhjones/cuda_sdk/common/inc -L/usr/local/cuda/lib64 -lcublas
#include <stdlib.h>
#include <stdio.h>
#include <cublas.h>
int main () {
int i,j;
cublasInit();
for(j=20;j<15000;j+=200){
float A = (float)malloc(jjsizeof(float));
float *B = (float*)malloc(j*j*sizeof(float));
float *C = (float*)malloc(j*j*sizeof(float));
if(A == NULL || B == NULL || C == NULL) return 1;
for (i=0;i<j*j;i++){ A[i] = 0; B[i] = 0; C[i] = 0; }
float* AA; float* BB; float* CC;
cublasAlloc(j*j,sizeof(float),(void**)&AA);
cublasAlloc(j*j,sizeof(float),(void**)&BB);
cublasAlloc(j*j,sizeof(float),(void**)&CC);
cublasSetMatrix(j,j,sizeof(float),A,j,AA,j);
cublasSetMatrix(j,j,sizeof(float),B,j,BB,j);
cublasSgemm(‘N’,‘N’,j,j,j,1,A,j,B,j,1,C,j);
cublasGetMatrix(j,j,sizeof(float),CC,j,C,j);
int sum=0;
for (i=0;i<j*j;i++){
sum += C[i];
}
printf("Size: %d. Sum of elements: %d\n",j,sum);
free( A ); free( B ); free ( C );
cublasFree(AA); cublasFree(BB); cublasFree(CC);
}
return 0;
}
[/codebox]
Results:
[codebox]
…
Size: 2620. Sum of elements: 0
Size: 2820. Sum of elements: 0
Size: 3020. Sum of elements: 0
Size: 3220. Sum of elements: 0
Size: 3420. Sum of elements: 0
Size: 3620. Sum of elements: -2147483648
Size: 3820. Sum of elements: -2147483648
Size: 4020. Sum of elements: -2147483648
Size: 4220. Sum of elements: -2146263808
[/codebox]
Note that repeated runs produce similar results, however not always starting at the same size matrix.