Hi,
I was using Quadro FX4800. Yesterday my"baby" reached my lab…Now I start enjoying it.
It is the Tesla C2050.
However there is a strange behavior I don’t understand.
I can’t get anything than 0 when run my cubals programs.
I notice that the program works well on the previous card.
cat /proc/driver/nvidia/version comand gives me
Nvidia x86_64 kernel module 260.19.12
gcc version 4.1.2
I’m trying to figure out what happen but can’t find anything.
Please any help will be wellcome.
The sample code of Hendrik Lensch…
#include<stdio.h>
#include<stdlib.h>
#include"cublas.h"
int main()
{
float *h_a, *h_b,*h_c;
float *d_a, *d_b,*d_c;
float alpha=1.0f, beta = 0.0f;
int N = 10, n2 = N*N;
int nBytes = n2*sizeof(float),i;
h_a = (float*)malloc(nBytes);
h_b = (float*)malloc(nBytes);
h_c = (float*)malloc(nBytes);
for(int i = 0; i < n2; i++){
h_a[i] = rand()/(float)RAND_MAX;
h_b[i] = rand()/(float)RAND_MAX;
}
cublasInit();
cublasAlloc(n2, sizeof(float),(void**)&d_a);
cublasAlloc(n2, sizeof(float),(void**)&d_b);
cublasAlloc(n2, sizeof(float),(void**)&d_c);
cublasSetVector(n2,sizeof(float),h_a,1, d_a,1);
cublasSetVector(n2,sizeof(float),h_b,1, d_b,1);
cublasSgemm('n','n',N,N,N,alpha,d_a,N,d_b,N,beta,d_c,N);
cublasGetVector(n2,sizeof(float),d_c,1,h_c,1);
for(int i = 0; i < 10; i++){
for(int j = 0; j < 10; j++){
printf("% lf",h_c[i + j * N]);
}
printf("\n");
}
cublasShutdown();
return 0;
}