Thanks for your reply,
I’ve tried using Cuda events and also i obtain the same strange behaviour, only print some iterations over a loop,
function c++
cudaEvent_t start,stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start,0);
unsigned int bytes = size * sizeof(Neurona);
cutilSafeCallNoSync( cudaMemcpy(d_idata, h_idata, bytes, cudaMemcpyHostToDevice) );
float COEF_GANADORA=(float)0.1;
float COEF_VECINAS=(float)0.01;
int UMBRAL_DATOS=127;
cudaMemcpyToSymbol("COEF_GANADORA", &COEF_GANADORA, sizeof(float));
cudaMemcpyToSymbol("COEF_VECINAS", &COEF_VECINAS, sizeof(float));
int aux;
if( isPow2(size) )
{
cudaMemcpyToSymbol("size", &size, sizeof(size));
}else
{
aux = nextPow2(size);
cudaMemcpyToSymbol("size", &aux, sizeof(aux));
}
....
....
for (entrada=0;entrada<NUM_ENTRADAS;entrada++)
{
al=rand()%(NUM_PUNTOS);
x=nube_puntos[al*3];
y=nube_puntos[al*3+1];
z=nube_puntos[al*3+2];
reduceMinNeurona3_Min2<<<dimGrid>>>(d_idata, d_odata, x, y, z);
ajustarPesosGanadora<<<1>>>(d_matVecinas, d_idata, d_odata, numBlocks,x,y, z);
}
cudaMemcpy(h_matVecinas, d_matVecinas, sizeof(auxVECINA)*((size*100)+size), cudaMemcpyDeviceToHost);
cutilSafeCall( cudaMemcpy(h_idata, d_idata, bytes, cudaMemcpyDeviceToHost) );
float elapsedTime;
cudaEventRecord( stop,0 );
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsedTime, start,stop);
printf("Time with %d neurons %3.1f ms \n",size, elapsedTime);
this function is executed over a loop about M times and only is printed the messae sometimes and vary in every execution, its so strange, im compiling with these commands:
pgcpp -Mcudax86 -m64 -o ../../bin//release/cuGNG_base obj/x86_64/release/tmapas.cpp.o obj/x86_64/release/cuGNG3D.cu.o -L../../lib -L../../common/lib/ -L../../shared/lib -lcutil_x86_64 -lshrutil
_x86_64
Many thanks in advance, Sergio