Hi,
I am trying to implement neural network trained by genetic algorithm on CUDA. When i debug the kernel in emu mode, i see that value changes. But when kernel execution finishes and i copy the value to host and display, i see that it never changes. The code related about problem as shown below,
In kernel code fitnessVector values change when i debug it, but when i write those values to a file on host side, i see that every time same values are written. I could not figure out why. Thanks for reading and answers.
Kernel Code
__global__ void evolvePopulation(float individuals[CONNECTION_NUM * MAX_POPULATION], PGAConfig gaConfig, ANNDATA *training_data, int trainDataSize, Rand48 *random, float fitnessVector[MAX_POPULATION], Chrosomome res, float* debugFitness){
int blockId = blockIdx.x;
int threadId = threadIdx.x;
int index = blockId * blockDim.x + threadId;
//some code
if(err < fitnessVector[index]){
fitnessVector[index] = err;
for(int indx = 0 ; indx < CONNECTION_NUM; indx++){
individuals[index * CONNECTION_NUM + indx] = crossVec[indx];
}
}
__syncthreads();
// findMinFitness(fitnessVector,debugFitness);
}
Host part
float* d_fitnessArry;
cudaMalloc ((void**)&d_fitnessArry,h_gaConfig->maxPopulation * sizeof(float));
calculateFirstFitness<<<MAX_POPULATION / BLOCK_SIZE, BLOCK_SIZE>>>(d_individuals,d_training_data,trainDataSize,d_fitnessArr
y);
float* hostFitness = new float[h_gaConfig->maxPopulation];
for(int cycle = 0; cycle < 100; cycle++){
evolvePopulation<<<MAX_POPULATION / BLOCK_SIZE, BLOCK_SIZE>>>(d_individuals,d_ga_config,d_training_data,trainDataSize,
d_random,d_fitnessArry, d_result,d_best);
cudaMemcpy (hostFitness,d_fitnessArry,sizeof(float) * h_gaConfig->maxPopulation, cudaMemcpyDeviceToHost);
for(int fit = 0; fit < h_gaConfig->maxPopulation; fit++){
out<<hostFitness[fit]<<std::endl;
}