Hello,
I’m trying to write a little program that sort a population (a vector) of people. The people are defined by a custom structure containing a string (name) and an integer (fitness).
My problem here is in the device function (global sort_Device). Each thread executes a loop that compare the fitness of one person to all the others one, and increment a comparison value integer if it’s higher.
The problem comes from this loop. When the population_size is small (100 or 200), there is no problem. But if I set the population_size at a high value (100000 for example), the program freeze and sometimes my computer crashes and give me a blue screen of the death. The blue screen reports an error due to the graphic driver.
Here is my code :
#include <iostream>
#include <windows.h>
#include <math.h>
#include <string>
#include <vector>
#include <algorithm>
#include <time.h>
#include <cutil_inline.h>
#include <cuda.h>
using namespace std;
#define BLOCK_SIZE 256
#define PRGM_POPULATION_SIZE 100000
//Custom structure definition
struct prgm_structure
{
string name;
unsigned int fitness;
};
typedef vector<prgm_structure> prgm_vector;
//INITIALISATION OF THE POPULATION
void initialisation_population(prgm_vector &population)
{
int string_size = 3;
for (int i = 0; i < PRGM_POPULATION_SIZE; i++)
{
prgm_structure someone;
someone.name.erase();
for (int j = 0; j < string_size; j++)
{
someone.name += (rand() % 90) + 32; //Generate a char, between the char 32 to 122 of the ASCII CODE
}
someone.fitness = (rand() % 100); //Generate a integer between 0 to 100
population.push_back(someone);
}
}
//SORTING FUNCTION
__global__ void sort_Device(prgm_structure* population, unsigned int* comp)
{
int bID = blockIdx.x;
int tID = threadIdx.x;
int thread_number = bID * blockDim.x + tID;
//Control if the thread is usefull
if (thread_number >= PRGM_POPULATION_SIZE)
{
return;
}
//Initialisation of the comparaison value
comp[thread_number] = 0;
//Comparison of the current fitness to all the others
//If the current fitness is higher than another, it increments the comparison value
for (int i=0; i<PRGM_POPULATION_SIZE; i++)
{
if (comp[thread_number] >= population[i].fitness)
{
comp[thread_number] = comp[thread_number] + 1;
}
}
}
void sort_Host(prgm_vector &population)
{
unsigned int size;
//Load population on the device
prgm_structure* population_Device;
size = PRGM_POPULATION_SIZE * sizeof(prgm_structure);
cudaMalloc((void**)&population_Device, size);
cudaMemcpy(population_Device, &population[0], size, cudaMemcpyHostToDevice);
//Allocation of a a array of integers on the device
unsigned int* comp_Device;
size = PRGM_POPULATION_SIZE * sizeof(unsigned int);
cudaMalloc((void**)&comp_Device, size);
//Call of the GPU sorting funtion
dim3 dimGrid( (PRGM_POPULATION_SIZE/BLOCK_SIZE) +1 );
dim3 dimBlock(BLOCK_SIZE);
sort_Device<<<dimGrid, dimBlock>>>(population_Device, comp_Device);
cudaThreadSynchronize();
}
//MAIN FUNTION
void main()
{
//Declaration of vector
prgm_vector population_a;
//Initialisation of vector
initialisation_population(population_a);
//Declaration of pointers to the vector
prgm_vector *population;
population = &population_a;
//Call of the sorting funtion
sort_Host(*population);
/*for (int i=0; i<PRGM_POPULATION_SIZE; i++)
cout << "People : " << population_a[i].name << " Fitness : " << population_a[i].fitness << flush << endl;*/
system("pause");
}
This is the part of the code that creates the problem :
for (int i=0 ; i<PRGM_POPULATION_SIZE ; i++)
{
if (comp[thread_number] >= population[i].fitness)
{
<b>comp[thread_number] = comp[thread_number] + 1;</b>
}
}
Can you please have a look at this code and say me if you see what is the problem with it?
Can you test it on your computer and say me if the same crash occurs?
Note that if you comment the line in red and bold, the problem disappears.
Thank you very much External Image
++