Code works perfectly fine with 1 block, but not working for more blocks.......

What could have caused the code to work only on one block but not for more than one?

Device Code:

/* ALG XIV.H: For each of the process’s people, each process spawns 
         *  threads to do the following */
__global__ void cuda_susceptible(char *states_dev, int *x_locations_dev, 
	int *y_locations_dev, int *infected_x_locations_dev, 
	int *infected_y_locations_dev, int *num_infected_dev, 
	int *num_susceptible_dev, int *num_infection_attempts_dev,
	int *num_infections_dev, curandState *cuda_states, int global_num_infected, 
	int infection_radius, int contagiousness_factor, char SUSCEPTIBLE, char INFECTED, 
	int numThread)
{
	// set up shared memory
	int *num_infected = (int*)array; 
	int *num_susceptible = (int*)&num_infected[numThread];
	int *num_infection_attempts = (int*)&num_susceptible[numThread];
	int *num_infections = (int*)&num_infection_attempts[numThread];

	int id = threadIdx.x + blockIdx.x * blockDim.x;
	int i, num_infected_nearby;

	// clear the shared memory
	num_infected[id] = 0;
	num_susceptible[id] = 0;
	num_infection_attempts[id] = 0;
	num_infections[id] = 0;

	/* ALG XIV.H.1: If the person is susceptible, then */
	if(states_dev[id] == SUSCEPTIBLE)
	{
		/* ALG XIV.H.1.a: For each of the infected people (received
                 *  earlier from all processes) or until the number of infected 
                 *  people nearby is 1, the thread does the following */
		num_infected_nearby = 0;
		for(i=0; i<=global_num_infected-1 && num_infected_nearby<1; i++)
		{
			/* ALG XIV.H.1.a.i: If person 1 is within the infection 
                     *  radius, then */
			if( (x_locations_dev[id] > infected_x_locations_dev[i] - infection_radius) && 
				(x_locations_dev[id] < infected_x_locations_dev[i] + infection_radius) && 
				(y_locations_dev[id] > infected_y_locations_dev[i] - infection_radius) &&
				(y_locations_dev[id] < infected_y_locations_dev[i] + infection_radius) )
			{
				/* ALG XIV.H.1.a.i.1: The thread increments the number 
                         *  of infected people nearby */
				num_infected_nearby ++;
			}
		}

		if(num_infected_nearby >= 1){
			num_infection_attempts[threadIdx.x]++;
		}
		
		/* ALG XIV.H.1.b: If there is at least one infected person 
                 *  nearby, and a random number less than 100 is less than or
                 *  equal to the contagiousness factor, then */
		// generate a random number between 0 and 100
		int rand_num = (int)(curand_uniform(&cuda_states[id])*100);

		if(num_infected_nearby >= 1 && rand_num <= contagiousness_factor)
		{
			/* ALG XIV.H.1.b.i: The thread changes person1’s state to 
                     *  infected */
			states_dev[id] = INFECTED;

			/* ALG XIV.H.1.b.ii: The thread updates the counters */
			num_infected[threadIdx.x]++;
			num_susceptible[threadIdx.x]--;
			num_infections[threadIdx.x]++;
		}
	}

	__syncthreads();

	// use atomicAdd function to add up results
	if( threadIdx.x == 0 ) {
		int num_infected_sum = 0;
		int num_susceptible_sum = 0;
		int num_infection_attempts_sum = 0;
		int num_infections_sum = 0;
		for(i=0; i<numThread; i++){
			num_infected_sum += num_infected[i];
			num_susceptible_sum += num_susceptible[i];
			num_infection_attempts_sum += num_infection_attempts[i];
			num_infections_sum += num_infections[i];
		}
		atomicAdd(num_infected_dev, num_infected_sum);
		atomicAdd(num_susceptible_dev, num_susceptible_sum);
		atomicAdd(num_infection_attempts_dev, num_infection_attempts_sum);
		atomicAdd(num_infections_dev, num_infections_sum);
	}
}

Host Code

void cuda_run(struct global_t *global, struct stats_t *stats, 
	struct const_t *constant, struct cuda_t *cuda)
{
	// copy host info to device
	cudaMemcpy(cuda->infected_x_locations_dev, global->infected_x_locations, cuda->people_size, cudaMemcpyHostToDevice);
	cudaMemcpy(cuda->infected_y_locations_dev, global->infected_y_locations, cuda->people_size, cudaMemcpyHostToDevice);

	if(global->current_day == 0){
		// cast stats data to int
		cuda->num_infections_int = (int)stats->num_infections;
		cuda->num_infection_attempts_int = (int)stats->num_infection_attempts;
		cuda->num_deaths_int = (int)stats->num_deaths;
		cuda->num_recovery_attempts_int = (int)stats->num_recovery_attempts;

		cudaMemcpy(cuda->num_infections_dev, &cuda->num_infections_int, sizeof(int), cudaMemcpyHostToDevice);
		cudaMemcpy(cuda->num_infection_attempts_dev, &cuda->num_infection_attempts_int, sizeof(int), cudaMemcpyHostToDevice);
		cudaMemcpy(cuda->num_deaths_dev, &cuda->num_deaths_int, sizeof(int), cudaMemcpyHostToDevice);
		cudaMemcpy(cuda->num_recovery_attempts_dev, &cuda->num_recovery_attempts_int, sizeof(int), cudaMemcpyHostToDevice);

		cudaMemcpy(cuda->x_locations_dev, global->x_locations, cuda->people_size, cudaMemcpyHostToDevice);
		cudaMemcpy(cuda->y_locations_dev, global->y_locations, cuda->people_size, cudaMemcpyHostToDevice);
		cudaMemcpy(cuda->states_dev, global->states, cuda->states_size, cudaMemcpyHostToDevice);
		cudaMemcpy(cuda->num_days_infected_dev, global->num_days_infected, cuda->people_size, cudaMemcpyHostToDevice);

		cudaMemcpy(cuda->num_susceptible_dev, &global->num_susceptible, sizeof(int), cudaMemcpyHostToDevice);
		cudaMemcpy(cuda->num_immune_dev, &global->num_immune, sizeof(int), cudaMemcpyHostToDevice);
		cudaMemcpy(cuda->num_dead_dev, &global->num_dead, sizeof(int), cudaMemcpyHostToDevice);
		cudaMemcpy(cuda->num_infected_dev, &global->num_infected, sizeof(int), cudaMemcpyHostToDevice);
	}
	
	/* set up cuda Random Number Generator */
	// set up curand states
	curandState *cuda_states;	
	cudaMalloc(&cuda_states, cuda->numThread * cuda->numBlock);
	// obatin current time as seed
	time_t current_time;
	time(&current_time);
	// initialize rand_kernel function
	rand_kernel<<<cuda->numBlock, cuda->numThread>>>(cuda_states, (unsigned long)current_time);

	// execute device code on susceptible people
	int infection_radius = constant->infection_radius;
	int contagiousness_factor = constant->contagiousness_factor;
	int num_infected = global->num_infected;
	cuda_susceptible<<<cuda->numBlock, cuda->numThread, 4*cuda->numThread*sizeof(int)>>>(
		cuda->states_dev, cuda->x_locations_dev, cuda->y_locations_dev, 
		cuda->infected_x_locations_dev, cuda->infected_y_locations_dev, 
		cuda->num_infected_dev, cuda->num_susceptible_dev, 
		cuda->num_infection_attempts_dev, cuda->num_infections_dev, 
		cuda_states, num_infected, infection_radius, 
		contagiousness_factor, SUSCEPTIBLE, INFECTED, cuda->numThread);
	// Sync
	cudaThreadSynchronize();

	cudaMemcpy(global->x_locations, cuda->x_locations_dev, cuda->people_size, cudaMemcpyDeviceToHost);
	cudaMemcpy(global->y_locations, cuda->y_locations_dev, cuda->people_size, cudaMemcpyDeviceToHost);
	cudaMemcpy(global->states, cuda->states_dev, cuda->states_size, cudaMemcpyDeviceToHost);
	cudaMemcpy(&global->num_infected, cuda->num_infected_dev, sizeof(int), cudaMemcpyDeviceToHost);

	cudaFree(cuda_states);

	printf("x location %d y location %d state %c \n", global->x_locations[10], 
		global->y_locations[10], global->states[10]);

	/*int i;
	for(i=0; i<global->number_of_people; i++){
		printf("x location %d y location %d \n", global->x_locations[i], global->y_locations[i]);
	}*/

	printf("infected number %d \n", global->num_infected);

	if(global->current_day == constant->total_number_of_days){
		cudaMemcpy(&global->num_susceptible, cuda->num_susceptible_dev, sizeof(int), cudaMemcpyDeviceToHost);
		cudaMemcpy(&global->num_immune, cuda->num_immune_dev, sizeof(int), cudaMemcpyDeviceToHost);
		cudaMemcpy(&global->num_dead, cuda->num_dead_dev, sizeof(int), cudaMemcpyDeviceToHost);
	
		cudaMemcpy(&cuda->num_infections_int, cuda->num_infections_dev, sizeof(int), cudaMemcpyDeviceToHost);
		cudaMemcpy(&cuda->num_infection_attempts_int, cuda->num_infection_attempts_dev, sizeof(int), cudaMemcpyDeviceToHost);
		cudaMemcpy(&cuda->num_deaths_int, cuda->num_deaths_dev, sizeof(int), cudaMemcpyDeviceToHost);
		cudaMemcpy(&cuda->num_recovery_attempts_int, cuda->num_recovery_attempts_dev, sizeof(int), cudaMemcpyDeviceToHost);

		stats->num_infections = (double)cuda->num_infections_int;
		stats->num_infection_attempts = (double)cuda->num_infection_attempts_int;
		stats->num_deaths = (double)cuda->num_deaths_int;
		stats->num_recovery_attempts = (double)cuda->num_recovery_attempts_int;
	}
}

interesting. Didn’t think a CUDA card would be used to simulate infectious diseases ;)

Hi, cbuchner1

Can you explain a little bit more on why cuda wouldn’t work for infectious simulation?

Thanks

I found the problem.

when I clear the memory, I should use

num_infected[threadIdx.x] = 0;
num_susceptible[threadIdx.x] = 0;
num_infection_attempts[threadIdx.x] = 0;
num_infections[threadIdx.x] = 0;

instead, i wrongly put

num_infected[id] = 0;
num_susceptible[id] = 0;
num_infection_attempts[id] = 0;
num_infections[id] = 0;

this is perfectly fine for the first block, because for the first block id and threadIdx.x are the same but for the second kernel, everything just went wrong…

Problem SOLVED…