A problem when a new hand in cuda programing

excuse me ,i’m a new hand in cuda programing ,i have some problem ,in the image follow ,i define the data struct
and define the “test” is 32768 ,this will work very well and the result will be same if i use CPU ,but when i make the “test” be 327680 ,the gpu will run crash ,i don’t know why,maybe memory is not enough?
ps:link one is my program of define and data struct,link two is my gpu with cuda z
http://imgur.com/QN9rGwo,gI65BIy
http://imgur.com/QN9rGwo,gI65BIy#1
if you need some information to solve this problem ,please mail “810473@gmail.com
thank you very much for your help!!

You haven’t shown enough of your code to identify the issue. You can paste code directly into these forums, you don’t need to put pictures in.

I would suggest to add return starus checks for all CUDA API calls and error checks for all kernel launches. It is exceedingly likely that the problem will become obvious at that point.

this is the code i use

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include<windows.h>

#define test 327680
#define require 118
#define BLOCK_NUM   1
#define THREAD_NUM   1024
#define everythread_datanum test/(BLOCK_NUM*THREAD_NUM)

struct data_inf{
	int coverage;
	int coverage_copy;
	bool data[require];
};

__device__ static void choose_update(struct data_inf*gpu_data, int *max_coverage, int *max_coverage_num, int *count_coverage, int *count_RS, int *RS)
{
	for (int i = 0; i<test; i++){
		if (gpu_data[i].coverage>*max_coverage || (gpu_data[i].coverage == *max_coverage && (gpu_data[i].coverage_copy >= gpu_data[*max_coverage_num].coverage_copy))){
			*max_coverage = gpu_data[i].coverage;
			*max_coverage_num = i;
		}
	}
	RS[*count_RS] = *max_coverage_num;
	for (int j = 0; j<require; j++){
		if (gpu_data[*max_coverage_num].data[j] == 1){
			for (int k = 0; k<test; k++){                                //remove this test data from request
				gpu_data[k].data[j] = 0;
			}
		}
	}
	if (gpu_data[*max_coverage_num].coverage == 0){
		printf("can not cover all require\n");
	}
	else{
		//update coverage
		*count_coverage += gpu_data[*max_coverage_num].coverage;
		//get coverage of the testing + get the max coverage of testing + number the testing
		printf("choose %d\n", RS[*count_RS]);
		printf("now coverage = %d\n", *count_coverage);
		*count_RS = *count_RS + 1;
	}
	*max_coverage_num = *max_coverage = 0;
}

__global__ static void copy_and_init(struct data_inf*gpu_data, int *max_coverage, int *max_coverage_num, int *count_coverage, int *count_RS)
{
	const int tid = threadIdx.x;
	const int bid = blockIdx.x;
	for (int x = 0; x<everythread_datanum; x++){
		int sum = 0;
		int num = bid * THREAD_NUM * everythread_datanum + tid * everythread_datanum + x;
		for (int i = 0; i < require; i++) {
			sum += gpu_data[num].data[i];
		}
		gpu_data[num].coverage_copy = gpu_data[num].coverage = sum;
	}
	if (tid == 0 && bid == 0){
		*max_coverage = *max_coverage_num = *count_coverage = *count_RS = 0;
	}
}

__global__ static void greedy(struct data_inf*gpu_data, int *max_coverage, int *max_coverage_num, int *count_coverage, int *count_RS, int *RS)
{
	const int tid = threadIdx.x;
	const int bid = blockIdx.x;
	while ((*count_coverage<require)&&(gpu_data[*max_coverage_num].coverage != 0)){
		__syncthreads();
		for (int x = 0; x<everythread_datanum; x++){
			int sum = 0;
			int num = bid * THREAD_NUM * everythread_datanum + tid * everythread_datanum + x;
			for (int i = 0; i < require; i++) {
				sum += gpu_data[num].data[i];
			}
			gpu_data[num].coverage = sum;
		}
		__syncthreads();
		if (tid == 0 && bid == 0){
			choose_update(gpu_data, max_coverage, max_coverage_num, count_coverage, count_RS, RS);
		}
	}
}

struct data_inf Data[test];
int count_RS_temp, RS_temp[test];

int main()
{
	FILE *fp;
	fp = fopen("greedy1.txt", "r");
	assert(fp != NULL);
	for (int i = 0; i<test; i++){
		for (int j = 0; j<require; j++){
			fscanf(fp, "%d", &Data[i].data[j]);
		}
	}
	fclose(fp);
	LARGE_INTEGER startTime, endTime, fre;
	double times;
	QueryPerformanceFrequency(&fre); //取得CPU頻率
	QueryPerformanceCounter(&startTime); //取得開機到現在經過幾個CPU Cycle
	//doing something
	//-------------------------------
	//greedy
	struct data_inf* gpu_data;
	cudaMalloc((void**)&gpu_data, sizeof(struct data_inf) * test);
	cudaMemcpy(gpu_data, Data, sizeof(struct data_inf) * test,
		cudaMemcpyHostToDevice);

	int *max_coverage, *max_coverage_num, *count_coverage, *count_RS, *RS;
	cudaMalloc((void**)&max_coverage, sizeof(int));
	cudaMalloc((void**)&max_coverage_num, sizeof(int));
	cudaMalloc((void**)&count_coverage, sizeof(int));
	cudaMalloc((void**)&count_RS, sizeof(int));
	cudaMalloc((void**)&RS, sizeof(int)*test);

	copy_and_init << <BLOCK_NUM, THREAD_NUM, 0 >> >(gpu_data, max_coverage, max_coverage_num, count_coverage, count_RS);

	greedy << <BLOCK_NUM, THREAD_NUM, 0 >> >(gpu_data, max_coverage, max_coverage_num, count_coverage, count_RS, RS);

	cudaMemcpy(&count_RS_temp, count_RS, sizeof(int),
		cudaMemcpyDeviceToHost);
	cudaMemcpy(&RS_temp, RS, sizeof(int)*count_RS_temp,
		cudaMemcpyDeviceToHost);
	for (int i = 0; i<count_RS_temp; i++){
		printf("%d\n", RS_temp[i]);
	}

	cudaFree(gpu_data);
	cudaFree(max_coverage);
	cudaFree(max_coverage_num);
	cudaFree(count_coverage);
	cudaFree(count_RS);
	cudaFree(RS);
	//Greedy end
	//-------------------------------------------
	//doing something
	QueryPerformanceCounter(&endTime); //取得開機到程式執行完成經過幾個CPU Cycle
	times = ((double)endTime.QuadPart - (double)startTime.QuadPart) / fre.QuadPart;
	printf("%lf s\n", times);
	system("pause");
	return 0;
}

As I expected, there is no error checking for the CUDA API calls and the kernel launches. Add that first. You can use the macros below, or anything equivalent. Wrap each CUDA API call in CUDA_SAFE_CALL(), e.g.

CUDA_SAFE_CALL (cudaMalloc((void**)&RS, sizeof(int)*test));

Insert a call to CHECK_LAUNCH_ERROR() after each kernel launch.

// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call)                                          \
do {                                                                  \
    cudaError_t err = call;                                           \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString(err) );       \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
} while (0)

// Macro to catch CUDA errors in kernel launches
#define CHECK_LAUNCH_ERROR()                                          \
do {                                                                  \
    /* Check synchronous errors, i.e. pre-launch */                   \
    cudaError_t err = cudaGetLastError();                             \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString(err) );       \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
    /* Check asynchronous errors, i.e. kernel failed (ULF) */         \
    err = cudaThreadSynchronize();                                    \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString( err) );      \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
} while (0)

I would also suggest reviewing this sticky forum thread:

https://devtalk.nvidia.com/default/topic/459869/cuda-programming-and-performance/-quot-display-driver-stopped-responding-and-has-recovered-quot-wddm-timeout-detection-and-recovery-/

The error-checking suggested by njuffa would confirm also if this is occurring.

Yes the problem is happen like the above website,i will try that website’s solution ,and the code i will also insert to try,thank you very much for yours help!!