Problem with cudaEventQuery

I have a problem with the following code producing loads of cudaError_enum errors: Microsoft C++ exception: cudaError_enum at memory location 0x1c20fce4…

It only occurs when this line is included while(cudaEventQuery(stop2) != cudaSuccess); Ultimately I want the CPU to be doing some work in here so I cant just use cudaEventSynchonize.

What is wrong? The program will run for many repetitions without error or problem without the while loop. Yet with it included it triggers loads of errors and eventually hangs the system.

Any help would be very much appreciated. Thanks.

cudaEvent_t start2, stop2;

			cutilSafeCall( cudaEventCreate(&start2) );

			cutilSafeCall( cudaEventCreate(&stop2)  );

			cudaEventRecord(start2, 0);

			printf("Launching threads\n");

			for (int i = 0; i < NUM_STREAMS; i++) {

				//grid,threads

				stringMatch<<< (STREAM_SIZE/128), 128, 0,stream[i]>>>( d_idata, dhash_idata, d_odata+i*STREAM_SIZE,i);

			}

			for (int i = 0; i < NUM_STREAMS; i++) {

				cudaMemcpyAsync(h_odata+i*STREAM_SIZE ,d_odata+i*STREAM_SIZE,STREAM_SIZE*sizeof(int),cudaMemcpyDeviceToHost, stream[i]);

			}

			cudaEventRecord(stop2, 0);

			while(cudaEventQuery(stop2) != cudaSuccess);