Hello
I want to print on screen the status progress of my kernel execution (something like “kernel is x% done”).
At the right moment, i’m trying to launch two concurrent kernels, accessing the same memory area, like this:
__device__ int d_percent;
__global__ void getPercent(int *out)
{
*out = d_percent;
}
__global__ void compute(int N)
{
d_percent = 0;
for (int i=0; i<N; i++) {
d_percent = i/(N/100);
}
d_percent = 100;
}
int main(int argc, char** argv) {
cudaSetDevice( cutGetMaxGflopsDeviceId() );
// setup execution parameters
dim3 grid( 1, 1, 1);
dim3 threads( 1, 1, 1);
cudaStream_t stream0;
cudaStream_t stream1;
cudaStreamCreate(&stream0);
cudaStreamCreate(&stream1);
int* h_out;
int* d_out;
cutilSafeCall( cudaMalloc((void**)&d_out, sizeof(int)));
cutilSafeCall( cudaMallocHost((void**)&h_out, sizeof(int)));
// execute the kernel
compute<<< grid, threads, 0, stream0>>>(100000000);
int last = 0;
while (cudaStreamQuery(stream0) == cudaErrorNotReady) {
getPercent<<< grid, threads, 0, stream1>>>(d_out);
cutilSafeCall(cudaMemcpyAsync( h_out, d_out, sizeof(int), cudaMemcpyDeviceToHost, stream1));
cudaStreamSynchronize(stream1);
if (last != *h_out) {
last = *h_out;
printf("%d\n", last);
}
//sleep(1);
}
printf("OUT\n");
// check if kernel execution generated and error
cutilCheckMsg("Kernel execution failed");
}
Unfortunatly, i cant overlap the kernels. The output is only "100\n’, without partial values.
I have tried without two streams, but i can’t obtain partial results also.
What am i missing? Is there any way of getting partial results from kernel to the CPU?
thanks in advance.