Hi,
I am a cuda beginner and am now following the built-in cuda samples to learn something basics about cuda. However, there are a sample about printf in global functions that keep me puzzled for a long time.
The script is simplePrintf:
/*
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// System includes
#include <stdio.h>
#include <assert.h>
// CUDA runtime
#include <cuda_runtime.h>
// helper functions and utilities to work with CUDA
#include <helper_functions.h>
#include <helper_cuda.h>
#ifndef MAX
#define MAX(a,b) (a > b ? a : b)
#endif
__global__ void testKernel(int val)
{
printf("[%d, %d]:\t\tValue is:%d\n",\
blockIdx.y*gridDim.x+blockIdx.x,\
threadIdx.z*blockDim.x*blockDim.y+threadIdx.y*blockDim.x+threadIdx.x,\
val);
}
int main(int argc, char **argv)
{
int devID;
cudaDeviceProp props;
// This will pick the best possible CUDA capable device
devID = findCudaDevice(argc, (const char **)argv);
//Get GPU information
checkCudaErrors(cudaGetDevice(&devID));
checkCudaErrors(cudaGetDeviceProperties(&props, devID));
printf("Device %d: \"%s\" with Compute %d.%d capability\n",
devID, props.name, props.major, props.minor);
printf("printf() is called. Output:\n\n");
//Kernel configuration, where a two-dimensional grid and
//three-dimensional blocks are configured.
dim3 dimGrid(2, 2);
dim3 dimBlock(2, 2, 2);
testKernel<<<dimGrid, dimBlock>>>(10);
cudaDeviceSynchronize();
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
cudaDeviceReset();
return EXIT_SUCCESS;
}
While I expected the result to be something highly random like:
[1,5]: Value is:10
[2,2]: Value is:10
[0,1]: Value is:10
........
The result would always be in complete sequence no matter how many times I run through it:
[0, 0]: Value is:10
[0, 1]: Value is:10
[0, 2]: Value is:10
[0, 3]: Value is:10
[0, 4]: Value is:10
[0, 5]: Value is:10
[0, 6]: Value is:10
[0, 7]: Value is:10
[1, 0]: Value is:10
[1, 1]: Value is:10
[1, 2]: Value is:10
[1, 3]: Value is:10
[1, 4]: Value is:10
[1, 5]: Value is:10
[1, 6]: Value is:10
[1, 7]: Value is:10
[2, 0]: Value is:10
[2, 1]: Value is:10
[2, 2]: Value is:10
[2, 3]: Value is:10
[2, 4]: Value is:10
[2, 5]: Value is:10
[2, 6]: Value is:10
[2, 7]: Value is:10
[3, 0]: Value is:10
[3, 1]: Value is:10
[3, 2]: Value is:10
[3, 3]: Value is:10
[3, 4]: Value is:10
[3, 5]: Value is:10
[3, 6]: Value is:10
[3, 7]: Value is:10
Could anyone tell me why this happens? Why do all the threads execute in order?