Hi,
could anybody explain the following? When I compile (with nvcc -std=c++11 ) and run the following code:
class TestClass
{
public:
__device__ inline
TestClass( int a )
: num1( a ), num2( a ), num3( a )
// ,ptr1( this ), ptr2( this ), ptr3( this )
{
}
int num1, num2, num3;
TestClass *ptr1, *ptr2, *ptr3;
};
global void testKernel()
{
TestClass t( 2 );
}
int main( int argc, char* argv )
{
dim3 cudaBlockSize( 256 );
dim3 cudaGridSize( 256 );
int iteration( 0 );
auto t_start = std::chrono::high_resolution_clock::now();
while( iteration < 10000 )
{
testKernel<<< cudaGridSize, cudaBlockSize >>>();
cudaThreadSynchronize();
iteration++;
}
auto t_stop = std::chrono::high_resolution_clock::now();
std::cout << "Elapsed time = "
<< std::chrono::duration<double, std::milli>(t_stop-t_start).count() << std::endl;
return EXIT_SUCCESS;
}
I get a message like
Elapsed time = 204.765
When I uncomment the one commented line, I get
Elapsed time = 1218.97
though I would expect something like 400 ms since initiation of a pointer should take the same time as initiation of integer. I tested it with:
Cuda compilation tools, release 7.5, V7.5.17
on GeForce GT 430
and with g++ (Ubuntu 4.8.5-2ubuntu1~14.04.1) 4.8.5.
Thanks for any help.