Hello,
Paste the below code in template project in SDK samples and run it.
Basically the code is comparing the results of 1D and 2D call configs are same or not.
TestKernel1 uses 1D call config and TestKernel2 uses 2D call config.
But Both function results are NOT same.
[codebox]// // includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, project
#include <cutil_inline.h>
// includes, kernels
#include <template_kernel.cu>
global
void TestKernel1( int* array1, int limit )
{
int idx = __umul24(blockIdx.x,blockDim.x) + threadIdx.x;
if ( idx < limit )
array1[idx] = idx;
}
global
void TestKernel2( int* array2, int limit )
{
int idx = __umul24(blockIdx.x,blockDim.x) + threadIdx.x;
int idy = __umul24(blockIdx.y,blockDim.y) + threadIdx.y;
int index = __umul24(idx,idy);
if ( index < limit )
array2[index] = index;
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv)
{
int* dArray1=NULL;
int* dArray2=NULL;
int width = 300;
int height = 200;
cudaMalloc( (void**)&dArray1, sizeof(int)*width*height );
cudaMalloc( (void**)&dArray2, sizeof(int)*width*height );
cudaMemset( dArray1, 0, sizeof(int)*width*height );
cudaMemset( dArray2, 0, sizeof(int)*width*height );
dim3 grid1( ((width*height)+255)/256,1,1);
dim3 block1( 256,1,1);
TestKernel1<<<grid1,block1>>>(dArray1, width*height);
dim3 grid2( (width+15)/16, (height+15)/16, 1);
dim3 block2( 16,16,1);
TestKernel2<<<grid2,block2>>>(dArray2, width*height);
int* hArray1=NULL;
int* hArray2=NULL;
hArray1 = (int*)malloc(sizeof(int)*width*height);
hArray2 = (int*)malloc(sizeof(int)*width*height);
cudaMemcpy( hArray1, dArray1, sizeof(int)*width*height, cudaMemcpyDeviceToHost);
cudaMemcpy( hArray2, dArray2, sizeof(int)*width*height, cudaMemcpyDeviceToHost);
int isSame = true;
for( int y = 0; y < height; ++y )
{
for( int x = 0; x < width; ++x )
{
int index = y*width+x;
if( hArray1[index] != hArray2[index] )
isSame = false;
}
}
if ( isSame == true )
printf("\nBoth Kernels are same\n");
else
printf("\nBoth Kernels are NOT same\n");
free(hArray1);
free(hArray2);
cudaFree(dArray1);
cudaFree(dArray2);
cutilExit(argc, argv);
}
[/codebox]