Hi,
I am trying to use cudaFuncGetAttributes to get the information about memory usage of a function. However, it returns unexpected result.
Here is my kernel:
[codebox]global void compute_pixel_value(unsigned char* image, float* pixel_value, float* min_max, int x_size, int y_size)
{
int idx = blockIdx.x*blockDim.x+threadIdx.x;
int idy = blockIdx.y*blockDim.y+threadIdx.y;
//To compute laplacian of a pixel, it need 8 neighbors -> need to check range.
if( (idx>0 && idx<x_size-1) && (idy > 0 && idy < y_size-1) )
{
int index = idx + idy*x_size; // current pixel for this thread
int i,j;
int weight[3][3] = {{ 1, 1, 1 },
{ 1, -8, 1 },
{ 1, 1, 1 }};
for (j = - 1; j < 2; j++)
{
for (i = -1; i < 2; i++)
{
int index_t = (idy+j)*x_size + idx + i;
pixel_value[index] += weight[j + 1][i + 1] * image[index_t];
}
}
if (pixel_value[index] < min_max[0]) min_max[0] = pixel_value[index]; // min = min_max[0]
if (pixel_value[index] > min_max[1]) min_max[1] = pixel_value[index];
}
}
[/codebox]
and calling of cudaFuncGetAttributes:
[codebox]void print_func_attr(struct cudaFuncAttributes at)
{
printf("Constant memory in bytes: %lu\n", at.constSizeBytes);
printf("Local memory in bytes: %lu\n", at.localSizeBytes);
printf("Max Thread per Block: %d\n", at.maxThreadsPerBlock);
printf("Number of registers used: %d\n", at.numRegs);
printf("Shared memory in bytes: %lu\n", at.sharedSizeBytes);
}
int main()
{
struct cudaFuncAttributes attr;
const char* fname = “compute_pixel_value”;
cudaFuncGetAttributes(&attr, fname);
print_func_attr(attr);
return 0;
}[/codebox]
Here is the result:
[codebox]Constant memory in bytes: 4270390
Local memory in bytes: 238895010520
Max Thread per Block: -1637762112
Number of registers used: 55
Shared memory in bytes: 6395888
[/codebox]
I spent a couple of hours to debug but no progress. I really appreciate if anyone can point out the problem in my code.
Thanks a lot,
Huynh.