Hi all,
OpenACC does not offer access to GPU-specific features useful for debugging, optimization and other purposes.
For debugging I want to call device functions from within OpenACC kernels.
I do this
print.cu
#include <cstdio>
// Return thread 3D index.
extern "C" __device__ int3 acc_get_thread_idx()
{
int3 result;
result.x = threadIdx.x;
result.y = threadIdx.y;
result.z = threadIdx.z;
return result;
}
// Return block 3D index.
extern "C" __device__ int3 acc_get_block_idx()
{
int3 result;
result.x = blockIdx.x;
result.y = blockIdx.y;
result.z = blockIdx.z;
return result;
}
// Print values from within the OpenACC parallel for loop.
extern "C" __device__ void print(int3 thread, int3 block, int i)
{
printf("block: (%d, %d, %d), thread: (%d, %d, %d) :: i = %d \n", block.x, block.y, block.z, thread.x, thread.y, thread.z, i);
}
main.c
// Declaration of 3-integer structure, which is built-in
// in CUDA, but not in C/OpenACC.
typedef struct { int x, y, z; } int3;
#pragma acc routine
int3 acc_get_thread_idx();
#pragma acc routine
int3 acc_get_block_idx();
#pragma acc routine
void print(int3 thread, int3 block, int i);
void main()
{
#pragma acc parallel loop
for (int i = 0; i < 512; i++)
{
int3 thread = acc_get_thread_idx();
int3 block = acc_get_block_idx();
// Print values from within the OpenACC parallel for loop.
print(thread, block, i);
}
}
How do i compile with pggc main.cu?
Should nvcc be used?
Rem : I use PGI under Windows