Hi,
I would like to know if it’s possible to pass plain-old-data structs to device routines. (aggregation of builtin data types like int/floats etc… but without pointers, so theoretically can be contiguous in memory).
I made a silly example to exemplify this. (the error is at the end)
$ cat struct_test.c
#include <stdio.h>
#include <stdlib.h>
#define PITCH 32
#define DIM 24
// 3D structure:
// each line aligned to 128-byte (32 elements in this case)
typedef struct
{
int pitch;
int x;
int y;
int z;
} dim_t;
#pragma acc routine seq
inline
int IDX(int x, int y, int z, dim_t dim)
{
return (y * dim.x + x) * dim.pitch + z;
}
#pragma acc routine seq
inline
int compute(int* in, int x, int y, int z, dim_t dim)
{
return in[IDX(x,y,z,dim)];
}
int main(int argc, char* argv[])
{
int in[PITCH*DIM*DIM];
int out[PITCH*DIM*DIM];
dim_t dim = {PITCH,DIM,DIM,DIM};
#pragma acc kernels copyin(in[:PITCH*DIM*DIM]) copyout(out[:PITCH*DIM*DIM])
#pragma acc loop independent
for (int y = 0; y < dim.y; y++) {
#pragma acc loop independent
for (int x = 0; x < dim.x; x++) {
#pragma acc loop independent
for (int z = 0; z < dim.z; z++) {
out[IDX(x,y,z,dim)] = compute(in, x, y, z, dim);
}
}
}
return 0;
}
$ pgcc -acc -Minfo=accel,inline -Minline=IDX,compute struct_test.c
main:
38, Generating copy(dim)
Generating copyin(in[:])
Generating copyout(out[:])
40, Loop is parallelizable
42, Loop is parallelizable
45, IDX inlined, size=3, file struct_test.c (21)
44, Loop is parallelizable
Accelerator kernel generated
Generating Tesla code
40, #pragma acc loop gang /* blockIdx.y */
42, #pragma acc loop gang, vector(4) /* blockIdx.z threadIdx.y */
44, #pragma acc loop gang, vector(32) /* blockIdx.x threadIdx.x */
45, compute inlined, size=4, file struct_test.c (28)
IDX:
21, Generating acc routine seq
nvvmCompileProgram error: 9.
Error: /scratch/tmp/pgaccQg7f_jnj6Dh7.gpu (183, 24): parse invalid cast opcode for cast from 'i8*' to '%struct.DT1_4178 = type { i32, i32, i32, i32 }'
PGC-S-0155-Compiler failed to translate accelerator region (see -Minfo messages): Device compiler exited with error status code (struct_test.c: 1)
PGC/x86-64 Linux 16.5-0: compilation completed with severe errors
$ pgcc --version
pgcc 16.5-0 64-bit target on x86-64 Linux -tp nehalem
The Portland Group - PGI Compilers and Tools
Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.