Hi Nvidia Team,
I am implementing a Custom plugin for an Einsum op(which is not currently supported in TRT).
Actually, I was having doubt in enqueue().
Below is the Einsum Node Information:
I have Implemented the plugin, but I was having doubt in Transposing the Inputs in the enqueue().
Below the snippet of the enqueue() function.
```
int EinsumPlugin::enqueue(int batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream)
{
cublasHandle_t mCublas;
CHECK(cublasCreate(&mCublas));
float onef{1.0f}, zerof{0.0f};
float transpose_1[N][O][L];
float transpose[N][K][L];
cublasSetStream(mCublas, stream);
if(equation== 'nct,ncp->ntp')
{
//Transpose the matrix A
for(int k=0; k< N; ++k)
for (int i = 0; i < L; ++i)
for (int j = 0; j < K; ++j) {
transpose[k][j][i] = inputs[0][k][i][j];
}
cublasSgemmBatched(mCublas, CUBLAS_OP_N,CUBLAS_OP_N,
M, L, K, onef,
reinterpret_cast<const float*>(inputs[1]), M,
reinterpret_cast<const float*>(transpose), K,
zerof, reinterpret_cast<const float*>(outputs[0]), M, N)
}
else if(equation=='ntg, ncg -> nct')
{
//Transpose the matrix B
for(int a=0; a< N; ++a)
for (int b = 0; b < O; ++b)
for (int c = 0; c < M; ++c) {
transpose[a][c][b] = inputs[1][a][b][c];
}
cublasSgemmBatched(mCublas, CUBLAS_OP_N,CUBLAS_OP_N,
M, L, K, onef,
reinterpret_cast<const float*>(transpose), M,
reinterpret_cast<const float*>(inputs[0]), K,
zerof, reinterpret_cast<const float*>(outputs[0]), M, N)
//Transpose the output matrix
for(int e=0; e< N; ++e)
for (int f = 0; f < O; ++f)
for (int g = 0; g < M; ++g) {
transpose_1[e][g][f] = outputs[0][e][f][g];
}
//Transpose the matrix results
for(int e=0; e< N; ++e)
for (int f = 0; f < O; ++f)
for (int g = 0; g < M; ++g) {
outputs[0][e][f][g] = transpose_1[e][f][g];
}
}
return 0;
}
```
I am facing an error: error: expression must be a pointer to a complete object type in the line transpose[k][j][i] = inputs[0][k][i][j];. May I know what is the exact error and how to resolve this?
Thanks,
Darshan