I’m implementing the Axpy custom layer. However, my IExecutionContext automatically sets all negative values to zero that come from my custom IPlugin.
For debugging purpose, I copied outputs to host memory to check in my function:
virtual int enqueue(int batchSize, const void * const *inputs, void **outputs, void *workspace, cudaStream_t stream) override
{
// steps to perform Axpy operation
float* _out;
cudaMallocHost((void**)&_out, batchSize * _cpy_size[0] * sizeof(float));
cudaMemcpy(_out, output, batchSize * _cpy_size[0] * sizeof(float), cudaMemcpyDeviceToHost);
std::vector<float> _out_vec;
for(int x = 0; x < batchSize * _cpy_size[0]; ++x)
{
_out_vec.push_back(_out[x]);
}
// do other stuff
}
I used the same approach to check the obtained values where I call context.enqueue as follows:
if(!m_context->enqueue(batchSize, buffers, stream, nullptr))
return false;
cudaMallocHost((void**)&output, batchSize * outputDims.c() * outputDims.h() * outputDims.w() * sizeof(float));
cudaMemcpy(output, buffers[outputIndex], batchSize * outputDims.c() * outputDims.h() * outputDims.w() * sizeof(float), cudaMemcpyDeviceToHost);
std::vector<float> result_vals;
for(int x = 0; x < batchSize * outputDims.c() * outputDims.h() * outputDims.w(); ++x)
{
result_vals.push_back(output[x]);
}
However, I found that the negative values are automatically set to zero in my result_vals vector. For instance, part of my ‘_out_vec’ values are as follows:
-1.0568
0.00726944
-0.115623
-0.273628
-0.310037
0.0518601
0.477593
0.467764
-0.108174
0.538383
But my ‘result_vals’ look like this:
0
0.00726944
0
0
0
0.0518601
0.477593
0.467764
0
0.538383
I guess this should be something like a set-parameter stuff (set all negatives to zero or not)? Could someone help on this issue? Thank you.