Trouble with my custom layer realization (activation function ELU)

Hi,

I try to implement ELU layer and convert my caffe network with tensorRT, but I have
“Process finished with exit code 139 (interrupted by signal 11: SIGSEGV)”
Error happen when I try execute my code.

I get error with this line.
ICudaEngine* engine = builder->buildCudaEngine(*network);

First of all, I tried to convert standard u-net model with some changes like ELU layer but with large structure really hard to find mistake. After it, I simplify my network and

I try different variations of networks structures:

  1. With net like INP -> CONV -> ELU -> OUT it works good
  2. INP -> CONV -> ELU -> CONV -> OUT, also it works good
  3. INP->CONV->ELU->CONV->ReLU->OUT or INP->CONV->ReLU->CONV->ELU->OUT, works good
  4. With: INP -> CONV -> ELU -> CONV -> ELU -> OUT I get the error

I have no variants, why it happens…
Maybe problems in getNbOutputs or getOutputDimensions but I can’t understand it.

Also, I have converted u-net with ReLU without any trouble. It means that tensorRT was installed correctly and works fine.

I use samplePlugin which was installed with tensorRT as a basis for my code.
You can find it by /usr/src/tensorrt/samples/samplePlugin after installing tensorRT

Some parameters of my PC:

  1. OS: Linux Mint 18.3 Sylvia
  2. Kernel version: 4.10.0-38-generic
  3. Cuda: release 9.0, V9.0.176
  4. Compiler: gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.9)
  5. GPU : GeForce GTX 1050, NVIDIA-SMI 384.111
  6. TensorRT: 4.1.0-1 + cuda 9.0

LINKS:

  1. My code: https://pastebin.com/QkzjX6qW
  2. Deploy file which give error: https://pastebin.com/ZDHmxr0Z
  3. About u-net: https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/

Thanks for attention

I’ve had a quick look through your code and you don’t seem to be doing any memcpy’ing or malloc’ing in your constructors. Don’t you need to move the ELU alpha parameter into the GPU?

I’ve included code for a PReLU plugin layer that I know works. I hope this helps you.

nvinfer1::IPlugin* PluginFactory::createPlugin(const char* layerName, const nvinfer1::Weights* weights, int nbWeights)
{  
    assert(isPlugin(layerName));
    if (!strcmp(layerName, "prelu1")||!strcmp(layerName, "prelu"))
    { 
        assert(mPReLULayer1.get() == nullptr);
        mPReLULayer1 = std::unique_ptr<PReLULayer>(new PReLULayer(weights, nbWeights));
        return mPReLULayer1.get();
    }
    else if (!strcmp(layerName, "prelu2"))
    { 
        assert(mPReLULayer2.get() == nullptr);
        mPReLULayer2 = std::unique_ptr<PReLULayer>(new PReLULayer(weights, nbWeights));
        return mPReLULayer2.get();
    }
    else if (!strcmp(layerName, "prelu3"))
    { 
        assert(mPReLULayer3.get() == nullptr);
        mPReLULayer3 = std::unique_ptr<PReLULayer>(new PReLULayer(weights, nbWeights));
        return mPReLULayer3.get();
    }
    else if (!strcmp(layerName, "prelu4_1"))
    { 
        assert(mPReLULayer4_1.get() == nullptr);
        mPReLULayer4_1 = std::unique_ptr<PReLULayer>(new PReLULayer(weights, nbWeights));
        return mPReLULayer4_1.get();
    }
    else if (!strcmp(layerName, "prelu4"))
    { 
        assert(mPReLULayer4.get() == nullptr);
        mPReLULayer4 = std::unique_ptr<PReLULayer>(new PReLULayer(weights, nbWeights));
        return mPReLULayer4.get();
    }
    else if (!strcmp(layerName, "fc_prelu"))
    { 
        assert(mPReLULayer5.get() == nullptr);
        mPReLULayer5 = std::unique_ptr<PReLULayer>(new PReLULayer(weights, nbWeights));
        return mPReLULayer5.get();
    }
    else
    {
        assert(0);
        return nullptr;
    }
}

IPlugin* PluginFactory::createPlugin(const char* layerName, const void* serialData, size_t serialLength)
{  
    assert(isPlugin(layerName));
    if (!strcmp(layerName, "prelu1")||!strcmp(layerName, "prelu") )
    {
        assert(mPReLULayer1.get() == nullptr);
        mPReLULayer1 = std::unique_ptr<PReLULayer>(new PReLULayer(serialData, serialLength));
        return mPReLULayer1.get();
    }
    else if ( !strcmp(layerName, "prelu2") )
    {
        assert(mPReLULayer2.get() == nullptr);
        mPReLULayer2 = std::unique_ptr<PReLULayer>(new PReLULayer(serialData, serialLength));
        return mPReLULayer2.get();
    }
    else if ( !strcmp(layerName, "prelu3") )
    {
        assert(mPReLULayer3.get() == nullptr);
        mPReLULayer3 = std::unique_ptr<PReLULayer>(new PReLULayer(serialData, serialLength));
        return mPReLULayer3.get();
    }
    else if ( !strcmp(layerName, "prelu4_1") )
    {
        assert(mPReLULayer4_1.get() == nullptr);
        mPReLULayer4_1 = std::unique_ptr<PReLULayer>(new PReLULayer(serialData, serialLength));
        return mPReLULayer4_1.get();
    }
    else if ( !strcmp(layerName, "prelu4") )
    {
        assert(mPReLULayer4.get() == nullptr);
        mPReLULayer4 = std::unique_ptr<PReLULayer>(new PReLULayer(serialData, serialLength));
        return mPReLULayer4.get();
    }
    else if ( !strcmp(layerName, "fc_prelu") )
    {
        assert(mPReLULayer5.get() == nullptr);
        mPReLULayer5 = std::unique_ptr<PReLULayer>(new PReLULayer(serialData, serialLength));
        return mPReLULayer5.get();
    }
    else
    {
        assert(0);
        return nullptr;
    }
}

bool PluginFactory::isPlugin(const char* name)
{
    return !strncmp(name, "prelu", 5) || !strcmp(name, "fc_prelu")  ;
}

void PluginFactory::destroyPlugin()
{
    mPReLULayer1.release();
    mPReLULayer2.release();
    mPReLULayer3.release();
    mPReLULayer4.release();
    mPReLULayer4_1.release();
    mPReLULayer5.release();
    mPReLULayer1 = nullptr;
    mPReLULayer2 = nullptr;
    mPReLULayer3 = nullptr;
    mPReLULayer4 = nullptr;
    mPReLULayer4_1 = nullptr;
    mPReLULayer5 = nullptr;
}


/******************************/
// PReLU Plugin Layer Start
/******************************/


PReLULayer::PReLULayer(const Weights *weights, int nbWeights)
{
    assert(nbWeights == 1); 
    mSlope = weights[0];
    // std::cout << "PReLULayer(weight, nbWiegh)" << std::endl;
    assert(mSlope.type == DataType::kFLOAT || mSlope.type == DataType::kHALF);

    mSlope.values = malloc(mSlope.count*type2size(mSlope.type));
    // std::cout << "Slope count " << weights[0].count << std::endl;
    // std::cout << "Slope initialize " << reinterpret_cast<const float*>(weights[0].values)[1] << std::endl;
    memcpy(const_cast<void*>(mSlope.values), weights[0].values, mSlope.count*type2size(mSlope.type));
}

PReLULayer::PReLULayer(const void* buffer, size_t size)
{
    // std::cout << "PReLULayer(buffer, size)" << &mDeviceSlope << std::endl;
    const char* d = static_cast<const char*>(buffer);
    read(d, mSlope.count);  
    CHECK(cudaMalloc(&mDeviceSlopeCount, sizeof(int)));
    CHECK(cudaMemcpy(mDeviceSlopeCount, d, sizeof(int), cudaMemcpyHostToDevice)); 

    read(d, mDataType);
    CHECK(cudaMalloc(&mDeviceSlopeCount, sizeof(int)));
    CHECK(cudaMemcpy(mDeviceSlopeCount, d, sizeof(int), cudaMemcpyHostToDevice)); 

    read(d, mC);
    CHECK(cudaMalloc(&mDeviceC, sizeof(int)));
    CHECK(cudaMemcpy(mDeviceC, d, sizeof(int), cudaMemcpyHostToDevice)); 

    read(d, mH);
    CHECK(cudaMalloc(&mDeviceH, sizeof(int)));
    CHECK(cudaMemcpy(mDeviceH, d, sizeof(int), cudaMemcpyHostToDevice));

    read(d, mW);
    CHECK(cudaMalloc(&mDeviceW, sizeof(int)));
    CHECK(cudaMemcpy(mDeviceW, d, sizeof(int), cudaMemcpyHostToDevice));
    mDim = mW*mH;
    read(d, mCount);
    CHECK(cudaMalloc(&mDeviceW, sizeof(int)));
    CHECK(cudaMemcpy(mDeviceW, d, sizeof(int), cudaMemcpyHostToDevice));

    mSlope.values = nullptr;  
    CHECK(cudaMalloc(&mDeviceSlope, mSlope.count*type2size(mDataType)));
    CHECK(cudaMemcpy(mDeviceSlope, d, 
        mSlope.count*type2size(mDataType), cudaMemcpyHostToDevice)); 
 
}

void PReLULayer::serialize(void* buffer)
{
    char* d = static_cast<char*>(buffer), *a = d;

    write(d, mSlope.count); 
    write(d, mDataType);
    write(d, mC);
    write(d, mH);
    write(d, mW);
    write(d, mCount);
    convertAndCopyToBuffer(d, mSlope); 
    assert(d == a + getSerializationSize());
}

Dims PReLULayer::getOutputDimensions(int index, const Dims* inputDims, int nbInputDims)
{  
    mC = (*inputDims).d[0];
    mH = (*inputDims).d[1];
    mW = (*inputDims).d[2];
    mCount = mC*mH*mW; 
    
    return DimsCHW(inputDims[0].d[0], (*inputDims).d[1], (*inputDims).d[2]);
}

int PReLULayer::initialize()
{ 
    return 0; 
}
void PReLULayer::convertAndCopyToBuffer(char*& buffer, const Weights& weights)
{
    if (weights.type != mDataType)
        for (int64_t v = 0; v < weights.count; ++v)
            if (mDataType == DataType::kFLOAT)
                reinterpret_cast<float*>(buffer)[v] = fp16::__half2float(static_cast<const __half*>(weights.values)[v]);
            else
                reinterpret_cast<__half*>(buffer)[v] = fp16::__float2half(static_cast<const float*>(weights.values)[v]);
    else
        memcpy(buffer, weights.values, weights.count * type2size(mDataType));
    buffer += weights.count * type2size(mDataType);
}
 

int PReLULayer::enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream)
{  
        PReLUForward(batchSize*mCount, 
                    mC, 
                    mDim, 
                    inputs, 
                    outputs, 
                    mDeviceSlope,  
                    stream);
    

    return 0;
}
 

size_t PReLULayer::getSerializationSize()
{

    return sizeof(mSlope.count) + 4*sizeof(mW) + sizeof(mDataType) +
               (mSlope.count) * type2size(mDataType);
}
void PReLULayer::configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int)
{
}
 
size_t PReLULayer::type2size(DataType type) 
{ 
    return type == DataType::kFLOAT ? sizeof(float) : sizeof(__half); 
}

Thanks davidsh3zdh for your advice.

Problem solution
I had a trouble because I created 1 object ELU_Plugin type in “PluginFactory”, but I have more than 1 layer ELU type in my net.

P.S it’s strange but I don’t see the davidsh3zdh’s comment
P.P.S comment has appeared