TensorRT-3 IShuffleLayer permutation error

I’m trying to implement my dnn model inference with tensorrt-3. I want to convert input data from HWC format to CHW. But when I use the transpose operation of IShuffleLayer, it seems that I can’t permute the channel dimension with the spatial dimension. Here’s my code for testing permutation:

void test(float* in, float* out)
{
    Logger gLogger;
    nvinfer1::IBuilder* builder = createInferBuilder(gLogger);
    nvinfer1::INetworkDefinition* network = builder->createNetwork();

    //  Create input
    auto data = network->addInput("data", nvinfer1::DataType::kFLOAT, nvinfer1::DimsCHW{4, 4, 3});
    assert(data != nullptr);

    // Permute
    auto ps = network->addShuffle(*data);
    assert(ps != nullptr);

    std::cout << "ps transpose" << std::endl;
    ps->setFirstTranspose(nvinfer1::Permutation{1, 2, 0});

    // Set output layer
    ps->getOutput(0)->setName("out");
    network->markOutput(*ps->getOutput(0));

    // Build the engine
    builder->setMaxBatchSize(1);
    builder->setMaxWorkspaceSize(1 << 20);

    nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);
    network->destroy();
    assert(engine != nullptr);
    nvinfer1::IExecutionContext* context = engine->createExecutionContext();
    assert(context != nullptr);

    assert(engine->getNbBindings() == 2);
    int inputIndex = engine->getBindingIndex("data");
    int outputIndex = engine->getBindingIndex("out");

    void* buffers[2];
    CHECK(cudaMalloc(&buffers[inputIndex], 64 * sizeof(float)));
    CHECK(cudaMalloc(&buffers[outputIndex], 64 * sizeof(float)));
    cudaStream_t stream;
    CHECK(cudaStreamCreate(&stream));

    CHECK(cudaMemcpyAsync(buffers[inputIndex], in, 64 * sizeof(float), cudaMemcpyHostToDevice, stream));
    context->enqueue(1, buffers, stream, nullptr);
    CHECK(cudaMemcpyAsync(out, buffers[outputIndex], 64 * sizeof(float), cudaMemcpyDeviceToHost, stream));
    cudaStreamSynchronize(stream);

    // release the stream and the buffers
    cudaStreamDestroy(stream);
    CHECK(cudaFree(buffers[inputIndex]));
    CHECK(cudaFree(buffers[outputIndex]));
    engine->destroy();
    builder->destroy();
}

input data is a float buffer, and when I execute this code I get following errors:

Enhance: helpers.cpp:39: nvinfer1::DimsCHW nvinfer1::getCHW(const nvinfer1::Dims&): Assertion `isIndexedCHW(d)’ failed.
The program has unexpectedly finished.

And it seems ok when I just set the permutation as [0, 2, 1] that will keep the channel dimension and just transpose in spatial dimensions.

Any suggestion?
Thank you very much in advance!

Maybe it will be easier to do the conversion outside tensorrt

It was. Just be confused about how to use shufflelayer if it can’t do such permutation. For more infomation you can follow my another post: https://devtalk.nvidia.com/default/topic/1027394/tensorrt-3-0-ishufflelayer-cannot-transpose-tensor-from-chw-format-to-hwc/