Custom plugin issue(split plugin outputs is wrong)

I implemented a custom layer in c++ and cuda by inheriting from PluginV2, and creating a PluginCreator. Since I generate my engine in python, I used pybind11 to bind the ctors of the layer and the creator. In python, I use network.add_plugin_v2() to add the layers to my network.I replace this node with the configuration:

def preprocess(dynamic_graph):
    # Now create a new graph by collapsing namespaces
         dynamic_graph.collapse_namespaces(namespace_plugin_map)

     namespace_plugin_map = {"FeatureExtractor/detnat_59/fpn/top_down/smoothing_1/BA_GConv2d_3_3/split": split10,
    "FeatureExtractor/detnat_59/fpn/top_down/smoothing_1/BA_GConv2d_3_3/split:1": split10,
    "FeatureExtractor/detnat_59/fpn/top_down/smoothing_1/BA_GConv2d_3_3/split:2": split10,
    "FeatureExtractor/detnat_59/fpn/top_down/smoothing_1/BA_GConv2d_3_3/split:3": split10,}
 
    split10 = gs.create_plugin_node("Split_10",
    inputChannels=128,
    inputHeight=60,
    inputWidth=92,
    axis=1,
    num_of_split=4)

then, there is 4 outputs,the split plugin code is there:

//return number of splited tensor.
    int getNbOutputs() const override { return mParams.num_of_split; }

    //return tensor's output dims.
    nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) override
    {
        assert(inputs[0].nbDims == 3);
        assert(mParams.inputChannels == inputs[0].d[0] &&
               mParams.inputHeight   == inputs[0].d[1] &&
               mParams.inputWidth    == inputs[0].d[2]);
        assert(mParams.axis == 1 || mParams.axis == 2 || mParams.axis == 3);
        UNUSED(index);
        UNUSED(nbInputDims);

        if (mParams.axis == 1) {
            assert(mParams.inputChannels % mParams.num_of_split == 0);
            return nvinfer1::DimsCHW(mParams.inputChannels/mParams.num_of_split, mParams.inputHeight, mParams.inputWidth);
        } else if (mParams.axis == 2) {
            assert(mParams.inputHeight % mParams.num_of_split == 0);
            return nvinfer1::DimsCHW(mParams.inputChannels, mParams.inputHeight/mParams.num_of_split, mParams.inputWidth);
        } else if (mParams.axis == 3){
            assert(mParams.inputWidth % mParams.num_of_split == 0);
            return nvinfer1::DimsCHW(mParams.inputChannels, mParams.inputHeight, mParams.inputWidth/mParams.num_of_split);
        } else
            assert(false);

    }

    int enqueue(int batchSize, const void* const* inputs, void** outputs, void*, cudaStream_t stream) override
    {
        UNUSED(stream);
        int i, j;
        int split_channel;
        float* output;
        long copy_length, total_length;
        const float* input = reinterpret_cast<const float*>(inputs[0]);
        total_length = mParams.inputWidth * mParams.inputHeight * mParams.inputChannels;
        if (!mParams.isFp16) {
            if (mParams.axis == 1) {

                split_channel = mParams.inputChannels/mParams.num_of_split;
                copy_length = mParams.inputHeight * mParams.inputWidth * split_channel;

                for (j = 0; j < batchSize; j++) {
                    for (i = 0; i < mParams.num_of_split; i++) {
                        output = reinterpret_cast<float*>(outputs[i]);
                        CHECK(cudaMemcpy((void*)(output + j*copy_length), (void *)(input + i * split_channel * mParams.inputWidth * mParams.inputHeight + j*total_length) , copy_length * sizeof(float), cudaMemcpyDeviceToDevice));
                    }
                }

            } else {
                // TODO:
                assert(false);
            }
        } else {
            // TODO:
            assert(false);
        }

        return 0;
    }

For example, I have four outputs here that are the inputs of the following nodes, but the test finds that each output value is the value of output[0]. Instead of output[0], output[1], output[2], output[3].

please help me,thanks.

anybody help me!!!