TensorRT2.1 Has Wrong Output for Resnet

I test TensorRT2.1 for resnet with cuda7.5+cudnn6.0 on M40 using the CaffeParser, the outblob is a FC layer,but got wrong output;

I inspect the output for each convolution layer, and find the first resnet-block(non-identity)has correct output the second with identity connect resnet-block has wrong output.

The prototxt:

layer {
  name: "resx1_conv1"
  type: "Convolution"
  bottom: "pool1"
  top: "resx1_conv1"
  param {
    lr_mult:  1
    decay_mult: 1
  }
  param {
    lr_mult:  1
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    bias_term: true
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
    }
	
  }
}
layer {
  name: "resx1_conv1_relu_"
  type: "ReLU"
  bottom: "resx1_conv1"
  top: "resx1_conv1"
}
layer {
  name: "resx1_conv2"
  type: "Convolution"
  bottom: "resx1_conv1"
  top: "resx1_conv2"
  param {
    lr_mult:  1
    decay_mult: 1
  }
  param {
    lr_mult:  1
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    bias_term: true
    pad: 1
    kernel_size: 3
    #group: 32
    stride: 1
    weight_filler {
      type: "xavier"
    }
	
  }
}
layer {
  name: "resx1_conv2_relu_"
  type: "ReLU"
  bottom: "resx1_conv2"
  top: "resx1_conv2"
}
layer {
  name: "resx1_conv3"
  type: "Convolution"
  bottom: "resx1_conv2"
  top: "resx1_conv3"
  param {
    lr_mult:  1
    decay_mult: 1
  }
  param {
    lr_mult:  1
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    bias_term: true
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
    }
	
  }
}

layer {
  name: "resx1_match_conv"
  type: "Convolution"
  bottom: "pool1"
  top: "resx1_match_conv"
  param {
    lr_mult:  1
    decay_mult: 1
  }
  param {
    lr_mult:  1
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    bias_term: true
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
    }
	
  }
}

layer {
  name: "resx1_elewise"
  type: "Eltwise"
  bottom: "resx1_match_conv"
  bottom: "resx1_conv3"
  top: "resx1_elewise"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "resx1_elewise_relu_"
  type: "ReLU"
  bottom: "resx1_elewise"
  top: "resx1_elewise"
}
layer {
  name: "resx2_conv1"
  type: "Convolution"
  bottom: "resx1_elewise"
  top: "resx2_conv1"
  param {
    lr_mult:  1
    decay_mult: 1
  }
  param {
    lr_mult:  1
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    bias_term: true
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
    }
	
  }
}
layer {
  name: "resx2_conv1_relu_"
  type: "ReLU"
  bottom: "resx2_conv1"
  top: "resx2_conv1"
}
layer {
  name: "resx2_conv2"
  type: "Convolution"
  bottom: "resx2_conv1"
  top: "resx2_conv2"
  param {
    lr_mult:  1
    decay_mult: 1
  }
  param {
    lr_mult:  1
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    bias_term: true
    pad: 1
    kernel_size: 3
    #group: 32
    stride: 1
    weight_filler {
      type: "xavier"
    }
	
  }
}
layer {
  name: "resx2_conv2_relu_"
  type: "ReLU"
  bottom: "resx2_conv2"
  top: "resx2_conv2"
}
layer {
  name: "resx2_conv3"
  type: "Convolution"
  bottom: "resx2_conv2"
  top: "resx2_conv3"
  param {
    lr_mult:  1 
    decay_mult: 1
  }
  param {
    lr_mult:  1
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    bias_term: true
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
    }
	
  }
}
layer {
  name: "resx2_elewise"
  type: "Eltwise"
  bottom: "resx1_elewise"
  bottom: "resx2_conv3"
  top: "resx2_elewise"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "resx2_elewise_relu_"
  type: "ReLU"
  bottom: "resx2_elewise"
  top: "resx2_elewise"
}