NVCaffe's BatchNormLayer is incompatible with BVLC caffe

While loading the pretrained Yolo v2 model (based on gklz1982’s work(https://github.com/gklz1982/caffe-yolov2)) which has BatchNormLayer on it, I got the error message of incompatible blob size.

“ERROR: Check failed: target_blobs.size() == source_layer.blobs_size() (5 vs. 3) Incompatible number of blobs for layer bn1”

So, I thought there might be some difference between Nvidia Caffe and BVLC Caffe, especially for Batch Norm layer.

On BVLC Caffe(https://github.com/BVLC/caffe/blob/master/src/caffe/layers/batch_norm_layer.cpp), Batch Normalization only use blobs with size 3,

void BatchNormLayer::LayerSetUp(const vector<Blob>& bottom,
const vector<Blob
>& top) {
BatchNormParameter param = this->layer_param_.batch_norm_param();
moving_average_fraction_ = param.moving_average_fraction();
use_global_stats_ = this->phase_ == TEST;
if (param.has_use_global_stats())
use_global_stats_ = param.use_global_stats();
if (bottom[0]->num_axes() == 1)
channels_ = 1;
else
channels_ = bottom[0]->shape(1);
eps_ = param.eps();
if (this->blobs_.size() > 0) {
LOG(INFO) << “Skipping parameter initialization”;
} else {
this->blobs_.resize(3);
vector sz;
sz.push_back(channels_);
this->blobs_[0].reset(new Blob(sz));
this->blobs_[1].reset(new Blob(sz));
sz[0] = 1;
this->blobs_[2].reset(new Blob(sz));
for (int i = 0; i < 3; ++i) {
caffe_set(this->blobs_[i]->count(), Dtype(0),
this->blobs_[i]->mutable_cpu_data());
}
}
// Mask statistics from optimization by setting local learning rates
// for mean, variance, and the bias correction to zero.
for (int i = 0; i < this->blobs_.size(); ++i) {
if (this->layer_param_.param_size() == i) {
ParamSpec* fixed_param_spec = this->layer_param_.add_param();
fixed_param_spec->set_lr_mult(0.f);
} else {
CHECK_EQ(this->layer_param_.param(i).lr_mult(), 0.f)
<< "Cannot configure batch normalization statistics as layer "
<< “parameters.”;
}
}
}

However, for Nvidia Caffe(https://github.com/NVIDIA/caffe/blob/caffe-0.16/src/caffe/layers/batch_norm_layer.cpp), it requires blobs (or this->blobs_) size of 5.

template<typename Ftype, typename Btype>
void
BatchNormLayer<Ftype, Btype>::LayerSetUp(const vector<Blob*>& bottom, const vector<Blob*>& top) {
BatchNormParameter param = this->layer_param_.batch_norm_param();
moving_average_fraction_ = param.moving_average_fraction();

clip_variance_ = false;
//use_global_stats_ = false;
use_global_stats_= param.use_global_stats();

if (bottom[0]->num_axes() == 1)
channels_ = 1;
else
channels_ = bottom[0]->shape(1);
eps_ = std::max(param.eps(), 0.00001f);

scale_bias_ = false;
scale_bias_ = param.scale_bias(); // by default = false;
if (param.has_scale_filler() || param.has_bias_filler()) { // implicit set
scale_bias_ = true;
}

if (this->blobs_.size() > 0) {
LOG(INFO) << “Skipping parameter initialization”;
} else {
if (scale_bias_)
this->blobs_.resize(5);
else
this->blobs_.resize(3);

const Type btype = blobs_type();
const vector<int> shape { channels_ };
const vector<int> shape1 { 1 };
this->blobs_[0] = Blob::create(btype, btype);  // mean
this->blobs_[0]->Reshape(shape);
this->blobs_[0]->set_data(0.);
this->blobs_[1] = Blob::create(btype, btype);  // variance1
this->blobs_[1]->Reshape(shape);
this->blobs_[1]->set_data(0.);
this->blobs_[2] = Blob::create(btype, btype);  // variance correction
this->blobs_[2]->Reshape(shape1);
this->blobs_[2]->set_data(1.);
if (scale_bias_) {
  this->blobs_[3] = Blob::create(btype, btype);  // scale
  this->blobs_[3]->Reshape(shape);
  this->blobs_[4] = Blob::create(btype, btype);  // bias
  this->blobs_[4]->Reshape(shape);
  if (param.has_scale_filler()) {
    // TODO
    if (btype == tp<Ftype>()) {
      shared_ptr<Filler<Ftype>> scale_filler(
          GetFiller<Ftype>(this->layer_param_.batch_norm_param().scale_filler()));
      scale_filler->Fill(this->blobs_[3].get());
    } else {
      shared_ptr<Filler<float>> scale_filler(
          GetFiller<float>(this->layer_param_.batch_norm_param().scale_filler()));
      scale_filler->Fill(this->blobs_[3].get());
    }
  } else {
    this->blobs_[3]->set_data(1.);
  }
  if (param.has_bias_filler()) {
    // TODO
    if (btype == tp<Ftype>()) {
      shared_ptr<Filler<Ftype>> bias_filler(
          GetFiller<Ftype>(this->layer_param_.batch_norm_param().bias_filler()));
      bias_filler->Fill(this->blobs_[4].get());
    } else {
      shared_ptr<Filler<float>> bias_filler(
          GetFiller<float>(this->layer_param_.batch_norm_param().bias_filler()));
      bias_filler->Fill(this->blobs_[4].get());
    }
  } else {
    this->blobs_[4]->set_data(0.);
  }
}
iter_ = 0;

}

I thought I can block Nvidia Caffe to access on this->blobs variable’s 4th and 5th elements by setting scale_bias to false. Also, on NVCaffe Document(http://docs.nvidia.com/deeplearning/dgx/caffe-user-guide/index.html), it is written that there are scale_bias Setting under batch_norm_param.

However, I got error message that there is no field named “scale_bias” under “caffe.BatchNormParameter”.

-> Bad network: Not a valid NetParameter: 91:5 : Message type “caffe.BatchNormParameter” has no field named “scale_bias”.

Is there any way to disable scale_bias on NVCaffe??