While loading the pretrained Yolo v2 model (based on gklz1982’s work(https://github.com/gklz1982/caffe-yolov2)) which has BatchNormLayer on it, I got the error message of incompatible blob size.
“ERROR: Check failed: target_blobs.size() == source_layer.blobs_size() (5 vs. 3) Incompatible number of blobs for layer bn1”
So, I thought there might be some difference between Nvidia Caffe and BVLC Caffe, especially for Batch Norm layer.
On BVLC Caffe(https://github.com/BVLC/caffe/blob/master/src/caffe/layers/batch_norm_layer.cpp), Batch Normalization only use blobs with size 3,
void BatchNormLayer::LayerSetUp(const vector<Blob>& bottom,
const vector<Blob>& top) {
BatchNormParameter param = this->layer_param_.batch_norm_param();
moving_average_fraction_ = param.moving_average_fraction();
use_global_stats_ = this->phase_ == TEST;
if (param.has_use_global_stats())
use_global_stats_ = param.use_global_stats();
if (bottom[0]->num_axes() == 1)
channels_ = 1;
else
channels_ = bottom[0]->shape(1);
eps_ = param.eps();
if (this->blobs_.size() > 0) {
LOG(INFO) << “Skipping parameter initialization”;
} else {
this->blobs_.resize(3);
vector sz;
sz.push_back(channels_);
this->blobs_[0].reset(new Blob(sz));
this->blobs_[1].reset(new Blob(sz));
sz[0] = 1;
this->blobs_[2].reset(new Blob(sz));
for (int i = 0; i < 3; ++i) {
caffe_set(this->blobs_[i]->count(), Dtype(0),
this->blobs_[i]->mutable_cpu_data());
}
}
// Mask statistics from optimization by setting local learning rates
// for mean, variance, and the bias correction to zero.
for (int i = 0; i < this->blobs_.size(); ++i) {
if (this->layer_param_.param_size() == i) {
ParamSpec* fixed_param_spec = this->layer_param_.add_param();
fixed_param_spec->set_lr_mult(0.f);
} else {
CHECK_EQ(this->layer_param_.param(i).lr_mult(), 0.f)
<< "Cannot configure batch normalization statistics as layer "
<< “parameters.”;
}
}
}
However, for Nvidia Caffe(https://github.com/NVIDIA/caffe/blob/caffe-0.16/src/caffe/layers/batch_norm_layer.cpp), it requires blobs (or this->blobs_) size of 5.
template<typename Ftype, typename Btype>
void
BatchNormLayer<Ftype, Btype>::LayerSetUp(const vector<Blob*>& bottom, const vector<Blob*>& top) {
BatchNormParameter param = this->layer_param_.batch_norm_param();
moving_average_fraction_ = param.moving_average_fraction();
clip_variance_ = false;
//use_global_stats_ = false;
use_global_stats_= param.use_global_stats();
if (bottom[0]->num_axes() == 1)
channels_ = 1;
else
channels_ = bottom[0]->shape(1);
eps_ = std::max(param.eps(), 0.00001f);
scale_bias_ = false;
scale_bias_ = param.scale_bias(); // by default = false;
if (param.has_scale_filler() || param.has_bias_filler()) { // implicit set
scale_bias_ = true;
}
if (this->blobs_.size() > 0) {
LOG(INFO) << “Skipping parameter initialization”;
} else {
if (scale_bias_)
this->blobs_.resize(5);
else
this->blobs_.resize(3);
const Type btype = blobs_type();
const vector<int> shape { channels_ };
const vector<int> shape1 { 1 };
this->blobs_[0] = Blob::create(btype, btype); // mean
this->blobs_[0]->Reshape(shape);
this->blobs_[0]->set_data(0.);
this->blobs_[1] = Blob::create(btype, btype); // variance1
this->blobs_[1]->Reshape(shape);
this->blobs_[1]->set_data(0.);
this->blobs_[2] = Blob::create(btype, btype); // variance correction
this->blobs_[2]->Reshape(shape1);
this->blobs_[2]->set_data(1.);
if (scale_bias_) {
this->blobs_[3] = Blob::create(btype, btype); // scale
this->blobs_[3]->Reshape(shape);
this->blobs_[4] = Blob::create(btype, btype); // bias
this->blobs_[4]->Reshape(shape);
if (param.has_scale_filler()) {
// TODO
if (btype == tp<Ftype>()) {
shared_ptr<Filler<Ftype>> scale_filler(
GetFiller<Ftype>(this->layer_param_.batch_norm_param().scale_filler()));
scale_filler->Fill(this->blobs_[3].get());
} else {
shared_ptr<Filler<float>> scale_filler(
GetFiller<float>(this->layer_param_.batch_norm_param().scale_filler()));
scale_filler->Fill(this->blobs_[3].get());
}
} else {
this->blobs_[3]->set_data(1.);
}
if (param.has_bias_filler()) {
// TODO
if (btype == tp<Ftype>()) {
shared_ptr<Filler<Ftype>> bias_filler(
GetFiller<Ftype>(this->layer_param_.batch_norm_param().bias_filler()));
bias_filler->Fill(this->blobs_[4].get());
} else {
shared_ptr<Filler<float>> bias_filler(
GetFiller<float>(this->layer_param_.batch_norm_param().bias_filler()));
bias_filler->Fill(this->blobs_[4].get());
}
} else {
this->blobs_[4]->set_data(0.);
}
}
iter_ = 0;
}
I thought I can block Nvidia Caffe to access on this->blobs variable’s 4th and 5th elements by setting scale_bias to false. Also, on NVCaffe Document(NVCaffe User Guide :: NVIDIA Deep Learning Frameworks Documentation), it is written that there are scale_bias Setting under batch_norm_param.
However, I got error message that there is no field named “scale_bias” under “caffe.BatchNormParameter”.
→ Bad network: Not a valid NetParameter: 91:5 : Message type “caffe.BatchNormParameter” has no field named “scale_bias”.
Is there any way to disable scale_bias on NVCaffe??