上一节讲的是solver的初始化,在其过程中,调用了net.cpp的init函数,下面,来看一下它是
怎么干活的。
template <typename Dtype>
void Net<Dtype>::Init(const NetParameter& in_param) {
//in_param,接solver.cpp的NetParameter
CHECK(Caffe::root_solver() || root_net_)
<< "root_net_ needs to be set for all non-root solvers";
// Set phase from the state.
phase_ = in_param.state().phase();
//phase_ = caffe::TRAIN
// Filter layers based on their include/exclude rules and
// the current NetState.
NetParameter filtered_param;
FilterNet(in_param, &filtered_param);
//这个函数的作用就是检查in_param,如果in_param的layer符合要求,就赋给filtered_param
//否则就不赋给filtered_param,你也可以认为这个函数的作用是移除in_param的指定层,将剩下
//的复制给filtered_param(这里面主要是针对included和exclude)
LOG_IF(INFO, Caffe::root_solver())
<< "Initializing net from parameters: " << std::endl
<< filtered_param.DebugString();
// Create a copy of filtered_param with splits added where necessary.
NetParameter param;
InsertSplits(filtered_param, ¶m);
//函数从filtered_param读入新网络到param
// Basically, build all the layers and set up their connections.
name_ = param.name();
map<string, int> blob_name_to_idx;
set<string> available_blobs;
//关于set容器,可以看这个网址http://blog.csdn.net/wangran51/article/details/8836160
memory_used_ = 0;
// For each layer, set up its input and output
bottom_vecs_.resize(param.layer_size());//重置bottom_vecs_的大小,一下是函数前后对比
// bottom_vecs_ = std::vector of length 0, capacity 0
// bottom_vecs_ = std::vector of length 9, capacity 9 = {
// std::vector of length 0, capacity 0, std::vector of length 0, capacity 0,
// std::vector of length 0, capacity 0, std::vector of length 0, capacity 0,
// std::vector of length 0, capacity 0, std::vector of length 0, capacity 0,
// std::vector of length 0, capacity 0, std::vector of length 0, capacity 0,
// std::vector of length 0, capacity 0}
//这里面九个元素指的是网络的train layer共有9个所以需要九个参数
top_vecs_.resize(param.layer_size());
bottom_id_vecs_.resize(param.layer_size());
param_id_vecs_.resize(param.layer_size());
top_id_vecs_.resize(param.layer_size());
bottom_need_backward_.resize(param.layer_size());
//差不多参数后面带‘_’的,代表的都是函数运行过程中的中间变量
for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) {
//对layer的每一层进行处理
// For non-root solvers, whether this layer is shared from root_net_.
bool share_from_root = !Caffe::root_solver()
&& root_net_->layers_[layer_id]->ShareInParallel();
// Inherit phase from net if unset.
if (!param.layer(layer_id).has_phase()) {
param.mutable_layer(layer_id)->set_phase(phase_);
}
// Setup layer.
const LayerParameter& layer_param = param.layer(layer_id);//看 caffe.proto去~ 赶紧的
if (layer_param.propagate_down_size() > 0) {
//propagate_down:Specifies on which bottoms the backpropagation should
//be skipped. The size must be either 0 or equal to the number of bottoms.
CHECK_EQ(layer_param.propagate_down_size(),
layer_param.bottom_size())
<< "propagate_down param must be specified "
<< "either 0 or bottom_size times ";
}
if (share_from_root) {
LOG(INFO) << "Sharing layer " << layer_param.name() << " from root net";
layers_.push_back(root_net_->layers_[layer_id]);
layers_[layer_id]->SetShared(true);
} else {
layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param));
创建layer并将layer_param的值赋值给layers_(具体见下篇博客)
}
layer_names_.push_back(layer_param.name());
LOG_IF(INFO, Caffe::root_solver())
<< "Creating Layer " << layer_param.name();
bool need_backward = false;
// Figure out this layer's input and output
for (int bottom_id = 0; bottom_id < layer_param.bottom_size();
++bottom_id)
//上边创建了层,然后就该对bottom/top进行处理了
{
const int blob_id = AppendBottom(param, layer_id, bottom_id,
&available_blobs, &blob_name_to_idx);
//见附1
// If a blob needs backward, this layer should provide it.
need_backward |= blob_need_backward_[blob_id];
}
int num_top = layer_param.top_size();
for (int top_id = 0; top_id < num_top; ++top_id) {
AppendTop(param, layer_id, top_id, &available_blobs, &blob_name_to_idx);
//见附2
// Collect Input layer tops as Net inputs.
if (layer_param.type() == "Input") {
const int blob_id = blobs_.size() - 1;
net_input_blob_indices_.push_back(blob_id);
net_input_blobs_.push_back(blobs_[blob_id].get());
}
}
// If the layer specifies that AutoTopBlobs() -> true and the LayerParameter
// specified fewer than the required number (as specified by
// ExactNumTopBlobs() or MinTopBlobs()), allocate them here.
Layer<Dtype>* layer = layers_[layer_id].get();
//vector<shared_ptr<Layer<Dtype> > > layers_;
if (layer->AutoTopBlobs()) {
const int needed_num_top =
std::max(layer->MinTopBlobs(), layer->ExactNumTopBlobs());
for (; num_top < needed_num_top; ++num_top) {
// Add "anonymous" top blobs -- do not modify available_blobs or
// blob_name_to_idx as we don't want these blobs to be usable as input
// to other layers.
AppendTop(param, layer_id, num_top, NULL, NULL);
}
}
// After this layer is connected, set it up.
if (share_from_root) {
// Set up size of top blobs using root_net_
const vector<Blob<Dtype>*>& base_top = root_net_->top_vecs_[layer_id];
const vector<Blob<Dtype>*>& this_top = this->top_vecs_[layer_id];
for (int top_id = 0; top_id < base_top.size(); ++top_id) {
this_top[top_id]->ReshapeLike(*base_top[top_id]);
LOG(INFO) << "Created top blob " << top_id << " (shape: "
<< this_top[top_id]->shape_string() << ") for shared layer "
<< layer_param.name();
}
} else {
layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]);
//调用SetUp这一段的介绍看下一篇啊,要不然东西就太多了
}
LOG_IF(INFO, Caffe::root_solver())
<< "Setting up " << layer_names_[layer_id];
//更新向量blob_loss_weights
for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
if (blob_loss_weights_.size() <= top_id_vecs_[layer_id][top_id]) {
blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0));
//调整blob_loss_weights_的大小,使其与top_id_vecs_[layer_id][top_id]一样大
}
blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id);
//loss函数返回loss_weight ——> 在模板类的SetUp方法中会调用SetLossWeights来设置
//其私有数据成员loss_,里面存储的其实是loss_weight
LOG_IF(INFO, Caffe::root_solver())
<< "Top shape: " << top_vecs_[layer_id][top_id]->shape_string();
// top_vecs_[0][0]->shape_string() = "64 1 28 28 (50176)"
if (layer->loss(top_id)) {
LOG_IF(INFO, Caffe::root_solver())
<< " with loss weight " << layer->loss(top_id);
}
memory_used_ += top_vecs_[layer_id][top_id]->count();
}
LOG_IF(INFO, Caffe::root_solver())
<< "Memory required for data: " << memory_used_ * sizeof(Dtype);
const int param_size = layer_param.param_size();
const int num_param_blobs = layers_[layer_id]->blobs().size();
//param_size是Layermeter类型对象layer_param中ParamSpec param成员的个数, num_param_blobs是一
//个Layer中learnable parameter blob的个数,param_size <= num_param_blobs
CHECK_LE(param_size, num_param_blobs)
<< "Too many params specified for layer " << layer_param.name();
ParamSpec default_param_spec;
for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
const ParamSpec* param_spec = (param_id < param_size) ?
&layer_param.param(param_id) : &default_param_spec;
const bool param_need_backward = param_spec->lr_mult() != 0;
//是否反反向传播,主要看基础学习率,如果其为0,则不传播
need_backward |= param_need_backward;
//由param_need_backward来决定need_backward是否为真,并且,只要有一次遍历使得
//need_backward为真,则这个for循环结束后,need_backward也为真
layers_[layer_id]->set_param_propagate_down(param_id,
param_need_backward);
}
for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
AppendParam(param, layer_id, param_id);//附3
}
// Finally, set the backward flag
layer_need_backward_.push_back(need_backwar
d);
if (need_backwa