net.hpp/cpp中主要含有:前向后向传播函数,网络IO函数,每层的参数检测和读取函数,建立和维护每层参数的函数以及vector容器。
caffe支持的网络是有向无环图结构。网络中每一层都是一个节点,网络含有起点和终点,并且起点和终点不一定只有一个,信息在前向传递时在网络中每个节点都会至少会经过一次,并且不一定只有一次。但是只有一个起点时网络每个节点只能经过一次。后向传播时同理。
下面主要用在代码中添加注释的方法来说明。
首先来看net中维护的各种vector以及变量
/// @brief The network name
string name_;
/// @brief The phase: TRAIN or TEST
Phase phase_;
/// @brief Individual layers in the net, each indicator specify a layer class.
vector<shared_ptr<Layer<Dtype> > > layers_;
// the name of each layer, indexed by layer id.
vector<string> layer_names_;
// the map of layer name and layer id.
map<string, int> layer_names_index_;
// indicating whether a layer need backword
vector<bool> layer_need_backward_;
/// @brief the blobs storing intermediate results between the layer.
vector<shared_ptr<Blob<Dtype> > > blobs_;
// the name of each blob, indexed by blob id.
vector<string> blob_names_;
// the map of blob name and blob id.
map<string, int> blob_names_index_;
// indicating whether a blob need backword
vector<bool> blob_need_backward_;
/// bottom_vecs stores the vectors containing the input for each layer.
/// They don't actually host the blobs (blobs_ does), so we simply store
/// pointers.
vector<vector<Blob<Dtype>*> > bottom_vecs_;
vector<vector<int> > bottom_id_vecs_;
vector<vector<bool> > bottom_need_backward_;
/// top_vecs stores the vectors containing the output for each layer
vector<vector<Blob<Dtype>*> > top_vecs_;
vector<vector<int> > top_id_vecs_;
/// Vector of weight in the loss (or objective) function of each net blob,
/// indexed by blob_id.
vector<Dtype> blob_loss_weights_;
// vector of param id for each param. it is needed because caffe support parameter sharing.
vector<vector<int> > param_id_vecs_;
// the number of the onwer of parameter.
vector<int> param_owners_;
// the display name of parameter, because the splite algorithm and other reason cause the name of parameter is not the same as specified the prototxt.
vector<string> param_display_names_;
// the param and its owner pair, onwer indicates by layer id.
vector<pair<int, int> > param_layer_indices_;
//map of param name and param id.
map<string, int> param_names_index_;
/// blob indices for the input and the output of the net
vector<int> net_input_blob_indices_;
vector<int> net_output_blob_indices_;
vector<Blob<Dtype>*> net_input_blobs_;
vector<Blob<Dtype>*> net_output_blobs_;
/// The parameters in the network.
vector<shared_ptr<Blob<Dtype> > > params_;
vector<Blob<Dtype>*> learnable_params_;
/**
* The mapping from params_ -> learnable_params_: we have
* learnable_param_ids_.size() == params_.size(),
* and learnable_params_[learnable_param_ids_[i]] == params_[i].get()
* if and only if params_[i] is an "owner"; otherwise, params_[i] is a sharer
* and learnable_params_[learnable_param_ids_[i]] gives its owner.
*/
vector<int> learnable_param_ids_;
/// the learning rate multipliers for learnable_params_
vector<float> params_lr_;
vector<bool> has_params_lr_;
/// the weight decay multipliers for learnable_params_
vector<float> params_weight_decay_;
vector<bool> has_params_decay_;
/// The bytes of memory used by this net
size_t memory_used_;
下面是构造net相关的函数
//construct function of Net. Parsed parameter or parameter file name are both available.
// Netparameter is a class to describe the structure. Sometimes it contains weights of net.
// it is not a net setted up, but some parameters.
// it is created by google protobuff project, and the defination of it can be found in caffe.pb.h
// root_net_ is used for multi-gpu computing, if root_net is not NULL, if will share weight between gpu
template <typename Dtype>
Net<Dtype>::Net(const NetParameter& param, const Net* root_net)
: root_net_(root_net){
Init(param);
}
template <typename Dtype>
Net<Dtype>::Net(const string& param_file, Phase phase, const Net* root_net)
: root_net_(root_net) {
NetParameter param;
ReadNetParamsFromTextFileOrDie(param_file, ¶m);
param.mutable_state()->set_phase(phase);
Init(param);
}
/// @brief Initialize a network with a NetParameter.
template <typename Dtype>
void Net<Dtype>::Init(const NetParameter& in_param) {
//if has root_solver, root_net is needed
CHECK(Caffe::root_solver() || root_net_)
<< "root_net_ needs to be set for all non-root solvers";
// Set phase from the state. phase is train or test
phase_ = in_param.state().phase();
// Filter layers based on their include/exclude rules and
// the current NetState.
NetParameter filtered_param;
// this function is used to check whether the parameter of a layer is legal
// and then find whether the layer is needed in this phase
FilterNet(in_param, &filtered_param);
LOG_IF(INFO, Caffe::root_solver())
<< "Initializing net from parameters: " << std::endl
<< filtered_param.DebugString();
// Create a copy of filtered_param with splits added where necessary.
NetParameter param;
// this function is used when a blob act as the bottom of two or more layers. it will duplicate a bolb and share weights betwwen duplicates.
// it is needed because caffe assume all blobs can be used only once.
InsertSplits(filtered_param, ¶m);
// Basically, build all the layers and set up their connections.
name_ = param.name();
map<string, int> blob_name_to_idx;
set<string> available_blobs;
memory_used_ = 0;
// For each layer, set up its input and output
bottom_vecs_.resize(param.layer_size());
top_vecs_.resize(param.layer_size());
bottom_id_vecs_.resize(param.layer_size());
param_id_vecs_.resize(param.layer_size());
top_id_vecs_.resize(param.layer_size());
bottom_need_backward_.resize(param.layer_size());
// each layer will be setup in following
for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) {
// For non-root solvers, whether this layer is shared from root_net_.
bool share_from_root = !Caffe::root_solver()
&& root_net_->layers_[layer_id]->ShareInParallel();
// Inherit phase from net if unset.
if (!param.layer(layer_id).has_phase()) {
param.mutable_layer(layer_id)->set_phase(phase_);
}
// Setup layer.
const LayerParameter& layer_param = param.layer(layer_id);
if (layer_param.propagate_down_size() > 0) {
CHECK_EQ(layer_param.propagate_down_size(),
layer_