Bootstrap

openpose训练代码(一)

openpose训练代码(一): http://blog.csdn.net/u011956147/article/details/79292026
openpose训练代码(二):http://blog.csdn.net/u011956147/article/details/79292734


openspoe本身是很繁杂的,包含了人体姿态估计、手势估计、脸部关键点提取,还有3Dpose,是在caffe上再做的一层封装,但是如果我们实际要去用的话,很多其实都是不需要的,比如openpose里面的多线程,GUI等等,我们只需要关注一些核心的东西就好了。
在这里,我们只关心openpose中的人体关键点估计,其实在上一篇博客中,我们可以大致了解到,Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields就是CVPR6016的CPM加上PAF,inference是很直观的,就是提取关键点,算PAF积分,再把关键点放到每个group(就是确定是不是同一个人)完成多人的姿态估计。


训练代码,其实主要就是看数据准备和数据读取,主要包括几个文件:
数据读取文件:

cpm_data_layer.cpp
cpm_data_transformer.cpp

数据准备文件:

genCOCOMask.m
genJSON.m
genLMDB.py
getANNO.m

cpm_data_layer和cpm_data_transformer都是在caffe中实现的,要理清楚这两个文件,我们需要先看一下数据准备是怎么做的,这里,也只是关注LMDB文件是怎么生成的,因为其他的都比较简单(其实生成LMDB也蛮简单的,但是作者这部分写的有点乱,需要静心好好梳理)可以自行查阅。
在genLMDB.py中,把事先处理好的数据都写入LMDB中,其中有一个函数writeLMDB,这个函数就是逐行,逐页面(这里的页面可以理解长channel,因为在读取的时候都是利用指针移动)来写入的:

def writeLMDB(datasets, lmdb_path, validation):
    env = lmdb.open(lmdb_path, map_size=int(1e12))  # 需要先建立一个空文件夹用来放LMDB文件,大概需要140G
    txn = env.begin(write=True)
    data = []
    numSample = 0

    for d in range(len(datasets)):
        if(datasets[d] == "MPI"):
            print datasets[d]
            with open('MPI.json') as data_file:
                data_this = json.load(data_file)
                data_this = data_this['root']
                data = data + data_this
            numSample = len(data)
            #print data
            print numSample
        elif(datasets[d] == "COCO"):   # 读json文件
            print datasets[d]
            with open('dataset/COCO/json/COCO.json') as data_file:
                data_this = json.load(data_file)
                data_this = data_this['root']
                data = data + data_this
            numSample = len(data)
            #print data
            print numSample

    random_order = np.random.permutation(numSample).tolist()

    isValidationArray = [data[i]['isValidation'] for i in range(numSample)];
    if(validation == 1):
        totalWriteCount = isValidationArray.count(0.0);
    else:
        totalWriteCount = len(data)
    print totalWriteCount;
    writeCount = 0

    for count in range(numSample):# numSample
        #idx = random_order[count]
                idx = 3
        if (data[idx]['isValidation'] != 0 and validation == 1):
            print '%d/%d skipped' % (count,idx)
            continue

        if "MPI" in data[idx]['dataset']:
            path_header = 'dataset/MPI/images/'
        elif "COCO" in data[idx]['dataset']:
            path_header = '/proj/Sunjiarui/fcm_pose_train/training/dataset/COCO/images/'

        print os.path.join(path_header, data[idx]['img_paths'])
        img = cv2.imread(os.path.join(path_header, data[idx]['img_paths']))
        #print data[idx]['img_paths']
        img_idx = data[idx]['img_paths'][-16:-3];
        #print img_idx
        # 做mask_all 和mask_miss 这里是因为有一些人比较小,没有标注,但是又存在,所以才有这一步
        if "COCO_val" in data[idx]['dataset']:  
            mask_all = cv2.imread(path_header+'mask2014/val2014_mask_all_'+img_idx+'png', 0)
            mask_miss = cv2.imread(path_header+'mask2014/val2014_mask_miss_'+img_idx+'png', 0)
            #print path_header+'mask2014/val2014_mask_miss_'+img_idx+'png'
        elif "COCO" in data[idx]['dataset']:
            mask_all = cv2.imread(path_header+'mask2014/train2014_mask_all_'+img_idx+'png', 0)
            mask_miss = cv2.imread(path_header+'mask2014/train2014_mask_miss_'+img_idx+'png', 0)
            #print path_header+'mask2014/train2014_mask_miss_'+img_idx+'png'
        elif "MPI" in data[idx]['dataset']:
            img_idx = data[idx]['img_paths'][-13:-3];
            #print img_idx
            mask_miss = cv2.imread('dataset/MPI/masks/mask_'+img_idx+'jpg', 0)
            #mask_all = mask_miss

        height = img.shape[0]
        width = img.shape[1]
        if(width < 64):
            img = cv2.copyMakeBorder(img,0,0,0,64-width,cv2.BORDER_CONSTANT,value=(128,128,128))
            print 'saving padded image!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
            cv2.imwrite('padded_img.jpg', img)
            width = 64
            # no modify on width, because we want to keep information
        meta_data = np.zeros(shape=(height,width,1), dtype=np.uint8)
        #print type(img), img.shape
        #print type(meta_data), meta_data.shape
        clidx = 0 # current line index
        # dataset name (string)
        for i in range(len(data[idx]['dataset'])):
            meta_data[clidx][i] = ord(data[idx]['dataset'][i])
                        print 'type()=', type(ord(data[idx]['dataset'][i]))

        # 开始准备mata信息
        clidx = clidx + 1
        # image height, image width
        height_binary = float2bytes(data[idx]['img_height'])
        for i in range(len(height_binary)):
            meta_data[clidx][i] = ord(height_binary[i])
        width_binary = float2bytes(data[idx]['img_width'])
                print 'type(width_binary)=',type(width_binary)
        for i in range(len(width_binary)):
            meta_data[clidx][4+i] = ord(width_binary[i])
        clidx = clidx + 1
        # (a) isValidation(uint8), numOtherPeople (uint8), people_index (uint8), annolist_index (float), writeCount(float), totalWriteCount(float)
        meta_data[clidx][0] = data[idx]['isValidation']
        meta_data[clidx][1] = data[idx]['numOtherPeople']
        meta_data[clidx][2] = data[idx]['people_index']
                print 'type() =', type(data[idx]['isValidation'])
                print 'data numOther = ',data[idx]['numOtherPeople']

        annolist_index_binary = float2bytes(data[idx]['annolist_index'])
        for i in range(len(annolist_index_binary)): # 3,4,5,6
            meta_data[clidx][3+i] = ord(annolist_index_binary[i])
        count_binary = float2bytes(float(writeCount)) # note it's writecount instead of count!
        for i in range(len(count_binary)):
            meta_data[clidx][7+i] = ord(count_binary[i])
        totalWriteCount_binary = float2bytes(float(totalWriteCount))
        for i in range(len(totalWriteCount_binary)):
            meta_data[clidx][11+i] = ord(totalWriteCount_binary[i])
        nop = int(data[idx]['numOtherPeople'])
        clidx = clidx + 1
        # (b) objpos_x (float), objpos_y (float)
        objpos_binary = float2bytes(data[idx]['objpos'])
        for i in range(len(objpos_binary)):
            meta_data[clidx][i] = ord(objpos_binary[i])
        clidx = clidx + 1
        # (c) scale_provided (float)
        scale_provided_binary = float2bytes(data[idx]['scale_provided'])
        for i in range(len(scale_provided_binary)):
            meta_data[clidx][i] = ord(scale_provided_binary[i])
        clidx = clidx + 1
        # (d) joint_self (3*16) (float) (3 line)
        joints = np.asarray(data[idx]['joint_self']).T.tolist() # transpose to 3*16
        for i in range(len(joints)):
            row_binary = float2bytes(joints[i])
            for j in range(len(row_binary)):
                meta_data[clidx][j] = ord(row_binary[j])
            clidx = clidx + 1
        # (e) check nop, prepare arrays
                print 'nop=',nop
        if(nop!=0):
            if(nop==1):
                joint_other = [data[idx]['joint_others']]
                objpos_other = [data[idx]['objpos_other']]
                scale_provided_other = [data[idx]['scale_provided_other']]
                                print 'joint_other=',joint_other
            else:
                joint_other = data[idx]['joint_others']
                objpos_other = data[idx]['objpos_other']
                scale_provided_other = data[idx]['scale_provided_other']
                                print 'joint_others2 =', joint_other
            # (f) objpos_other_x (float), objpos_other_y (float) (nop lines)
            for i in range(nop):
                objpos_binary = float2bytes(objpos_other[i])
                for j in range(len(objpos_binary)):
                    meta_data[clidx][j] = ord(objpos_binary[j])
                clidx = clidx + 1
            # (g) scale_provided_other (nop floats in 1 line)
            scale_provided_other_binary = float2bytes(scale_provided_other)
            for j in range(len(scale_provided_other_binary)):
                meta_data[clidx][j] = ord(scale_provided_other_binary[j])
            clidx = clidx + 1
            # (h) joint_others (3*16) (float) (nop*3 lines)
            for n in range(nop):
                joints = np.asarray(joint_other[n]).T.tolist() # transpose to 3*16
                                print 'joints=',joints
                                print 'joint_other[n]=', joint_other[n]
                for i in range(len(joints)):
                    row_binary = float2bytes(joints[i])
                    for j in range(len(row_binary)):
                        meta_data[clidx][j] = ord(row_binary[j])
                    clidx = clidx + 1

        # print meta_data[0:12,0:48]
        # total 7+4*nop lines
        # lmdb排列的顺序一定要记清楚,这个在读取数据的时候很重要,在C++代码中相关联的就是指针的偏移量
        if "COCO" in data[idx]['dataset']:
            img4ch = np.concatenate((img, meta_data, mask_miss[...,None], mask_all[...,None]), axis=2)
            #img4ch = np.concatenate((img, meta_data, mask_miss[...,None]), axis=2)
        elif "MPI" in data[idx]['dataset']:
            img4ch = np.concatenate((img, meta_data, mask_miss[...,None]), axis=2)

        img4ch = np.transpose(img4ch, (2, 0, 1))
        print img4ch.shape

        datum = caffe.io.array_to_datum(img4ch, label=0)
        key = '%07d' % writeCount
        txn.put(key, datum.SerializeToString())
        if(writeCount % 1000 == 0):
            txn.commit()
            txn = env.begin(write=True)
        print '%d/%d/%d/%d' % (count,writeCount,idx,numSample)
        writeCount = writeCount + 1

    txn.commit()
    env.close()

在上述Python代码过后,就会生成训练所需要的LMDB文件,在实际的使用过程中,需要重新写caffe的data_layer,关于caffe的data_layer ,可以参考我之前的一篇博客: http://mp.blog.csdn.net/mdeditor/77987504


下面是cpm_data_layer和cpm_data_transformer,其实cpm_data_layer主要就是layer的建立,主要的数据转化都是在cpm_data_transformer中完成的。
先看cpm_data_layer的setup函数(代码有些细微地方我可能改过):

template <typename Dtype>
void CPMDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  cpm_data_transformer_.reset(
     new CPMDataTransformer<Dtype>(cpm_transform_param_, this->phase_));
  cpm_data_transformer_->InitRand();


  // Read a data point, and use it to initialize the top blob.
  Datum& datum = *(reader_.full().peek());
  LOG(INFO) << datum.height() << " " << datum.width() << " " << datum.channels();

  bool force_color = this->layer_param_.data_param().force_encoded_color();
  if ((force_color && DecodeDatum(&datum, true)) ||
      DecodeDatumNative(&datum)) {
    LOG(INFO) << "Decoding Datum";
  }

  // image
  const int crop_size = this->layer_param_.cpm_transform_param().crop_size();
  const int batch_size = this->layer_param_.data_param().batch_size();
  if (crop_size > 0) {
    // top[0]->Reshape(batch_size, datum.channels(), crop_size, crop_size);
    // for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
    //   this->prefetch_[i].data_.Reshape(batch_size, datum.channels(), crop_size, crop_size);
    // }
    // //this->transformed_data_.Reshape(1, 4, crop_size, crop_size);
    // this->transformed_data_.Reshape(1, 6, crop_size, crop_size);
  } 
  else {
    const int height = this->phase_ != TRAIN ? datum.height() :
      this->layer_param_.cpm_transform_param().crop_size_y();
    const int width = this->phase_ != TRAIN ? datum.width() :
      this->layer_param_.cpm_transform_param().crop_size_x();
    LOG(INFO) << "PREFETCH_COUNT is " << this->PREFETCH_COUNT;  // asynchronously if to GPU memory
    top[0]->Reshape(batch_size, datum.channels(), height, width);
    for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
      this->prefetch_[i].data_.Reshape(batch_size, datum.channels(), height, width);   // 10,6,368,368
    }
    //this->transformed_data_.Reshape(1, 4, height, width);
    this->transformed_data_.Reshape(1, datum.channels(), height, width);  // 1,6,368,368
  }
  LOG(INFO) << "output data size: " << top[0]->num() << ","              
      << top[0]->channels() << "," << top[0]->height() << ","
      << top[0]->width();   // 10,6,368,368

  // label
  if (this->output_labels_) {
    const int stride = this->layer_param_.cpm_transform_param().stride();  // 8,重要
    const int height = this->phase_ != TRAIN ? datum.height() :
      this->layer_param_.cpm_transform_param().crop_size_y();
    const int width = this->phase_ != TRAIN ? datum.width() :
      this->layer_param_.cpm_transform_param().crop_size_x();

    int num_parts = this->layer_param_.cpm_transform_param().num_parts();  // 56
    top[1]->Reshape(batch_size, 2*(num_parts+1), height/stride, width/stride);
    for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
      this->prefetch_[i].label_.Reshape(batch_size, 2*(num_parts+1), height/stride, width/stride);  // 10,114,46,46
    }
    this->transformed_label_.Reshape(1, 2*(num_parts+1), height/stride, width/stride);  // 1,114,46,46
  }
}

在这个函数中,主要是就一些超参数的读取,和数据输出格式的规定。
关键的是load_batch 函数,我截取了一部分:

    // Apply data transformations (mirror, scale, crop...)
    timer.Start();
    const int offset_data = batch->data_.offset(item_id);
    const int offset_label = batch->label_.offset(item_id);
    this->transformed_data_.set_cpu_data(top_data + offset_data);
    this->transformed_label_.set_cpu_data(top_label + offset_label);
    if (datum.encoded()) {
      this->cpm_data_transformer_->Transform(cv_img, &(this->transformed_data_));
    } else {
      this->cpm_data_transformer_->Transform_nv(datum, 
        &(this->transformed_data_),
        &(this->transformed_label_), cnt);
      ++cnt;
    }
    // if (this->output_labels_) {
    //   top_label[item_id] = datum.label();
    // }
    trans_time += timer.MicroSeconds();

这里调用Transform和Transform_nv 就进入了cpm_data_transformer文件。
下一篇写cpm_data_transformer。
原文链接:http://blog.csdn.net/u011956147/article/details/79292026

;