Bootstrap

9月5日关键点检测学习笔记——人体骨骼点检测:自顶向下


前言

本文为9月5日关键点检测学习笔记——人体骨骼点检测:自顶向下,分为三个章节:

  • 常用数据集;
  • 评价体系;
  • Top-down 自顶向下。

  • 自顶向下:先找人,再找点;
  • 自底向上:先找点,后归纳。

一、常用数据集

1


二、评价体系

1、Bounding box IoU

2

2、Mask IoU

3

3、Object keypoint similarity(OKS)

4
4

  • 真实关节点的格式: [ x 1 , y 1 , v 1 , … , x k , y k , v k ] [x_1, y_1, v_1, …, x_k, y_k, v_k] [x1,y1,v1,,xk,yk,vk].
    • 坐标: [ x , y ] [x, y] [x,y]
    • 可见性: [ v ] [v] [v].

O K S = ∑ i e − d i 2 2 s 2 k i 2 δ ( v i > 0 ) ∑ i δ ( v i > 0 ) OKS = \frac{\textstyle \sum_{i} e^{-\frac{d_i^2}{2s^2k_i^2} }\delta (v_i > 0)} { {\textstyle \sum_{i}} \delta (v_i > 0)} OKS=iδ(vi>0)ie2s2ki2di2δ(vi>0)
其中:

  1. d i d_i di 是每个 GT 和检测到的 keypoint 的欧氏距离;
  2. v i v_i vi 是 GT 的可见度标识:
    • v = 0 v = 0 v=0:未标注点;
    • v = 1 v = 1 v=1:已标注但不可见;
    • v = 2 v = 2 v=2:已标注且图像可见。
  3. s ∗ k i s*k_i ski (scale * keypoint constant)是该高斯分布的标准差,使每个 keypoint 的权重相等。
  4. 完美预测:1.
  • Precision: 预测的准确度;
    P r e c i s i o n = T P T P + F P Precision = \frac{TP}{TP + FP} Precision=TP+FPTP
    其中, T P TP TP 是 true positive, F P FP FP 是 false positive。

  • Recall: 找到多少真值。
    P r e c i s i o n = T P T P + F N Precision = \frac{TP}{TP + FN} Precision=TP+FNTP
    其中, F N FN FN 是 false negative。

4

  • Keypoints Evaluation Metric:

5


三、Top-down 自顶向下

1、Mask RCNN

  • 与 Faster RCNN 的区别:

6

  • Feature Pyramid Network:

7
8

  • Anchors:
    • Ratio:weight / height = [0.5, 1, 2]
    • Scales:[32, 64, 256] pixels.
    • 坐标: [ x 1 , y 1 , x 2 , y 2 ] [x_1, y_1, x_2, y_2] [x1,y1,x2,y2] / [ x 0 , y 0 , w , h ] [x_0, y_0, w, h] [x0,y0,w,h].

9

  • Proposal Layer:
    10
    11

  • RoI Aligned Layer:
    12
    13

  • Mask Branch:
    14

main.py 代码如下:

import os
import sys
import random
import argparse
import numpy as np
import cv2 as cv

import coco
import utils
import model as modellib


class InferenceConfig(coco.CocoConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    KEYPOINT_MASK_POOL_SIZE = 7


def main():
    parse = argparse.ArgumentParser()
    parse.add_argument("--image", type=str)
    parse.add_argument('--video', type=str)
    args = parse.parse_args()

    ROOT_DIR = os.getcwd()

    MODEL_DIR = os.path.join(ROOT_DIR, "logs")
    #在此更改你所保存的下载好的模型的路径
    COCO_MODEL_PATH = "./model/mask_rcnn_coco.h5/"


    if not os.path.exists(COCO_MODEL_PATH):
        raise AssertionError('please download the pre-trained model')

    colorsFile = "colors.txt" #选择连线所对应的颜色
    with open(colorsFile, 'rt') as f:
        colorsStr = f.read().rstrip('\n').split('\n')
    colors = []
    for i in range(len(colorsStr)):
        rgb = colorsStr[i].split(' ')
        color = np.array([float(rgb[0]), float(rgb[1]), float(rgb[2])])
        colors.append(color)

    inference_config = InferenceConfig()

    model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR,
                              config=inference_config)#加载模型框架

    model.load_weights(COCO_MODEL_PATH, by_name=True)#加载模型权值

    if (args.image): #图像
        if not os.path.isfile(args.image):
            print("Input image file ", args.image, " doesn't exist")
            sys.exit(1)
        cap = cv.VideoCapture(args.image)
        outputFile = args.image[:-4]+'_mask_rcnn_out_py.jpg'
    elif (args.video): #视频
        if not os.path.isfile(args.video):
            print("Input video file ", args.video, " doesn't exist")
            sys.exit(1)
        cap = cv.VideoCapture(args.video)
        outputFile = args.video[:-4]+'_mask_rcnn_out_py.avi'
    else:
        cap = cv.VideoCapture(0)

    if (not args.image):
        vid_writer = cv.VideoWriter(outputFile,
                                    cv.VideoWriter_fourcc('M', 'J', 'P', 'G'),
                                    30,
                                    (round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),
                                     round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))

    maskThreshold = 0.3 #mask阈值
    while cv.waitKey(1) < 0:
        hasFrame, frame = cap.read()
        if not hasFrame:
            print("Done processing !!!")
            print("Output file is stored as ", outputFile)
            cv.waitKey(3000)
            break
        
        print("frame shape:", frame.shape)
        # class_names = ['BG', 'person']
        results = model.detect_keypoint([frame], verbose=1)
        r = results[0]
        if r['masks'].shape[0]:
            for i in range(r['masks'].shape[2]):
                mask = r['masks'][:, :, i]
                mask = (mask > maskThreshold)
                roi = frame[mask]
                colorIndex = random.randint(0, len(colors)-1)
                color = colors[colorIndex]
                frame[mask] = ([0.3 * color[0],
                                0.3 * color[1],
                                0.3 * color[2]] + 0.7 * roi).astype(np.uint8)
                mask = mask.astype(np.uint8)
                contours, hierarchy = cv.findContours(mask,
                                                         cv.RETR_TREE,
                                                         cv.CHAIN_APPROX_SIMPLE)
                cv.drawContours(frame, contours, -1, color, 3,
                                cv.LINE_8, hierarchy, 100) #画轮廓
            keypoints = np.array(r['keypoints']).astype(int)#读关键点结果
            skeleton = [0, -1, -1, 5, -1, 6, 5, 7, 6, 8, 7, 9,
                        8, 10, 11, 13, 12, 14, 13, 15, 14, 16]
            for i in range(len(keypoints)):
                # Skeleton: 11*2
                limb_colors = [[0, 0, 255], [0, 170, 255], [0, 255, 170],
                               [0, 255, 0], [170, 255, 0], [255, 170, 0],
                               [255, 0, 0], [255, 0, 170], [170, 0, 255],
                               [170, 170, 0], [170, 0, 170]]
                if(len(skeleton)):#画线
                    skeleton = np.reshape(skeleton, (-1, 2))
                    neck = np.array((keypoints[i, 5, :]
                                    + keypoints[i, 6, :]) / 2).astype(int)
                    if(keypoints[i, 5, 2] == 0 or keypoints[i, 6, 2] == 0):
                        neck = [0, 0, 0]
                    limb_index = -1
                    for limb in skeleton:
                        limb_index += 1
                        start_index, end_index = limb
                        if(start_index == -1):
                            Joint_start = neck
                        else:
                            Joint_start = keypoints[i][start_index]
                        if(end_index == -1):
                            Joint_end = neck
                        else:
                            Joint_end = keypoints[i][end_index]
                        if ((Joint_start[2] != 0) & (Joint_end[2] != 0)):
                            cv.line(frame,
                                    tuple(Joint_start[:2]),
                                    tuple(Joint_end[:2]),
                                    limb_colors[limb_index], 5)
        if (args.image):
            cv.imwrite(outputFile, frame.astype(np.uint8))
        else:
            vid_writer.write(frame.astype(np.uint8))


if __name__ == "__main__":
    main()

;