文章目录
前言
本文为9月5日关键点检测学习笔记——人体骨骼点检测:自顶向下,分为三个章节:
- 常用数据集;
- 评价体系;
- Top-down 自顶向下。
- 自顶向下:先找人,再找点;
- 自底向上:先找点,后归纳。
一、常用数据集
二、评价体系
1、Bounding box IoU
2、Mask IoU
3、Object keypoint similarity(OKS)
- 真实关节点的格式:
[
x
1
,
y
1
,
v
1
,
…
,
x
k
,
y
k
,
v
k
]
[x_1, y_1, v_1, …, x_k, y_k, v_k]
[x1,y1,v1,…,xk,yk,vk].
- 坐标: [ x , y ] [x, y] [x,y];
- 可见性: [ v ] [v] [v].
O
K
S
=
∑
i
e
−
d
i
2
2
s
2
k
i
2
δ
(
v
i
>
0
)
∑
i
δ
(
v
i
>
0
)
OKS = \frac{\textstyle \sum_{i} e^{-\frac{d_i^2}{2s^2k_i^2} }\delta (v_i > 0)} { {\textstyle \sum_{i}} \delta (v_i > 0)}
OKS=∑iδ(vi>0)∑ie−2s2ki2di2δ(vi>0)
其中:
- d i d_i di 是每个 GT 和检测到的 keypoint 的欧氏距离;
-
v
i
v_i
vi 是 GT 的可见度标识:
- v = 0 v = 0 v=0:未标注点;
- v = 1 v = 1 v=1:已标注但不可见;
- v = 2 v = 2 v=2:已标注且图像可见。
- s ∗ k i s*k_i s∗ki (scale * keypoint constant)是该高斯分布的标准差,使每个 keypoint 的权重相等。
- 完美预测:1.
-
Precision: 预测的准确度;
P r e c i s i o n = T P T P + F P Precision = \frac{TP}{TP + FP} Precision=TP+FPTP
其中, T P TP TP 是 true positive, F P FP FP 是 false positive。 -
Recall: 找到多少真值。
P r e c i s i o n = T P T P + F N Precision = \frac{TP}{TP + FN} Precision=TP+FNTP
其中, F N FN FN 是 false negative。
- Keypoints Evaluation Metric:
三、Top-down 自顶向下
1、Mask RCNN
- 与 Faster RCNN 的区别:
- Feature Pyramid Network:
- Anchors:
- Ratio:weight / height = [0.5, 1, 2]
- Scales:[32, 64, 256] pixels.
- 坐标: [ x 1 , y 1 , x 2 , y 2 ] [x_1, y_1, x_2, y_2] [x1,y1,x2,y2] / [ x 0 , y 0 , w , h ] [x_0, y_0, w, h] [x0,y0,w,h].
-
Proposal Layer:
-
RoI Aligned Layer:
-
Mask Branch:
main.py 代码如下:
import os
import sys
import random
import argparse
import numpy as np
import cv2 as cv
import coco
import utils
import model as modellib
class InferenceConfig(coco.CocoConfig):
GPU_COUNT = 1
IMAGES_PER_GPU = 1
KEYPOINT_MASK_POOL_SIZE = 7
def main():
parse = argparse.ArgumentParser()
parse.add_argument("--image", type=str)
parse.add_argument('--video', type=str)
args = parse.parse_args()
ROOT_DIR = os.getcwd()
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
#在此更改你所保存的下载好的模型的路径
COCO_MODEL_PATH = "./model/mask_rcnn_coco.h5/"
if not os.path.exists(COCO_MODEL_PATH):
raise AssertionError('please download the pre-trained model')
colorsFile = "colors.txt" #选择连线所对应的颜色
with open(colorsFile, 'rt') as f:
colorsStr = f.read().rstrip('\n').split('\n')
colors = []
for i in range(len(colorsStr)):
rgb = colorsStr[i].split(' ')
color = np.array([float(rgb[0]), float(rgb[1]), float(rgb[2])])
colors.append(color)
inference_config = InferenceConfig()
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR,
config=inference_config)#加载模型框架
model.load_weights(COCO_MODEL_PATH, by_name=True)#加载模型权值
if (args.image): #图像
if not os.path.isfile(args.image):
print("Input image file ", args.image, " doesn't exist")
sys.exit(1)
cap = cv.VideoCapture(args.image)
outputFile = args.image[:-4]+'_mask_rcnn_out_py.jpg'
elif (args.video): #视频
if not os.path.isfile(args.video):
print("Input video file ", args.video, " doesn't exist")
sys.exit(1)
cap = cv.VideoCapture(args.video)
outputFile = args.video[:-4]+'_mask_rcnn_out_py.avi'
else:
cap = cv.VideoCapture(0)
if (not args.image):
vid_writer = cv.VideoWriter(outputFile,
cv.VideoWriter_fourcc('M', 'J', 'P', 'G'),
30,
(round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),
round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))
maskThreshold = 0.3 #mask阈值
while cv.waitKey(1) < 0:
hasFrame, frame = cap.read()
if not hasFrame:
print("Done processing !!!")
print("Output file is stored as ", outputFile)
cv.waitKey(3000)
break
print("frame shape:", frame.shape)
# class_names = ['BG', 'person']
results = model.detect_keypoint([frame], verbose=1)
r = results[0]
if r['masks'].shape[0]:
for i in range(r['masks'].shape[2]):
mask = r['masks'][:, :, i]
mask = (mask > maskThreshold)
roi = frame[mask]
colorIndex = random.randint(0, len(colors)-1)
color = colors[colorIndex]
frame[mask] = ([0.3 * color[0],
0.3 * color[1],
0.3 * color[2]] + 0.7 * roi).astype(np.uint8)
mask = mask.astype(np.uint8)
contours, hierarchy = cv.findContours(mask,
cv.RETR_TREE,
cv.CHAIN_APPROX_SIMPLE)
cv.drawContours(frame, contours, -1, color, 3,
cv.LINE_8, hierarchy, 100) #画轮廓
keypoints = np.array(r['keypoints']).astype(int)#读关键点结果
skeleton = [0, -1, -1, 5, -1, 6, 5, 7, 6, 8, 7, 9,
8, 10, 11, 13, 12, 14, 13, 15, 14, 16]
for i in range(len(keypoints)):
# Skeleton: 11*2
limb_colors = [[0, 0, 255], [0, 170, 255], [0, 255, 170],
[0, 255, 0], [170, 255, 0], [255, 170, 0],
[255, 0, 0], [255, 0, 170], [170, 0, 255],
[170, 170, 0], [170, 0, 170]]
if(len(skeleton)):#画线
skeleton = np.reshape(skeleton, (-1, 2))
neck = np.array((keypoints[i, 5, :]
+ keypoints[i, 6, :]) / 2).astype(int)
if(keypoints[i, 5, 2] == 0 or keypoints[i, 6, 2] == 0):
neck = [0, 0, 0]
limb_index = -1
for limb in skeleton:
limb_index += 1
start_index, end_index = limb
if(start_index == -1):
Joint_start = neck
else:
Joint_start = keypoints[i][start_index]
if(end_index == -1):
Joint_end = neck
else:
Joint_end = keypoints[i][end_index]
if ((Joint_start[2] != 0) & (Joint_end[2] != 0)):
cv.line(frame,
tuple(Joint_start[:2]),
tuple(Joint_end[:2]),
limb_colors[limb_index], 5)
if (args.image):
cv.imwrite(outputFile, frame.astype(np.uint8))
else:
vid_writer.write(frame.astype(np.uint8))
if __name__ == "__main__":
main()