Bootstrap

facenet_pytorch简介

介绍

facenet-pytorch库里面包含了两个重要功能:人脸检测和人脸识别,其中人脸检测部分使用mtcnn算法,人脸识别部分使用Facenet算法。利用这个库,可以轻松实现人脸检测和人脸向量映射操作。

安装

pip install facenet-pytorch

人脸检测

from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
import numpy as np
import pandas as pd
import os

workers = 0 if os.name == 'nt' else 4

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)


这个库的mtcnn模型,只有一个forward和detect方法,其中forward的输出是人脸的3*image_size*image_size的张量,如果参数keep_all = True,则会增加一个维度,返回所有检测到的人脸张量,即N*3*image_size*image_size的张量,另外参数return_prob决定是否返回概率值,即检测到人脸的概率,典型用法如下。而detect方法则返回了检测的人脸框的位置坐标和概率。

因此,forward的输出是NCHW的张量,用于后面的模型输入计算特征向量,而detect的输出是人脸的位置坐标,如果两个结果都需要的话,就得根据例子进行两次运算,相当于进行了两次推理运算。

mtcnn = MTCNN()
face_tensor, prob = mtcnn(img, save_path='face.png', return_prob=True)  # 返回的是检测到的人脸数据tensor 形状是N,3*160*160 # 尺寸不一定是160,之前的参数设置
boxes, prob = mtcnn.detect(img)  # 直接返回人脸的位置坐标和概率

#定义inception resnet v1模块
#如果是用于分类方法,需要设定classify=True,在这个例子中,我们需要获得人脸向量值,同时注意的是设置model.eval()模式

resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)
#定义一个数据路径
#增加一个idx_to_class属性记录每个名字的索引值
def collate_fn(x):
    return x[0]

dataset = datasets.ImageFolder('../data/test_images')
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)
aligned = []
names = []
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)
    if x_aligned is not None:
        print('Face detected with probability: {:8f}'.format(prob))
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])
Face detected with probability: 0.999983
Face detected with probability: 0.999934
Face detected with probability: 0.999733
Face detected with probability: 0.999876
Face detected with probability: 0.999992
aligned = torch.stack(aligned).to(device)
embeddings = resnet(aligned).detach().cpu()
dists = [[(e1 - e2).norm().item() for e2 in embeddings] for e1 in embeddings]
print(pd.DataFrame(dists, columns=names, index=names))
                angelina_jolie  bradley_cooper  kate_siegel  paul_rudd  \
angelina_jolie        0.000000        1.447480     0.887728   1.434376   
bradley_cooper        1.447480        0.000000     1.313749   1.011981   
kate_siegel           0.887728        1.313749     0.000000   1.388993   
paul_rudd             1.434376        1.011981     1.388993   0.000000   
shea_whigham          1.399073        1.038684     1.379655   1.104096   

                shea_whigham  
angelina_jolie      1.399073  
bradley_cooper      1.038684  
kate_siegel         1.379655  
paul_rudd           1.104096  
shea_whigham        0.000000  

人脸识别

# -*- coding: utf-8 -*-
# @Time    : 2021/6/17 14:39
# @Author  : Johnson
#制作人脸特征向量的数据库,最后会保存两个文件,分别是数据库中的人脸特征向量和对应的名字,当然也可以一起保存
from facenet_pytorch import MTCNN,InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
import numpy as np
import pandas as pd
import os
from PIL import Image, ImageDraw, ImageFont

workers = 0 if os.name=="nt" else 4
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
print("Running on device:{}".format(device))

mtcnn = MTCNN(image_size=160,margin=0,min_face_size=20,thresholds=[0.6,0.7,0.7],
              factor=0.709,post_process=True,device=device
              )


'''
# InceptionResnetV1提供了两个预训练模型,分别在vggface数据集和casia数据集上训练的。
# 预训练模型如果不手动下载,可能速度会很慢,可以从作者给的谷歌云链接下载,然后放到C:\Users\你的用户名\.cache\torch\checkpoints这个文件夹下面
# 如果是linux系统,那么存放在/home/你的用户名/.cache/torch/checkpoints下面
'''
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

def collate_fn(x):
    return x[0]

#将所有的单人照图片放在各自的文件夹中,文件夹名字就是人的名字,存放格式如下::
'''
--orgin
  |--zhangsan
     |--1.jpg
     |--2.jpg
  |--lisi
     |--1.jpg
     |--2.jpg
'''

dataset = datasets.ImageFolder("./database/origin") #加载数据库
dataset.idx_to_class = {i:c for c,i in dataset.class_to_idx.items()}
loader = DataLoader(dataset,collate_fn=collate_fn,num_workers=workers)
aligned = [] #aligned就是从图像上抠出的人脸,大小是之前定义的image_size=160
names = []
i = 1
for x, y in loader:
    path = './database/aligned/{}/'.format(dataset.idx_to_class[y])  # 这个是要保存的人脸路径
    if not os.path.exists(path):
        i = 1
        os.mkdir(path)
    # 如果要保存识别到的人脸,在save_path参数指明保存路径即可,不保存可以用None
    x_aligned, prob = mtcnn(x, return_prob=True,save_path= path+ '/{}.jpg'.format(i))
    i = i+1
    if x_aligned is not None:
        print('Face detected with probability: {:8f}'.format(prob))
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])


aligned = torch.stack(aligned).to(device)
embeddings = resnet(aligned).detach().cpu() #提取所有人脸的特征向量,每个向量的长度是512
#两两之间计算混淆矩阵
dists = [[(e1 - e2).norm().item() for e2 in embeddings] for e1 in embeddings]
print(names)
print(pd.DataFrame(dists, columns=names, index=names))
torch.save(embeddings,'database.pt')  # 当然也可以保存在一个文件
torch.save(names,'names.pt')

'''
有了上述的数据库,就能通过距离识别人脸。这个库的mtcnn模型,只有一个forward和detect方法,其中forward的输出是人脸的3*image_size*image_size的张量,如果参数keep_all = True,则会增加一个维度,返回所有检测到的人脸张量,即N*3*image_size*image_size的张量,另外参数return_prob决定是否返回概率值,即检测到人脸的概率,典型用法如下。而detect方法则返回了检测的人脸框的位置坐标和概率。
因此,forward的输出是NCHW的张量,用于后面的模型输入计算特征向量,而detect的输出是人脸的位置坐标,如果两个结果都需要的话,就得根据例子进行两次运算,相当于进行了两次推理运算。
'''
#对新的照片进行人脸识别
# mtcnn网络负责检测人脸
mtcnn = MTCNN(keep_all=True, device=device)
resnet = InceptionResnetV1(pretrained='vggface2').eval().to('cuda')

names = torch.load("./database/names.pt")
embeddings = torch.load("./database/database.pt").to('cuda')

def detect_frame(img):
    fontStyle = ImageFont.truetype("LiberationSans-Regular.ttf", 25,encoding="utf-8")
    faces = mtcnn(img)  # 直接infer所有的faces
    #但是这里相当于两次infer,会浪费时间
    boxes, _ = mtcnn.detect(img)  # 检测出人脸框 返回的是位置
    frame_draw = img.copy()
    draw = ImageDraw.Draw(frame_draw)
    print("检测人脸数目:",len(boxes))
    for i,box in enumerate(boxes):
        draw.rectangle(box.tolist(), outline=(255, 0, 0))  # 绘制框
        face_embedding = resnet(faces[i].unsqueeze(0).to('cuda'))
        #print(face_embedding.size(),'大小')
        # 计算距离
        probs = [(face_embedding - embeddings[i]).norm().item() for i in range(embeddings.size()[0])]
        #print(probs)
        # 我们可以认为距离最近的那个就是最有可能的人,但也有可能出问题,数据库中可以存放一个人的多视角多姿态数据,对比的时候可以采用其他方法,如投票机制决定最后的识别人脸
        index = probs.index(min(probs))   # 对应的索引就是判断的人脸
        name = names[index] # 对应的人脸
        draw.text( (int(box[0]),int(box[1])), str(name), fill=(255,0,0),font=fontStyle)
    return frame_draw

人脸追踪

from facenet_pytorch import MTCNN
import torch
import numpy as np
import mmcv, cv2
from PIL import Image, ImageDraw
from IPython import display
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

mtcnn = MTCNN(keep_all=True, device=device)


video = mmcv.VideoReader('video.mp4')
frames = [Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in video]

display.Video('video.mp4', width=640)
frames_tracked = []
for i, frame in enumerate(frames):
    print('\rTracking frame: {}'.format(i + 1), end='')
    
    # Detect faces
    boxes, _ = mtcnn.detect(frame)
    
    # Draw faces
    frame_draw = frame.copy()
    draw = ImageDraw.Draw(frame_draw)
    for box in boxes:
        draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
    
    # Add to frame list
    frames_tracked.append(frame_draw.resize((640, 360), Image.BILINEAR))
print('\nDone')
d = display.display(frames_tracked[0], display_id=True)
i = 1
try:
    while True:
        d.update(frames_tracked[i % len(frames_tracked)])
        i += 1
except KeyboardInterrupt:
    pass
video_tracked = cv2.VideoWriter('video_tracked.mp4', fourcc, 25.0, dim)
for frame in frames_tracked:
    video_tracked.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
video_tracked.release()
;