Yolov8模型剪枝实战
剪枝参考文章:
https://blog.csdn.net/qq_33596242/article/details/133774348?spm=1001.2014.3001.5502
torch-pruning 基础使用教程参考文章:
https://blog.csdn.net/magic_ll/article/details/134441473
一、剪枝
1、安装依赖库
pip install torch-pruning
支持的yolov8版本
https://pypi.org/project/ultralytics/8.0.132/#files
2、把 https://github.com/VainF/Torch-Pruning/blob/master/examples/yolov8/yolov8_pruning.py,文件拷贝到yolov8的根目录下。
# This code is adapted from Issue [#147](https://github.com/VainF/Torch-Pruning/issues/147), implemented by @Hyunseok-Kim0.
import argparse
import math
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from copy import deepcopy
from datetime import datetime
from pathlib import Path
from typing import List, Union
import numpy as np
import torch
import torch.nn as nn
from matplotlib import pyplot as plt
from ultralytics import YOLO, __version__
from ultralytics.nn.modules import Detect, C2f, Conv, Bottleneck
from ultralytics.nn.tasks import attempt_load_one_weight
from ultralytics.yolo.engine.model import TASK_MAP
from ultralytics.yolo.engine.trainer import BaseTrainer
from ultralytics.yolo.utils import yaml_load, LOGGER, RANK, DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS
from ultralytics.yolo.utils.checks import check_yaml
from ultralytics.yolo.utils.torch_utils import initialize_weights, de_parallel
import torch_pruning as tp
def save_pruning_performance_graph(x, y1, y2, y3):
"""
Draw performance change graph
Parameters
----------
x : List
Parameter numbers of all pruning steps
y1 : List
mAPs after fine-tuning of all pruning steps
y2 : List
MACs of all pruning steps
y3 : List
mAPs after pruning (not fine-tuned) of all pruning steps
Returns
-------
"""
try:
plt.style.use("ggplot")
except:
pass
x, y1, y2, y3 = np.array(x), np.array(y1), np.array(y2), np.array(y3)
y2_ratio = y2 / y2[0]
# create the figure and the axis object
fig, ax = plt.subplots(figsize=(8, 6))
# plot the pruned mAP and recovered mAP
ax.set_xlabel('Pruning Ratio')
ax.set_ylabel('mAP')
ax.plot(x, y1, label='recovered mAP')
ax.scatter(x, y1)
ax.plot(x, y3, color='tab:gray', label='pruned mAP')
ax.scatter(x, y3, color='tab:gray')
# create a second axis that shares the same x-axis
ax2 = ax.twinx()
# plot the second set of data
ax2.set_ylabel('MACs')
ax2.plot(x, y2_ratio, color='tab:orange', label='MACs')
ax2.scatter(x, y2_ratio, color='tab:orange')
# add a legend
lines, labels = ax.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(lines + lines2, labels + labels2, loc='best')
ax.set_xlim(105, -5)
ax.set_ylim(0, max(y1) + 0.05)
ax2.set_ylim(0.05, 1.05)
# calculate the highest and lowest points for each set of data
max_y1_idx = np.argmax(y1)
min_y1_idx = np.argmin(y1)
max_y2_idx = np.argmax(y2)
min_y2_idx = np.argmin(y2)
max_y1 = y1[max_y1_idx]
min_y1 = y1[min_y1_idx]
max_y2 = y2_ratio[max_y2_idx]
min_y2 = y2_ratio[min_y2_idx]
# add text for the highest and lowest values near the points
ax.text(x[max_y1_idx], max_y1 - 0.05, f'max mAP = {max_y1:.2f}', fontsize=10)
ax.text(x[min_y1_idx], min_y1 + 0.02, f'min mAP = {min_y1:.2f}', fontsize=10)
ax2.text(x[max_y2_idx], max_y2 - 0.05, f'max MACs = {max_y2 * y2[0] / 1e9:.2f}G', fontsize=10)
ax2.text(x[min_y2_idx], min_y2 + 0.02, f'min MACs = {min_y2 * y2[0] / 1e9:.2f}G', fontsize=10)
plt.title('Comparison of mAP and MACs with Pruning Ratio')
plt.savefig('pruning_perf_change.png')
def infer_shortcut(bottleneck):
c1 = bottleneck.cv1.conv.in_channels
c2 = bottleneck.cv2.conv.out_channels
return c1 == c2 and hasattr(bottleneck, 'add') and bottleneck.add
class C2f_v2(nn.Module):
# CSP Bottleneck with 2 convolutions
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
self.c = int(c2 * e) # hidden channels
self.cv0 = Conv(c1, self.c, 1, 1)
self.cv1 = Conv(c1, self.c, 1, 1)
self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2)
self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
def forward(self, x):
# y = list(self.cv1(x).chunk(2, 1))
y = [self.cv0(x), self.cv1(x)]
y.extend(m(y[-1]) for m in self.m)
return self.cv2(torch.cat(y, 1))
def transfer_weights(c2f, c2f_v2):
c2f_v2.cv2 = c2f.cv2
c2f_v2.m = c2f.m
state_dict = c2f.state_dict()
state_dict_v2 = c2f_v2.state_dict()
# Transfer cv1 weights from C2f to cv0 and cv1 in C2f_v2
old_weight = state_dict['cv1.conv.weight']
half_channels = old_weight.shape[0] // 2
state_dict_v2['cv0.conv.weight'] = old_weight[:half_channels]
state_dict_v2['cv1.conv.weight'] = old_weight[half_channels:]
# Transfer cv1 batchnorm weights and buffers from C2f to cv0 and cv1 in C2f_v2
for bn_key in ['weight', 'bias', 'running_mean', 'running_var']:
old_bn = state_dict[f'cv1.bn.{bn_key}']
state_dict_v2[f'cv0.bn.{bn_key}'] = old_bn[:half_channels]
state_dict_v2[f'cv1.bn.{bn_key}'] = old_bn[half_channels:]
# Transfer remaining weights and buffers
for key in state_dict:
if not key.startswith('cv1.'):
state_dict_v2[key] = state_dict[key]
# Transfer all non-method attributes
for attr_name in dir(c2f):
attr_value = getattr(c2f, attr_name)
if not callable(attr_value) and '_' not in attr_name:
setattr(c2f_v2, attr_name, attr_value)
c2f_v2.load_state_dict(state_dict_v2)
def replace_c2f_with_c2f_v2(module):
for name, child_module in module.named_children():
if isinstance(child_module, C2f):
# Replace C2f with C2f_v2 while preserving its parameters
shortcut = infer_shortcut(child_module.m[0])
c2f_v2 = C2f_v2(child_module.cv1.conv.in_channels, child_module.cv2.conv.out_channels,
n=len(child_module.m), shortcut=shortcut,
g=child_module.m[0].cv2.conv.groups,
e=child_module.c / child_module.cv2.conv.out_channels)
transfer_weights(child_module, c2f_v2)
setattr(module, name, c2f_v2)
else:
replace_c2f_with_c2f_v2(child_module)
def save_model_v2(self: BaseTrainer):
"""
Disabled half precision saving. originated from ultralytics/yolo/engine/trainer.py
"""
ckpt = {
'epoch': self.epoch,
'best_fitness': self.best_fitness,
'model': deepcopy(de_parallel(self.model)),
'ema': deepcopy(self.ema.ema),
'updates': self.ema.updates,
'optimizer': self.optimizer.state_dict(),
'train_args': vars(self.args), # save as dict
'date': datetime.now().isoformat(),
'version': __version__}
# Save last, best and delete
torch.save(ckpt, self.last)
if self.best_fitness == self.fitness:
torch.save(ckpt, self.best)
if (self.epoch > 0) and (self.save_period > 0) and (self.epoch % self.save_period == 0):
torch.save(ckpt, self.wdir / f'epoch{self.epoch}.pt')
del ckpt
def final_eval_v2(self: BaseTrainer):
"""
originated from ultralytics/yolo/engine/trainer.py
"""
for f in self.last, self.best:
if f.exists():
strip_optimizer_v2(f) # strip optimizers
if f is self.best:
LOGGER.info(f'\nValidating {f}...')
self.metrics = self.validator(model=f)
self.metrics.pop('fitness', None)
self.run_callbacks('on_fit_epoch_end')
def strip_optimizer_v2(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
"""
Disabled half precision saving. originated from ultralytics/yolo/utils/torch_utils.py
"""
x = torch.load(f, map_location=torch.device('cpu'))
args = {**DEFAULT_CFG_DICT, **x['train_args']} # combine model args with default args, preferring model args
if x.get('ema'):
x['model'] = x['ema'] # replace model with ema
for k in 'optimizer', 'ema', 'updates': # keys
x[k] = None
for p in x['model'].parameters():
p.requires_grad = False
x['train_args'] = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS} # strip non-default keys
# x['model'].args = x['train_args']
torch.save(x, s or f)
mb = os.path.getsize(s or f) / 1E6 # filesize
LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB")
def train_v2(self: YOLO,pruner=None, pruning=False, **kwargs):
"""
Disabled loading new model when pruning flag is set. originated from ultralytics/yolo/engine/model.py
"""
self._check_is_pytorch_model()
if self.session: # Ultralytics HUB session
if any(kwargs):
LOGGER.warning('WARNING ⚠️ using HUB training arguments, ignoring local training arguments.')
kwargs = self.session.train_args
overrides = self.overrides.copy()
overrides.update(kwargs)
if kwargs.get('cfg'):
LOGGER.info(f"cfg file passed. Overriding default params with {kwargs['cfg']}.")
overrides = yaml_load(check_yaml(kwargs['cfg']))
overrides['mode'] = 'train'
if not overrides.get('data'):
raise AttributeError("Dataset required but missing, i.e. pass 'data=coco128.yaml'")
if overrides.get('resume'):
overrides['resume'] = self.ckpt_path
self.task = overrides.get('task') or self.task
self.trainer = TASK_MAP[self.task][1](overrides=overrides, _callbacks=self.callbacks)
if not pruning:
if not overrides.get('resume'): # manually set model only if not resuming
self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml)
self.model = self.trainer.model
else:
# pruning mode
self.trainer.pruning = True
self.trainer.model = self.model
# replace some functions to disable half precision saving
self.trainer.save_model = save_model_v2.__get__(self.trainer)
self.trainer.final_eval = final_eval_v2.__get__(self.trainer)
self.trainer.hub_session = self.session # attach optional HUB session
if pruner is None:
self.trainer.train()
else:
self.trainer.train(pruner)
# Update model and cfg after training
if RANK in (-1, 0):
self.model, _ = attempt_load_one_weight(str(self.trainer.best))
self.overrides = self.model.args
self.metrics = getattr(self.trainer.validator, 'metrics', None)
def prune(args):
# load trained yolov8 model
base_name = 'prune/' + str(datetime.now()) + '/'
model = YOLO(args.model)
model.__setattr__("train_v2", train_v2.__get__(model))
pruning_cfg = yaml_load(check_yaml(args.cfg))
batch_size = pruning_cfg['batch']
is_regularize = args.is_regularize
sparse_train_epoch = args.sparse_train_epoch
finetune_epoch = args.finetune_epoch
# device = pruning_cfg['device']
# use coco128 dataset for 10 epochs fine-tuning each pruning iteration step
# this part is only for sample code, number of epochs should be included in config file
pruning_cfg['data'] = "/yolov8/ultralytics/yolo/data/datasets/your_data.yaml"
pruning_cfg['epochs'] = finetune_epoch
model.model.train()
replace_c2f_with_c2f_v2(model.model)
initialize_weights(model.model) # set BN.eps, momentum, ReLU.inplace
for name, param in model.model.named_parameters():
param.requires_grad = True
example_inputs = torch.randn(1, 3, pruning_cfg["imgsz"], pruning_cfg["imgsz"]).to(model.device)
macs_list, nparams_list, map_list, pruned_map_list = [], [], [], []
base_macs, base_nparams = tp.utils.count_ops_and_params(model.model, example_inputs)
# do validation before pruning model, 剪枝前测精度和flops
pruning_cfg['name'] = base_name+f"baseline_val"
pruning_cfg['batch'] = 64
validation_model = deepcopy(model)
metric = validation_model.val(**pruning_cfg)
init_map = metric.box.map
macs_list.append(base_macs)
nparams_list.append(100)
map_list.append(init_map)
pruned_map_list.append(init_map)
print(f"Before Pruning: MACs={base_macs / 1e9: .5f} G, #Params={base_nparams / 1e6: .5f} M, mAP={init_map: .5f}")
# prune same ratio of filter based on initial size
ch_sparsity = 1 - math.pow((1 - args.target_prune_rate), 1 / args.iterative_steps)
for i in range(args.iterative_steps):
model.model.train()
for name, param in model.model.named_parameters():
param.requires_grad = True
ignored_layers = []
unwrapped_parameters = []
for m in model.model.modules():
if isinstance(m, (Detect,)):
ignored_layers.append(m)
example_inputs = example_inputs.to(model.device)
pruner = tp.pruner.GroupNormPruner(
model.model,
example_inputs,
importance=tp.importance.GroupNormImportance(), # L2 norm pruning,
iterative_steps=1,
ch_sparsity=ch_sparsity,
ignored_layers=ignored_layers,
unwrapped_parameters=unwrapped_parameters
)
# 稀疏训练,只进行一次
if is_regularize:
sparse_pruner = tp.pruner.GroupNormPruner(
model.model,
example_inputs,
importance=tp.importance.GroupNormImportance(), # L2 norm pruning,
iterative_steps=args.iterative_steps,
ch_sparsity=args.target_prune_rate,
ignored_layers=ignored_layers,
unwrapped_parameters=unwrapped_parameters
)
pruning_cfg['epochs'] = sparse_train_epoch
pruning_cfg['name'] = base_name+"sparse_training"
pruning_cfg['batch'] = batch_size # restore batch size
model.train_v2(sparse_pruner, pruning=True, **pruning_cfg)
# 剪枝
pruner.step()
pruning_cfg['epochs'] = finetune_epoch
# pre fine-tuning validation, 剪枝后计算map
pruning_cfg['name'] = base_name+f"step_{i}_pre_val"
pruning_cfg['batch'] = 64
validation_model.model = deepcopy(model.model)
metric = validation_model.val(**pruning_cfg)
pruned_map = metric.box.map
if is_regularize:
is_regularize = False
pruned_macs, pruned_nparams = tp.utils.count_ops_and_params(pruner.model, example_inputs.cuda()) # to(model.device)
else:
pruned_macs, pruned_nparams = tp.utils.count_ops_and_params(pruner.model, example_inputs.to(model.device))
current_speed_up = float(macs_list[0]) / pruned_macs
print(f"After pruning iter {i + 1}: MACs={pruned_macs / 1e9} G, #Params={pruned_nparams / 1e6} M, "
f"mAP={pruned_map}, speed up={current_speed_up}")
# fine-tuning, 微调
for name, param in model.model.named_parameters():
param.requires_grad = True
pruning_cfg['name'] = base_name+f"step_{i}_finetune"
pruning_cfg['batch'] = batch_size # restore batch size
model.train_v2(pruning=True, **pruning_cfg)
# post fine-tuning validation, 微调后计算map
pruning_cfg['name'] = base_name+f"step_{i}_post_val"
pruning_cfg['batch'] = 64
validation_model = YOLO(model.trainer.best)
metric = validation_model.val(**pruning_cfg)
current_map = metric.box.map
print(f"After fine tuning mAP={current_map}")
macs_list.append(pruned_macs)
nparams_list.append(pruned_nparams / base_nparams * 100)
pruned_map_list.append(pruned_map)
map_list.append(current_map)
# remove pruner after single iteration
del pruner
model.model.zero_grad() # Remove gradients
save_path = 'runs/detect/'+base_name+f"step_{i}_pruned_model.pth"
torch.save(model.model,save_path) # without .state_dict
print('pruned model saved in',save_path)
# model = torch.load('model.pth') # load the pruned model
save_pruning_performance_graph(nparams_list, map_list, macs_list, pruned_map_list)
# if init_map - current_map > args.max_map_drop:
# print("Pruning early stop")
# break
model.export(format='onnx')
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--model', default='/yolov8/runs/detect/train/weights/last.pt', help='Pretrained pruning target model file')
parser.add_argument('--cfg', default='default.yaml',
help='Pruning config file.'
' This file should have same format with ultralytics/yolo/cfg/default.yaml')
parser.add_argument('--is_regularize', default=False, type=bool, help='Sparse training')
parser.add_argument('--max-map-drop', default=0.2, type=float, help='Allowed maximum map drop after fine-tuning')
parser.add_argument('--target-prune-rate', default=0.5, type=float, help='Target pruning rate')
parser.add_argument('--iterative-steps', default=4, type=int, help='Total pruning iteration step')
parser.add_argument('--sparse_train_epoch', default=10, type=int) # 稀疏训练次数
parser.add_argument('--finetune_epoch', default=10, type=int) # finetune次数
args = parser.parse_args()
prune(args)
target-prune-rate:剪枝率
iterative-steps:剪枝迭代次数,通过迭代次数达到最后的剪枝率。
finetune_epoch:每次剪枝后调优训练的批次。
二、导出onnx并测试图片和coco指标
class Detect(nn.Module):
"""YOLOv8 Detect head for detection models."""
dynamic = False # force grid reconstruction
export = False # export mode
shape = None
anchors = torch.empty(0) # init
strides = torch.empty(0) # init
def __init__(self, nc=80, ch=()): # detection layer
super().__init__()
self.nc = nc # number of classes
self.nl = len(ch) # number of detection layers
self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
self.no = nc + self.reg_max * 4 # number of outputs per anchor
self.stride = torch.zeros(self.nl) # strides computed during build
c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], min(self.nc, 100)) # channels
self.cv2 = nn.ModuleList(
nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
def forward(self, x):
"""Concatenates and returns predicted bounding boxes and class probabilities."""
shape = x[0].shape # BCHW
for i in range(self.nl):
x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
output_0=x[0]
output_1=x[1]
output_2=x[2]
if self.training:
return x
elif self.dynamic or self.shape != shape:
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
self.shape = shape
x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
if self.export and self.format in ('saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs'): # avoid TF FlexSplitV ops
box = x_cat[:, :self.reg_max * 4]
cls = x_cat[:, self.reg_max * 4:]
else:
box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
y = torch.cat((dbox, cls.sigmoid()), 1)
#评估onnx
return (y, x) if self.export else (y, x)
head头部分把onnx 的export的输出改为(y,x)
测试保存图片并测试coco指标
import onnxruntime as rt
import torch
import cv2
import numpy as np
import os
import json
from tqdm import tqdm
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from ultralytics.yolo.utils.metrics import box_iou
import time
import platform
from collections import defaultdict
from pathlib import Path
import os
import torch
import cv2
import json
from ultralytics.nn.autobackend import AutoBackend
from ultralytics.yolo.configs import get_config
from ultralytics.yolo.data.dataloaders.stream_loaders import LoadImages, LoadScreenshots, LoadStreams
from ultralytics.yolo.data.utils import IMG_FORMATS, VID_FORMATS
from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, SETTINGS, callbacks, colorstr, ops
from ultralytics.yolo.utils.checks import check_file, check_imgsz, check_imshow
from ultralytics.yolo.utils.files import increment_path
from ultralytics.yolo.utils.torch_utils import select_device, smart_inference_mode
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import numpy as np
from ultralytics.yolo.data.augment import LetterBox
import torchvision
def xywh2xyxy(x):
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
return y
def non_max_suppression(
prediction,
conf_thres=0.25,
iou_thres=0.45,
classes=None,
agnostic=False,
multi_label=False,
labels=(),
max_det=300,
nm=0, # number of masks
):
# Checks
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
prediction = prediction[0] # select only inference output
mps = 'mps' in device.type # Apple MPS
if mps: # MPS not fully supported yet, convert tensors to CPU before NMS
prediction = prediction.cpu()
bs = prediction.shape[0] # batch size
nc = prediction.shape[1] - nm - 4 # number of classes
mi = 4 + nc # mask start index
xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates
# Settings
# min_wh = 2 # (pixels) minimum box width and height
max_wh = 7680 # (pixels) maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
# time_limit = 0.5 + 0.05 * bs # seconds to quit after
time_limit =300
redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS
t = time.time()
output = [torch.zeros((0, 6 + nm))] * bs
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
# x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height
x = x.transpose(0, -1)[xc[xi]] # confidence
# Cat apriori labels if autolabelling
if labels and len(labels[xi]):
lb = labels[xi]
v = torch.zeros((len(lb), nc + nm + 5))
v[:, :4] = lb[:, 1:5] # box
v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls
x = torch.cat((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
# Detections matrix nx6 (xyxy, conf, cls)
box, cls, mask = x.split((4, nc, nm), 1)
box = xywh2xyxy(box) # center_x, center_y, width, height) to (x1, y1, x2, y2)
if multi_label:
i, j = (cls > conf_thres).nonzero(as_tuple=False).T
x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
else: # best class only
conf, j = cls.max(1, keepdim=True)
x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
# Apply finite constraint
# if not torch.isfinite(x).all():
# x = x[torch.isfinite(x).all(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
i = i[:max_det] # limit detections
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
weights = iou * scores[None] # box weights
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
if redundant:
i = i[iou.sum(1) > 1] # require redundancy
output[xi] = x[i]
if mps:
output[xi] = output[xi].to(device)
if (time.time() - t) > time_limit:
LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
break # time limit exceeded
return output
def onnx_inf(onnxModulePath, data):
# import pdb
# pdb.set_trace()
is_cuda_available = rt.get_device() == 'GPU'
if is_cuda_available:
providers = ['CUDAExecutionProvider']
options = [{'device_id': 0}]
else:
providers = ['CPUExecutionProvider']
options = [{}]
sess = rt.InferenceSession(onnxModulePath, providers=providers, provider_options=options)
input_name = sess.get_inputs()[0].name
output_names = [_.name for _ in sess.get_outputs()]
pred_onnx = sess.run(output_names, {input_name: data})
return pred_onnx
#gt的json结果
anno_json=r'/yolov8/data/your_annotations.json'
# 获取标注文件图像id与图像名字的字典
def get_name2id_map(anno_json):
with open(anno_json, 'r') as fr:
anno_dict = json.load(fr)
image_dict = anno_dict['images']
# 构建图像名称与索引的字典对
name2id_dict = {}
for image in image_dict:
file_name = image['file_name']
id = image['id']
name2id_dict[file_name] = id
return name2id_dict
#检测类别
names=['1','2','3','4']
color=[(255,192,203),(128,0,128),(248,248,255),(0,0,0),(255,0,0),(0,255,0),(0,0,255),(173,216,230),(64,224,208),
(50,205,50),(34,139,34),(255,255,0),(0,255,255),(255,0,255),(250,240,230),(240,128,128),(192,192,192)]
img_path_dir = "测试图片路径"
img_save_path_dir = "保存测试图片的路径"
#字体
font = cv2.FONT_HERSHEY_SIMPLEX
#加载v8检测模型和显卡
device = select_device('7')
# model = '/home/yuanzhengqian/yolov8/runs/detect/hubei_jijian_v1/weights/best.pt'
# model = AutoBackend(model, device=device)
# stride, pt = model.stride, model.pt
# model.eval()
jdict=[]
fps_total=0
img_total=0
for img in os.listdir(img_path_dir):
# 读取每张图
img_path=img_path_dir+'/'+img
frame_ori=cv2.imread(img_path)
#先resize成可以letterbox处理后可以满足要求的格式,比如(640,480)-》(224,192),但是(1080,720)-》(224,160),160不满足192就会出错
frame=cv2.resize(frame_ori,(640,480))
#图像前处理
im = LetterBox(224,True)(image=frame)
im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
im = np.ascontiguousarray(im) # contiguous
im = torch.from_numpy(im).to(device)
im = im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
im=im.cpu().numpy()
print(im.shape)
t1 = time.time()
#onnx模型预测
preds = onnx_inf('yolov8/runs/detect/prune/step_0_finetune/weights/best.onnx', im)
print('preds',preds[0].shape)
preds = torch.from_numpy(preds[0])
fps = 1./(time.time()-t1)
# print("fps= %.2f"%(fps))
#nms
preds = non_max_suppression(preds,
0.3,
0.1,
agnostic=False,
max_det=300)
print('preds',preds)
for i, pred in enumerate(preds):
shape = frame_ori.shape
pred[:, :4] = ops.scale_boxes(im.shape[2:], pred[:, :4], shape).round()
print('pred',pred)
fps_total=fps_total+fps
img_total=img_total+1
#保存json格式
box = ops.xyxy2xywh(preds[0][:, :4]) # xywh
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
score = preds[0][:, 4]
category_id = preds[0][:, -1]
name2id_dict = get_name2id_map(anno_json)
try:
id=name2id_dict[img]
except:
continue
for box, src, cls in zip(box, score, category_id):
print('box',box)
#保存单个json结果
jdict.append(
{
'image_id': id,
'category_id': int(cls),
'bbox': box.tolist(),
'score': float(src)
}
)
#输出结果图像
box =preds[0][:, :4] # xyxy
score = preds[0][:, 4]
category_id = preds[0][:, -1]
box=box.tolist()
score=score.tolist()
category_id=category_id.tolist()
for i in range(len(box)):
cv2.rectangle(frame, (int(box[i][0]), int(box[i][1])),(int(box[i][2]), int(box[i][3])), color[int(category_id[i])],2)
cv2.putText(frame, 'pred_{}_{:.3f} '.format(names[int(category_id[i])],score[i]), (int(box[i][2]), int(box[i][3])), font, 0.5, color[int(category_id[i])], 2)
cv2.imwrite(img_save_path_dir+'/{}'.format(img),frame)
print("img Detection Done!")
fps_mean=(fps_total/img_total)
print(fps_mean)
#保存下预测的结果
pred_json = r'yolov8/json/pred.json'
with open(pred_json, 'w') as fw:
json.dump(jdict, fw, indent=4, ensure_ascii=False)
#coco评估
anno = COCO(anno_json) # init annotations api
pred = anno.loadRes(pred_json) # init predictions api
coco_eval = COCOeval(anno, pred, 'bbox')
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
for i in range(4):
coco_eval.params.catIds = [i] # 你可以根据需要增减类别
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()