0. 前言
虽然v8支持直接训练“无标签的负样本”(v8中叫做background),但默认方式也只是混进正样本中一起练,这会导致新的误检图得不到充分训练
使用本文的“负样本马赛克”功能,可以保证每一轮、每一张图里全都有误检图。能够在较少的训练轮次中,解决漏检问题
使用例:
比如说你发现你模型把摩托车识别成人,那就把那几张摩托车的图放进neg_dir,练个30轮
出来就不会有摩托车误识别的问题了
【但要注意,放进neg_dir里的图 绝 对 不 能 有正样本(例如把摩托车误检成人的图里不能出现真的人)。不然模型会学到错误的负样本,反而会出大问题】
1. 加入新超参
在ultralytics/cfg/default.yaml(默认超参文件)中加入新超参"neg_dir"。
v8的结构是直接读取所有超参数,解析交给后面的函数
neg_dir: ' ' # 负样本文件夹(str),其中只有负样本图片,没有标签(默认为空)
2. 读取新超参
超参在ultralytics/engine/Trainer.py中的BaseTrainer()初始化,
在ultralytics/engine/Trainer.py的108行左右,即BaseTrainer的__init__()方法中,加入下述内容。
目的是读取args里的neg_dir(108行左右改为下述内容),之后超参数会随着trainer的属性hyp一层一层的传下去
# 负样本文件夹——初始化
self.neg_dir = self.args.neg_dir
3. 识别超参
在ultralytics/data/augment.py里的v8_transforms()里的Mosaic()。
增加一个输入neg_dir = hyp.neg_dir。
def v8_transforms(dataset, imgsz, hyp, stretch=False):
"""Convert images to a size suitable for YOLOv8 training."""
pre_transform = Compose([
Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic, neg_dir=hyp.neg_dir, neg_num = hyp.neg_num),
CopyPaste(p=hyp.copy_paste),
RandomPerspective(
degrees=hyp.degrees,
translate=hyp.translate,
scale=hyp.scale,
shear=hyp.shear,
perspective=hyp.perspective,
pre_transform=None if stretch else LetterBox(new_shape=(imgsz, imgsz)),
)])
再到Mosaic()类中的__init__中,添加识别此参数的代码
def __init__(self, dataset, imgsz=640, p=1.0, n=4, neg_dir='', neg_num=''):
"""Initializes the object with a dataset, image size, probability, and border."""
assert 0 <= p <= 1.0, f'The probability should be in range [0, 1], but got {p}.'
assert n in (4, 9), 'grid must be equal to 4 or 9.'
super().__init__(dataset=dataset, p=p)
self.dataset = dataset
self.imgsz = imgsz
self.border = (-imgsz // 2, -imgsz // 2) # width, height
self.n = n
# # 更改此处, 读取负样本加入数(默认为2)
self.neg_num = int(neg_num)
self.img_neg_files = [] # 负样本路径列表
# “默认只有负样文件夹才有无标签负样本”
# 读取负样本文件夹
# additional feature
if os.path.isdir(neg_dir):
# 负样本路径
self.img_neg_files = [os.path.join(neg_dir, i) for i in os.listdir(neg_dir)]
logging.info(
colorstr("Negative dir: ")
+ f"'{neg_dir}', using {len(self.img_neg_files)} pictures from the dir as negative samples during training"
)
else:
# 未找到负样本
self.img_neg_files = []
#################################################################################
def get_indexes(self, buffer=True):
"""Return a list of random indexes from the dataset."""
if buffer: # select images from buffer
return random.choices(list(self.dataset.buffer), k=self.n - 1)
else: # select any images
return [random.randint(0, len(self.dataset) - 1) for _ in range(self.n - 1)]
def _mix_transform(self, labels):
"""Apply mixup transformation to the input image and labels."""
assert labels.get('rect_shape', None) is None, 'rect and mosaic are mutually exclusive.'
assert len(labels.get('mix_labels', [])), 'There are no other images for mosaic augment.'
return self._mosaic4(labels) if self.n == 4 else self._mosaic9(labels)
# 加入负样本的mosaic4
def _mosaic4(self, labels):
"""Create a 2x2 image mosaic."""
mosaic_labels = []
s = self.imgsz
yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border) # mosaic center x, y
# 如果有负样本的话,从负样本数据集中随机抽取0~2张图
neg_max_in = 2
# 若负样文件夹长度为0,则放入数为0,放入列表也为0
# num_neg = random.randint(0, neg_max_in) if len(self.img_neg_files) else 0 # 随机放入图(当前为0 ~ neg_max_in个)
#num_neg = neg_max_in if len(self.img_neg_files) else 0 # 固定放入num_neg个
# 负样方案:替换放入mosaic的图、宽高、标签
# 增加修改放入数量判断、neg_num为负数时放入0~neg_num、正数时放入neg_num固定个
if self.neg_num < 0:
# 若负样文件夹长度为0,则放入数为0,放入列表也为0
num_neg = random.randint(0, abs(self.neg_num)) if len(self.img_neg_files) else 0 # 放入随机数量(0 ~ neg_num)
else:
num_neg = self.neg_num if len(self.img_neg_files) else 0 # 放入固定数量num_neg个
neg_img_url_list, neg_in_num = [], []
if num_neg != 0:
#print("放入这次mosaic的负样本个数", num_neg)
neg_img_url_list = random.sample(range(len(self.img_neg_files)), num_neg) # 从所有负样本图里,随机取出num_neg个
neg_in_num = random.sample(range(0, 4), num_neg) # 也许应考虑neg_in_num=0时,覆盖原图的情况
# 方案:替换放入mosaic的0~2张图、宽高、标签
for i in range(4):
labels_patch = labels if i == 0 else labels['mix_labels'][i - 1]
# Load image
img = labels_patch['img']
h, w = labels_patch.pop('resized_shape')
neg_flag = num_neg != 0 and i in neg_in_num # bool,判断是否放入负样本
if neg_flag:
img_url = self.img_neg_files[neg_img_url_list.pop()] # 取出待输入的负样图片地址
img = cv2.imread(img_url) # BGR
if img is None:
raise FileNotFoundError(f'Image Not Found {img_url}')
# 负样本图片处理(主要是缩放)
h0, w0 = img.shape[:2] # 原始宽高
r = self.imgsz / max(h0, w0) # 宽高比
if r != 1: # 如果宽高不相等
interp = cv2.INTER_LINEAR # if (self.augment or r > 1) else cv2.INTER_AREA
img = cv2.resize(img, (min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz)),
interpolation=interp) # 覆盖图片
h, w = img.shape[:2] # 覆盖宽高
# Place img in img4
if i == 0: # top left
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
padw = x1a - x1b
padh = y1a - y1b
if not neg_flag: # 覆盖标签,为负样本时不进行标签合并
labels_patch = self._update_labels(labels_patch, padw, padh)
mosaic_labels.append(labels_patch)
final_labels = self._cat_labels(mosaic_labels)
final_labels['img'] = img4
return final_labels
#######-- 负样本的mosaic4-----------------------
4. 更改_mosaic4()
更改ultralytics/data/augment.py里的Mosaic()里的_mosaic4()方法(和上文代码在同一文件),加入下文高亮内容。
加入负样本识别/处理代码,当放入负样本时,覆盖图片、标签、宽高为负样本的数据.
# 加入负样本的mosaic4
def _mosaic4(self, labels):
"""Create a 2x2 image mosaic."""
mosaic_labels = []
s = self.imgsz
yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border) # mosaic center x, y
# 如果有负样本的话,从负样本数据集中随机抽取0~2张图
neg_max_in = 2
# 若负样文件夹长度为0,则放入数为0,放入列表也为0
# num_neg = random.randint(0, neg_max_in) if len(self.img_neg_files) else 0 # 随机放入图(当前为0 ~ neg_max_in个)
num_neg = neg_max_in if len(self.img_neg_files) else 0 # 固定放入num_neg个
neg_img_url_list, neg_in_num = [], []
if num_neg != 0:
# print("放入这次mosaic的负样本个数", num_neg)
neg_img_url_list = random.sample(range(len(self.img_neg_files)), num_neg) # 从所有负样本图里,随机取出num_neg个
neg_in_num = random.sample(range(0, 4), num_neg) # 也许应考虑neg_in_num=0时,覆盖原图的情况
# 方案:替换放入mosaic的0~2张图、宽高、标签
for i in range(4):
labels_patch = labels if i == 0 else labels['mix_labels'][i - 1]
# Load image
img = labels_patch['img']
h, w = labels_patch.pop('resized_shape')
neg_flag = num_neg != 0 and i in neg_in_num # bool,判断是否放入负样本
if neg_flag:
img_url = self.img_neg_files[neg_img_url_list.pop()] # 取出待输入的负样图片地址
img = cv2.imread(img_url) # BGR
if img is None:
raise FileNotFoundError(f'Image Not Found {img_url}')
# 负样本图片处理(主要是缩放)
h0, w0 = img.shape[:2] # 原始宽高
r = self.imgsz / max(h0, w0) # 宽高比
if r != 1: # 如果宽高不相等
interp = cv2.INTER_LINEAR # if (self.augment or r > 1) else cv2.INTER_AREA
img = cv2.resize(img, (min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz)),
interpolation=interp) # 覆盖图片
h, w = img.shape[:2] # 覆盖宽高
# Place img in img4
if i == 0: # top left
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
padw = x1a - x1b
padh = y1a - y1b
if not neg_flag: # 覆盖标签,为负样本时不进行标签合并
labels_patch = self._update_labels(labels_patch, padw, padh)
mosaic_labels.append(labels_patch)
final_labels = self._cat_labels(mosaic_labels)
final_labels['img'] = img4
return final_labels
5. 使用负样本马赛克功能进行训练
在工程中,新建一个train.py文件,以如下形式填入相关超参:
model = YOLO("weights/yolov8s.pt")
results = model.train(data="dataset/test_dataset.yaml", # 数据集yaml
neg_dir="neg_dir/person_neg", # 负样本文件夹
neg_num=-1, # 负样加入数
)
也可以以命令行的形式调用,使用方式与其他超参类似。但命令行训练容易让训练的可复现性、可读性降低,个人不推荐
EX 1. 加入“负样本加入数”超参
简单来说,负样本加入数(即neg_num)是为了“在不同的训练轮数/策略下更改负样本加入的数量”
例如:
在出现误检模型的基础上,快速训练30轮压误检时:每轮都加负样本
在重新完整训练300/500轮时:随机不加/加点负样本,以保证模型正负样本都充分学习
而在上述情况下,推荐的neg_num设置为:
继续训练30轮————正数2
重新训练300轮————负数负1 (负样本很多时推荐为-1)
下为具体加入方法:
在ultralytics/cfg/default.yaml(默认超参文件)中加入新超参"neg_num"。
neg_num: -2 # 负样本加入个数(int),设置加入一次马赛克的负样本数量[负数时为0-neg_num张负样本、正数时为放入固定neg_num张负样本]
超参在ultralytics/engine/Trainer.py中的class BaseTrainer初始化,
在ultralytics/engine/Trainer.py的154行左右,即BaseTrainer的__init__()方法中,加入下述内容。
目的是用以读取args里的neg_num
# 负样本加入数——初始化
self.neg_num = self.args.neg_num
在ultralytics/data/augment.py里的v8_transforms()里的Mosaic()。
增加一个输入neg_num = hyp.neg_num。
def v8_transforms(dataset, imgsz, hyp, stretch=False):
"""Convert images to a size suitable for YOLOv8 training."""
pre_transform = Compose([
# 加入负样本文件夹地址输入
Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic, neg_dir=hyp.neg_dir, neg_num = hyp.neg_num),
再到Mosaic()类中的__init__中,添加识别此参数的代码
def __init__(self, dataset, imgsz=640, p=1.0, n=4, neg_dir='', neg_num=''):
"""Initializes the object with a dataset, image size, probability, and border."""
assert 0 <= p <= 1.0, f'The probability should be in range [0, 1], but got {p}.'
assert n in (4, 9), 'grid must be equal to 4 or 9.'
super().__init__(dataset=dataset, p=p)
self.dataset = dataset
self.imgsz = imgsz
self.border = (-imgsz // 2, -imgsz // 2) # width, height
self.n = n
# # 更改此处, 读取负样本加入数(默认为2)
self.neg_num = int(neg_num)
更改ultralytics/data/augment.py里的Mosaic()里的_mosaic4()方法(和上文代码在同一文件),注释加粗内容,加入高亮内容。
加入“负样本加入数”的控制。为正数时将放入固定neg_num张的负样本;为负数时放入随机0~neg_num张的负样本
# 加入负样本的mosaic4
def _mosaic4(self, labels):
"""Create a 2x2 image mosaic."""
mosaic_labels = []
s = self.imgsz
yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border) # mosaic center x, y
# 如果有负样本的话,从负样本数据集中随机抽取0~2张图
# neg_max_in = 2
# 若负样文件夹长度为0,则放入数为0,放入列表也为0
# num_neg = random.randint(0, neg_max_in) if len(self.img_neg_files) else 0 # 随机放入图(当前为0 ~ neg_max_in个)
# num_neg = neg_max_in if len(self.img_neg_files) else 0 # 固定放入num_neg个
# 负样方案:替换放入mosaic的图、宽高、标签
# 增加修改放入数量判断、neg_num为负数时放入0~neg_num、正数时放入neg_num固定个
if self.neg_num < 0:
# 若负样文件夹长度为0,则放入数为0,放入列表也为0
num_neg = random.randint(0, abs(self.neg_num)) if len(self.img_neg_files) else 0 # 放入随机数量(0 ~ neg_num)
else:
num_neg = self.neg_num if len(self.img_neg_files) else 0 # 放入固定数量num_neg个