Bootstrap

YOLO v8 在马赛克增强中加入负样本,解决误检问题

0. 前言
虽然v8支持直接训练“无标签的负样本”(v8中叫做background),但默认方式也只是混进正样本中一起练,这会导致新的误检图得不到充分训练
使用本文的“负样本马赛克”功能,可以保证每一轮、每一张图里全都有误检图。能够在较少的训练轮次中,解决漏检问题

使用例:
比如说你发现你模型把摩托车识别成人,那就把那几张摩托车的图放进neg_dir,练个30轮
出来就不会有摩托车误识别的问题了

【但要注意,放进neg_dir里的图 绝 对 不 能 有正样本(例如把摩托车误检成人的图里不能出现真的人)。不然模型会学到错误的负样本,反而会出大问题】

1. 加入新超参
在ultralytics/cfg/default.yaml(默认超参文件)中加入新超参"neg_dir"。
v8的结构是直接读取所有超参数,解析交给后面的函数

neg_dir: '  '  # 负样本文件夹(str),其中只有负样本图片,没有标签(默认为空)

2. 读取新超参
超参在ultralytics/engine/Trainer.py中的BaseTrainer()初始化,
在ultralytics/engine/Trainer.py的108行左右,即BaseTrainer的__init__()方法中,加入下述内容。
目的是读取args里的neg_dir(108行左右改为下述内容),之后超参数会随着trainer的属性hyp一层一层的传下去

 # 负样本文件夹——初始化
 self.neg_dir = self.args.neg_dir

3. 识别超参

在ultralytics/data/augment.py里的v8_transforms()里的Mosaic()。
增加一个输入neg_dir = hyp.neg_dir。

def v8_transforms(dataset, imgsz, hyp, stretch=False):
    """Convert images to a size suitable for YOLOv8 training."""
    pre_transform = Compose([
        Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic, neg_dir=hyp.neg_dir, neg_num = hyp.neg_num),
        CopyPaste(p=hyp.copy_paste),
        RandomPerspective(
            degrees=hyp.degrees,
            translate=hyp.translate,
            scale=hyp.scale,
            shear=hyp.shear,
            perspective=hyp.perspective,
            pre_transform=None if stretch else LetterBox(new_shape=(imgsz, imgsz)),
        )])

再到Mosaic()类中的__init__中,添加识别此参数的代码

    def __init__(self, dataset, imgsz=640, p=1.0, n=4, neg_dir='', neg_num=''):
        """Initializes the object with a dataset, image size, probability, and border."""
        assert 0 <= p <= 1.0, f'The probability should be in range [0, 1], but got {p}.'
        assert n in (4, 9), 'grid must be equal to 4 or 9.'
        super().__init__(dataset=dataset, p=p)
        self.dataset = dataset
        self.imgsz = imgsz
        self.border = (-imgsz // 2, -imgsz // 2)  # width, height
        self.n = n

        # # 更改此处, 读取负样本加入数(默认为2)
        self.neg_num = int(neg_num)
        self.img_neg_files = []  # 负样本路径列表

        # “默认只有负样文件夹才有无标签负样本”
        # 读取负样本文件夹
        # additional feature
        if os.path.isdir(neg_dir):
            # 负样本路径
            self.img_neg_files = [os.path.join(neg_dir, i) for i in os.listdir(neg_dir)]
            logging.info(
                colorstr("Negative dir: ")
                + f"'{neg_dir}', using {len(self.img_neg_files)} pictures from the dir as negative samples during training"
            )
        else:
            # 未找到负样本
            self.img_neg_files = []

#################################################################################
    def get_indexes(self, buffer=True):
        """Return a list of random indexes from the dataset."""
        if buffer:  # select images from buffer
            return random.choices(list(self.dataset.buffer), k=self.n - 1)
        else:  # select any images
            return [random.randint(0, len(self.dataset) - 1) for _ in range(self.n - 1)]

    def _mix_transform(self, labels):
        """Apply mixup transformation to the input image and labels."""
        assert labels.get('rect_shape', None) is None, 'rect and mosaic are mutually exclusive.'
        assert len(labels.get('mix_labels', [])), 'There are no other images for mosaic augment.'
        return self._mosaic4(labels) if self.n == 4 else self._mosaic9(labels)

    # 加入负样本的mosaic4
    def _mosaic4(self, labels):
        """Create a 2x2 image mosaic."""
        mosaic_labels = []
        s = self.imgsz
        yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border)  # mosaic center x, y
        # 如果有负样本的话,从负样本数据集中随机抽取0~2张图
        neg_max_in = 2
        # 若负样文件夹长度为0,则放入数为0,放入列表也为0
        # num_neg = random.randint(0, neg_max_in) if len(self.img_neg_files) else 0  # 随机放入图(当前为0 ~ neg_max_in个)
        #num_neg = neg_max_in if len(self.img_neg_files) else 0  # 固定放入num_neg个

        # 负样方案:替换放入mosaic的图、宽高、标签
        # 增加修改放入数量判断、neg_num为负数时放入0~neg_num、正数时放入neg_num固定个
        if self.neg_num < 0:
            # 若负样文件夹长度为0,则放入数为0,放入列表也为0
            num_neg = random.randint(0, abs(self.neg_num)) if len(self.img_neg_files) else 0  # 放入随机数量(0 ~ neg_num)
        else:
            num_neg = self.neg_num if len(self.img_neg_files) else 0  # 放入固定数量num_neg个


        neg_img_url_list, neg_in_num = [], []
        if num_neg != 0:
            #print("放入这次mosaic的负样本个数", num_neg)
            neg_img_url_list = random.sample(range(len(self.img_neg_files)), num_neg)  # 从所有负样本图里,随机取出num_neg个
            neg_in_num = random.sample(range(0, 4), num_neg)  # 也许应考虑neg_in_num=0时,覆盖原图的情况
            # 方案:替换放入mosaic的0~2张图、宽高、标签

        for i in range(4):
            labels_patch = labels if i == 0 else labels['mix_labels'][i - 1]
            # Load image
            img = labels_patch['img']
            h, w = labels_patch.pop('resized_shape')
            neg_flag = num_neg != 0 and i in neg_in_num  # bool,判断是否放入负样本
            if neg_flag:
                img_url = self.img_neg_files[neg_img_url_list.pop()]  # 取出待输入的负样图片地址
                img = cv2.imread(img_url)  # BGR
                if img is None:
                    raise FileNotFoundError(f'Image Not Found {img_url}')
                # 负样本图片处理(主要是缩放)
                h0, w0 = img.shape[:2]  # 原始宽高
                r = self.imgsz / max(h0, w0)  # 宽高比
                if r != 1:  # 如果宽高不相等
                    interp = cv2.INTER_LINEAR  # if (self.augment or r > 1) else cv2.INTER_AREA
                    img = cv2.resize(img, (min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz)),
                                     interpolation=interp)  # 覆盖图片
                h, w = img.shape[:2]  # 覆盖宽高

            # Place img in img4
            if i == 0:  # top left
                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
            elif i == 1:  # top right
                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
            elif i == 2:  # bottom left
                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
            elif i == 3:  # bottom right
                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
            padw = x1a - x1b
            padh = y1a - y1b

            if not neg_flag:  # 覆盖标签,为负样本时不进行标签合并
                labels_patch = self._update_labels(labels_patch, padw, padh)
                mosaic_labels.append(labels_patch)
        final_labels = self._cat_labels(mosaic_labels)
        final_labels['img'] = img4

        return final_labels

    #######-- 负样本的mosaic4-----------------------

4. 更改_mosaic4()

更改ultralytics/data/augment.py里的Mosaic()里的_mosaic4()方法(和上文代码在同一文件),加入下文高亮内容。
加入负样本识别/处理代码,当放入负样本时,覆盖图片、标签、宽高为负样本的数据.

# 加入负样本的mosaic4
def _mosaic4(self, labels):
    """Create a 2x2 image mosaic."""
    mosaic_labels = []
    s = self.imgsz
    yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border)  # mosaic center x, y
    # 如果有负样本的话,从负样本数据集中随机抽取0~2张图
    neg_max_in = 2
    # 若负样文件夹长度为0,则放入数为0,放入列表也为0
    # num_neg = random.randint(0, neg_max_in) if len(self.img_neg_files) else 0  # 随机放入图(当前为0 ~ neg_max_in个)
    num_neg = neg_max_in if len(self.img_neg_files) else 0  # 固定放入num_neg个
    
    neg_img_url_list, neg_in_num = [], []
    if num_neg != 0:
        # print("放入这次mosaic的负样本个数", num_neg)
        neg_img_url_list = random.sample(range(len(self.img_neg_files)), num_neg)  # 从所有负样本图里,随机取出num_neg个
        neg_in_num = random.sample(range(0, 4), num_neg)  # 也许应考虑neg_in_num=0时,覆盖原图的情况
        # 方案:替换放入mosaic的0~2张图、宽高、标签

    for i in range(4):
        labels_patch = labels if i == 0 else labels['mix_labels'][i - 1]
        # Load image
        img = labels_patch['img']
        h, w = labels_patch.pop('resized_shape')
        neg_flag = num_neg != 0 and i in neg_in_num  # bool,判断是否放入负样本
        if neg_flag:
            img_url = self.img_neg_files[neg_img_url_list.pop()]  # 取出待输入的负样图片地址
            img = cv2.imread(img_url)  # BGR
            if img is None:
                raise FileNotFoundError(f'Image Not Found {img_url}')
            # 负样本图片处理(主要是缩放)
            h0, w0 = img.shape[:2]  # 原始宽高
            r = self.imgsz / max(h0, w0)  # 宽高比
            if r != 1:  # 如果宽高不相等
                interp = cv2.INTER_LINEAR  # if (self.augment or r > 1) else cv2.INTER_AREA
                img = cv2.resize(img, (min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz)),
                                 interpolation=interp)  # 覆盖图片
            h, w = img.shape[:2]  # 覆盖宽高

        # Place img in img4
        if i == 0:  # top left
            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
        elif i == 1:  # top right
            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
        elif i == 2:  # bottom left
            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
        elif i == 3:  # bottom right
            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
        padw = x1a - x1b
        padh = y1a - y1b

        if not neg_flag:  # 覆盖标签,为负样本时不进行标签合并
            labels_patch = self._update_labels(labels_patch, padw, padh)
            mosaic_labels.append(labels_patch)
    final_labels = self._cat_labels(mosaic_labels)
    final_labels['img'] = img4
    
    return final_labels

5. 使用负样本马赛克功能进行训练

在工程中,新建一个train.py文件,以如下形式填入相关超参:

model = YOLO("weights/yolov8s.pt")
results = model.train(data="dataset/test_dataset.yaml",  # 数据集yaml
                      neg_dir="neg_dir/person_neg",  # 负样本文件夹
                      neg_num=-1,  # 负样加入数
                      )

也可以以命令行的形式调用,使用方式与其他超参类似。但命令行训练容易让训练的可复现性、可读性降低,个人不推荐

EX 1. 加入“负样本加入数”超参
简单来说,负样本加入数(即neg_num)是为了“在不同的训练轮数/策略下更改负样本加入的数量”
例如:
在出现误检模型的基础上,快速训练30轮压误检时:每轮都加负样本
在重新完整训练300/500轮时:随机不加/加点负样本,以保证模型正负样本都充分学习

而在上述情况下,推荐的neg_num设置为:
继续训练30轮————正数2
重新训练300轮————负数负1 (负样本很多时推荐为-1)

下为具体加入方法:
在ultralytics/cfg/default.yaml(默认超参文件)中加入新超参"neg_num"。

neg_num: -2  # 负样本加入个数(int),设置加入一次马赛克的负样本数量[负数时为0-neg_num张负样本、正数时为放入固定neg_num张负样本]

超参在ultralytics/engine/Trainer.py中的class BaseTrainer初始化,
在ultralytics/engine/Trainer.py的154行左右,即BaseTrainer的__init__()方法中,加入下述内容。
目的是用以读取args里的neg_num
 

# 负样本加入数——初始化
self.neg_num = self.args.neg_num
在ultralytics/data/augment.py里的v8_transforms()里的Mosaic()。
增加一个输入neg_num = hyp.neg_num。
def v8_transforms(dataset, imgsz, hyp, stretch=False):
    """Convert images to a size suitable for YOLOv8 training."""
    pre_transform = Compose([
        # 加入负样本文件夹地址输入
        Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic, neg_dir=hyp.neg_dir, neg_num = hyp.neg_num),

再到Mosaic()类中的__init__中,添加识别此参数的代码

def __init__(self, dataset, imgsz=640, p=1.0, n=4, neg_dir='', neg_num=''):
    """Initializes the object with a dataset, image size, probability, and border."""
    assert 0 <= p <= 1.0, f'The probability should be in range [0, 1], but got {p}.'
    assert n in (4, 9), 'grid must be equal to 4 or 9.'
    super().__init__(dataset=dataset, p=p)
    self.dataset = dataset
    self.imgsz = imgsz
    self.border = (-imgsz // 2, -imgsz // 2)  # width, height
    self.n = n
    # # 更改此处, 读取负样本加入数(默认为2)
    self.neg_num = int(neg_num)

更改ultralytics/data/augment.py里的Mosaic()里的_mosaic4()方法(和上文代码在同一文件),注释加粗内容,加入高亮内容。
加入“负样本加入数”的控制。为正数时将放入固定neg_num张的负样本;为负数时放入随机0~neg_num张的负样本

# 加入负样本的mosaic4
def _mosaic4(self, labels):
    """Create a 2x2 image mosaic."""
    mosaic_labels = []
    s = self.imgsz
    yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border)  # mosaic center x, y
    
    # 如果有负样本的话,从负样本数据集中随机抽取0~2张图
    # neg_max_in = 2
    # 若负样文件夹长度为0,则放入数为0,放入列表也为0
    # num_neg = random.randint(0, neg_max_in) if len(self.img_neg_files) else 0  # 随机放入图(当前为0 ~ neg_max_in个)
    # num_neg = neg_max_in if len(self.img_neg_files) else 0  # 固定放入num_neg个
    
    # 负样方案:替换放入mosaic的图、宽高、标签
    # 增加修改放入数量判断、neg_num为负数时放入0~neg_num、正数时放入neg_num固定个
    if self.neg_num < 0:
        # 若负样文件夹长度为0,则放入数为0,放入列表也为0
        num_neg = random.randint(0, abs(self.neg_num)) if len(self.img_neg_files) else 0  # 放入随机数量(0 ~ neg_num)
    else:
        num_neg = self.neg_num if len(self.img_neg_files) else 0  # 放入固定数量num_neg个

YOLO v8 在马赛克增强中加入负样本,解决误检问题_yolov8 负样本-CSDN博客

;