目录
前言
在目标检测任务中,模型的训练依赖于大量高质量的标注数据。然而,获取足够多的标注数据集往往代价高昂,并且某些情况下,数据集中的样本分布不均衡,这会导致模型的泛化能力不足。为此,数据增强成为提升模型性能的常用方法之一。
在数据增强的各种方法中,裁剪是一种高效且常用的技术。通过对图片进行随机或特定区域的裁剪,能够生成更多的训练样本,提高模型对不同尺度和位置目标的鲁棒性。此外,裁剪还可以有效解决原始图片尺寸过大的问题,避免对训练性能造成影响。大尺寸图片会增加模型的计算开销,导致训练速度变慢、内存消耗增大。而通过裁剪,图片尺寸得以缩小,训练效率随之提升。
然而,裁剪不仅仅是简单地对图片进行操作,目标检测任务中,每个目标的边界框(标签)必须与图片裁剪过程同步更新,否则裁剪后的标签将失去准确性,影响模型的训练效果。本篇文章将重点介绍如何在进行目标检测数据集图片裁剪时,同时对目标的标签进行精确调整,确保裁剪后的图片和标签保持一致,从而构建高质量的增强数据集,助力模型的准确性与泛化能力提升。
具体方法
本文所使用的裁剪方式为图片的宽度和高度各自从中间切开,将一个图片分为左上,右上,左下,右下四个部分,其中保留含有标签的那部分图片,其余未含有标签的部分则不保留。
def crop_image(image, save_dir, name, suf, boxes):
H, W, _ = image.shape
# 左上区域
if boxes[0] == 1:
img_top_left = image[0:H // 2, 0:W // 2]
save_path = os.path.join(save_dir, f"{name}_1{suf}")
cv.imwrite(save_path, img_top_left)
# 右上区域
if boxes[1] == 1:
img_top_right = image[0:H // 2, W // 2:W]
save_path = os.path.join(save_dir, f"{name}_2{suf}")
cv.imwrite(save_path, img_top_right)
# 左下区域
if boxes[2] == 1:
img_bottom_left = image[H // 2:H, 0:W // 2]
save_path = os.path.join(save_dir, f"{name}_3{suf}")
cv.imwrite(save_path, img_bottom_left)
# 右下区域
if boxes[3] == 1:
img_bottom_right = image[H // 2:H, W // 2:W]
save_path = os.path.join(save_dir, f"{name}_4{suf}")
cv.imwrite(save_path, img_bottom_right)
这部分为裁剪图片的函数定义,可以看到只保留含有标签的部分图像
def split_box(box, shape):
W, H = shape[1], shape[0]
n, xmin, ymin, xmax, ymax = box
regions = []
# 左上区域
if xmin < W / 2 and ymin < H / 2:
xmin_new = max(0, xmin)
ymin_new = max(0, ymin)
xmax_new = min(W / 2, xmax)
ymax_new = min(H / 2, ymax)
regions.append((n, xmin_new, ymin_new, xmax_new, ymax_new, 'left_top'))
# 右上区域
if xmax > W / 2 and ymin < H / 2:
xmin_new = max(W / 2, xmin) - W / 2
ymin_new = max(0, ymin)
xmax_new = min(W, xmax) - W / 2
ymax_new = min(H / 2, ymax)
regions.append((n, xmin_new, ymin_new, xmax_new, ymax_new, 'right_top'))
# 左下区域
if xmin < W / 2 and ymax > H / 2:
xmin_new = max(0, xmin)
ymin_new = max(H / 2, ymin) - H / 2
xmax_new = min(W / 2, xmax)
ymax_new = min(H, ymax) - H / 2
regions.append((n, xmin_new, ymin_new, xmax_new, ymax_new, 'left_bottom'))
# 右下区域
if xmax > W / 2 and ymax > H / 2:
xmin_new = max(W / 2, xmin) - W / 2
ymin_new = max(H / 2, ymin) - H / 2
xmax_new = min(W, xmax) - W / 2
ymax_new = min(H, ymax) - H / 2
regions.append((n, xmin_new, ymin_new, xmax_new, ymax_new, 'right_bottom'))
normalized_regions = []
for region in regions:
box = region[1:5] # 获取 (xmin, ymin, xmax, ymax)
normalized_box = xyxy2xywhn((W/2, H/2), box) # 归一化
normalized_regions.append((region[0], *normalized_box, region[5])) # 保留标签
return normalized_regions
这部分函数为标签裁剪,这里我将每个标签都完整的分割出来,避免因为标签在图片中间交界处而导致的标签发生丢失现象,同时每部分标签加入一定的标志,全部运行完之后返回这部分标志,经过处理后传入图片裁剪中就可以知道哪部分图片需要保留,避免了图片重复写入。
使用介绍
完整代码如下,修改其中路径部分即可使用,需要注意本文仅支持标签文件为yolo格式的txt文件,如果对于转换格式不熟悉的可以去看我下面这篇文章,里面有相关的介绍及方法
完整代码
# 作者:CSDN-笑脸惹桃花 https://blog.csdn.net/qq_67105081?type=blog
# github:peng-xiaobai https://github.com/peng-xiaobai/Data-Augmentor
import os
import numpy as np
import cv2 as cv
def GetFileList(dir):
l = []
files = os.listdir(dir)
for file in files:
if file.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tif')):
l.append(os.path.join(dir, file))
return l
def xywhn2xyxy(x, w, h):
y = np.copy(x) if isinstance(x, np.ndarray) else x.copy() if hasattr(x, 'copy') else x
y[0] = w * (x[0] - 0.5 * x[2])
y[1] = h * (x[1] - 0.5 * x[3])
y[2] = w * (x[0] + 0.5 * x[2])
y[3] = h * (x[1] + 0.5 * x[3])
return y
def xyxy2xywhn(size, box):
x_center = (box[0] + box[2]) / 2.0
y_center = (box[1] + box[3]) / 2.0
w = box[2] - box[0]
h = box[3] - box[1]
x = x_center / size[0]
y = y_center / size[1]
w = w / size[0]
h = h / size[1]
return x, y, w, h
def crop_image(image, save_dir, name, suf, boxes):
H, W, _ = image.shape
# 左上区域
if boxes[0] == 1:
img_top_left = image[0:H // 2, 0:W // 2]
save_path = os.path.join(save_dir, f"{name}_1{suf}")
cv.imwrite(save_path, img_top_left)
# 右上区域
if boxes[1] == 1:
img_top_right = image[0:H // 2, W // 2:W]
save_path = os.path.join(save_dir, f"{name}_2{suf}")
cv.imwrite(save_path, img_top_right)
# 左下区域
if boxes[2] == 1:
img_bottom_left = image[H // 2:H, 0:W // 2]
save_path = os.path.join(save_dir, f"{name}_3{suf}")
cv.imwrite(save_path, img_bottom_left)
# 右下区域
if boxes[3] == 1:
img_bottom_right = image[H // 2:H, W // 2:W]
save_path = os.path.join(save_dir, f"{name}_4{suf}")
cv.imwrite(save_path, img_bottom_right)
def split_box(box, shape):
W, H = shape[1], shape[0]
n, xmin, ymin, xmax, ymax = box
regions = []
# 左上区域
if xmin < W / 2 and ymin < H / 2:
xmin_new = max(0, xmin)
ymin_new = max(0, ymin)
xmax_new = min(W / 2, xmax)
ymax_new = min(H / 2, ymax)
regions.append((n, xmin_new, ymin_new, xmax_new, ymax_new, 'left_top'))
# 右上区域
if xmax > W / 2 and ymin < H / 2:
xmin_new = max(W / 2, xmin) - W / 2
ymin_new = max(0, ymin)
xmax_new = min(W, xmax) - W / 2
ymax_new = min(H / 2, ymax)
regions.append((n, xmin_new, ymin_new, xmax_new, ymax_new, 'right_top'))
# 左下区域
if xmin < W / 2 and ymax > H / 2:
xmin_new = max(0, xmin)
ymin_new = max(H / 2, ymin) - H / 2
xmax_new = min(W / 2, xmax)
ymax_new = min(H, ymax) - H / 2
regions.append((n, xmin_new, ymin_new, xmax_new, ymax_new, 'left_bottom'))
# 右下区域
if xmax > W / 2 and ymax > H / 2:
xmin_new = max(W / 2, xmin) - W / 2
ymin_new = max(H / 2, ymin) - H / 2
xmax_new = min(W, xmax) - W / 2
ymax_new = min(H, ymax) - H / 2
regions.append((n, xmin_new, ymin_new, xmax_new, ymax_new, 'right_bottom'))
normalized_regions = []
for region in regions:
box = region[1:5] # 获取 (xmin, ymin, xmax, ymax)
normalized_box = xyxy2xywhn((W/2, H/2), box) # 归一化
normalized_regions.append((region[0], *normalized_box, region[5])) # 保留标签
return normalized_regions
def save_boxes(label_path, boxes):
# 先获取标注文件的基本路径和扩展名
base_label_path, ext = os.path.splitext(label_path)
for box in boxes:
# 获取区域标识(左上、右上、左下、右下)
region = box[5]
if region == 'left_top':
suffix = '_1'
elif region == 'right_top':
suffix = '_2'
elif region == 'left_bottom':
suffix = '_3'
elif region == 'right_bottom':
suffix = '_4'
else:
continue # 如果区域标识不在预期范围内,跳过
specific_label_path = f"{base_label_path}{suffix}{ext}"
with open(specific_label_path, 'a') as f:
f.write(f"{int(box[0])} " + " ".join(map(str, box[1:5])) + '\n')
fileDir = r"E:\peanut_data\j" # 原图片路径
label_path = r"E:\peanut_data\txt" # 原label的路径
list1 = GetFileList(fileDir)
image_save_path_head = r"E:\peanut_data\j1" # 分割后有标注图片储存路径
label_save_path_head = r"E:\peanut_data\txt1" # 标签储存路径
if not os.path.exists(image_save_path_head):
os.makedirs(image_save_path_head)
if not os.path.exists(label_save_path_head):
os.makedirs(label_save_path_head)
for i in list1:
l = [0, 0, 0, 0]
img = cv.imread(i)
shape = img.shape
seq = 1
name, suf = os.path.splitext(os.path.basename(i))
labelname = os.path.join(label_path, name) + '.txt' # 找到对应图片的label
pos = []
with open(labelname, 'r') as f1:
#print(labelname)
while True:
lines = f1.readline()
if lines == '\n':
lines = None
if not lines:
break
p_tmp = [float(i) for i in lines.split()]
pos.append(p_tmp)
pos = np.array(pos)
for k in pos:
k[1:] = xywhn2xyxy(k[1:], shape[1], shape[0])
regions = split_box(k, shape)
labelname_new = os.path.join(label_save_path_head, name) + '.txt'
save_boxes(labelname_new, regions)
for region in regions:
region_name = region[-1]
if region_name == 'left_top':
l[0] = 1
print("The region is 'left_top'.")
elif region_name == 'right_top':
l[1] = 1
print("The region is 'right_top'.")
elif region_name == 'left_bottom':
l[2] = 1
print("The region is 'left_bottom'.")
elif region_name == 'right_bottom':
l[3] = 1
print("The region is 'right_bottom'.")
f1.close()
crop_image(img, image_save_path_head, name, suf, l)