本篇博客主要讲述如何使用ssd在Caffe下针对自己的数据集进行finetune训练
本篇博客主要讲解如何使用在VOC0712数据集下训练好的ssd.caffemodel来对自己的数据集进行finetune训练,涉及到NEUDataset、一些python脚本代码、网络结构的修改、网络训练和测试流程以及批量生成测试图片的python代码。
同时,本篇博客不会再涉及到路径和名称修改问题,关于caffe-ssd的安装编译、网络训练和测试以及路径和名称修改等问题请参考我这篇博客: 目标检测SSD网络在Caffe下的实现
数据集来源: NEU surface defect database(
模型:ssd
系统平台:linux-ubuntu
NEUDataset介绍
该数据集是东北大学宋克臣团队制作而成,是钢材表面缺陷数据集,共有1800张图片,包含六种类型:
- crazing
- inclusion
- patches
- pitted_surface
- rolled-in_scale
- scratches
LMDB数据集制作
将数据集分为trainval和test
我写了个python脚本,将1800张images和labels按照8:2的比例随机分为trainval和test两个数据集
- trainval:1440
- test:360
代码如下:
"""this code is to split randomly images and xml files to train and test file"""
import os
import cv2
#import string
import random
import numpy as np
import shutil
os.makedirs('/home1/xxx/caffe_ssd/data/NEU/neu/trainval/ANNOTATIONS')
os.makedirs('/home1/xxx/caffe_ssd/data/NEU/neu/trainval/IMAGES')
os.makedirs('/home1/xxx/caffe_ssd/data/NEU/neu/test/ANNOTATIONS')
os.makedirs('/home1/xxx/caffe_ssd/data/NEU/neu/test/IMAGES')
open_dir = "/home1/xxx/caffe_ssd/data/NEU/NEU-DET" #the file you want to split
save_dir = '/home1/xxx/caffe_ssd/data/NEU/neu' #the file you want to save
sum_samples = 300 #the sums of each class
img_resize = 300
sample_class=['crazing', 'inclusion', 'patches', 'pitted_surface', 'rolled-in_scale', 'scratches'] #samples class
def get_specific_suffix(dirname, suffix='.jpg'): #get specific suffix images and xml files
images_path = 'IMAGES' #the file name of images
annotations_path = 'ANNOTATIONS' #the file name of annotations
img_dir = os.path.join(dirname, images_path)
img_list = os.listdir(img_dir)
xml_dir = os.path.join(dirname, annotations_path)
xml_list = os.listdir(xml_dir)
img_list_suffix = []
for img_array in img_list:
if os.path.splitext(img_array)[1] == suffix:
img_list_suffix.append(img_array)
else:
continue
return img_list_suffix, xml_list #['crazing_1.jpg', 'crazing_10.jpg'] #return img list and xml list of content
def get_random_list(sum_samples, scale=0.8): #get random list to split train and test with scale
list_random = random.sample(range(1, sum_samples), int(sum_samples * scale)) #get random figures without repetition
list_sort = sorted(list_random)
return list_sort
#get random images and annotations, split them to train and test file
def get_random_img_anno(img_list_suffix, xml_list, sum_samples, img_anno_path='./data/', save_path='./'):
images_path = 'IMAGES/' #the file name of images
annotations_path = 'ANNOTATIONS/' #the file name of annotations
random_list = get_random_list(sum_samples) #get random list
#split images to train and test according sample class
for sam_class in sample_class:
for img_name in img_list_suffix:
count = 0
for i in random_list:
if img_name.find(sam_class) != -1:
if img_name.split('.')[0] == sam_class + '_' + str(i):
shutil.copy(os.path.join(img_anno_path, images_path, img_name),os.path.join(save_path, 'trainval/', images_path, img_name))
if img_name.split('.')[0] != sam_class + '_' + str(i):
count = count + 1
if count == len(random_list):
count = 0
shutil.copy(os.path.join(img_anno_path, images_path, img_name),os.path.join(save_path, 'test/', images_path, img_name))
#split annotations to train and test according sample class
for sam_class in sample_class:
#count_val = 0
for xml_name in xml_list:
count = 0
for i in random_list:
if xml_name.find(sam_class)