Tips:COCO原数据集格式为.json格式,需要先转为VOC格式,再转为YOLO所需的.txt格式
一、数据集下载
百度网盘:https://pan.baidu.com/s/1YcxHxXfyoViTn2GH2xrPRA
提取码:38ts
二、COCO转VOC格式
##############(80类别都转化)##############
import argparse, json
import cytoolz
from lxml import etree, objectify
import os, re
def instance2xml_base(anno):
E = objectify.ElementMaker(annotate=False)
anno_tree = E.annotation(
E.folder('VOC2014_instance/{}'.format(anno['category_id'])),
E.filename(anno['file_name']),
E.source(
E.database('MS COCO 2014'),
E.annotation('MS COCO 2014'),
E.image('Flickr'),
E.url(anno['coco_url'])
),
E.size(
E.width(anno['width']),
E.height(anno['height']),
E.depth(3)
),
E.segmented(0),
)
return anno_tree
def instance2xml_bbox(anno, bbox_type='xyxy'):
"""bbox_type: xyxy (xmin, ymin, xmax, ymax); xywh (xmin, ymin, width, height)"""
assert bbox_type in ['xyxy', 'xywh']
if bbox_type == 'xyxy':
xmin, ymin, w, h = anno['bbox']
xmax = xmin+w
ymax = ymin+h
else:
xmin, ymin, xmax, ymax = anno['bbox']
E = objectify.ElementMaker(annotate=False)
anno_tree = E.object(
E.name(anno['category_id']),
E.bndbox(
E.xmin(xmin),
E.ymin(ymin),
E.xmax(xmax),
E.ymax(ymax)
),
E.difficult(anno['iscrowd'])
)
return anno_tree
def parse_instance(content, outdir):
categories = {d['id']: d['name'] for d in content['categories']}
# merge images and annotations: id in images vs image_id in annotations
merged_info_list = list(map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])))
# convert category id to name
for instance in merged_info_list:
instance['category_id'] = categories[instance['category_id']]
# group by filename to pool all bbox in same file
for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
anno_tree = instance2xml_base(groups[0])
# if one file have multiple different objects, save it in each category sub-directory
filenames = []
for group in groups:
filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']),
os.path.splitext(name)[0] + ".xml"))
anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
for filename in filenames:
etree.ElementTree(anno_tree).write(filename, pretty_print=True)
print("Formating instance xml file {} done!".format(name))
def keypoints2xml_base(anno):
annotation = etree.Element("annotation")
etree.SubElement(annotation, "folder").text = "VOC2014_keypoints"
etree.SubElement(annotation, "filename").text = anno['file_name']
source = etree.SubElement(annotation, "source")
etree.SubElement(source, "database").text = "MS COCO 2014"
etree.SubElement(source, "annotation").text = "MS COCO 2014"
etree.SubElement(source, "image").text = "Flickr"
etree.SubElement(source, "url").text = anno['coco_url']
size = etree.SubElement(annotation, "size")
etree.SubElement(size, "width").text = str(anno["width"])
etree.SubElement(size, "height").text = str(anno["height"])
etree.SubElement(size, "depth").text = '3'
etree.SubElement(annotation, "segmented").text = '0'
return annotation
def keypoints2xml_object(anno, xmltree, keypoints_dict, bbox_type='xyxy'):
assert bbox_type in ['xyxy', 'xywh']
if bbox_type == 'xyxy':
xmin, ymin, w, h = anno['bbox']
xmax = xmin+w
ymax = ymin+h
else:
xmin, ymin, xmax, ymax = anno['bbox']
key_object = etree.SubElement(xmltree, "object")
etree.SubElement(key_object, "name").text = anno['category_id']
bndbox = etree.SubElement(key_object, "bndbox")
etree.SubElement(bndbox, "xmin").text = str(xmin)
etree.SubElement(bndbox, "ymin").text = str(ymin)
etree.SubElement(bndbox, "xmax").text = str(xmax)
etree.SubElement(bndbox, "ymax").text = str(ymax)
etree.SubElement(key_object, "difficult").text = '0'
keypoints = etree.SubElement(key_object, "keypoints")
for i in range(0, len(keypoints_dict)):
keypoint = etree.SubElement(keypoints, keypoints_dict[i+1])
etree.SubElement(keypoint, "x").text = str(anno['keypoints'][i*3])
etree.SubElement(keypoint, "y").text = str(anno['keypoints'][i*3+1])
etree.SubElement(keypoint, "v").text = str(anno['keypoints'][i*3+2])
return xmltree
def parse_keypoints(content, outdir):
keypoints = dict(zip(range(1, len(content['categories'][0]['keypoints'])+1), content['categories'][0]['keypoints']))
# merge images and annotations: id in images vs image_id in annotations
merged_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))
# convert category name to person
for keypoint in merged_info_list:
keypoint['category_id'] = "person"
# group by filename to pool all bbox and keypoint in same file
for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
filename = os.path.join(outdir, os.path.splitext(name)[0]+".xml")
anno_tree = keypoints2xml_base(groups[0])
for group in groups:
anno_tree = keypoints2xml_object(group, anno_tree, keypoints, bbox_type="xyxy")
doc = etree.ElementTree(anno_tree)
doc.write(open(filename, "w"), pretty_print=True)
print("Formating keypoints xml file {} done!".format(name))
def main(args):
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
content = json.load(open(args.anno_file, 'r'))
if args.type == 'instance':
# make subdirectories
sub_dirs = [re.sub(" ", "_", cate['name']) for cate in content['categories']]
for sub_dir in sub_dirs:
sub_dir = os.path.join(args.output_dir, str(sub_dir))
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
parse_instance(content, args.output_dir)
elif args.type == 'keypoint':
parse_keypoints(content, args.output_dir)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--anno_file", default = '这里填写COCO的标签路径例如:/Path/to/instances_train2014.json', help="annotation file for object instance/keypoint")
parser.add_argument("--type", default = 'instance',type=str, help="object instance or keypoint", choices=['instance', 'keypoint'])
parser.add_argument("--output_dir", default = '这里填写生成VOC标签的保存路径', help="output directory for voc annotation xml file")
args = parser.parse_args()
main(args)
##############(选择某个类别转化,只需要修改64行代码)##############
import argparse, json
import cytoolz
from lxml import etree, objectify
import os, re
def instance2xml_base(anno):
E = objectify.ElementMaker(annotate=False)
anno_tree = E.annotation(
E.folder('VOC2014_instance/{}'.format(anno['category_id'])),
E.filename(anno['file_name']),
E.source(
E.database('MS COCO 2014'),
E.annotation('MS COCO 2014'),
E.image('Flickr'),
E.url(anno['coco_url'])
),
E.size(
E.width(anno['width']),
E.height(anno['height']),
E.depth(3)
),
E.segmented(0),
)
return anno_tree
def instance2xml_bbox(anno, bbox_type='xyxy'):
"""bbox_type: xyxy (xmin, ymin, xmax, ymax); xywh (xmin, ymin, width, height)"""
assert bbox_type in ['xyxy', 'xywh']
if bbox_type == 'xyxy':
xmin, ymin, w, h = anno['bbox']
xmax = xmin+w
ymax = ymin+h
else:
xmin, ymin, xmax, ymax = anno['bbox']
E = objectify.ElementMaker(annotate=False)
anno_tree = E.object(
E.name(anno['category_id']),
E.bndbox(
E.xmin(xmin),
E.ymin(ymin),
E.xmax(xmax),
E.ymax(ymax)
),
E.difficult(anno['iscrowd'])
)
return anno_tree
def parse_instance(content, outdir):
categories = {d['id']: d['name'] for d in content['categories']}
# merge images and annotations: id in images vs image_id in annotations
merged_info_list = list(map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])))
# convert category id to name
for instance in merged_info_list:
instance['category_id'] = categories[instance['category_id']]
# group by filename to pool all bbox in same file
for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
anno_tree = instance2xml_base(groups[0])
# if one file have multiple different objects, save it in each category sub-directory
filenames = []
for group in groups:
if group['category_id']=='person': #这里可以修改为COCO的其他类别,就可以输出相应的VOC格式文件
filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']),
os.path.splitext(name)[0] + ".xml"))
anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
for filename in filenames:
etree.ElementTree(anno_tree).write(filename, pretty_print=True)
print("Formating instance xml file {} done!".format(name))
def keypoints2xml_base(anno):
annotation = etree.Element("annotation")
etree.SubElement(annotation, "folder").text = "VOC2014_keypoints"
etree.SubElement(annotation, "filename").text = anno['file_name']
source = etree.SubElement(annotation, "source")
etree.SubElement(source, "database").text = "MS COCO 2014"
etree.SubElement(source, "annotation").text = "MS COCO 2014"
etree.SubElement(source, "image").text = "Flickr"
etree.SubElement(source, "url").text = anno['coco_url']
size = etree.SubElement(annotation, "size")
etree.SubElement(size, "width").text = str(anno["width"])
etree.SubElement(size, "height").text = str(anno["height"])
etree.SubElement(size, "depth").text = '3'
etree.SubElement(annotation, "segmented").text = '0'
return annotation
def keypoints2xml_object(anno, xmltree, keypoints_dict, bbox_type='xyxy'):
assert bbox_type in ['xyxy', 'xywh']
if bbox_type == 'xyxy':
xmin, ymin, w, h = anno['bbox']
xmax = xmin+w
ymax = ymin+h
else:
xmin, ymin, xmax, ymax = anno['bbox']
key_object = etree.SubElement(xmltree, "object")
etree.SubElement(key_object, "name").text = anno['category_id']
bndbox = etree.SubElement(key_object, "bndbox")
etree.SubElement(bndbox, "xmin").text = str(xmin)
etree.SubElement(bndbox, "ymin").text = str(ymin)
etree.SubElement(bndbox, "xmax").text = str(xmax)
etree.SubElement(bndbox, "ymax").text = str(ymax)
etree.SubElement(key_object, "difficult").text = '0'
keypoints = etree.SubElement(key_object, "keypoints")
for i in range(0, len(keypoints_dict)):
keypoint = etree.SubElement(keypoints, keypoints_dict[i+1])
etree.SubElement(keypoint, "x").text = str(anno['keypoints'][i*3])
etree.SubElement(keypoint, "y").text = str(anno['keypoints'][i*3+1])
etree.SubElement(keypoint, "v").text = str(anno['keypoints'][i*3+2])
return xmltree
def parse_keypoints(content, outdir):
keypoints = dict(zip(range(1, len(content['categories'][0]['keypoints'])+1), content['categories'][0]['keypoints']))
# merge images and annotations: id in images vs image_id in annotations
merged_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))
# convert category name to person
for keypoint in merged_info_list:
keypoint['category_id'] = "person"
# group by filename to pool all bbox and keypoint in same file
for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
filename = os.path.join(outdir, os.path.splitext(name)[0]+".xml")
anno_tree = keypoints2xml_base(groups[0])
for group in groups:
anno_tree = keypoints2xml_object(group, anno_tree, keypoints, bbox_type="xyxy")
doc = etree.ElementTree(anno_tree)
doc.write(open(filename, "w"), pretty_print=True)
print("Formating keypoints xml file {} done!".format(name))
def main(args):
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
content = json.load(open(args.anno_file, 'r'))
if args.type == 'instance':
# make subdirectories
sub_dirs = [re.sub(" ", "_", cate['name']) for cate in content['categories']]
for sub_dir in sub_dirs:
sub_dir = os.path.join(args.output_dir, str(sub_dir))
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
parse_instance(content, args.output_dir)
elif args.type == 'keypoint':
parse_keypoints(content, args.output_dir)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--anno_file", default = '这里填写COCO的标签路径例如:/Path/to/instances_train2014.json', help="annotation file for object instance/keypoint")
parser.add_argument("--type", default = 'instance',type=str, help="object instance or keypoint", choices=['instance', 'keypoint'])
parser.add_argument("--output_dir", default = '这里填写生成VOC标签的保存路径', help="output directory for voc annotation xml file")
args = parser.parse_args()
main(args)
三、VOC转YOLO .TXT
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
def convert(size, box):
# size=(width, height) b=(xmin, xmax, ymin, ymax)
# x_center = (xmax+xmin)/2 y_center = (ymax+ymin)/2
# x = x_center / width y = y_center / height
# w = (xmax-xmin) / width h = (ymax-ymin) / height
x_center = (box[0] + box[1]) / 2.0
y_center = (box[2] + box[3]) / 2.0
x = x_center / size[0]
y = y_center / size[1]
w = (box[1] - box[0]) / size[0]
h = (box[3] - box[2]) / size[1]
# print(x, y, w, h)
return (x, y, w, h)
def convert_annotation(xml_files_path, save_txt_files_path, classes):
xml_files = os.listdir(xml_files_path)
# print(xml_files)
for xml_name in xml_files:
print(xml_name)
xml_file = os.path.join(xml_files_path, xml_name)
out_txt_path = os.path.join(save_txt_files_path, xml_name.split('.')[0] + '.txt')
out_txt_f = open(out_txt_path, 'w')
tree = ET.parse(xml_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
# b=(xmin, xmax, ymin, ymax)
# print(w, h, b)
bb = convert((w, h), b)
out_txt_f.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
if __name__ == "__main__":
# 把forklift_pallet的voc的xml标签文件转化为yolo的txt标签文件
# 1、需要转化的类别
# classes = ['car', 'bus', 'van', 'others'] # 注意:这里根据自己的类别名称及种类自行更改
classes = ['person'] # 注意:这里根据自己的类别名称及种类自行更改
# 2、voc格式的xml标签文件路径
xml_files1 = r'E:\project\py-project\network\dataset\COCO\voc\person'
# 3、转化为yolo格式的txt标签文件存储路径
save_txt_files1 = r'E:\project\py-project\network\dataset\COCO\yolo'
convert_annotation(xml_files1, save_txt_files1, classes)
四、Reference
1、将COCO数据集转成VOC数据集,还可以提取单独的类别分离出来,像行人检测数据集。
2、https://github.com/CasiaFan/Dataset_to_VOC_converter