Bootstrap

rock带你读CornerNet-lite系列源码(一)

前言

CornerNet-lite系列模型分Cornernet,Cornernet-Saccade,Cornernet-Squeese三个网络,后2个网络是Cornernet的改进版本,虽然说现在Anchor-free系列的FCOS、centerNet网络性能优于Cornernet,但是学习前者的源码还是很有必要的,况且Conernet的代码风格很清新。
论文地址:https://arxiv.org/abs/1904.08900
代码:https://github.com/princeton-vl/CornerNet-Lite

项目架构

CornerNet-Lite-master
-----configs   #数据集参数配置文件
     cornernet-saccade.json
-----core  
------------dbs  #数据预处理,COCO,voc格式
------------external  #对检测的box处理 ,nms
------------models  # 模型架构定义,loss,返回模型得到的 Out
------------nnet  #py_factory.py 模型启动器 ,类似于solver
------------sample  # ground truth 数据图像encode,汇总成Target
------------test  #测试
------------utils # 部分组件单元
-----data  #需要手动创建,放voc 和coco数据集
-----demo.py
-----evaluate.py  #测试
-----train.py   

训练

项目训练没有太多注意的地方,参考ReadME即可,注意一点当只有一个GPU时,需要将config下的json文件参数: batch_size=chunk_sizes=X ,X不要太大就可以,一般选15。

configs参数解析:

以 cornernet-saccade.json为例:

{
    "system": {
        "dataset": "VOC",  #数据集格式
        "batch_size": 15,  #batch
        "sampling_function": "cornernet_saccade",

        "train_split": "trainval"  #划分数据集
        "val_split": "minival",

        "learning_rate": 0.00025,  #学习率
        "decay_rate": 10,  #学习率衰减的参数, 为了迭代次数越多,将学习率降低
                 # 参考train.py learning_rate /= (decay_rate ** (start_iter // stepsize))
        "val_iter": 100,    

        "opt_algo": "adam",
        "prefetch_size": 5,   #作者采用多线程训练 5个为一个线程队列

        "max_iter": 500000,
        "stepsize": 450000,
        "snapshot": 5000,

        "chunk_sizes": [15]   #一块gpu处理的图像数
    },
    
    "db": {
        "rand_scale_min": 0.5,  #数据预处理的参数
        "rand_scale_max": 1.1,
        "rand_scale_step": 0.1,
        "rand_scales": null,

        "rand_full_crop": true,
        "gaussian_bump": true,
        "gaussian_iou": 0.5,

        "min_scale": 16,
        "view_sizes": [],

        "height_mult": 31,  #代码里没用到
        "width_mult": 31,

        "input_size": [255, 255],  #输入图像统一后尺寸
        "output_sizes": [[64, 64]], #输出特征图尺寸

        "att_max_crops": 30,  
        "att_scales": [[1, 2, 4]],
        "att_thresholds": [0.3],

        "top_k": 12,  #对检测出的box(或dets)选取top_k个
        "num_dets": 12,
        "categories": 1,  #类别
        "ae_threshold": 0.3,
        "nms_threshold": 0.5,

        "max_per_image": 100
    }
}

core 文件:

dbs:
dbs下只给出了COCO数据集的训练格式, voc的训练格式博主放到了github:https://github.com/huangzicheng/CornerNet-Lite,COCO文件主要返回detections, eval_ids,detection包含物体的category和box, eval_ids 保存每张图像的id,这个不细读了,如何修改参考博主给出的voc版本就可以加深理解。

import os
import json
import numpy as np

from .detection import DETECTION
from ..paths import get_file_path

# COCO bounding boxes are 0-indexed

class COCO(DETECTION):
    def __init__(self, db_config, split=None, sys_config=None):
        assert split is None or sys_config is not None
        super(COCO, self).__init__(db_config)

        self._mean    = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32)
        self._std     = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32)
        self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], dtype=np.float32)
        self._eig_vec = np.array([
            [-0.58752847, -0.69563484, 0.41340352],
            [-0.5832747, 0.00994535, -0.81221408],
            [-0.56089297, 0.71832671, 0.41158938]
        ], dtype=np.float32)

        self._coco_cls_ids = [
            1,
            2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
            14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
            24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
            37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
            48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
            58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
            72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
            82, 84, 85, 86, 87, 88, 89, 90
        ]

        self._coco_cls_names = [
            'person',
            'bicycle', 'car', 'motorcycle', 'airplane',
            'bus', 'train', 'truck', 'boat', 'traffic light',
            'fire hydrant', 'stop sign', 'parking meter', 'bench',
            'bird', 'cat', 'dog', 'horse','sheep', 'cow', 'elephant',
            'bear', 'zebra','giraffe', 'backpack', 'umbrella',
            'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
            'snowboard','sports ball', 'kite', 'baseball bat',
            'baseball glove', 'skateboard', 'surfboard',
            'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
            'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
            'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
            'donut', 'cake', 'chair', 'couch', 'potted plant',
            'bed', 'dining table', 'toilet', 'tv', 'laptop',
            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
            'oven', 'toaster', 'sink', 'refrigerator', 'book',
            'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
            'toothbrush'
        ]

        self._cls2coco  = {ind + 1: coco_id for ind, coco_id in enumerate(self._coco_cls_ids)}
        self._coco2cls  = {coco_id: cls_id for cls_id, coco_id in self._cls2coco.items()}
        self._coco2name = {cls_id: cls_name for cls_id, cls_name in zip(self._coco_cls_ids, self._coco_cls_names)}
        self._name2coco = {cls_name: cls_id for cls_name, cls_id in self._coco2name.items()}

        if split is not None:
            coco_dir = os.path.join(sys_config.data_dir, "coco")
            #coco_dir='/media/diskData/huanglong_data/coco'
            self._split     = {
                "trainval": "train2017",
                "minival":  "val2017",
                "testdev":  "val2017"
            }[split]
            self._data_dir  = os.path.join(coco_dir, self._split)
            self._anno_file = os.path.join(coco_dir, "annotations", "instances_{}.json".format(self._split))

            self._detections, self._eval_ids = self._load_coco_annos()
            self._image_ids = list(self._detections.keys())
            self._db_inds   = np.arange(len(self._image_ids))

    def _load_coco_annos(self):
        from pycocotools.coco import COCO

        coco = COCO(self._anno_file)
        self._coco = coco

        class_ids = coco.getCatIds()
        image_ids = coco.getImgIds()
        
        eval_ids   = {}
        detections = {}
        for image_id in image_ids:
            image = coco.loadImgs(image_id)[0]
            dets  = []
            
            eval_ids[image["file_name"]] = image_id
            for class_id in class_ids:
                annotation_ids = coco.getAnnIds(imgIds=image["id"], catIds=class_id)
                annotations    = coco.loadAnns(annotation_ids)
                category       = self._coco2cls[class_id]
                for annotation in annotations:
                    det     = annotation["bbox"] + [category]
                    det[2] += det[0]
                    det[3] += det[1]
                    dets.append(det)

            file_name = image["file_name"]
            if len(dets) == 0:
                detections[file_name] = np.zeros((0, 5), dtype=np.float32)
            else:
                detections[file_name] = np.array(dets, dtype=np.float32)
        return detections, eval_ids

    def image_path(self, ind):
        if self._data_dir is None:
            raise ValueError("Data directory is not set")

        db_ind    = self._db_inds[ind]
        file_name = self._image_ids[db_ind]
        return os.path.join(self._data_dir, file_name)

    def detections(self, ind):
        db_ind    = self._db_inds[ind]
        file_name = self._image_ids[db_ind]
        return self._detections[file_name].copy()

    def cls2name(self, cls):
        coco = self._cls2coco[cls]
        return self._coco2name[coco]

    def _to_float(self, x):
        return float("{:.2f}".format(x))

    def convert_to_coco(self, all_bboxes):
        detections = []
        for image_id in all_bboxes:
            coco_id = self._eval_ids[image_id]
            for cls_ind in all_bboxes[image_id]:
                category_id = self._cls2coco[cls_ind]
                for bbox in all_bboxes[image_id][cls_ind]:
                    bbox[2] -= bbox[0]
                    bbox[3] -= bbox[1]

                    score = bbox[4]
                    bbox  = list(map(self._to_float, bbox[0:4]))

                    detection = {
                        "image_id": coco_id,
                        "category_id": category_id,
                        "bbox": bbox,
                        "score": float("{:.2f}".format(score))
                    }

                    detections.append(detection)
        return detections

    def evaluate(self, result_json, cls_ids, image_ids):
        from pycocotools.cocoeval import COCOeval

        if self._split == "testdev":
            return None

        coco = self._coco

        eval_ids = [self._eval_ids[image_id] for image_id in image_ids]
        cat_ids  = [self._cls2coco[cls_id] for cls_id in cls_ids]

        coco_dets = coco.loadRes(result_json)
        coco_eval = COCOeval(coco, coco_dets, "bbox")
        coco_eval.params.imgIds = eval_ids
        coco_eval.params.catIds = cat_ids
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()
        return coco_eval.stats[0], coco_eval.stats[12:]

models 核心解读:

models文件下有cornernet的三个网络架构文件,py——utils下存放一些modules组件。这里的model很多,组件复杂,如何能清楚model的构成和数据流向,我先从模型训练开始讲,通过数据流的走向,逐渐过度到网络模型的每个组件,这样会比较清楚的掌握整个项目code。

数据流 training:

见 train.py 文件 ,训练CornerNet_Squeeze命令:

 python  /homexxx/CornerNet-Lite-master/train.py    CornerNet_Squeeze

总的调用流程是:

在这里插入图片描述

#!/usr/bin/env python
import os
import json
import torch
import numpy as np
import queue
import pprint
import random 
import argparse
import importlib
import threading
import traceback
import torch.distributed as dist
import torch.multiprocessing as mp

from tqdm import tqdm
from torch.multiprocessing import Process, Queue, Pool

from core.dbs import datasets
from core.utils import stdout_to_tqdm
from core.config import SystemConfig
from core.sample import data_sampling_func
from core.nnet.py_factory import NetworkFactory


torch.backends.cudnn.enabled   = True
torch.backends.cudnn.benchmark = True

def parse_args():
    parser = argparse.ArgumentParser(description="Training Script")
    parser.add_argument("cfg_file", help="config file", type=str)
    parser.add_argument("--iter", dest="start_iter",
                        help="train at iteration i",
                        default=0, type=int)
    parser.add_argument("--workers", default=4, type=int)
    parser.add_argument("--initialize", action="store_true")

    parser.add_argument("--distributed", action="store_true")   #action=store_true 表示如果训练加参数 --distributed 该值为true ,不加为False
    parser.add_argument("--world-size", default=-1, type=int,
                        help="number of nodes of distributed training")
    parser.add_argument("--rank", default=0, type=int,
                        help="node rank for distributed training")
    parser.add_argument("--dist-url", default=None, type=str,
                        help="url used to set up distributed training")
    parser.add_argument("--dist-backend", default="nccl", type=str)

    args = parser.parse_args()
    return args

def prefetch_data(system_config, db, queue, sample_data, data_aug):
    ind = 0
    print("start prefetching data...")
    np.random.seed(os.getpid())
    while True:
        try:
            data, ind = sample_data(system_config, db, ind, data_aug=data_aug)
            queue.put(data)
        except Exception as e:
            traceback.print_exc()
            raise e

def _pin_memory(ts):
    if type(ts) is list:
        return [t.pin_memory() for t in ts]
    return ts.pin_memory()

def pin_memory(data_queue, pinned_data_queue, sema):  #training_queue, pinned_training_queue, training_pin_semaphore
    while True:
        data = data_queue.get()

        data["xs"] = [_pin_memory(x) for x in data["xs"]]
        data["ys"] = [_pin_memory(y) for y in data["ys"]]

        pinned_data_queue.put(data)

        if sema.acquire(blocking=False):
            return

def init_parallel_jobs(system_config, dbs, queue, fn, data_aug):
    tasks = [Process(target=prefetch_data, args=(system_config, db, queue, fn, data_aug)) for db in dbs]
    for task in tasks:
        task.daemon = True
        task.start()
    return tasks

def terminate_tasks(tasks):
    for task in tasks:
        task.terminate()

def train(training_dbs, validation_db, system_config, model, args):
    # reading arguments from command
    start_iter  = args.start_iter
    distributed = args.distributed
    world_size  = args.world_size
    initialize  = args.initialize
    gpu         = args.gpu
    rank        = args.rank

    # reading arguments from json file
    batch_size       = system_config.batch_size
    learning_rate    = system_config.learning_rate
    max_iteration    = system_config.max_iter
    pretrained_model = system_config.pretrain
    stepsize         = system_config.stepsize
    snapshot         = system_config.snapshot
    val_iter         = system_config.val_iter
    display          = system_config.display
    decay_rate       = system_config.decay_rate
    stepsize         = system_config.stepsize

    print("Process {}: building model...".format(rank))
    nnet = NetworkFactory(system_config, model, distributed=distributed, gpu=gpu) #初始化下网络
    if initialize:
        nnet.save_params(0)
        exit(0)

    # queues storing data for training
    training_queue   = Queue(system_config.prefetch_size)
    validation_queue = Queue(5)

    # queues storing pinned data for training
    pinned_training_queue   = queue.Queue(system_config.prefetch_size)
    pinned_validation_queue = queue.Queue(5)

    # allocating resources for parallel reading   #下面2段是python多线程操作
    training_tasks = init_parallel_jobs(system_config, training_dbs, training_queue, data_sampling_func, True)
    if val_iter:
        validation_tasks = init_parallel_jobs(system_config, [validation_db], validation_queue, data_sampling_func, False)

    training_pin_semaphore   = threading.Semaphore()
    validation_pin_semaphore = threading.Semaphore()
    training_pin_semaphore.acquire()
    validation_pin_semaphore.acquire()

    training_pin_args   = (training_queue, pinned_training_queue, training_pin_semaphore)
    training_pin_thread = threading.Thread(target=pin_memory, args=training_pin_args)
    training_pin_thread.daemon = True
    training_pin_thread.start()

    validation_pin_args   = (validation_queue, pinned_validation_queue, validation_pin_semaphore)
    validation_pin_thread = threading.Thread(target=pin_memory, args=validation_pin_args)
    validation_pin_thread.daemon = True
    validation_pin_thread.start()

    if pretrained_model is not None:   #这里pretrained_model 在 utils文件的config.py 设定为NONE
        if not os.path.exists(pretrained_model):
            raise ValueError("pretrained model does not exist")
        print("Process {}: loading from pretrained model".format(rank))
        nnet.load_pretrained_params(pretrained_model)

    if start_iter:
        nnet.load_params(start_iter)   #开始iter的数字
        learning_rate /= (decay_rate ** (start_iter // stepsize))
        nnet.set_lr(learning_rate)   #设置网络学习率
        print("Process {}: training starts from iteration {} with learning_rate {}".format(rank, start_iter + 1, learning_rate))
    else:
        nnet.set_lr(learning_rate)

    if rank == 0:
        print("training start...")
    nnet.cuda()
    nnet.train_mode()
    with stdout_to_tqdm() as save_stdout:
        for iteration in tqdm(range(start_iter + 1, max_iteration + 1), file=save_stdout, ncols=80):
            training = pinned_training_queue.get(block=True)
            training_loss = nnet.train(**training)

            if display and iteration % display == 0:
                print("Process {}: training loss at iteration {}: {}".format(rank, iteration, training_loss.item()))
            del training_loss

            if val_iter and validation_db.db_inds.size and iteration % val_iter == 0:
                nnet.eval_mode()
                validation = pinned_validation_queue.get(block=True)
                validation_loss = nnet.validate(**validation)
                print("Process {}: validation loss at iteration {}: {}".format(rank, iteration, validation_loss.item()))
                nnet.train_mode()

            if iteration % snapshot == 0 and rank == 0:
                nnet.save_params(iteration)

            if iteration % stepsize == 0:
                learning_rate /= decay_rate
                nnet.set_lr(learning_rate)

    # sending signal to kill the thread
    training_pin_semaphore.release()
    validation_pin_semaphore.release()

    # terminating data fetching processes
    terminate_tasks(training_tasks)
    terminate_tasks(validation_tasks)

def main(gpu, ngpus_per_node, args):
    args.gpu = gpu           # 多GPU 训练 ,一个GPU自动跳过
    if args.distributed:
        args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)

    rank = args.rank

    cfg_file = os.path.join("./configs", args.cfg_file + ".json")   #加载网络各种参数
    with open(cfg_file, "r") as f:
        config = json.load(f)

    config["system"]["snapshot_name"] = args.cfg_file     #模型名CornerNet_Squeeze
    system_config = SystemConfig().update_config(config["system"]) 

    model_file  = "core.models.{}".format(args.cfg_file)  #model的路径名core.models.CornerNet_Squeeze
    model_file  = importlib.import_module(model_file) #import_moudle可以根据路径名导入该CornerNet_Squeeze.py 文件
    model       = model_file.model() #取CornerNet_Squeeze.py文件里面已经构建好的model 类
    # print(model)
    # from thop import profile
    #input = torch.randn(1, 3, 511, 511)
    # flops, params = profile(model, inputs=(input, ))
    
    train_split = system_config.train_split   #训练参数
    val_split   = system_config.val_split

    print("Process {}: loading all datasets...".format(rank))
    dataset = system_config.dataset   #数据集处理的一些参数配置
    workers = args.workers  #线程
    print("Process {}: using {} workers".format(rank, workers))
    training_dbs = [datasets[dataset](config["db"], split=train_split, sys_config=system_config) for _ in range(workers)]  #训练数据
    validation_db = datasets[dataset](config["db"], split=val_split, sys_config=system_config)

    if rank == 0:
        print("system config...")
        pprint.pprint(system_config.full)

        print("db config...")
        pprint.pprint(training_dbs[0].configs)

        print("len of db: {}".format(len(training_dbs[0].db_inds)))
        print("distributed: {}".format(args.distributed))

    train(training_dbs, validation_db, system_config, model, args)

if __name__ == "__main__":
    args = parse_args()

    distributed = args.distributed  
    world_size  = args.world_size

    if distributed and world_size < 0:   #distributed为 False  wordsize=-1  不满足该条件
        raise ValueError("world size must be greater than 0 in distributed training")

    ngpus_per_node  = torch.cuda.device_count()
    if distributed:  #不满足
        args.world_size = ngpus_per_node * args.world_size
        mp.spawn(main, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
    else:  #执行这句
        main(None, ngpus_per_node, args)

;