Bootstrap

搭建深度学习框架+nn.Module

一、搭建项目框架(YOLO框架的简约版本)

最终成品的项目框架如下图,最终实现的效果,通过自己配置的框架函数,对模型进行定义与参数调配。同时在参数配置的过程中可选择是否进行模型初始化参数的使用。适用于YOLO框架的初认识。了解此框架可更好的认识YOLO框架。
在这里插入图片描述

二、Net模型的搭建-nn_net.py

net框架引用nn.Module父类框架,进行框架的搭建。本框架在文件夹net中。

import torch.nn as nn

class Mynet_model(nn.Module):
    def __init__(self,input_size,output_size):
        super(Mynet_model,self).__init__()

        self.hiden1 = nn.Sequential(nn.Linear(input_size,128),nn.LeakyReLU())
        self.hiden2 = nn.Sequential(nn.Linear(128,256),nn.LeakyReLU())
        self.hiden3 = nn.Sequential(nn.Linear(256,512),nn.LeakyReLU())
        self.hiden4 = nn.Sequential(nn.Linear(512,256),nn.LeakyReLU())
        self.hiden5 = nn.Sequential(nn.Linear(256,128),nn.LeakyReLU())
        self.hiden6 = nn.Sequential(nn.Linear(128,64),nn.LeakyReLU())
        self.out = nn.Sequential(nn.Linear(64,output_size),nn.Softmax())
        self._init_weight()
        
    def forward(self,x):
        x = self.hiden1(x)
        x = self.hiden2(x)
        x = self.hiden3(x)
        x = self.hiden4(x)
        x = self.hiden5(x)
        x = self.hiden6(x)
        x = self.out(x)

        return x
    
    def _init_weight(self):
        # 对模型参数的初始化
        nn.init.kaiming_uniform_(self.hiden1[0].weight,nonlinearity='leaky_relu')
        nn.init.kaiming_uniform_(self.hiden2[0].weight,nonlinearity='leaky_relu')
        nn.init.kaiming_uniform_(self.hiden3[0].weight,nonlinearity='leaky_relu')
        nn.init.kaiming_uniform_(self.hiden4[0].weight,nonlinearity='leaky_relu')
        nn.init.kaiming_uniform_(self.hiden5[0].weight,nonlinearity='leaky_relu')
        nn.init.kaiming_uniform_(self.hiden6[0].weight,nonlinearity='leaky_relu')

三、优化器框架的搭建-optimizer.py

主要用于深度学习框架中,优化器的选择。本框架位于net文件夹中。

# 创建优化器,按照参数名称进行优化器匹配,只能在这个文件中进行模型的配置
import torch.optim as optim

def optimizer_parents(model,name,lr,weight_decay,betas,eps):
    if name == 'SGD':
        optimizer = optim.SGD(model.parameters())
    elif name == 'Adam':
        optimizer = optim.Adam(model.parameters(),lr=lr,weight_decay=weight_decay,betas=betas,eps=eps)
    else:
        # 显示报错信息
        raise NotImplementedError('optimizer {} not implemented'.format(name))  
    return optimizer

if __name__ == '__main__':
    from nn_net import Mynet_model
    model = Mynet_model(input_size=20,output_size=10)
    optimizer = optimizer_parents(model,'SGD')

四、数据加载器-dataloder.py

本框架主要为读取csv文件,将csv文件进行数据处理后保存返回TensorDatasets类型的数据,便于后续进行Dataloder数据切分与调用。用于批量读取数据。本框架位于文件夹data中。

# 创建数据集加载的工具
import torch ,os
from torch.utils.data import TensorDataset,Dataset,DataLoader
import pandas as pd 
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.model_selection import train_test_split
import parameter 

class Mydatasets():
    def __init__(self,path,pt_path,device):
        self.data = pd.read_csv(path)
        self.pt_path = pt_path
        self.device = device
        
    def datasets(self):
        data = np.array(self.data)
        x_ = data[:,:-1]
        y_ = data[:,-1]

        x_train,x_text,y_train,y_test = train_test_split(x_,y_,test_size=0.2,random_state=42,stratify=y_)

        # 数据标准化
        stander = StandardScaler()
        x_train = stander.fit_transform(x_train)
        x_text = stander.transform(x_text)

        # 标准化工具参数保存
        stander_dic = {
            "mean":stander.mean_,
            "std":stander.scale_
        }
        path = f"{self.pt_path}/stander.pth"
        if os.path.exists(path):
            pass
        else:
            torch.save(stander_dic,path)

        device = torch.device(self.device)

        # 将数据转换为tensor
        x_train = torch.tensor(x_train,dtype=torch.float32).to(device)
        x_text = torch.tensor(x_text,dtype=torch.float32).to(device)
        y_train = torch.tensor(y_train,dtype=torch.int64).to(device)
        y_test = torch.tensor(y_test,dtype=torch.int64).to(device)
        
        train = TensorDataset(x_train,y_train)
        test = TensorDataset(x_text,y_test)

        return train,test

五、文件路径创建框架-creat_path.py

本框架主要用于模型训练前,创建文件夹路径,用于保存本次训练过程中的相关训练参数以及模型。在每次训练过程中,在文件夹pt中生成相关exp文件,在exp文件内生成model_weight,优化器参数,其他参数等。

import os 

def creat_path():
    name = 'exp_'
    Root = "./Torch/mobile_pheno/pt"
    i = 1
    for root,dir,list in os.walk(Root):
        if root == Root:
            if not dir:
                path = root+'/'+f"{name}{i}"
                os.makedirs(path)
            else:
                i = len(dir)
                path = root+'/'+f"{name}{i+1}"
                if not os.path.exists(path):
                    os.makedirs(path)
    return path

def read_path():
    name = 'exp_'
    Root = "./Torch/mobile_pheno/pt"
    i = 1
    for root,dir,list in os.walk(Root):
        if root == Root:
            i = len(dir)
            path = root+'/'+f"{name}{i}"

    return path

if __name__ == '__main__':
    creat_path()

六、模型相关参数配置-parameter.py

# 模型的相关参数配置文件
import os 
import creat_path as cp

# 对训练文件模型中相关参数进行配置
def train_parameter():
    root_path = cp.creat_path()
    argument = {
        "device": "cpu",   # 运行的设备
        "optimizer_name": "Adam",  # 优化器的名称
        "learning_rate": 1e-4,   # 优化器相关参数
        "weight_decay": 0.01,
        "betas": (0.9, 0.999),
        "eps": 1e-8,
        "epochs": 50,  # 循环轮次
        "model_weight_file_path": './Torch/mobile_pheno/model/model_weight.yaml',   # 初始模型权重路径
        "root_path": root_path,  
        "optimizer_weight_file_path":'./Torch/mobile_pheno/model/optimizer_weight.yaml',  # 初始优化器权重路径
    }
    return argument

# 测试框架模型相关调优参数
def test_parameter():
    root_path = cp.read_path()
    argument = {
        "device": "cpu",
        "model_weight_file_path": os.path.join(root_path,"model_weight.yaml"),
        "root_path": root_path,
        "optimizer_weight_file_path":os.path.join(root_path,"optimizer_weight.yaml"),
        "other_weight_file_path":os.path.join(root_path,"other_weight.yaml")
    }
    return argument

七、训练框架-train.py

from data.dataloder import Mydatasets
from net.nn_net import Mynet_model
from net.optimizer import optimizer_parents
import torch 
import torch.nn as nn
import os 
import time 
import parameter 
from torch.utils.data import DataLoader

def train(file_path):
    # 参数初始化
    argument = parameter.train_parameter()
    root_path = argument['root_path']
    train,_ = Mydatasets(file_path,root_path,argument['device']).datasets()  
    input_size = train[:][0].shape[1]
    output_size = torch.unique(train[:][1]).shape[0]
    
    # 加载数据加载器
    train = DataLoader(train,batch_size=16,shuffle=True)
    
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = torch.device(argument['device'])
    model = Mynet_model(input_size,output_size).to(device)

    # 判定是否进行继承训练
    if os.path.exists(argument['model_weight_file_path']):
        model_weight = torch.load(argument['model_weight_file_path'])
        model.load_state_dict(model_weight["model_weight"])  

    loss_func = nn.CrossEntropyLoss()

    # 配置优化器
    # optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)
    optimizer = optimizer_parents(model,argument['optimizer_name'],argument['learning_rate'],argument['weight_decay'],argument['betas'],argument['eps'])
    # 进行optimizer参数优化
    if os.path.exists(argument['optimizer_weight_file_path']):
        optimizer_weight = torch.load(argument['optimizer_weight_file_path'])
        optimizer.load_state_dict(optimizer_weight['optimizer_weight'])

    start_time = time.time()
    epochs=argument['epochs']
    for epoch in range(epochs):
        conut = 0
        loss = 0
        for i,(x,y) in enumerate(train):
            y_pred = model(x)
            loss_value = loss_func(y_pred,y)
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
            conut +=1
            loss += loss_value
        print(f"epoch:{epoch},loss:{loss/conut}")
    end_time = time.time()
    print(f"time:{end_time-start_time}")
    

    # 生成字典对模型参数进行保存
    model_weight_parents = {
        "model_weight":model.state_dict(),
    }
    model_weight_parents_path = os.path.join(root_path,'model_weight.yaml')
    # torch.save(model_weight_parents,'./Torch/mobile_pheno/model/model_weight.yaml')   # 初始化模型保存
    torch.save(model_weight_parents,model_weight_parents_path)  # 在当前训练文件中进行当前训练模型的保存

    optimizer_weight_parents = {
        "optimizer_weight":optimizer.state_dict(),
    }
    optimizer_weight_parents_path = os.path.join(root_path,'optimizer_weight.yaml')
    # torch.save(optimizer_weight_parents,'./Torch/mobile_pheno/model/optimizer_weight.yaml')   # 初始化模型保存
    torch.save(optimizer_weight_parents,optimizer_weight_parents_path)  # 在当前训练文件中进行当前训练模型的保存

    other_parents = {
        "epoch":epochs,
        "input_size" : input_size,
        "output_size" : output_size,
    }
    other_parents_path = os.path.join(root_path,'other_weight.yaml')
    # torch.save(other_parents,'./Torch/mobile_pheno/model/other_weight.yaml')   # 初始化模型保存
    torch.save(other_parents,other_parents_path)  # 在当前训练文件中进行当前训练模型的保存

if __name__ == '__main__':
    path = "./Torch/mobile_pheno/data/手机价格预测.csv"
    train(path)

八,测试框架-test.py

import torch 
import torch.nn as nn
from net.nn_net import Mynet_model
from data.dataloder import Mydatasets
import creat_path
import parameter
from torch.utils.data import DataLoader

def test(path):
    argument = parameter.test_parameter()
    root_path = argument['root_path']
    device = argument['device']
    _,test = Mydatasets(path,root_path,device).datasets()
    
    number = len(test)
    test = DataLoader(test,batch_size=8,shuffle=False)

    # 读取模型中的参数数据
    other_weight_file_path = torch.load(argument['other_weight_file_path'],map_location=device)
    input_size,output_size = other_weight_file_path['input_size'],other_weight_file_path['output_size']

    model_weight_file_path = torch.load(argument['model_weight_file_path'],map_location=device)
    model = Mynet_model(input_size,output_size).to(device)
    model.load_state_dict(model_weight_file_path['model_weight'])

    count = 0
    for x,y in test:
        y_pred = model(x)
        y_pred = torch.argmax(y_pred,dim=1)
        # print(f"预测结果:{y_pred},真实结果:{y}")
        count += (y_pred==y).sum()

    print(f"正确率:{count/number}")
    return count/number

if __name__ == '__main__':
    path = "./Torch/mobile_pheno/data/手机价格预测.csv"
    test(path)

九、推理框架

# 推理验证模型 
import torch,os
import torch.nn as nn
import pandas as pd 
from sklearn.preprocessing import StandardScaler
import parameter
from torch.utils.data import DataLoader,TensorDataset
from net.nn_net import Mynet_model
import numpy as np

def data_sets(path):
    argument = parameter.test_parameter()
    device = argument['device']

    data = pd.read_csv(path)
    data = np.array(data)[:,1:]
    print(data.dtype)
    print(data.shape)
    print('**************************')
    # 导入数据标准化处理工具
    stander_file_path = os.path.join(argument['root_path'],'stander.pth')
    satander_mean_std = (torch.load(stander_file_path))


    satander = StandardScaler()
    satander.mean_ = np.array(satander_mean_std['mean'])
    satander.scale_ = np.array(satander_mean_std['std'])
    data = satander.transform(data)

    data = torch.tensor(data,dtype=torch.float32).to(device)
    return data

def detect(path):
    argument = parameter.test_parameter()
    data = data_sets(path)

    device = argument['device']
    model_weight_file_path = torch.load(argument['model_weight_file_path'],map_location=device)
    other_weight_file_path = torch.load(argument['other_weight_file_path'],map_location=device)

    model = Mynet_model(other_weight_file_path['input_size'],other_weight_file_path['output_size']).to(device)
    model.load_state_dict(model_weight_file_path['model_weight'])

    y_pred = model(data)
    y_pred = torch.argmax(y_pred,dim=1)
    print(y_pred)


if __name__ == "__main__":
    path = './Torch/mobile_pheno/data/detect_test.csv'
    detect(path)

十、main函数-save_best_model.py

# 保存验证效果最好的模型数据参数
import train
import test
import torch

def bset_model():
    list = 0
    path = "./Torch/mobile_pheno/data/手机价格预测.csv"
    train.train(path)  
    result = test.test(path)  
    print(result)
    if result > list:
        model_parents =torch.load('./Torch/mobile_pheno/model/model_weight.yaml')
        torch.save(model_parents,'./Torch/mobile_pheno/model/best_model.yaml')
    
if __name__ == '__main__':
    bset_model()

十一、数据来源

本模型实验数据来源为https://tianchi.aliyun.com/dataset/157241,手机价格预测数据。

;