写在前面
学习也是一个递进的过程,首先知道有这么一个东西,其次知道这个东西能干吗,然后再知道这个东西怎么实现,又知道这个东西的原理是什么, 再发现这个东西的改进点。
深度学习的认知
对深度学习的认知目前还比较肤浅,首先深度学习主要分为三个大任务:分类、检测、分割。简单描述这三大任务的要义。图像分类主要回答一张图片是什么的问题。而目标检测不仅需要回答图像中有什么,同时要给出这些物体在图像中的具体位置。至于图像分割,则是在检测的基础上,将检测的物品以像素级的标准分割开来。
可以发现,这三个任务是依次递进的,一些计算机视觉的项目就建立在这些基础上,同时引申出其他任务。对这三个任务进一步理解,其实就是把图片(数据集)送入到神经网络中,得到一些数字,再对这些数字进行后处理得到想要的结果。而图片本质上就是矩阵(张量),或者说是数字,那么可以进一步理解成,对数字进行加权加减(模型)得到新的数字。深度学习说到底就是这样的一件事。
深度学习的实现
首先,代码能力很重要,有好的idea,没有好的代码能力,那么这个idea就是一个无效的idea。如果有好的代码能力,就有了验证许多idea的可能。
那么怎么提高代码能力,以深度学习为例。首先有一套专门的模板,加载数据集、定义网络结构、定义损失优化器、定义迭代。每一个大步骤又分为很多小步骤。
需要有一个全局的视角,宏观把控整个工程,但面对实际的问题,同时要学会把问题细化,拆分。一篇论文可以分为好几个章节,每个章节先写上几句话,这样像搭积木一样,就可以逐渐把它拼接起来。代码也是如此,先要写的类定义出来,然后一点点补充完善。想象力在编程中很重要,脑海中要有一个大概的轮廓,代码是怎么跑的,每个模块是负责什么的,都要有一个概念,而不是空空的。
在学习的过程中,要给自己布置任务,同时也要把握重点突破,先把握宏观,然后切分细节,然后重点突破。
Pytorch代码的实现
1.一个简单的线性模型实现
import numpy as np
import matplotlib.pyplot as plt
#一个简单的线性模型实现
#根据输入x、输出y,遍历权重w,得到损失最小的w
# 定义数据集
x=[1,2,3]
y=[2,4,6]
#定义前向传播和损失
def forward(x):
return x*w
def loss(x,y):
y_pred=forward(x)
return (y-y_pred)*(y-y_pred)
w_list=[]
mse_list=[]
#循环权重,同时输出损失
for w in np.arange(0,4.1,0.1):
print("权重w : "+str(w))
l_sum=0
for x_val,y_val in zip(x,y):
# y_pred_val=forward(x_val)
loss_val=loss(x_val,y_val)
l_sum += loss_val
print("损失loss : "+str(l_sum))
w_list.append(w)
mse_list.append(l_sum/3)
plt.plot(w_list,mse_list)
plt.xlabel("w")
plt.ylabel("mse")
plt.show()
2.梯度下降,根据损失loss得到权重w
import numpy as np
import matplotlib.pyplot as plt
#梯度下降,根据损失loss得到权重w
#define dataset
x_data =[1.0,2.0,3.0]
y_data =[2.0,4.0,6.0]
w=1.0
lr =0.01
#define forward
def forward(x):
return x*w
#define loss
def loss(x,y):
y_pred=forward(x)
return (y-y_pred)*(y-y_pred)
def train(x,y,w):
w=w+lr*2*(y-x*w)*x
return w
cost_list=[]
epoch_list=[]
for epoch in range(30):
print("epoch"+str(epoch))
loss_sum=0
for x_val,y_val in zip(x_data,y_data):
loss_val=loss(x_val,y_val)
loss_sum +=loss_val
w=train(x_val,y_val,w)
print(x_val, y_val, w,loss_sum)
cost_list.append(loss_sum / 3)
epoch_list.append(epoch)
plt.plot(epoch_list,cost_list)
plt.xlabel("epoch")
plt.ylabel("mse")
plt.savefig("test01.png")
plt.show()
3.用torch 实现梯度下降
import torch
import matplotlib.pyplot as plt
#用torch 实现梯度下降
x=[1.0,2.0,4.0]
y=[2.0,4.0,8.0]
w=torch.Tensor([1])
w.requires_grad=True
lr=0.03
def forward(x):
return x * w
def loss(x, y):
y_pred = forward(x)
return (y_pred - y) ** 2
for epoch in range(10):
print("epoch: "+str(epoch))
loss_sum =0
for x_val,y_val in zip(x,y):
l=loss(x_val,y_val)
l.backward()
loss_sum +=l
w.data = w.data - 0.01 * w.grad.data
w.grad.data.zero_()
print(loss_sum)
print(w)
# tensor 怎么可视化呢..
# copy code是没用的,一行行跟着敲也是低效的,只有自己一行行写才有用
4.用torch定义神经网络
import torch
import matplotlib.pyplot as plt
#用torch定义神经网络
#加载数据集,数据集需要有输入和输出,这样才能训练
x_data =torch.tensor([[1.0],[2.0],[3.0]])
y_data =torch.tensor([[2.0],[4.0],[6.0]])
#定义网络模型
#记住下面这个形式,因为都是这么一套模板,万变不离其宗,
#
class LinearModel(torch.nn.Module):
def __init__(self):
super(LinearModel,self).__init__()
self.linear = torch.nn.Linear(1,1)
def forward(self,x):
y_pred=self.linear(x)
return y_pred
#构建网络模型对象,定义损失和优化器,也都是torch封装好的,记住模板就行了
model=LinearModel()
criterion=torch.nn.MSELoss(size_average=False)
optimizer=torch.optim.SGD(model.parameters(),lr=0.01)
#方便可视化
loss_list= []
epoch_list=[]
#定义训练次数
for epoch in range(30):
#输入x、输出y, 其实本质就是 y=f(x)
y_pred=model(x_data)
#计算损失loss,这样才能进行后面的计算
loss=criterion(y_pred,y_data)
#三步走,梯度归零、损失反传、优化器迭代 顺序不能变
optimizer.zero_grad()
loss.backward()
optimizer.step()
#训练过程可视化,也可以用plot画出图像
print("epoch: "+str(epoch)+" loss: "+str(loss))
loss_list.append(loss.item())
epoch_list.append(epoch)
plt.plot(epoch_list, loss_list)
plt.xlabel("epoch")
plt.ylabel("mse")
plt.show()
#训练完成,输出权重和偏执
print("w= "+str(model.linear.weight.item()))
print("b= "+str(model.linear.bias.item()))
#做一个预测,可以类比为测试集
x_test=torch.tensor([[4.0]])
y_test=model.forward(x_test)
print("y_test= ",y_test)
5.不是线性回归而是逻辑回归了
import torch
#不是线性回归而是逻辑回归了
# torch.tensor != torch.Tensor 把下面改成tensor会报错
#具体讲解可以看 https://blog.csdn.net/weixin_44912159/article/details/104776922?utm_medium=distribute.pc_relevant.none-task-blog-2~default~baidujs_baidulandingword~default-0.pc_relevant_default&spm=1001.2101.3001.4242.1&utm_relevant_index=3
# 我的浅显理解就是 Tensor 是 float , tensor 是 int
x_data = torch.Tensor([[1.0],[2.0],[3.0]])
y_data = torch.Tensor([[0],[0],[1]])
#定义模型
class LogisticRegressionModel(torch.nn.Module):
def __init__(self):
super(LogisticRegressionModel,self).__init__()
self.linear=torch.nn.Linear(1,1)
def forward(self,x):
# sigmoid 激活函数的一种
#激活函数可以理解为 使线性模型变得非线性,使模型能更好的拟合
y_pred=torch.sigmoid(self.linear(x))
return y_pred
#定义网络、损失、优化器
model=LogisticRegressionModel()
criterion=torch.nn.BCELoss(size_average=False)
optimizer=torch.optim.SGD(model.parameters(),lr=0.03)
#定义迭代
for epoch in range(100):
y_pred=model(x_data)
loss = criterion(y_pred,y_data)
print("epoch: " + str(epoch) + " loss: " + str(loss))
#优化、反传、迭代
optimizer.zero_grad()
loss.backward()
optimizer.step()
#训练完成,输出权重和偏执
print("w= "+str(model.linear.weight.item()))
print("b= "+str(model.linear.bias.item()))
#做一个预测,可以类比为测试集
x_test=torch.tensor([[4.0]])
y_test=model.forward(x_test)
print("y_test= "+str(y_test))
6.Logist回归
import numpy as np
import torch
xy= np.loadtxt('diabetes.csv.gz',delimiter=',',dtype=np.float32) #delimiter 分隔符
x_data= torch.from_numpy(xy[:,:-1])
y_data=torch.from_numpy(xy[:,[-1]])
# print(x_data)
# print(y_data) # x_data 输入 y_data 输出
class Model(torch.nn.Module):
def __init__(self):
super(Model,self).__init__()
self.linear1=torch.nn.Linear(8,6)
self.linear2=torch.nn.Linear(6,4)
self.linear3=torch.nn.Linear(4,1)
self.sigmoid=torch.nn.Sigmoid()
def forward(self,x):
x=self.sigmoid(self.linear1(x))
x=self.sigmoid(self.linear2(x))
x=self.sigmoid(self.linear3(x))
return x
model=Model()
criterion=torch.nn.BCELoss(size_average=True)
optimizer=torch.optim.SGD(model.parameters(),lr=0.03)
# 测试对比
# x_test=torch.tensor([-0.2,0.4,-0.1,0.3,-0.1,0.02,-0.33,0])
# y_test1=model(x_test)
for epoch in range(100):
y_pred=model(x_data)
loss=criterion(y_pred,y_data)
print("epoch: "+str(epoch),"loss: "+str(loss.item()))
optimizer.zero_grad()
loss.backward()
optimizer.step()
# y_test2=model(x_test)
# print(y_test1,y_test2)
7.处理多维特征的输入
import torch
import numpy as np
from torch.utils.data import DataLoader,Dataset
#num_workers=2 我这台电脑会报错
class DiabetesDataset(Dataset):
def __init__(self,filepath):
xy=np.loadtxt(filepath,delimiter=",",dtype=np.float32)
self.len=xy.shape[0]
self.x_data=torch.from_numpy(xy[:,:-1])
self.y_data=torch.from_numpy(xy[:,[-1]])
def __getitem__(self, item):
return self.x_data[item],self.y_data[item]
def __len__(self):
return self.len
dataset=DiabetesDataset("diabetes.csv.gz")
tarin_loader=DataLoader(dataset=dataset,batch_size=32,shuffle=True,num_workers=0)
class Model(torch.nn.Module):
def __init__(self):
super(Model,self).__init__()
self.linear1=torch.nn.Linear(8,6)
self.linear2=torch.nn.Linear(6,4)
self.linear3=torch.nn.Linear(4,2)
self.linear4=torch.nn.Linear(2,1)
self.sigmoid=torch.nn.Sigmoid()
def forward(self,x):
x=self.sigmoid(self.linear1(x))
x=self.sigmoid(self.linear2(x))
x=self.sigmoid(self.linear3(x))
x=self.sigmoid(self.linear4(x))
return x
model=Model()
criterion=torch.nn.MSELoss(size_average=True)
optimizer=torch.optim.SGD(model.parameters(),lr=0.03)
if __name__=='__main__':
for epoch in range(100):
for i, data in enumerate(tarin_loader, 0):
inputs, labels = data
y_pred = model(inputs)
loss = criterion(y_pred, labels)
print("epoch: " + str(epoch), "i: " + str(i), "loss: " + str(loss.item()))
criterion.zero_grad()
loss.backward()
optimizer.step()
8.多分类问题
import torch
from torchvision import transforms,datasets
from torch.utils.data import DataLoader,Dataset
#多分类问题
batch_size=64
transforms=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307),(0.3081))
])
train_dataset=datasets.MNIST(root='MNIST/',train=True,download=True,transform=transforms)
train_loader=DataLoader(train_dataset,shuffle=True,batch_size=batch_size)
test_dataset=datasets.MNIST(root='MNIST/',train=False,download=True,transform=transforms)
test_loader=DataLoader(test_dataset,shuffle=False,batch_size=batch_size)
class Module(torch.nn.Module):
def __init__(self):
super(Module,self).__init__()
self.linear1=torch.nn.Linear(784,512)
self.linear2=torch.nn.Linear(512,256)
self.linear3=torch.nn.Linear(256,128)
self.linear4=torch.nn.Linear(128,64)
self.linear5=torch.nn.Linear(64,10)
self.relu=torch.nn.ReLU()
def forward(self,x):
x=x.view(-1,784)
x=self.relu(self.linear1(x))
x=self.relu(self.linear2(x))
x=self.relu(self.linear3(x))
x=self.relu(self.linear4(x))
x=self.linear5(x)
return x
module=Module()
criterion=torch.nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(module.parameters(),lr=0.03,momentum=0.5)
def train(epoch):
running_loss=0.0
for batch_idx,data in enumerate(train_loader,0):
inputs,target=data
optimizer.zero_grad()
outputs=module(inputs)
loss=criterion(outputs,target)
loss.backward()
optimizer.step()
running_loss +=loss.item()
if batch_idx%300==299 :
print('[%d,%5d] loss: %.3f ' %(epoch+1,batch_idx+1,running_loss/300))
running_loss=0.0
def test():
correct=0
total =0
with torch.no_grad():
for data in test_loader:
inputs,target=data
outputs=module(inputs)
_,predicted=torch.max(outputs.data,dim=1)
total +=target.size(0)
correct +=(predicted==target).sum().item()
print('Accuary on test set : ',(100*correct/total))
if __name__=='__main__':
for epoch in range(10):
train(epoch)
test()
9.卷积神经网络
import torch
from torchvision import transforms,datasets
from torch.utils.data import DataLoader,Dataset
batch_size=64
transform =transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,),(0.3081,))
])
train_dataset=datasets.MNIST('MNIST/',train=True,download=True,transform=transform)
train_loader=DataLoader(train_dataset,shuffle=True,batch_size=batch_size)
test_dataset=datasets.MNIST('MNIST/',train=True,download=True,transform=transform)
test_loader=DataLoader(test_dataset,shuffle=False,batch_size=batch_size)
class Module(torch.nn.Module):
def __init__(self):
super(Module,self).__init__()
self.conv1=torch.nn.Conv2d(1,10,kernel_size=5)
self.conv2=torch.nn.Conv2d(10,20,kernel_size=5)
self.pooling=torch.nn.MaxPool2d(2)
self.relu=torch.nn.ReLU()
self.fc=torch.nn.Linear(320,10)
def forward(self,x):
batch_size=x.size(0)
x=self.relu(self.pooling(self.conv1(x)))
x = self.relu(self.pooling(self.conv2(x)))
x=x.view(batch_size,-1)
x=self.fc(x)
return x
model=Module()
device=torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
model.to(device)
criterion=torch.nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(model.parameters(),lr=0.03,momentum=0.5)
def train(epoch):
running_loss=0.0
for batch_index,data in enumerate(train_loader,0):
inputs,target=data
inputs,target=inputs.to(device),target.to(device)
prediction=model(inputs)
optimizer.zero_grad()
loss=criterion(prediction,target)
loss.backward()
optimizer.step()
running_loss+=loss.item()
if batch_index%300 ==299:
print('[%d,%5d] loss:%.3f' % (epoch + 1, batch_index + 1, running_loss / 300))
running_loss = 0.0
def test():
correct=0
total=0
with torch.no_grad():
for data in test_loader:
inputs,target=data
inputs,target=inputs.to(device),target.to(device)
outputs=model(inputs)
_,prediction=torch.max(outputs.data,dim=1)
total+=target.size(0)
correct += (target==prediction).sum().item()
print('Accuracy on test set:%d %%' % (100 * correct / total)) # 正确的数量除以总数
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
test()
10.卷积神经网络2
import torch
from torch.utils.data import DataLoader,Dataset
from torchvision import transforms,datasets
# import torch.nn.functional as F
#自己写有两个bug
#一个是pading 没写,它会弹出尺寸错误
#还有一个是用
#网上找的解释 https://blog.csdn.net/weixin_44682222/article/details/108533798?utm_medium=distribute.pc_relevant.none-task-blog-2~default~baidujs_title~default-0.pc_relevant_default&spm=1001.2101.3001.4242.1&utm_relevant_index=3
#1.在定义函数层(继承nn.Module)时,init函数中应该用torch.nn,例如torch.nn.ReLU,torch.nn.Dropout2d,而forward中应该用torch.nn.functionl,例如torch.nn.functional.relu,不过请注意,init里面定义的是标准的网络层。只有torch.nn定义的才会进行训练。torch.nn.functional定义的需要自己手动设置参数。所以通常,激活函数或者卷积之类的都用torch.nn定义。
#2.torch.nn是类,必须要先在init中实例化,然后在forward中使用,而torch.nn.functional可以直接在forward中使用。
#也就是使用torch.nn.ReLU 需要再init实例化 self.relu=torch.nn.ReLU()
batch_size=64
transforms=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,),(0.3081,))
])
train_dataset=datasets.MNIST('MNIST/',train=True,download=True,transform=transforms)
train_dataloader=DataLoader(train_dataset,shuffle=True,batch_size=batch_size)
test_dataset=datasets.MNIST('MNIST/',train=False,download=True,transform=transforms)
test_dataloader=DataLoader(test_dataset,shuffle=False,batch_size=batch_size)
class InceptionA(torch.nn.Module):
def __init__(self,in_channels):
super(InceptionA, self).__init__()
self.branch1_1=torch.nn.Conv2d(in_channels,16,kernel_size=1)
self.branch5_5_1=torch.nn.Conv2d(in_channels,16,kernel_size=1)
self.branch5_5_2=torch.nn.Conv2d(16,24,kernel_size=5,padding=2)
self.branch3_3_1=torch.nn.Conv2d(in_channels,16,kernel_size=1)
self.branch3_3_2=torch.nn.Conv2d(16,24,kernel_size=3,padding=1)
self.branch3_3_3=torch.nn.Conv2d(24,24,kernel_size=3,padding=1)
self.pooling=torch.nn.Conv2d(in_channels,24,kernel_size=1)
def forward(self,x):
branch1=self.branch1_1(x)
branch5_5=self.branch5_5_1(x)
branch5_5=self.branch5_5_2(branch5_5)
branch3_3=self.branch3_3_1(x)
branch3_3=self.branch3_3_2(branch3_3)
branch3_3=self.branch3_3_3(branch3_3)
branchpooling=torch.nn.functional.avg_pool2d(x,kernel_size=3,stride=1,padding=1)
branchpooling=self.pooling(branchpooling)
outputs=[branch1,branch5_5,branch3_3,branchpooling]
return torch.cat(outputs,dim=1)
class Net(torch.nn.Module):
def __init__(self):
super(Net,self).__init__()
self.conv1=torch.nn.Conv2d(1,10,kernel_size=5)
self.conv2=torch.nn.Conv2d(88,20,kernel_size=5)
self.incep1=InceptionA(in_channels=10)
self.incep2=InceptionA(in_channels=20)
self.mp=torch.nn.MaxPool2d(2)
self.fc=torch.nn.Linear(1408,10)
def forward(self,x):
in_size=x.size(0)
x=torch.nn.functional.relu(self.mp(self.conv1(x)))
x=self.incep1(x)
x=torch.nn.functional.relu(self.mp(self.conv2(x)))
x=self.incep2(x)
x=x.view(in_size,-1)
x=self.fc(x)
return x
model=Net()
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion=torch.nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(model.parameters(),lr=0.01,momentum=0.5)
def train(epoch):
running_loss=0.0
for batch_index,data in enumerate(train_dataloader,0):
inputs,target=data
inputs,target=inputs.to(device),target.to(device)
outputs=model(inputs)
loss=criterion(outputs,target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss +=loss.item()
if batch_index%300==299:
print('[%d,%5d] loss:%.3f' % (epoch + 1, batch_index+ 1, running_loss / 300))
running_loss = 0.0
def test():
correct=0
total=0
with torch.no_grad():
for data in test_dataloader:
images,label=data
images,label=images.to(device),label.to(device)
outputs=model(images)
_,predicted=torch.max(outputs.data,dim=1)
total+=label.size(0)
correct+=(predicted==label).sum().item()
print('accuracy on test set: %d %%' %(100*correct/total))
if __name__=="__main__":
for epoch in range(10):
train(epoch)
test()
11.残差模块
import torch
from torch.utils.data import DataLoader,Dataset
from torchvision import transforms,datasets
import torch.nn.functional as F
#网络搭起来了,参数大小也要会计算
class ResidualBlock(torch.nn.Module):
def __init__(self,channels):
super(ResidualBlock,self).__init__()
self.conv1=torch.nn.Conv2d(channels,channels,kernel_size=3,padding=1)
self.conv2=torch.nn.Conv2d(channels,channels,kernel_size=3,padding=1)
def forward(self,x):
y=F.relu(self.conv1(x))
y=self.conv2(y)
return F.relu(x+y)
class Net(torch.nn.Module):
def __init__(self):
super(Net,self).__init__()
self.conv1=torch.nn.Conv2d(1,16,kernel_size=5)
self.conv2=torch.nn.Conv2d(16,32,kernel_size=5)
self.pooling=torch.nn.MaxPool2d(2)
self.rblock1=ResidualBlock(16)
self.rblock2=ResidualBlock(32)
self.fc=torch.nn.Flatten(512,10)
def forward(self,x):
in_size=x.size(0)
x=self.pooling(F.relu(self.conv1(x)))
x=self.rblock1(x)
x = self.pooling(F.relu(self.conv2(x)))
x = self.rblock2(x)
x=x.view(in_size,-1)
x=self.fc(x)
return x