2024-11-21 学习人工智能的Day29 初见深度学习

深度学习前置

Pytorch

Pytorch是基于python的深度学习框架，被广泛应用在计算机视觉、自然语言处理、语音识别等领域。PyTorch提供了许多高级功能，如**自动微分（automatic differentiation）、自动求导（automatic gradients）**等，这些功能可以帮助我们更好地理解模型的训练过程，并提高模型训练效率。

除了Pytorch，还有其他很多的框架，比如耳熟的TensorFlow，由Google开发。PaddlePaddle由百度推出。这两个都是非常常用的开发框架，而使用Pytorch的原因也是因为Pytorch在教育方面Pytorch的使用非常的广泛。

然后就是安装，安装的话会根据是否有nvidia显卡来区分，因为如果是A卡或者是I卡的可能不是那样的适配，需要使用其他方法才能够调用Gpu，而N卡是直接适配Pytorch的，只需要下载对应cuda版本的torch和配套的工具包就行了。

网络上有很多的方法，例如从安装cuda到cudnn最后到pytorch，但实际现在主流的带有独立显卡的笔记本或者电脑都会安装这两个东西，可以在cmd控制台中通过

nvidia-smi

进行cuda版本的查看，再去确认此cuda版本能够兼容的torch，最后在创建的虚拟环境中去安装的torch，而众所周知，目前国内能用的安装源只有阿里的源能够使用了。所以贴出下面链接给未能够成功下载GPU版本torch的同学指条路。

pytorch GPU国内镜像下载

这里唯一需要注意的是先确定自己的cuda版本，下载支持版本的torch，在下载指令最后的cu118哪里将118改为支持的版本，例如cu121这种。

Tensor的概述

想之前学的Opencv一样，torch也有指定的操作数据格式，之前Opevncv中，大多是数据都是被转为了ndarray来进行处理，而这里的torch这是会将数据封装成张量（Tensor）进行计算，所谓张量就是元素为相同类型的多维矩阵，张量是可以在GPU上加速运行的，人话说就是GPU运算比CPU快，也是为什么要安装GPU版的原因。

Pytorch中有三种数据类型：浮点数、整数、布尔。其中、浮点和整数又分为了8位、16位、32位、64位。

Tensor的创建

最基本的方式

import torch
import numpy as np

# torch.tensor
def test01():
	 # 1. 用标量创建张量
	 shape0 = torch.tensor(5)
	 print(shape0,shape0.shape)
	 
	 # 2. 用numpy数组创建
	 data = np.random.randint(3,4)
	 data = torch.tensor(data)
	 print(data,data.shape,data.device)
	 
	 # 3. 根据list创建
	 data = [[1,2,3,4],[5,6,7,8]]
	 data = torch.tensor(data)
	 print(data,data.shape,data.dtype)
	 

# torch.Tensor
def test002():
    # 1. 根据形状创建张量
    tensor1 = torch.Tensor(2, 3)
    print(tensor1)
    # 2. 也可以是具体的值
    tensor2 = torch.Tensor([[1, 2, 3], [4, 5, 6]])
    print(tensor2, tensor2.shape, tensor2.dtype)

    tensor3 = torch.Tensor([10])
    print(tensor3, tensor3.shape, tensor3.dtype)
    
    
# torch.IntTensor
def test003():
    # 1. 创建指定类型的张量
    tt1 = torch.IntTensor(2, 3)
    print(tt1)

    tt2 = torch.FloatTensor(3, 3)
    print(tt2, tt2.dtype)
    tt3 = torch.DoubleTensor(3, 3)
    print(tt3, tt3.dtype)
    tt4 = torch.LongTensor(3, 3)
    print(tt4, tt4.dtype)
    tt5 = torch.ShortTensor(3, 3)
    print(tt5, tt5.dtype)
    
    
if __name__ == "__main__":
    test01()
   #test002()
   #test003()

创建线性和随机张量

import torch
import numpy as np

# 不用科学计数法打印
torch.set_printoptions(sci_mode=False)


# 创建线性张量
def test004():
    # 1. 创建线性张量
    r1 = torch.arange(0, 10, 2)
    print(r1)
    # 2. 在指定空间按照元素个数生成张量：等差
    r2 = torch.linspace(3, 10, 10)
    print(r2)
    # 3. 在指定空间按照元素个数生成张量：等比
    r3 = torch.logspace(3, 10, 5, base=2)#在2^3到2^10之间均匀生成5个数 base默认为10
    print(r3)


# 随机种子
def test005():
    # 设置随机数种子
    torch.manual_seed(123)

    # 获取随机数种子
    print(torch.initial_seed())
    
    
    
# 随机张量
def test006():
    # 1. 设置随机数种子
    torch.manual_seed(123)

    # 2. 获取随机数种子
    print(torch.initial_seed())

    # 3. 生成随机张量
    print(torch.rand(2, 3))

    # 4. 生成随机张量：标准正态分布
    print(torch.randn(2, 3))

    # 5. 原生服从正态分布：均值为2， 方差为3，形状为1*4的正态分布
    print(torch.normal(mean=2, std=3, size=(1, 4)))
    
if __name__ == "__main__":
    test004()
    #test005()
    #test006()

创建01张量

import torch
import numpy as np


# 创建全0张量
def test001():
    # 创建全0张量
    data = torch.zeros(2, 3)
    print(data, data.dtype)

    mask = np.ones((3, 4))
    print(mask)
    data = torch.zeros_like(torch.tensor(mask))
    print(data)


# 创建全1变量，其实就是将zeros改为ones
def test002():
    # 创建全1张量
    data = torch.ones(2, 3)
    print(data, data.dtype)

    mask = np.zeros((3, 4))
    print(mask)
    data = torch.ones_like(torch.tensor(mask))
    print(data)
    
    
# 比较实用的指定值张量
def test003():
    # 创建指定值的张量
    data = torch.full((2, 3), 666.0)
    print(data, data.dtype)

    mask = np.zeros((3, 4))
    data = torch.full_like(torch.tensor(mask), 999)
    print(data)
    
# 最后是单位矩阵张量
 def test004():
	data = torch.eye(4)
	print(data)
  
if __name__ == "__main__":
    test001()
    #test002()
    #test003()
    #test004()

张量的属性

import torch

# 获取属性
def test001():
    data = torch.tensor([1, 2, 3])
    print(data.dtype, data.device, data.shape)

# 切换设备
def test002():
    data = torch.tensor([1, 2, 3])
    print(data.dtype, data.device, data.shape)

    # 把数据切换到GPU进行运算
    device = "cuda" if torch.cuda.is_available() else "cpu"
    data = data.to(device)
    print(data.device)

#类型转换
def test003():
    data = torch.tensor([1, 2, 3])
    print(data.dtype)  # torch.int64

    # 1. 使用type进行类型转换
    data = data.type(torch.float32)
    print(data.dtype)  # float32
    data = data.type(torch.float16)
    print(data.dtype)  # float16

    # 2. 使用类型方法
    data = data.float()
    print(data.dtype)  # float32
    data = data.half()
    print(data.dtype)  # float16
    data = data.double()
    print(data.dtype)  # float64
    data = data.long()
    print(data.dtype)  # int64
    
    
    
if __name__ == "__main__":
    test001()
    #test002()
    #test003()

Tensor数据转换

tensor和numpy

import torch
import numpy as np

# 张量转numpy 浅拷贝
def test001():
    # 1. 张量转numpy
    data_tensor = torch.tensor([[1, 2, 3], [4, 5, 6]])
    data_numpy = data_tensor.numpy()
    print(type(data_tensor), type(data_numpy))
    # 2. 他们内存是共享的
    data_numpy[0, 0] = 100
    print(data_tensor, data_numpy)


# 张量转numpy 深拷贝
def test002():
    # 1. 张量转numpy
    data_tensor = torch.tensor([[1, 2, 3], [4, 5, 6]])
    
    # 2. 使用copy()避免内存共享
    data_numpy = data_tensor.numpy().copy()
    print(type(data_tensor), type(data_numpy))
    
    # 3. 此时他们内存是不共享的
    data_numpy[0, 0] = 100
    print(data_tensor, data_numpy)
    
    
# numpy转张量 浅拷贝
def test003():
    # 1. numpy转张量
    data_numpy = np.array([[1, 2, 3], [4, 5, 6]])
    data_tensor = torch.from_numpy(data_numpy)
    print(type(data_tensor), type(data_numpy))

    # 2. 他们内存是共享的
    data_tensor[0, 0] = 100
    print(data_tensor, data_numpy)
    
    
# numpy转张量 深拷贝    
def test004():
    # 1. numpy转张量
    data_numpy = np.array([[1, 2, 3], [4, 5, 6]])
    data_tensor = torch.tensor(data_numpy)
    print(type(data_tensor), type(data_numpy))

    # 2. 内存是不共享的
    data_tensor[0, 0] = 100
    print(data_tensor, data_numpy)
    
    
    
if __name__ == "__main__":
    test001()
    #test002()
    #test003()
   	#test004()

tensor和图像

import torch
from PIL import Image
from torchvision import transforms

# 图片转Tensor
def test001():
    imgpath = r"./105429.jpg"
    # 1. 读取图片
    img = Image.open(imgpath)

    # 使用transforms.ToTensor()将图片转换为张量
    transform = transforms.ToTensor()
    img_tensor = transform(img)
    print(img_tensor)


# Tensor转图片
def test002():
    # 1. 随机一个数据表示图片
    img_tensor = torch.randn(3, 224, 224)
    # 2. 创建一个transforms
    transform = transforms.ToPILImage()
    # 3. 转换为图片
    img = transform(img_tensor)
    img.show()
    # 4. 保存图片
    img.save("./test.jpg")
    
    
# 图像处理
def test003():
    # 指定读取的文件路径
    imgpath = r"./105429.jpg"
    # 加载图片
    img = Image.open(imgpath)
    # 图像转为Tensor
    transform = transforms.ToTensor()
    img_tensor = transform(img)
    # 去掉透明度值
    print(img_tensor.shape)
    # 检查CUDA是否可用并将tensor移至CUDA
    if torch.cuda.is_available():
        img_tensor = img_tensor.cuda()
    print(img_tensor.device)
    # 修改每个像素值
    img_tensor += 0.2

    # 将tensor移回CPU并转换回PIL图像
    img_tensor = img_tensor.cpu()
    transform = transforms.ToPILImage()
    img = transform(img_tensor)
    # 保存图像
    img.save("./ok.png")
    
    
if __name__ == "__main__":
    test001()
    #test002()
    #test003()

Tensor操作

元素操作

import torch

# 获取元素值
def test001():
    data = torch.tensor([18])
    print(data.item())
    pass

# 元素值运算
def test002():
    data = torch.randint(0, 10, (2, 3))
    print(data)
    # 元素级别的加减乘除：不修改原始值
    print(data.add(1))
    print(data.sub(1))
    print(data.mul(2))
    print(data.div(3))
    print(data.pow(2))

    # 元素级别的加减乘除：修改原始值
    data = data.float()
    data.add_(1)
    data.sub_(1)
    data.mul_(2)
    data.div_(3.0)
    data.pow_(2)
    print(data)
    
# 阿达玛积 就是矩阵对应位置的乘法
def test003():
    data1 = torch.tensor([[1, 2, 3], [4, 5, 6]])
    data2 = torch.tensor([[2, 3, 4], [2, 2, 3]])
    print(data1 * data2)


def test004():
    data1 = torch.tensor([[1, 2, 3], [4, 5, 6]])
    data2 = torch.tensor([[2, 3, 4], [2, 2, 3]])
    print(data1.mul(data2))


# Tensor相乘
def test005():
    data1 = torch.tensor([
        [1, 2, 3], 
        [4, 5, 6]
    ])
    data2 = torch.tensor([
        [3, 2], 
        [2, 3], 
        [5, 3]
    ])
    print(data1 @ data2)
    print(data1.matmul(data2))
    print(data1.mm(data2))
        
    
if __name__ == "__main__":
    test001()
    #test002()
    #test003()
    #test004()
    #test005()

索引操作

import torch

# 简单索引
def test001():
    data = torch.randint(0, 10, (3, 4))
    print(data)
    # 1. 行索引
    print("行索引：", data[0])
    
    # 2. 列索引
    print("列索引：", data[:, 0])
    
    # 3. 固定位置索引：2种方式都行
    print("索引：", data[0, 0], data[0][0])


# 列表索引
def test002():
    data = torch.randint(0, 10, (3, 4))
    print(data)
    # 1. 使用列表进行索引：(0, 0), (1, 1), (2, 1)
    print("列表索引：", data[[0, 1, 2], [0, 1, 1]])

    # 2. 行级别的列表索引
    print("行级别列表索引：", data[[[2], [1]], [0, 1, 2]])
    

# 布尔索引  多条件索引最好的方式就是将条件分开简化后用单变量表示
def test003():
    data = torch.randint(0, 10, (3, 4))
    print(data)
    
    # 1. 索引第3个元素大于3的所有行
    print(data[data[:, 2] > 3])
    
    # 2. 索引第3行 值大于3 的所有的元素 所在的列
    print(data[:, data[2] > 3])
    
    # 3. 第二列是偶数, 且第一列大于6的行
    print(data[(data[:, 1] % 2 == 0) & (data[:, 0] > 6)])
    
# 索引赋值   
def test004():
    data = torch.eye(4)
    print(data)
    # 赋值
    data[:, 1:-1] = 0
    print(data)    
    
    
    
if __name__ == "__main__":
    test001()

张量拼接

import torch

# cat 直接在维度上拼接，类似于numpy中的hstack或者vstack
def test001():
    tensor1 = torch.tensor([[1, 2, 3], [4, 5, 6]])
    tensor2 = torch.tensor([[7, 8, 9], [10, 11, 12]])
    # 1. 在指定的维度上进行拼接：0
    print(torch.cat([tensor1, tensor2], dim=0))
    # 输出:
    # tensor([[ 1,  2,  3],
    #         [ 4,  5,  6],
    #         [ 7,  8,  9],
    #         [10, 11, 12]])

    # 2. 在指定的维度上进行拼接：1
    print(torch.cat([tensor1, tensor2], dim=1))
    # 输出:
    # tensor([[ 1,  2,  3,  7,  8,  9],
    #         [ 4,  5,  6, 10, 11, 12]])
    
    
# 需要注意的是，拼接的张量在所有的维度上的大小都必须相同


# torch.stack 将指定维度的数据进行拼接，会导致数据升维
def test002():
    tensor1 = torch.tensor([[1, 2, 3], [4, 5, 6]])
    tensor2 = torch.tensor([[7, 8, 9], [10, 11, 12]])
    # 1. 沿新创建的第0维度堆叠:从第一层开始一人出一个数据 堆叠
    print(torch.stack([tensor1, tensor2], dim=0)) 
    # 输出:
    # tensor([[[ 1,  2,  3],
    #          [ 4,  5,  6]],

    #         [[ 7,  8,  9],
    #          [10, 11, 12]]])

    # 2. 沿新创建的第1维度堆叠:从第二层开始一人出一个数据 堆叠
    print(torch.stack([tensor1, tensor2], dim=1))
    # 输出:
    # tensor([[[ 1,  2,  3],
    #          [ 7,  8,  9]],

    #         [[ 4,  5,  6],
    #          [10, 11, 12]]])
	# 2. 沿新创建的第2维度堆叠:从第三层开始一人出一个数据 堆叠	
    print(torch.stack([tensor1, tensor2], dim=2))

if __name__ == "__main__":
    test001()

形状操作

reshape和view

reshape是将张量转换为不同的形状，但是转换后的形状的元素数量与原张量相同。

import torch

# reshape
def test001():
    data = torch.randint(0, 10, (4, 3))
    print(data)
    # 1. 使用reshape改变形状
    data = data.reshape(2, 2, 3)
    print(data)

    # 2. 使用-1表示自动计算
    data = data.reshape(2, -1)
    print(data)


if __name__ == "__main__":
    test001()

view进行形状变换的特征：

1、张量在内存中是连续的；

2、返回的是原始张量视图，不重新分配内存，效率更高;

3、如果张量在内存中不连续，view 将无法执行，并抛出错误。

# 对内存连续性进行实验
import torch


def test001():
    tensor = torch.tensor([[1, 2, 3], [4, 5, 6]])
    print("正常情况下的张量：", tensor.is_contiguous())

    # 对张量进行转置操作
    tensor = tensor.t()
    print("转置操作的张量：", tensor.is_contiguous())
    print(tensor)
    # 此时使用view进行变形操作
    tensor = tensor.view(2, -1)
    print(tensor)


if __name__ == "__main__":
    test001()

transpose&permute&flatten

import torch

# transpose的作用的交换张量的两个维度，而它返回的是原张量的视图
def test001():
    data = torch.randint(0, 10, (3, 4, 5))
    print(data, data.shape)
    # 使用transpose进行形状变换
    transpose_data = data.transpose(0, 1)
    print(transpose_data, transpose_data.shape)


# permute 用于改变张量的所有维度顺序。与 transpose 类似，但它可以交换多个维度。
def test002():
    data = torch.randint(0, 10, (3, 4, 5))
    print(data, data.shape)
    # 使用permute进行多维度形状变换
    permute_data = data.permute(1, 2, 0)
    print(permute_data, permute_data.shape)
    
    
# flatten 用于将张量展平为一维向量。
# flatten参数start_dim：从哪个维度开始展平
# end_dim：在哪个维度结束展平。默认值为-1，表示展平到最后一个维度。
def test003():
    data = torch.randint(0, 10, (3, 4, 5))
    # 展平
    flatten_data = data.flatten(1, -1)
    print(flatten_data)
    
    

if __name__ == "__main__":
    test001()

升维和降维

import torch

# squeeze ：用于移除所有大小为 1 的维度，或者移除指定维度的大小为 1 的维度。
def test001():
    data = torch.randint(0, 10, (1, 4, 5, 1))
    print(data, data.shape)

    # 进行降维操作
    data = data.squeeze(0).squeeze(-1)
    print(data.shape)


# unsqueeze：用于在指定位置插入一个大小为 1 的新维度。
def test002():
    data = torch.randint(0, 10, (32, 32, 3))
    print(data.shape)
    # 升维操作
    data = data.unsqueeze(0)
    print(data.shape)
    
    
if __name__ == "__main__":
    test001()

到这里今天的内容就结束了，今天主要讲的还是Pytorch最基础的部分，包括这下常用api的使用等。