Bootstrap

手写神经网络 (来源:《Python神经网络编程》)

3层神经网络(输入层,隐藏层,输出层)

权重更新公式的推导

  1. 误差函数,对目标值和实际值之差的平方进行求和,此处对所有n个输出节点 ∂ E ∂ w j , k   =   ∂ ∂ w j , k   ∑ n ( t n   −   o n ) 2 \frac{\partial E}{\partial w_{j,k}}\, = \, \frac{\partial }{\partial w_{j,k}}\, \sum_{n}^{}\left ( t_{n} \, -\, o_{n}\right )^{2} wj,kE=wj,kn(tnon)2
  2. 节点 n n n的输出 o n o_{n} on只取决于连接到这个节点的连接,以此简化公式 ∂ E ∂ w j , k   =   ∂ ∂ w j , k   ( t k   −   o k ) 2 \frac{\partial E}{\partial w_{j,k}}\, = \, \frac{\partial }{\partial w_{j,k}}\, \left ( t_{k} \, -\, o_{k}\right )^{2} wj,kE=wj,k(tkok)2
  3. 使用链式法则 ∂ E ∂ w j , k   =   ∂ E ∂ o k   ⋅   ∂ o k ∂ w j , k \frac{\partial E}{\partial w_{j,k}}\, = \, \frac{\partial E}{\partial o_{k}}\, \cdot \, \frac{\partial o_{k}}{\partial w_{j,k}} wj,kE=okEwj,kok
    ∂ E ∂ w j , k   =   − 2 ( t k − o k ) ⋅ ∂ o k ∂ w j , k \frac{\partial E}{\partial w_{j,k}}\, = \, -2\left ( t_{k} - o_{k} \right )\cdot \frac{\partial o_{k}}{\partial w_{{j,k}}} wj,kE=2(tkok)wj,kok
  4. 因为 ∂ o k ∂ w j , k   =   s i g m o i d ( ∑ j   w j , k ⋅ o j ) \frac{\partial o_{k}}{\partial w_{{j,k}}}\, = \, sigmoid\left ( \sum_{j}^{}\, w_{{j,k}}\cdot o_{j} \right ) wj,kok=sigmoid(jwj,koj)并且 ∂ ∂ x s i g m o i d ( x )   =   s i g m o i d ( x ) ( 1 − s i g m o i d ( x ) ) \frac{\partial }{\partial x}sigmoid\left ( x \right )\, =\,sigmoid\left ( x \right ) \left ( 1-sigmoid\left ( x \right ) \right ) xsigmoid(x)=sigmoid(x)(1sigmoid(x)),进而可以推出 ∂ E ∂ w j , k   =   − ( e j ) ⋅ s i g m o i d ( ∑ i w j , k ⋅ o i ) ( 1 − s i g m o i d ( ∑ i w j , k ⋅ o i ) ) ⋅ o i \frac{\partial E}{\partial w_{j,k}}\, = \,-\left ( e_{j} \right ) \cdot sigmoid\left ( \sum_{i}^{}w_{j,k}\cdot o_{i} \right )\left ( 1-sigmoid\left ( \sum_{i}^{}w_{j,k}\cdot o_{i} \right ) \right )\cdot o_{i} wj,kE=(ej)sigmoid(iwj,koi)(1sigmoid(iwj,koi))oi
  5. 将上述公式集合学习率 α \alpha α和矩阵运算 ( Δ w 1 , 1 Δ w 2 , 1 . . . Δ w 1 , 2 Δ w 2 , 2 . . . . . . . . . . . . )   =   ( E 1 ∗ S 1 ( 1 − S 1 ) E 2 ∗ S 2 ( 1 − S 2 ) . . . )   ⋅   ( O 1 O 2 . . . ) \begin{pmatrix} \Delta w_{{1,1}} & \Delta w_{{2,1}} & ... \\ \Delta w_{{1,2}}& \Delta w_{{2,2}} & ... \\ ... & ... & ... \\ \end{pmatrix}\, = \, \begin{pmatrix} E_{1}*S_{1}\left ( 1-S_{1} \right ) \\ E_{2}*S_{2}\left ( 1-S_{2} \right ) \\ ...\\ \end{pmatrix}\, \cdot \, \begin{pmatrix} O_{1}& O_{2} & ... \\ \end{pmatrix} Δw1,1Δw1,2...Δw2,1Δw2,2............ = E1S1(1S1)E2S2(1S2)... (O1O2...)可以得到
    Δ w j , k   =   α ⋅ E k ⋅ O k ( 1 − O k ) ⋅ O j T \Delta w_{{j,k}}\, = \, \alpha \cdot E_{k}\cdot O_{k}\left ( 1-O_{k} \right )\cdot O_{j}^{T} Δwj,k=αEkOk(1Ok)OjT
import scipy.special
import numpy as np
import matplotlib.pyplot as plt
#神经网络定义
class NeuralNetwork:
    
    #初始化神经网络
    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
        #网络节点设置
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnodes
        #权重初始化设置
        #输入层与隐藏层之间的权重
        self.wih = np.random.normal(0.0, pow(self.hnodes, -0.5), (self.hnodes, self.inodes))
        #隐藏层与输出层之间的权重
        self.who = np.random.normal(0.0, pow(self.onodes, -0.5), (self.onodes, self.hnodes))
        #学习率设置
        self.lr = learningrate
        #激活函数:sigmoid函数
        self.activation_function = lambda x: scipy.special.expit(x)
        
    #神经网络的训练函数
    def train(self, inputs_list, targets_list):
        #格式转化
        inputs = np.array(inputs_list, ndmin=2).T
        targets = np.array(targets_list, ndmin=2).T
        #计算出到达隐藏层的值
        hidden_inputs = np.dot(self.wih, inputs)
        #激活函数
        hidden_outputs = self.activation_function(hidden_inputs)
        #计算出到达输出层的值
        final_inputs = np.dot(self.who, hidden_outputs)
        #激活函数
        final_outputs = self.activation_function(final_inputs)
        #计算输出层误差
        output_errors = targets - final_outputs
        #计算隐藏层误差
        hidden_errors = np.dot(self.who.T, output_errors)
        #更新连接权重
        self.who += self.lr * np.dot((output_errors * final_outputs * (1.0 - final_outputs)), np.transpose(hidden_outputs))
        self.wih += self.lr * np.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), np.transpose(inputs))
            
    #查询函数
    def query(self, inputs_list):
        #格式转化
        inputs = np.array(inputs_list, ndmin=2).T
        #计算出到达隐藏层的值
        hidden_inputs = np.dot(self.wih, inputs)
        #激活函数
        hidden_outputs = self.activation_function(hidden_inputs)
        #计算出到达输出层的值
        final_inputs = np.dot(self.who, hidden_outputs)
        #激活函数
        final_outputs = self.activation_function(final_inputs)
        return final_outputs
#神经网络的节点配置
input_nodes = 784
hidden_nodes = 200
output_nodes = 10
#学习率配置
learning_rate = 0.1
#实例化神经网络
n = NeuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)

#加载训练数据集
training_data_file = open("F:\Python\dataset\mnist\mnist_train.csv", 'r')
training_data_list = training_data_file.readlines()
training_data_file.close()
#加载测试数据集
test_data_file = open("F:\Python\dataset\mnist\mnist_test.csv", 'r')
test_data_list = test_data_file.readlines()
test_data_file.close()

#训练神经网络
#epoch配置
epochs = 5
for e in range(epochs):
    print("epoch:", e)
    for record in training_data_list:
        #数据格式转换
        all_values = record.split(',')
        #将数值范围从0~255缩小至0.01~1.0
        #之所以选择这个范围,是因为避免0值输入(会导致权重更新失败,等值输入和0值权重也会导致失败)
        #输出应该在激活函数能够生成的值的范围内,逻辑S函数不可能生成小于等于0或大于等于1的值
        #将训练目标值设置在有限范围之外,将会驱使产生越来越大的权重,导致网络饱和
        inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
        targets = np.zeros(output_nodes) + 0.01
        targets[int(all_values[0])] = 0.99
        #训练
        n.train(inputs, targets)
#测试
scorecard = []
for record in test_data_list:
    #数据格式转换
        all_values = record.split(',')
        correct_label = int(all_values[0])
        inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
        #通过NeuralNetwork的query函数预测结果
        outputs = n.query(inputs)
        label = np.argmax(outputs)
        if (label == correct_label):
            scorecard.append(1)
        else:
            scorecard.append(0)
#准确率计算
scorecard_array = np.asarray(scorecard)
print("performance = ", scorecard_array.sum() / scorecard_array.size)
;