Bootstrap

PageRank算法Python实现

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
def getGm(A):
    '''
    功能:求状态转移概率矩阵Gm
    @A:网页链接图的邻接矩阵
    '''
    Gm = []
    for i in range(len(A)):
        cnt = 0
        for j in range(len(A[i])):
            if A[i][j] != 0:
                cnt += 1
        tran_prob = 1 / cnt  # 转移概率
        Gm_tmp = []
        for j in range(len(A[i])):
            Gm_tmp.append(tran_prob * A[i][j])
        Gm.append(Gm_tmp)
    Gm = np.transpose(Gm)
    return Gm
def getBaseLev(N):
    '''
    功能:计算网页所获得的基本级别(1-P)*e/n
    @N:网页总个数
    '''
    P = 0.85
    e = np.ones(N)
    R = [[(1 - P) * i * 1 / N] for i in e]
    return R
def getPR(P, Gm, R, PR):
    '''
    功能:获取PR值
    @P:加权系数,通常取 0.85 左右,按照超链接进行浏览的概率
    @Gm:状态转移概率矩阵
    @R:网页所获得的基本级别
    @PR:每个网页节点的PageRank值
    '''
    # 状态转移概率矩阵Gm与PR值相乘矩阵相乘
    Gm_PR = np.dot(Gm, PR)
    # 矩阵乘以常数P
    P_Gm_PR = P * Gm_PR
    # 矩阵相加
    new_PR = P_Gm_PR + R  # PR=P*Gm'PR+(1-d)*e/n PageRank算法的核心
    return new_PR
def res_vis(A, PR):
    '''
    将计算出来的值进行可视化展示
    @A:网页链接图的邻接矩阵
    @PR:每个网页节点最终的PageRank值
    '''
    # G=nx.Graph()构造的是无向图, G=nx.DiGraph()构造的是有向图
    # 初始化有向图,节点数为7,edge(边)被创造的随机概率
    all_edges = []
    for i in range(7):
        for j in range(len(A)):
            if A[i][j] == 1:
                all_edges.append([i + 1, j + 1])
                # (1)初始化有向图
    G = nx.DiGraph()
    # (2)添加节点
    G.add_nodes_from(range(1, len(A)))
    # (3)添加有向边
    G.add_edges_from(all_edges)
    # (4)添加PR值
    pr = {}
    for i in range(len(PR)):
        pr[i + 1] = PR[i][0]
    # (5)画图
    layout = nx.spring_layout(G)
    plt.figure(1)
    nx.draw(G, pos=layout, node_size=[x * 6000 for x in pr.values()],
            node_color='m', with_labels=True)
    plt.show()
def main():
    # 初始化参数
    N = 7  # 网页个数
    P = 0.85  # 一个加权系数,通常取 0.85 左右,按照超链接进行浏览的概率
    # 网页链接图的邻接矩阵,每一列表示一个网页的出度
    A = np.array([[0, 1, 1, 0, 1, 1, 0],
                  [1, 0, 1, 1, 0, 0, 0],
                  [1, 0, 0, 1, 1, 0, 0],
                  [1, 0, 0, 0, 1, 0, 0],
                  [1, 0, 0, 1, 0, 1, 1],
                  [0, 0, 0, 0, 1, 0, 0],
                  [1, 0, 0, 0, 0, 0, 0]])
    A = np.transpose(A)  # 转置
    # 初始化PR值为0
    new_PR = []
    for i in range(N):
        new_PR.append([0])
    count = 0  # 迭代计数器
    while True:
        PR = new_PR
        R = getBaseLev(N)
        Gm = getGm(A)
        new_PR = getPR(P, Gm, R, PR)
        count = count + 1
        print("第 %s 轮迭代" % count)
        print(str(round(new_PR[0][0], 5))
              + "\t" + str(round(new_PR[1][0], 5))
              + "\t" + str(round(new_PR[2][0], 5))
              + "\t" + str(round(new_PR[3][0], 5))
              + "\t" + str(round(new_PR[4][0], 5))
              + "\t" + str(round(new_PR[5][0], 5))
              + "\t" + str(round(new_PR[6][0], 5)))
        # 设置迭代条件
        if (round(PR[0][0], 5) == round(new_PR[0][0], 5)
                and round(PR[1][0], 5) == round(new_PR[1][0], 5)
                and round(PR[2][0], 5) == round(new_PR[2][0], 5)
                and round(PR[3][0], 5) == round(new_PR[3][0], 5)
                and round(PR[4][0], 5) == round(new_PR[4][0], 5)
                and round(PR[5][0], 5) == round(new_PR[5][0], 5)
                and round(PR[6][0], 5) == round(new_PR[6][0], 5)):
            break
    print("-------------------")
    print("PageRank值已计算完成")
    res_vis(A, new_PR)
if __name__ == '__main__':
    main()

;