己亥末,庚子春,荆楚大疫,染者数万计。众惶恐,举国防,皆闭户。街无舟车,巷无人烟,时天下震动。然九州一心,青丝白发皆身先士卒,布衣商客皆争相解囊,政医兵者扛鼎逆行为之勇战矣,能者皆竭力之。月余,疫尽去,国泰民安。
在这里先向全国抗疫的国内外各行各业的人士致敬!
然后说题目项目
pyecharts 是一个用于生成 Echarts 图表的类库。
Echarts 是百度开源的一个数据可视化 JS 库,主要用于数据可视化。pyecharts 是一个用于生成 Echarts 图表的类库。实际上就是 Echarts 与 Python 的对接。
详情请看官方文档:https://pyecharts.org/#/zh-cn/intro
话不多说先上效果图:
接下来从头说:
1.数据抓取:
数据源:腾讯疫情实时追踪
首先对该网站F12,点击Network刷新页面,看看每个页面的Response:
找到了,是这个:https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=jQuery34102867663505226432_1581423137066&_=1581423137067
但是在刷新一下会发现callback后面的东西就变了,推测这是一个时间戳,把它去了用https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5试试:
嗯,也可以,就它了,这就是数据源。
先引入所需模块:
import json
import requests
import pandas as pd
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
from pyecharts.globals import ThemeType, ChartType
from bs4 import BeautifulSoup
接下来抓取数据:
reponse = requests.get('https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5').json()
data = json.loads(reponse['data'])
'''
print(data.keys())
dict_keys(['lastUpdateTime', 'chinaTotal', 'chinaAdd', 'isShowAdd', 'chinaDayList', 'chinaDayAddList', 'dailyNewAddHistory', 'dailyDeadRateHistory', 'confirmAddRank', 'areaTree', 'articleList'])
判断大概就是截至时间,新增人数,死亡率治愈率等等,就是我们所需的
'''
数据处理:
# 国内
lastUpdateTime = data['lastUpdateTime']
'''
由于后面可视化要用到,但这字典的key都是英文,
为了方便观看,我就将其转化为中文了
'''
chinaTotal = data['chinaTotal']
chinaTotal['确诊'] = chinaTotal['confirm']
chinaTotal['疑似'] = chinaTotal['suspect']
chinaTotal['死亡'] = chinaTotal['dead']
chinaTotal['治愈'] = chinaTotal['heal']
del chinaTotal['confirm']
del chinaTotal['suspect']
del chinaTotal['dead']
del chinaTotal['heal']
sum = chinaTotal['确诊'] + chinaTotal['疑似'] + chinaTotal['死亡'] + chinaTotal['治愈']
'''
本来chinaAdd是用来统计新增人数的,但是不知道哪里出了点问题,新增疑似(suspect)是负的...
(网页上就是这样显示的,咱也不知道,(咱jio的是TX的问题)但咱也不敢问,
所以就用chinaDayAddList(每日新增数据列表)里的最后一项来代替了...
'''
chinaAdd = data['chinaAdd']
chinaAdd['新增确诊'] = data['chinaDayAddList'][-1]['confirm']
chinaAdd['新增疑似'] = data['chinaDayAddList'][-1]['suspect']
chinaAdd['新增死亡'] = data['chinaDayAddList'][-1]['dead']
chinaAdd['新增治愈'] = data['chinaDayAddList'][-1]['heal']
del chinaAdd['confirm']
del chinaAdd['suspect']
del chinaAdd['dead']
del chinaAdd['heal']
areaTree = data['areaTree']
china_data = areaTree[0]['children']
china_list = []
for x in range(len(china_data)):
province = china_data[x]['name']
province_list = china_data[x]['children']
for y in range(len(province_list)):
city = province_list[y]['name']
total = province_list[y]['total']
today = province_list[y]['today']
china_dict = {
'province': province, 'city': city, 'total': total, 'today': today}
china_list.append(china_dict)
# 定义数据处理函数
def confirm(x):
confirm = eval(str(x))['confirm']
return confirm
def suspect(x):
suspect = eval(str(x))['suspect']
return suspect
def dead(x):
dead = eval(str(x))['dead']
return dead
def heal(x):
heal = eval(str(x))['heal']
return heal
china_data = pd.DataFrame(china_list)
china_data.head()
# 函数映射
china_data['confirm'] = china_data['total'].map(confirm)
china_data['suspect'] = china_data['total'].map(suspect)
china_data['dead'] = china_data['total'].map(dead)
china_data['heal'] = china_data['total'].map(heal)
china_data['addconfirm'] = china_data['today'].map(confirm)
china_data['addsuspect'] = china_data['today'].map(suspect)
china_data['adddead'] = china_data['today'].map(dead)
china_data['addheal'] = china_data['today'].map(heal)
china_data = china_data[
["province", "city", "confirm", "suspect", "dead", "heal", "addconfirm", "addsuspect", "adddead", "addheal"]]
china_data.head()
# 国际数据处理
global_data = pd.DataFrame(data['areaTree'])
global_data['confirm'] = global_data['total'].map(confirm)
global_data['suspect'] = global_data['total'].map(suspect)
global_data['dead'] = global_data['total'].map(dead)
global_data['heal'] = global_data['total'].map(heal)
global_data['addconfirm'] = global_data['today'].map(confirm)
global_data['addsuspect'] = global_data['today'].map(suspect)
global_data['adddead'] = global_data['today'].map(dead)
global_data['addheal'] = global_data['today'].map(heal)
world_name = pd.read_excel("世界各国中英文对照.xlsx")
global_data = pd.merge(global_data, world_name, left_on="name", right_on="中文", how="inner")
global_data = global_data[
["name", "英文", "confirm", "suspect", "dead", "heal", "addconfirm", "addsuspect", "adddead", "addheal"]]
global_data.head()
# 日数据处理
chinaDayList = pd.DataFrame(data['chinaDayList'])
chinaDayList = chinaDayList[['date', 'confirm', 'suspect', 'dead', 'heal']]
chinaDayList.head()
# 日新增数据处理
chinaDayAddList = pd.DataFrame(data['chinaDayAddList'])
chinaDayAddList = chinaDayAddList[['date', 'confirm', 'suspect', 'dead', 'heal']]
chinaDayAddList.head()
数据可视化:
# 左上角饼图
total_pie = (
Pie(init_opts=opts.InitOpts(theme=ThemeType.LIGHT, width='500px', height='350px', bg_color="transparent"))
.add("", [list(z) for z in zip(['确 诊 ', '疑 似 ', '死 亡 ', '治 愈 '], chinaTotal.values())],
center=["50%", "60%"], radius=[75, 100], )
.add("", [list(z) for z in zip(chinaAdd.keys(), chinaAdd.values())], center=["50%", "60%"], radius=[0, 50])
.set_global_opts(title_opts=opts.TitleOpts(title="全国总量", pos_bottom=0,
title_textstyle_opts=opts.TextStyleOpts(color="#00FFFF")),
legend_opts=opts.LegendOpts(textstyle_opts=opts.TextStyleOpts(color="#FFFFFF")))
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{c}")))
# 中间全球疫情地图
world_map = (
Map(init_opts=opts.InitOpts(theme=ThemeType.WESTEROS))
.add("", [list(z) for z in zip(list(global_data["英文"]), list(global_data["confirm"]))], "world",
is_map_symbol_show=False)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False),
toolbox_opts=opts.ToolboxOpts(orient='vertical', pos_right="10%"))
.set_global_opts(visualmap_opts=opts.VisualMapOpts(is_piecewise=True, background_color="transparent",
textstyle_opts=opts.TextStyleOpts(color="#F5FFFA"),
pieces=[
{
"min": 101, "label": '>100', "color": "#893448"},
{
"min": 10, "max": 100, "label": '10-100',
"color": "#fb8146"},
{
"min": 1, "max": 9, "label": '1-9',
"color": "#fff2d1"},
])))
# 右下角中国疫情地图绘制
area_data = china_data.groupby("province")["confirm"].sum().reset_index()
area_data.columns = ["province", "confirm"]
area_map = (
Map(init_opts=opts.InitOpts(theme=ThemeType.WESTEROS))
.add("", [list(z) for z in zip(list(area_data["province"]), list(area_data["confirm"]))], "china",
is_map_symbol_show=False, label_opts=opts.LabelOpts(color="#fff"),
tooltip_opts=opts.TooltipOpts(is_show=True), zoom=1.2, center=[105, 30])
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(title_opts=opts.TitleOpts(title="中国疫情分布图", pos_top='5%',
title_textstyle_opts=opts.TextStyleOpts(color="#FF0000")),
visualmap_opts=opts.VisualMapOpts(is_piecewise=True, pos_right=0, pos_bottom=0,
textstyle_opts=opts.TextStyleOpts(color="#F5FFFA"),
pieces=[
{
"min": 1001, "label": '>1000', "color": "#893448"},
{
"min": 500, "max": 1000, "label": '500-1000'