JSON格式
JSON格式的文件使用json模块来处理。
下面看一个JSON文件里面的内容是什么样子的:
就是一个很长的Python列表,其中每个元素都是一个包含五个键的字典:统计日期,月份,周数,周几以及收盘价。
下载收盘数据
from __future__ import (absolute_import, division, print_function,
unicode_literals)
try:
# Python 2.x 版本
from urllib2 import urlopen
except ImportError:
# Python 3.x 版本
from urllib.request import urlopen # 1
import json
import requests
import pygal
import math
from itertools import groupby
json_url = 'https://raw.githubusercontent.com/muxuezi/btc/master/btc_close_2017.json'
response = urlopen(json_url) # 2
# 读取数据
req = response.read()
# 将数据写入文件
with open('btc_close_2017_urllib.json', 'wb') as f: # 3
f.write(req)
# 加载json格式
file_urllib = json.loads(req.decode('utf8')) # 4
print(file_urllib)
json_url = 'https://raw.githubusercontent.com/muxuezi/btc/master/btc_close_2017.json'
req = requests.get(json_url) # 1
# 将数据写入文件
with open('btc_close_2017_request.json', 'w') as f:
f.write(req.text) # 2
file_requests = req.json() # 3
print(file_urllib == file_requests)
提取相关的数据
下面编写一个小程序来提取btc_close_2017.json文件中的相关信息:
import json
#将数据加载到一个列表中
filename='btc_close_2017.json'
with open(filename) as f:
btc_data=json.load(f)
#打印每一天的信息
for btc_dict in btc_data:
date=btc_dict['date']
month=btc_dict['month']
week=btc_dict['week']
weekday=btc_dict['weekday']
close=btc_dict['close']
print("{} is month{}week{},{},the close price is {}RMB".format(date,month,week,weekday,close))
import json
#将数据加载到一个列表中
filename='btc_close_2017.json'
with open(filename) as f:
btc_data=json.load(f)
#打印每一天的信息
for btc_dict in btc_data:
date=btc_dict['date']
month=int(btc_dict['month'])
week=int(btc_dict['week'])
weekday=btc_dict['weekday']
close=int(btc_dict['close'])
print("{} is month{}week{},{},the close price is {}RMB".format(date,month,week,weekday,close))
我们运行来试试,看看结果如何
没想到出现了异常错误,在实际工作中,原始数据的格式经常不是统一的,此类数值类型转换造成的ValueError异常十分普遍,这里的原因在于,Python不能直接将包含小数点的字符串‘6928.6492’转换为整数。为了消除这种错误,需要先将字符串转换为浮点数(float),再将浮点数转换为整数(int):
import json
#将数据加载到一个列表中
filename='btc_close_2017.json'
with open(filename) as f:
btc_data=json.load(f)
#打印每一天的信息
for btc_dict in btc_data:
date=btc_dict['date']
month=int(btc_dict['month'])
week=int(btc_dict['week'])
weekday=btc_dict['weekday']
close=int(float(btc_dict['close']))
print("{} is month{}week{},{},the close price is {}RMB".format(date,month,week,weekday,close))
好了,解决了。
绘制收盘价折线图
之前了解过了pygal绘制条形图(bar chart)和matplotlib绘制折线图(line chart),下面用Pygal来实现收盘价的折线图
import json
import pygal
#将数据加载到一个列表中
filename='btc_close_2017.json'
with open(filename) as f:
btc_data=json.load(f)
#创建5个列表,分别存储日期和收盘价
dates=[]
months=[]
weeks=[]
weekdays=[]
close=[]
#打印每一天的信息
for btc_dict in btc_data:
dates.append(btc_dict['date'])
months.append(int(btc_dict['month']))
weeks.append(int(btc_dict['week']))
weekdays.append(btc_dict['weekday'])
close.append(int(float(btc_dict['close'])))
#print("{} is month{}week{},{},the close price is {}RMB".format(date,month,week,weekday,close))
line_chart=pygal.Line(x_label_rotation=20,show_minor_x_labels=False)
line_chart.title='收盘价(RMB)'
line_chart.x_labels=dates
N=20 #x轴坐标每隔20天显示一次
line_chart.add('收盘价',close)
line_chart.render_to_file('收盘价折线图.svg')
运行后,会在文件保存的目录下面生成一个.svg文件
然后用浏览器打开如下图:
时间序列特征初探
进行时间序列分析总是期望发现趋势(trend)、周期性(seasonality)和噪声(noise),从而能够描述事实、预测未来、做出决策。对数变换(log transformation)是可以将波动中非线性的趋势消除。Python标准库的数学模块math来解决,我i们用以10为底的对数函数math.log10计算收盘价,日期仍然保持不变,这种方式称为半对数变换(semi-logarithmic)。
import json
import pygal
import math
#将数据加载到一个列表中
filename='btc_close_2017.json'
with open(filename) as f:
btc_data=json.load(f)
#创建5个列表,分别存储日期和收盘价
dates=[]
months=[]
weeks=[]
weekdays=[]
close=[]
#打印每一天的信息
for btc_dict in btc_data:
dates.append(btc_dict['date'])
months.append(int(btc_dict['month']))
weeks.append(int(btc_dict['week']))
weekdays.append(btc_dict['weekday'])
close.append(int(float(btc_dict['close'])))
#print("{} is month{}week{},{},the close price is {}RMB".format(date,month,week,weekday,close))
line_chart=pygal.Line(x_label_rotation=20,show_minor_x_labels=False)
line_chart.title='收盘价对数变换(RMB)'
line_chart.x_labels=dates
N=20 #x轴坐标每隔20天显示一次
line_chart.x_labels_major=dates[::N]
close_log=[math.log10(_)for _ in close]
line_chart.add('log收盘价',close_log)
line_chart.render_to_file('收盘价折线图.svg')
现在用对数变换剔除非线性趋势之后,整体上涨的趋势更接近线性增长,并且从图中可以看出每个季度末似乎有显著的周期性—3月、6月、9月都出现了波动,那么,按照这样的推算12月是不是也会出现这样的波动呢?下面看看收盘的月日均值和周日均值的表现。
收盘价均值
import json
import pygal
import math
from itertools import groupby
#将数据加载到一个列表中
filename='btc_close_2017.json'
with open(filename) as f:
btc_data=json.load(f)
#创建5个列表,分别存储日期和收盘价
dates=[]
months=[]
weeks=[]
weekdays=[]
close=[]
#打印每一天的信息
for btc_dict in btc_data:
dates.append(btc_dict['date'])
months.append(int(btc_dict['month']))
weeks.append(int(btc_dict['week']))
weekdays.append(btc_dict['weekday'])
close.append(int(float(btc_dict['close'])))
#print("{} is month{}week{},{},the close price is {}RMB".format(date,month,week,weekday,close))
line_chart = pygal.Line(x_label_rotation=20, show_minor_x_labels=False) # ①
line_chart.title = '收盘价(¥)'
line_chart.x_labels = dates
N = 20 # x轴坐标每隔20天显示一次
line_chart.x_labels_major = dates[::N] # ②
line_chart.add('收盘价', close)
line_chart.render_to_file('收盘价折线图(¥).svg')
line_chart = pygal.Line(x_label_rotation=20, show_minor_x_labels=False)
line_chart.title = '收盘价对数变换(¥)'
line_chart.x_labels = dates
N = 20 # x轴坐标每隔20天显示一次
line_chart.x_labels_major = dates[::N]
close_log = [math.log10(_) for _ in close] # ①
line_chart.add('log收盘价', close_log)
line_chart.render_to_file('收盘价对数变换折线图(¥).svg')
line_chart
def draw_line(x_data, y_data, title, y_legend):
xy_map = []
for x, y in groupby(sorted(zip(x_data, y_data)), key=lambda _: _[0]): # 2
y_list = [v for _, v in y]
xy_map.append([x, sum(y_list) / len(y_list)]) # 3
x_unique, y_mean = [*zip(*xy_map)] # 4
line_chart = pygal.Line()
line_chart.title = title
#line_chart.x_labels = x_unique
x_unique_str=int_str(x_unique)
line_chart.x_labels = x_unique_str
line_chart.add(y_legend, y_mean)
line_chart.render_to_file(title + '.svg')
return line_chart
def int_str(list_0):
list_1=[]
for x in list_0:
x_str=str(x)
list_1.append(x_str)
return list_1
idx_month = dates.index('2017-12-01')
line_chart_month = draw_line(
months[:idx_month], close[:idx_month], '收盘价月日均值(¥)', '月日均值')
line_chart_month
idx_week = dates.index('2017-12-11')
line_chart_week = draw_line(
weeks[1:idx_week], close[1:idx_week], '收盘价周日均值(¥)', '周日均值')
line_chart_week
idx_week = dates.index('2017-12-11')
wd = ['Monday', 'Tuesday', 'Wednesday',
'Thursday', 'Friday', 'Saturday', 'Sunday']
weekdays_int = [wd.index(w) + 1 for w in weekdays[1:idx_week]]
line_chart_weekday = draw_line(
weekdays_int, close[1:idx_week], '收盘价星期均值(¥)', '星期均值')
line_chart_weekday.x_labels = ['周一', '周二', '周三', '周四', '周五', '周六', '周日']
line_chart_weekday.render_to_file('收盘价星期均值(¥).svg')
line_chart_weekday
with open('收盘价Dashboard.html', 'w', encoding='utf8') as html_file:
html_file.write(
'<html><head><title>收盘价Dashboard</title><meta charset="utf-8"></head><body>\n')
for svg in [
'收盘价折线图(¥).svg', '收盘价对数变换折线图(¥).svg', '收盘价月日均值(¥).svg',
'收盘价周日均值(¥).svg', '收盘价星期均值(¥).svg'
]:
html_file.write(
' <object type="image/svg+xml" data="{0}" height=500></object>\n'.format(svg)) # 1
html_file.write('</body></html>')
再写上面的代码时,出现了一个错误,如下,总是提示
是x轴的数据有问题,x轴应该修改为数值型str,修改方式如下: