这一节的前面部分的操作内容及LSTM的堆叠都与上一节完全一样,只是我们最终用途上有差别。03讲的是classification,04讲的是regression。
导入各类包
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
from pathlib import Path
import numpy as np
import pandas as pd
from scipy.stats import spearmanr
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LSTM, Input, concatenate, Embedding, Reshape, BatchNormalization
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
if gpu_devices:
print('Using GPU')
tf.config.experimental.set_memory_growth(gpu_devices[0], True)
else:
print('Using CPU')
idx = pd.IndexSlice
sns.set_style('whitegrid')
np.random.seed(42)#到这一步都是之前已经做过的操作进行一个预处理。
results_path = Path('results', 'lstm_embeddings')#从路径导入新数据。
if not results_path.exists():
results_path.mkdir(parents=True)
导入数据
这里导入的数据就是我们在01中所创建的数据集。
data = pd.read_hdf('data.h5', 'returns_weekly').drop('label', axis=1)#删除表中的label行。
data['ticker'] = pd.factorize(data.index.get_level_values('ticker'))[0]#依旧对刻度标准进行分类。
data['month'] = data.index.get_level_values('date').month#index是索引,get_level_values是返回所请求级别的值索引,也就是按照date的索引进行一个返回。
data = pd.get_dummies(data, columns=['month'], prefix='month')#对原数据集进行一个添加拓展。
data.info()
<class 'pandas.core.frame.DataFrame'> MultiIndex: 1547552 entries, ('000001', Timestamp('2017-12-31 00:00:00')) to ('689009', Timestamp('2008-01-02 00:00:00')) Data columns (total 66 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 0 1547552 non-null float64 1 1 1547552 non-null float64 2 2 1547552 non-null float64 3 3 1547552 non-null float64 4 4 1547552 non-null float64 5 5 1547552 non-null float64 6 6 1547552 non-null float64 7 7 1547552 non-null float64 8 8 1547552 non-null float64 9 9 1547552 non-null float64 10 10 1547552 non-null float64 11 11 1547552 non-null float64 12 12 1547552 non-null float64 13 13 1547552 non-null float64 14 14 1547552 non-null float64 15 15 1547552 non-null float64 16 16 1547552 non-null float64 17 17 1547552 non-null float64 18 18 1547552 non-null float64 19 19 1547552 non-null float64 20 20 1547552 non-null float64 21 21 1547552 non-null float64 22 22 1547552 non-null float64 23 23 1547552 non-null float64 24 24 1547552 non-null float64 25 25 1547552 non-null float64 26 26 1547552 non-null float64 27 27 1547552 non-null float64 28 28 1547552 non-null float64 29 29 1547552 non-null float64 30 30 1547552 non-null float64 31 31 1547552 non-null float64 32 32 1547552 non-null float64 33 33 1547552 non-null float64 34 34 1547552 non-null float64 35 35 1547552 non-null float64 36 36 1547552 non-null float64 37 37 1547552 non-null float64 38 38 1547552 non-null float64 39 39 1547552 non-null float64 40 40 1547552 non-null float64 41 41 1547552 non-null float64 42 42 1547552 non-null float64 43 43 1547552 non-null float64 44 44 1547552 non-null float64 45 45 1547552 non-null float64 46 46 1547552 non-null float64 47 47 1547552 non-null float64 48 48 1547552 non-null float64 49 49 1547552 non-null float64 50 50 1547552 non-null float64 51 51 1547552 non-null float64 52 52 1547552 non-null float64 53 label 1547552 non-null int64 54 ticker 1547552 non-null int64 55 month_1 1547552 non-null uint8 56 month_2 1547552 non-null uint8 57 month_3 1547552 non-null uint8 58 month_4 1547552 non-null uint8 59 month_5 1547552 non-null uint8 60 month_6 1547552 non-null uint8 61 month_7 1547552 non-null uint8 62 month_8 1547