Bootstrap

用LSTM预测股票价格

目录

效果图


import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM

# 数据加载和预处理
gstock_data = pd.read_csv(r'D:\lstm-cnn\lstm-snn\000001_SZSE.csv')
gstock_data = gstock_data[['date', 'open', 'close']]
gstock_data['date'] = pd.to_datetime(gstock_data['date'])
gstock_data.set_index('date', drop=True, inplace=True)

# 数据缩放
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(gstock_data)
scaled_df = pd.DataFrame(scaled_data, index=gstock_data.index, columns=gstock_data.columns)

# 划分训练集和测试集
training_size = int(len(scaled_df) * 0.50)
train_data = scaled_df.iloc[:training_size]
test_data = scaled_df.iloc[training_size:]


# 创建序列和标签
def create_sequence(dataset, seq_length=50):
    X, y = [], []
    for i in range(len(dataset) - seq_length):
        X.append(dataset.iloc[i:i + seq_length][['open', 'close']].values)
        y.append(dataset.iloc[i + seq_length][['open', 'close']].values)
    return np.array(X), np.array(y)


train_seq, train_label = create_sequence(train_data)
test_seq, test_label = create_sequence(test_data)

# 重塑输入数据以符合LSTM的输入要求 [samples, time steps, features]
train_seq = np.reshape(train_seq, (train_seq.shape[0], train_seq.shape[1], 2))
test_seq = np.reshape(test_seq, (test_seq.shape[0], test_seq.shape[1], 2))
print("Train sequence shape:", train_seq.shape)
print("Test sequence shape:", test_seq.shape)
# 构建和训练模型
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(train_seq.shape[1], train_seq.shape[2])))
model.add(Dropout(0.1))
model.add(LSTM(units=50))
model.add(Dense(2))  # 假设输出是'open'和'close'价格

model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(train_seq, train_label, epochs=80, batch_size=32, validation_data=(test_seq, test_label), verbose=1)

# 预测并逆变换结果
test_predicted = model.predict(test_seq)
test_inverse_predicted = scaler.inverse_transform(test_predicted)
seq_length = 10  # 假设你想要跳过前10个数据点
test_data_inverse = scaler.inverse_transform(test_data.values[seq_length:])
# 考虑到seq_length,确保索引对齐
predicted_index = test_data.index[seq_length:seq_length + len(test_predicted)]






# 创建包含预测和实际数据的DataFrame
gs_slic_data = pd.DataFrame(test_inverse_predicted, columns=['open_predicted', 'close_predicted'],
                            index=predicted_index)
test_data_inverse = scaler.inverse_transform(test_data.iloc[seq_length:].values)


# 假设 test_predicted 是你的模型预测结果,test_data_inverse 是逆变换后的测试数据

# 确定预测结果的长度
predicted_length = len(test_predicted)

# 裁剪 test_data_inverse 以匹配预测结果的长度
# 注意,我们已经跳过了前 seq_length 个数据点,因此从 seq_length 开始裁剪
test_data_inverse_cropped = test_data_inverse[:predicted_length, :]

# 现在你可以将裁剪后的数据添加到 gs_slic_data DataFrame 中
gs_slic_data[['open_actual', 'close_actual']] = test_data_inverse_cropped[:, :2]

# 绘制实际与预测的价格对比图(仅绘制'open'价格作为示例)
plt.figure(figsize=(10, 6))
plt.plot(gs_slic_data.index, gs_slic_data['open_actual'], label='Actual Open')
plt.plot(gs_slic_data.index, gs_slic_data['open_predicted'], label='Predicted Open', linestyle='--')
plt.xticks(rotation=45)
plt.xlabel('Date', size=15)
plt.ylabel('Stock Price', size=15)
plt.title('Actual vs Predicted for Open Price', size=15)
plt.legend()
plt.show()

效果图

;