import pandas as pd
import os
import datetime
path = os.path.dirname(os.path.realpath(file))
import matplotlib
import pylab as plt
plt.rcParams[‘font.sans-serif’] = [‘SimHei’] #显示中文
from matplotlib.font_manager import FontProperties
plt.rcParams[‘axes.unicode_minus’]=False
import numpy as np
import matplotlib
import pylab as plt
plt.rcParams[‘font.sans-serif’] = [‘SimHei’] #显示中文
from matplotlib.font_manager import FontProperties
plt.rcParams[‘axes.unicode_minus’]=False
import pandas as pd
#setup_seed(123)
import os
import joblib
import lightgbm as lgb
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from sklearn.linear_model import Ridge
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score
import numpy as np
import joblib
import time
import optuna
from hyperopt import fmin, hp, partial, Trials, tpe,rand
from lightgbm import log_evaluation, early_stopping
path = os.path.dirname(os.path.realpath(file))
import warnings
设置警告过滤器以忽略特定警告
warnings.filterwarnings(“ignore”, message=“No further splits with positive gain”)
def make_shorttime_data(data):
print(“*******************”+“短期预测数据准备”+“**************************”)
###########短期预测-1.数据准备##########
train=data[‘2024-01-21 00:00:00’: ]
test=data[‘2024-05-01 00:15’:]
result=pd.DataFrame(columns=[‘实际功率’, ‘真实值’, ‘预测值’])
result[‘实际功率’]=test[‘实际功率’][‘2024-05-01 00:15’:]#[96+96-1:]
print(train.shape,test.shape)
x_train, y_train,x_test,y_test=train.iloc[:,1].values.reshape([-1,1]),train.iloc[:,0].values.reshape([-1,1]),test.iloc[:,1].values.reshape([-1,1]),test.iloc[:,0].values.reshape([-1,1])
y_train,y_test=y_train.ravel(),y_test.ravel()
print('x_train.shape',x_train.shape,'y_train.shape',y_train.shape)
print('x_test.shape',x_test.shape,'y_test.shape',y_test.shape)
return x_train, y_train,x_test,y_test,result
###########短期预测-2.模型定义##########
def short_stacking_model(x_train, y_train,x_test,y_test):
from mlxtend.regressor import StackingCVRegressor
from mlxtend.data import boston_housing_data
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import mean_squared_error
import numpy as np
import matplotlib.pyplot as plt
# 初始化基模型
lr = LinearRegression()
svr_lin = SVR(kernel='linear', gamma='auto')
lasso =Lasso()
ridge = Ridge(random_state=2019,)
catboost=CatBoostRegressor(train_dir=path+r'/catboosttrain/',
iterations=200, learning_rate=0.03,
depth=6, l2_leaf_reg=3,
loss_function='MAE',
eval_metric='MAE',
random_seed=2013,
verbose=0)#参数二model=MultiOutputRegressor(model)
lightgbm = LGBMRegressor(objective='regression',verbose=0)
models = [lr, svr_lin, lasso, ridge,catboost,lightgbm]
params = {'lasso__alpha': [0.1, 1.0, 10.0],
'ridge__alpha': [0.1, 1.0, 10.0]}
sclf = StackingCVRegressor(regressors=models, meta_regressor=ridge)
#grid = GridSearchCV(estimator=sclf, param_grid=params, cv=5, refit=True)
#grid.fit(x_train, y_train)
#print(grid.best_score_, grid.best_params_)
sclf.fit(x_train, y_train)
import joblib
#save model
#model=grid(**grid.best_params_)
model=sclf
print('5-fold cross validation scores:\n')
for clf, label in zip([lr, svr_lin, lasso, ridge,catboost,lightgbm,sclf], ['lr', 'svr_lin', 'lasso', 'ridge','catboost','lightgbm','sclf']):
scores = cross_val_score(clf, x_train, y_train, cv=5, scoring='neg_mean_squared_error')
print("Neg. MSE Score: %0.2f (+/- %0.2f) [%s]"%(scores.mean(), scores.std(), label))
return model
###########短期预测-3.模型训练与预测##########
def shorttime_model_make_result(x_train, y_train,x_test,y_test,model,modelname,Cap,result):
model.fit(x_train, y_train)
#modelname=str(model)#type(model).name
print(“=“+modelname+”=”)
#save model
joblib.dump(model, path+r’/model/‘+modelname+r’shorttime.pkl’)
#load model
model= joblib.load(path+r’/model/'+modelname+r’shorttime.pkl’)
x_test=x_test.reshape(-1,1)
y_pred = model.predict(x_test)
#print(y_test.shape,y_pred.shape)
y_test,y_pred=y_test.ravel(),y_pred.ravel()
#print(y_test.shape,y_pred.shape)
# 校正
for j in range(len(y_pred)):
y_pred[j] = np.round(y_pred[j], 3)
if y_pred[j] < 0:
y_pred[j] = float(0)
if y_pred[j]>Cap:
y_pred[j]=Cap
#print(y_test.shape,y_pred.shape)
mse=mean_squared_error(y_test,y_pred)
rmse=np.sqrt(mean_squared_error(y_test,y_pred))
mae=mean_absolute_error(y_test,y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)
r2score=r2_score(y_test, y_pred)
print('mse:',mse)
print('rmse:',rmse)
print('mae',mae)
print('mape',mape)
print('r2score',r2score)
#分辨率参数-dpi,画布大小参数-figsize
#plt.figure(dpi=300,figsize=(24,8))
plt.title(modelname+str("预测结果"))
plt.plot(y_test,label="真实数据")
plt.plot(y_pred,label="预测值")
plt.legend(loc=1)
plt.savefig(path+r"/pictures/"+modelname+"短期.png")
plt.close()
result['真实值']=y_test
result['预测值']=y_pred
result.to_csv(path+r"/result/"+modelname+"短期.csv", sep=',')
def make_ultrashorttime_data(data):
print(““+“超短期预测数据准备”+”********”)
###########超短期预测-1.数据准备##########
data= data[[“实际功率”,“预测风速”]]
x_=data.iloc[:,1].values.reshape([-1,1])
modelshorttime=joblib.load(path+r’/model/CatBoostRegressor_shorttime.pkl’)
data[“短期预测值”]=modelshorttime.predict(x_ )
data= data[["实际功率","预测风速","短期预测值",]]
#删除某行中某个值为0的行
data= data[data['实际功率'] != np.nan]
data=data.fillna(value='0')
train_=data['2024-01-21 00:00': ]
test_=data['2024-04-30 16:30':]
result_=pd.DataFrame(columns=['实际功率', '真实值'])
result_['实际功率']=test_['实际功率']['2024-05-01 00:15':]
result16=pd.DataFrame(columns=['实际功率', '真实值'])
result16['实际功率']=test_['实际功率']['2024-05-01 00:15':]
print(train_.shape,test_.shape)
num_nodes=3
def process(dataset):
#观看过去时间窗口 过去多少天
past_history_size = 16
#预测未来值n天
future_target = 16
x = []
y = []
dataset=dataset.values
for i in range(len(dataset)-past_history_size-future_target+1):
x_1=dataset[i:i+past_history_size,0]
x_2=dataset[i:i+past_history_size,1]
x_3=dataset[i:i+past_history_size,2]
x_4=dataset[i+past_history_size:i+past_history_size+future_target,-1]
x_5=dataset[i+past_history_size:i+past_history_size+future_target,1]
xxxxx=np.concatenate((x_1,x_2,x_3,x_4,x_5),axis=0)
x.append(xxxxx)
y.append(dataset[i+past_history_size:i+past_history_size+future_target,0])
x=np.array(x)
y = np.array(y)
return x ,y
train_x, train_y= process(train_)
test_x,test_y = process(test_)
print(train_x.shape, train_y.shape,test_x.shape,test_y.shape)
return train_x, train_y,test_x,test_y,result_,result16
###########超短期预测-2.模型定义##########
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, KFold
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
from mlxtend.regressor import StackingCVRegressor
import numpy as np
自定义多输出 StackingCVRegressor 变体
class MultiOutputStackingCVRegressor:
def init(self, base_estimators, meta_regressor, cv=5, random_state=42):
self.base_estimators = base_estimators
self.meta_regressor = meta_regressor
self.cv = cv
self.random_state = random_state
def fit(self, X, Y):
kf = KFold(n_splits=self.cv, shuffle=True, random_state=self.random_state)
n_targets = Y.shape[1]
n_base_learners = len(self.base_estimators)
meta_features = np.zeros((X.shape[0], n_base_learners))
for fold_idx, (train_index, test_index) in enumerate(kf.split(X)):
for target in range(n_targets):
for i, (_, estimator) in enumerate(self.base_estimators):
estimator.fit(X[train_index], Y[train_index, target])
meta_features[test_index, i] = estimator.predict(X[test_index])
# 确保Y的维度与构造的元特征维度相匹配,无需flatten
self.meta_regressor.fit(np.hstack((X, meta_features)), Y)
def predict(self, X):
# 对每个基础学习器预测,组合成元特征
meta_features = np.column_stack([est.predict(X) for _, est in self.base_estimators])
# 使用元学习器进行最终预测
return self.meta_regressor.predict(np.column_stack((X, meta_features)))
def ultrashort_stacking_model(train_x, train_y,test_x,test_y):
# 假设其他必要的导入和变量如 CatBoostRegressor, LGBMRegressor 等已正确定义
base_learners = [
('lr', LinearRegression()),
('ridge', Ridge()),
# 注意:下面的 CatBoostRegressor 和 LGBMRegressor 应当正确导入并初始化
('catboost',CatBoostRegressor(train_dir=path+r'/catboosttrain/', verbose=0)),
('lightgbm',LGBMRegressor(objective='regression', verbose=-1))
]
meta_learner = LinearRegression()
# 实例化自定义的多输出 Stacking 模型
stacking_regressor = MultiOutputStackingCVRegressor(base_learners, meta_learner, cv=5)
# 训练模型
#stacking_regressor.fit(train_x, train_y)
# 预测并评估
#print("X_test.shape",X_test.shape)#(20, 10)
#Y_pred = stacking_regressor.predict(test_x)
#mse = mean_squared_error(test_y, Y_pred)
#print(f"Mean Squared Error: {mse}")
return stacking_regressor
###########超短期预测-3.模型训练与预测-输出最后1个点情况##########
def ultrashorttime_model_make_result(train_x, train_y,test_x,test_y,model,modelname,Cap,result_):
model.fit(train_x, train_y)
print("================="+modelname+"=================")
#save model
joblib.dump(model, path+r'/model/'+modelname+r'_ultrashorttime.pkl')
#load model
model_= joblib.load(path+r'/model/'+modelname+r'_ultrashorttime.pkl')
pred_y = model_.predict(test_x)
test_y,pred_y=test_y[:,-1],pred_y[:,-1]
print(test_y.shape,pred_y.shape)
# 校正
for j in range(len(pred_y)):
pred_y[j] = np.round(pred_y[j], 3)
if pred_y[j] < 0:
pred_y[j] = float(0)
if pred_y[j]>Cap:
pred_y[j]=Cap
mse=mean_squared_error(test_y,pred_y)
rmse=np.sqrt(mean_squared_error(test_y,pred_y))
mae=mean_absolute_error(test_y,pred_y)
mape = mean_absolute_percentage_error(test_y, pred_y)
r2score=r2_score(test_y, pred_y)
print('mse:',mse)
print('rmse:',rmse)
print('mae',mae)
print('mape',mape)
print('r2score',r2score)
#分辨率参数-dpi,画布大小参数-figsize
#plt.figure(dpi=300,figsize=(24,8))
plt.title(modelname+str("预测结果"))
plt.plot(test_y.ravel(),label="真实数据")
plt.plot(pred_y.ravel(),label="预测值")
plt.legend(loc=1)
plt.savefig(path+r"/pictures/"+modelname+"超短期.png")
plt.close()
result_['真实值']=test_y
result_['预测值']=pred_y
result_.to_csv(path+r"/result/"+modelname+"超短期.csv", sep=',')
modelname=[“Catboost”,“Lightgbm”,“Ridge”]
‘’’
ultrashorttime_model_make_result(train_x, train_y,test_x,test_y,catboost_model,modelname[0],Cap)
ultrashorttime_model_make_result(train_x, train_y,test_x,test_y,lightgbm_model,modelname[1],Cap)
ultrashorttime_model_make_result(train_x, train_y,test_x,test_y,ridge_model,modelname[2],Cap)
‘’’
###########超短期预测-4.模型训练与预测-输出16个点情况##########
#########按照要求模型预测16个点,但是为了便于部署模型预测出相应的时间点,在实际中,通常多预测出来一个点,变为预测17个点
#print(““+“超短期16个点预测开始”+”********”)
def ultrashorttime_model_make_result_16output(train_x, train_y,test_x,test_y,model,modelname,Cap,result16):
model.fit(train_x, train_y)
print(“=“+modelname+”=”)
#save model
joblib.dump(model, path+r’/model/‘+modelname+r’ultrashorttime.pkl’)
#load model
model= joblib.load(path+r’/model/'+modelname+r’ultrashorttime.pkl’)
pred_y = model.predict(test_x)
test_y_,pred_y_=test_y[:,-1],pred_y[:,-1]
print(test_y_.shape,pred_y_.shape)
mse=mean_squared_error(test_y_,pred_y_)
rmse=np.sqrt(mean_squared_error(test_y_,pred_y_))
mae=mean_absolute_error(test_y_,pred_y_)
mape = mean_absolute_percentage_error(test_y_, pred_y_)
r2score=r2_score(test_y_, pred_y_)
print(‘mse:’,mse)
print(‘rmse:’,rmse)
print(‘mae’,mae)
print(‘mape’,mape)
print(‘r2score’,r2score)
#分辨率参数-dpi,画布大小参数-figsize
#plt.figure(dpi=300,figsize=(24,8))
plt.title(modelname+str("预测结果"))
plt.plot(test_y_.ravel(),label="真实数据")
plt.plot(pred_y_.ravel(),label="预测值")
plt.legend(loc=1)
plt.savefig(path+r"/pictures/"+modelname+"超短期16个点取最后一个点画图.png")
plt.close()
def correction(jj):
for j in range(len(pred_y[:,jj])):
pred_y[:,jj][j] = np.round(pred_y[:,jj][j], 3)
if pred_y[:,jj][j] < 0:
pred_y[:,jj][j] = float(0)
if pred_y[:,jj][j]>Cap:
pred_y[:,jj][j]=Cap
for j in range(16):
correction(j)
result16['真实值']=test_y_
for i in range(16):
result16['预测值'+str(i)]=pred_y[:,i]
result16.to_csv(path+r"/result/"+modelname+"超短期16个点.csv", sep=',')
modelname=[“Catboost”,“Lightgbm”,“Ridge”]
‘’’
ultrashorttime_model_make_result_16output(train_x, train_y,test_x,test_y,catboost_model,modelname[0],Cap)
ultrashorttime_model_make_result_16output(train_x, train_y,test_x,test_y,lightgbm_model,modelname[1],Cap)
ultrashorttime_model_make_result_16output(train_x, train_y,test_x,test_y,ridge_model,modelname[2],Cap)
‘’’
if name == “main”:
###########场站数据##########
Cap=17
data=pd.read_csv(path+r"/data/with_nwp2024-05-14.csv", parse_dates=True, index_col=‘时间’)
data = data.sort_index()
print(“场站数据:”,data.shape)
print(data.head())
print(“=场站数据相关性=”)
print(data.corr())
data.plot(figsize=(24,10))
plt.savefig(path+r"/pictures/with_nwp.png")
plt.close()
data[‘实际功率’]=data[‘实际功率’].map(lambda x: x if x> 0 else 0)
data= data[[“实际功率”,“预测风速”]]
#删除某行中某个值为0的行
data= data[data[‘实际功率’] != np.nan]
data=data.fillna(value=‘0’)
'''
print("===========================生成短期预测数据集==========================================")
#生成短期预测数据集
x_train, y_train,x_test,y_test,result=make_shorttime_data(data)
print("=========================短期预测stacking方法===========================")
print("1.短期预测stacking方法预测结果:")
short_stacking=short_stacking_model(x_train, y_train,x_test,y_test)
shorttime_model_make_result(x_train, y_train,x_test,y_test,short_stacking,"short_stacking",Cap,result)
'''
print("===========================生成超短期预测数据集==========================================")
#生成超短期预测数据集
train_x, train_y,test_x,test_y,result_,result16=make_ultrashorttime_data(data)
print("=========================超短期预测stacking方法===========================")
print("2.超短期预测stacking方法预测结果:")
ultrashort_stacking=ultrashort_stacking_model(train_x, train_y,test_x,test_y)
ultrashorttime_model_make_result(train_x, train_y,test_x,test_y,ultrashort_stacking,"ultrashort_stacking",Cap,result_)
ultrashorttime_model_make_result_16output(train_x, train_y,test_x,test_y,ultrashort_stacking,"ultrashort_stacking",Cap,result16)