Optuna优化lightgbm
import lightgbm as lgb
import optuna
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np
# 自定义的评价指标函数
def custom_metric(y_true, y_pred):
mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
return mse * 0.5 + mae * 0.5
# 目标函数
def objective(trial):
# 定义objective和boosting_type的搜索空间
objective = trial.suggest_categorical('objective', ['regression', 'regression_l1', 'huber', 'fair', 'poisson'])
boosting_type = trial.suggest_categorical('boosting_type', ['gbdt', 'dart', 'goss', 'rf'])
# 超参数搜索空间
params = {
'objective': objective,
'boosting_type': boosting_type,
'metric': 'rmse',
'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-1), # 0.01 到 0.1
'num_leaves': trial.suggest_int('num_leaves', 20, 200), # 31 到 256
'max_depth': trial.suggest_int('max_depth', 3, 10),
'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100), # 20 到 100
'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0), # 0.0 到 10.0
'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),# 0.0 到 10.0
'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0), # 0.5 到 1.0
'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
'min_gain_to_split': trial.suggest_uniform('min_gain_to_split', 0.0, 1.0),
}
# 数据加载与划分
data = load_boston()
X_train, X_valid, y_train, y_valid = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
# LightGBM 数据集
dtrain = lgb.Dataset(X_train, label=y_train)
dvalid = lgb.Dataset(X_valid, label=y_valid, reference=dtrain)
# 模型训练
model = lgb.train(
params,
dtrain,
valid_sets=[dtrain, dvalid],
early_stopping_rounds=100,
verbose_eval=False
)
# 预测
y_pred = model.predict(X_valid, num_iteration=model.best_iteration)
# 使用自定义评价指标
score = custom_metric(y_valid, y_pred)
return score
# Optuna study定义和优化
study = optuna.create_study(direction='minimize') # 假设你的自定义评价指标越小越好
study.optimize(objective, n_trials=100)
# 输出最佳参数
print('Number of finished trials:', len(study.trials))
print('Best trial:')
trial = study.best_trial
print(' Value: ', trial.value)
print(' Params: ')
for key, value in trial.params.items():
print(' {}: {}'.format(key, value))
import joblib
# 假设 study 是你已经进行优化后的 Optuna Study 对象
joblib.dump(study, 'optuna_study.pkl')