Optuna优化lightgbm

import lightgbm as lgb
import optuna
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# 自定义的评价指标函数
def custom_metric(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    return mse * 0.5 + mae * 0.5

# 目标函数
def objective(trial):
    # 定义objective和boosting_type的搜索空间
    objective = trial.suggest_categorical('objective', ['regression', 'regression_l1', 'huber', 'fair', 'poisson'])
    boosting_type = trial.suggest_categorical('boosting_type', ['gbdt', 'dart', 'goss', 'rf'])

    # 超参数搜索空间
    params = {
        'objective': objective,
        'boosting_type': boosting_type,
        'metric': 'rmse',
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-1), # 0.01 到 0.1
        'num_leaves': trial.suggest_int('num_leaves', 20, 200), # 31 到 256
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100), # 20 到 100
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0), # 0.0 到 10.0
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),# 0.0 到 10.0
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0), # 0.5 到 1.0
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
         'min_gain_to_split': trial.suggest_uniform('min_gain_to_split', 0.0, 1.0),
    }

    # 数据加载与划分
    data = load_boston()
    X_train, X_valid, y_train, y_valid = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

    # LightGBM 数据集
    dtrain = lgb.Dataset(X_train, label=y_train)
    dvalid = lgb.Dataset(X_valid, label=y_valid, reference=dtrain)

    # 模型训练
    model = lgb.train(
        params,
        dtrain,
        valid_sets=[dtrain, dvalid],
        early_stopping_rounds=100,
        verbose_eval=False
    )

    # 预测
    y_pred = model.predict(X_valid, num_iteration=model.best_iteration)

    # 使用自定义评价指标
    score = custom_metric(y_valid, y_pred)

    return score

# Optuna study定义和优化
study = optuna.create_study(direction='minimize')  # 假设你的自定义评价指标越小越好
study.optimize(objective, n_trials=100)

# 输出最佳参数
print('Number of finished trials:', len(study.trials))
print('Best trial:')
trial = study.best_trial
print('  Value: ', trial.value)
print('  Params: ')
for key, value in trial.params.items():
    print('    {}: {}'.format(key, value))

import joblib

# 假设 study 是你已经进行优化后的 Optuna Study 对象
joblib.dump(study, 'optuna_study.pkl')
Optuna优化lightgbm

悦读