在MATLAB中,GCV函数主要用于计算广义交叉验证(Generalized Cross-Validation, GCV)准则,以确定正则化参数的最优值。
基本功能:GCV函数的基本功能是计算GCV准则,这是一种用于选择正则化参数的方法,特别是在岭回归(Ridge Regression)等正则化模型中得到广泛使用。通过计算模型预测误差来选择最佳的正则化参数,从而优化模型的性能。
工作原理:广义交叉验证是一种自校准方法,用于估计模型的预测误差,并选择最佳的正则化参数。它不需要显式地将数据划分为训练集和测试集,而是使用一种称为广义交叉验证函数的统计量来衡量模型的拟合质量和复杂度。GCV的核心思想是使用自由度调整后的预测误差来评价模型,通过一种无偏估计的方式来逼近交叉验证误差。
应用场景:GCV函数通常用于非线性回归和其他机器学习问题中,以确定模型的复杂度和正则化参数。它可以帮助用户选择合适的模型复杂度,避免过度拟合,并通过自动化参数选择的过程,使用户更轻松地构建高性能的预测模型。此外,GCV还广泛应用于高分辨率遥感图像感兴趣区去噪等领域。
函数调用:在MATLAB中,可以使用内置的gcv函数来执行广义交叉验证。该函数需要一个表示模型响应变量和预测变量的数据集,以及模型的参数范围。它会自动调整参数并返回最优的参数值,以优化模型的性能。
function [reg_min,G,reg_param] = gcv(U,s,b,method)
%GCV Plot the GCV function and find its minimum.
%
% [reg_min,G,reg_param] = gcv(U,s,b,method)
% [reg_min,G,reg_param] = gcv(U,sm,b,method) , sm = [sigma,mu]
%
% Plots the GCV-function
% || A*x - b ||^2
% G = -------------------
% (trace(I - A*A_I)^2
% as a function of the regularization parameter reg_param. Here, A_I is a
% matrix which produces the regularized solution.
%
% The following methods are allowed:
% method = 'Tikh' : Tikhonov regularization (solid line )
% method = 'tsvd' : truncated SVD or GSVD (o markers )
% method = 'dsvd' : damped SVD or GSVD (dotted line)
% If method is not specified, 'Tikh' is default. U and s, or U and sm,
% must be computed by the functions csvd and cgsvd, respectively.
%
% If any output arguments are specified, then the minimum of G is
% identified and the corresponding reg. parameter reg_min is returned.
% Per Christian Hansen, DTU Compute, Dec. 16, 2003.
% Reference: G. Wahba, "Spline Models for Observational Data",
% SIAM, 1990.
% Set defaults.
if (nargin==3), method='Tikh'; end % Default method.
npoints = 200; % Number of points on the curve.
smin_ratio = 16*eps; % Smallest regularization parameter.
% Initialization.
[m,n] = size(U); [p,ps] = size(s);
beta = U'*b; beta2 = norm(b)^2 - norm(beta)^2;
if (ps==2)
s = s(p:-1:1,1)./s(p:-1:1,2); beta = beta(p:-1:1);
end
if (nargout > 0), find_min = 1; else find_min = 0; end
if (strncmp(method,'Tikh',4) | strncmp(method,'tikh',4))
% Vector of regularization parameters.
reg_param = zeros(npoints,1); G = reg_param; s2 = s.^2;
reg_param(npoints) = max([s(p),s(1)*smin_ratio]);
ratio = (s(1)/reg_param(npoints))^(1/(npoints-1));
for i=npoints-1:-1:1, reg_param(i) = ratio*reg_param(i+1); end
% Intrinsic residual.
delta0 = 0;
if (m > n & beta2 > 0), delta0 = beta2; end
% Vector of GCV-function values.
for i=1:npoints
G(i) = gcvfun(reg_param(i),s2,beta(1:p),delta0,m-n);
end
% Plot GCV function.
loglog(reg_param,G,'-'), xlabel('\lambda'), ylabel('G(\lambda)')
title('GCV function')
% Find minimum, if requested.
if (find_min)
[minG,minGi] = min(G); % Initial guess.
reg_min = fminbnd('gcvfun',...
reg_param(min(minGi+1,npoints)),reg_param(max(minGi-1,1)),...
optimset('Display','off'),s2,beta(1:p),delta0,m-n); % Minimizer.
minG = gcvfun(reg_min,s2,beta(1:p),delta0,m-n); % Minimum of GCV function.
ax = axis;
HoldState = ishold; hold on;
loglog(reg_min,minG,'*r',[reg_min,reg_min],[minG/1000,minG],':r')
title(['GCV function, minimum at \lambda = ',num2str(reg_min)])
axis(ax)
if (~HoldState), hold off; end
end
elseif (strncmp(method,'tsvd',4) | strncmp(method,'tgsv',4))
% Vector of GCV-function values.
rho2(p-1) = abs(beta(p))^2;
if (m > n & beta2 > 0), rho2(p-1) = rho2(p-1) + beta2; end
for k=p-2:-1:1, rho2(k) = rho2(k+1) + abs(beta(k+1))^2; end
G = zeros(p-1,1);
for k=1:p-1
G(k) = rho2(k)/(m - k + (n - p))^2;
end
reg_param = (1:p-1)';
% Plot GCV function.
semilogy(reg_param,G,'o'), xlabel('k'), ylabel('G(k)')
title('GCV function')
% Find minimum, if requested.
if (find_min)
[minG,reg_min] = min(G);
ax = axis;
HoldState = ishold; hold on;
semilogy(reg_min,minG,'*r',[reg_min,reg_min],[minG/1000,minG],':r')
title(['GCV function, minimum at k = ',num2str(reg_min)])
axis(ax);
if (~HoldState), hold off; end
end
elseif (strncmp(method,'dsvd',4) | strncmp(method,'dgsv',4))
% Vector of regularization parameters.
reg_param = zeros(npoints,1); G = reg_param;
reg_param(npoints) = max([s(p),s(1)*smin_ratio]);
ratio = (s(1)/reg_param(npoints))^(1/(npoints-1));
for i=npoints-1:-1:1, reg_param(i) = ratio*reg_param(i+1); end
% Intrinsic residual.
delta0 = 0;
if (m > n & beta2 > 0), delta0 = beta2; end
% Vector of GCV-function values.
for i=1:npoints
G(i) = gcvfun(reg_param(i),s,beta(1:p),delta0,m-n,1);
end
% Plot GCV function.
loglog(reg_param,G,':'), xlabel('\lambda'), ylabel('G(\lambda)')
title('GCV function')
% Find minimum, if requested.
if (find_min)
[minG,minGi] = min(G); % Initial guess.
reg_min = fminbnd('gcvfun',...
reg_param(min(minGi+1,npoints)),reg_param(max(minGi-1,1)),...
optimset('Display','off'),s,beta(1:p),delta0,m-n,1); % Minimizer.
minG = gcvfun(reg_min,s,beta(1:p),delta0,m-n,1); % Minimum of GCV function.
ax = axis;
HoldState = ishold; hold on;
loglog(reg_min,minG,'*r',[reg_min,reg_min],[minG/1000,minG],':r')
title(['GCV function, minimum at \lambda = ',num2str(reg_min)])
axis(ax)
if (~HoldState), hold off; end
end
elseif (strncmp(method,'mtsv',4) | strncmp(method,'ttls',4))
error('The MTSVD and TTLS methods are not supported')
else
error('Illegal method')
end