Browse Source

Reworked TrustRegionMinimizer to support various subproblems.

pull/614/head
diluculo 8 years ago
parent
commit
ef5e93697e
  1. 13
      src/Numerics/Optimization/ITrustRegionSubProblem.cs
  2. 54
      src/Numerics/Optimization/Subproblems/QuadraticSubproblem.cs
  3. 32
      src/Numerics/Optimization/Subproblems/Util.cs
  4. 320
      src/Numerics/Optimization/TrustRegionDogLegMinimizer.cs
  5. 261
      src/Numerics/Optimization/TrustRegionMinimizerBase.cs
  6. 12
      src/Numerics/Optimization/TrustRegionSubProblem.cs

13
src/Numerics/Optimization/ITrustRegionSubProblem.cs

@ -0,0 +1,13 @@
using MathNet.Numerics.LinearAlgebra;
namespace MathNet.Numerics.Optimization
{
public interface ITrustRegionSubproblem
{
Vector<double> Pstep { get; }
double PredictedReduction { get; }
bool HitBoundary { get; }
void Solve(IObjectiveModel objective, double radius);
}
}

54
src/Numerics/Optimization/Subproblems/QuadraticSubproblem.cs

@ -0,0 +1,54 @@
using MathNet.Numerics.LinearAlgebra;
namespace MathNet.Numerics.Optimization.Subproblems
{
internal class QuadraticSubproblem : ITrustRegionSubproblem
{
public Vector<double> Pstep { get; private set; }
public double PredictedReduction { get; private set; }
public bool HitBoundary { get; private set; }
public void Solve(IObjectiveModel objective, double delta)
{
var Jacobian = objective.Jacobian;
var Gradient = objective.Gradient;
var Hessian = objective.Hessian;
var RSS = objective.Residue;
// newton point
// the Gauss–Newton step by solving the normal equations
var Pgn = Hessian.PseudoInverse() * Gradient; // Hessian.Solve(Gradient) fails so many times...
// cauchy point
// steepest descent direction is given by
var alpha = Gradient.DotProduct(Gradient) / (Hessian * Gradient).DotProduct(Gradient);
var Psd = alpha * Gradient;
// update step and prectted reduction
if (Pgn.L2Norm() <= delta)
{
// Pgn is inside trust region radius
HitBoundary = false;
Pstep = Pgn;
PredictedReduction = RSS;
}
else if (alpha * Psd.L2Norm() >= delta)
{
// Psd is outside trust region radius
HitBoundary = true;
Pstep = delta / Psd.L2Norm() * Psd;
PredictedReduction = delta * (2.0 * (alpha * Gradient).L2Norm() - delta) / 2.0 / alpha;
}
else
{
// Pstep is intersection of the trust region boundary
HitBoundary = true;
var beta = Util.FindBeta(alpha, Psd, Pgn, delta).Item2;
Pstep = alpha * Psd + beta * (Pgn - alpha * Psd);
PredictedReduction = 0.5 * alpha * (1 - beta) * (1 - beta) * Gradient.DotProduct(Gradient) + beta * (2 - beta) * RSS;
}
}
}
}

32
src/Numerics/Optimization/Subproblems/Util.cs

@ -0,0 +1,32 @@
using MathNet.Numerics.LinearAlgebra;
using System;
namespace MathNet.Numerics.Optimization.Subproblems
{
internal static class Util
{
public static Tuple<double, double> FindBeta(double alpha, Vector<double> sd, Vector<double> gn, double delta)
{
// Pstep is intersection of the trust region boundary
// Pstep = α*Psd + β*(Pgn - α*Psd)
// find r so that ||Pstep|| = Δ
// z = α*Psd, d = (Pgn - z)
// (d^2)β^2 + (2*z*d)β + (z^2 - Δ^2) = 0
//
// positive β is used for the quadratic formula
var z = alpha * sd;
var d = gn - z;
var a = d.DotProduct(d);
var b = 2.0 * z.DotProduct(d);
var c = z.DotProduct(z) - delta * delta;
var aux = b + ((b >= 0) ? 1.0 : -1.0) * Math.Sqrt(b * b - 4.0 * a * c);
var beta1 = -aux / 2.0 / a;
var beta2 = -2.0 * c / aux;
return new Tuple<double, double>(beta1, beta2);
}
}
}

320
src/Numerics/Optimization/TrustRegionDogLegMinimizer.cs

@ -3,324 +3,10 @@ using System;
namespace MathNet.Numerics.Optimization
{
public sealed class TrustRegionDogLegMinimizer
public sealed class TrustRegionDogLegMinimizer : TrustRegionMinimizerBase
{
#region Tolerances and options
/// <summary>
/// The stopping threshold for infinity norm of the gradient.
/// </summary>
public static double GradientTolerance { get; set; }
/// <summary>
/// The stopping threshold for L2 norm of the change of the parameters.
/// </summary>
public static double StepTolerance { get; set; }
/// <summary>
/// The stopping threshold for the function value or L2 norm of the residuals.
/// </summary>
public static double FunctionTolerance { get; set; }
/// <summary>
/// The stopping threshold for the trust region radius.
/// </summary>
public static double RadiusTolerance { get; set; }
/// <summary>
/// The maximum number of iterations.
/// </summary>
public int MaximumIterations { get; set; }
#endregion Tolerances and options
public TrustRegionDogLegMinimizer(double gradientTolerance = 1E-8, double stepTolerance = 1E-8, double functionTolerance = 1E-8, double radiusTolerance = 1E-8, int maximumIterations = -1)
{
FunctionTolerance = functionTolerance;
GradientTolerance = gradientTolerance;
StepTolerance = stepTolerance;
RadiusTolerance = radiusTolerance;
MaximumIterations = maximumIterations;
}
public ModelMinimizationResult FindMinimum(IObjectiveModel objective, Vector<double> initialGuess)
{
if (objective == null)
throw new ArgumentNullException("objective");
if (initialGuess == null)
throw new ArgumentNullException("initialGuess");
return Minimum(objective, initialGuess, GradientTolerance, StepTolerance, FunctionTolerance, RadiusTolerance, MaximumIterations);
}
public ModelMinimizationResult FindMinimum(IObjectiveModel objective, double[] initialGuess)
{
if (objective == null)
throw new ArgumentNullException("objective");
if (initialGuess == null)
throw new ArgumentNullException("initialGuess");
return Minimum(objective, CreateVector.DenseOfArray<double>(initialGuess), GradientTolerance, StepTolerance, FunctionTolerance, RadiusTolerance, MaximumIterations);
}
/// <summary>
/// Non-linear least square fitting by the trust-region dogleg algorithm.
/// </summary>
/// <param name="objective">The objective model, including function, jacobian, observations, and parameter bounds.</param>
/// <param name="initialGuess">The initial guess values.</param>
/// <param name="functionTolerance">The stopping threshold for L2 norm of the residuals.</param>
/// <param name="gradientTolerance">The stopping threshold for infinity norm of the gradient vector.</param>
/// <param name="stepTolerance">The stopping threshold for L2 norm of the change of parameters.</param>
/// <param name="radiusTolerance">The stopping threshold for trust region radius</param>
/// <param name="maximumIterations">The max iterations.</param>
/// <returns></returns>
public static ModelMinimizationResult Minimum(IObjectiveModel objective, Vector<double> initialGuess, double gradientTolerance = 1E-8, double stepTolerance = 1E-8, double functionTolerance = 1E-8, double radiusTolerance = 1E-18, int maximumIterations = -1)
{
// Non-linear least square fitting by the trust-region dogleg algorithm.
//
// The Powell's dogleg method is finding the minimum of a function F(p) that is a sum of squares of nonlinear functions.
//
// For given datum pair (x, y), uncertainties σ (or weighting W = 1 / σ^2) and model function f = f(x; p),
// let's find the parameters of the model so that the sum of the quares of the deviations is minimized.
//
// F(p) = 1/2 * ∑{ Wi * (yi - f(xi; p))^2 }
// pbest = argmin F(p)
//
// Here, we will use the following terms:
// Weighting W is the diagonal matrix and can be decomposed as LL', so L = 1/σ
// Residuals, R = L(y - f(x; p))
// Residual sum of squares, RSS = ||R||^2 = R.DotProduct(R)
// Jacobian J = df(x; p)/dp
// Gradient g = J'W(y − f(x; p)) = J'LR
// Approximated Hessian H = J'WJ
//
// The Powell's dogleg algorithm is summarized as follows:
// initially set trust-region radius, Δ
// repeat
// solve quadratic subproblem
// update Δ:
// let ρ = (RSS - RSSnew) / predRed
// if ρ > 0.75, Δ = 2Δ
// if ρ < 0.25, Δ = Δ/4
// if ρ > eta, P = P + ΔP
//
// References:
// [1]. Madsen, K., H. B. Nielsen, and O. Tingleff.
// "Methods for Non-Linear Least Squares Problems. Technical University of Denmark, 2004. Lecture notes." (2004).
// Available Online from: http://orbit.dtu.dk/files/2721358/imm3215.pdf
// [2]. SciPy
// Available Online from: https://github.com/scipy/scipy/blob/master/scipy/optimize/_trustregion_dogleg.py
double maxDelta = 1000;
double eta = 0;
if (objective == null)
throw new ArgumentNullException("objective");
if (initialGuess == null)
throw new ArgumentNullException("initialGuess");
ExitCondition exitCondition = ExitCondition.None;
// Initialize objective
objective.FunctionEvaluations = 0;
objective.JacobianEvaluations = 0;
// First, calculate function values and setup variables
objective.EvaluateFunction(initialGuess);
var P = objective.Parameters; // current parameters
var RSS = objective.Residue; // Residual Sum of Squares = R'R
var RSSinit = RSS; // RSS at initial gussing parameters
if (maximumIterations < 0)
{
maximumIterations = 200 * (initialGuess.Count + 1);
}
// if R == NaN, stop
if (double.IsNaN(RSS))
{
exitCondition = ExitCondition.InvalidValues;
return new ModelMinimizationResult(objective, -1, exitCondition);
}
// When only function evaluation is needed, set maximumIterations to zero,
if (maximumIterations == 0)
{
exitCondition = ExitCondition.ManuallyStopped;
}
// if ||R||^2 <= fTol, stop
if (RSS <= functionTolerance)
{
exitCondition = ExitCondition.Converged; // SmallRSS
}
// Evaluate projected Hessian, and gradient
objective.EvaluateJacobian(P);
var Hessian = objective.Hessian;
var Gradient = objective.Gradient;
// if ||g||_oo <= gtol, found and stop
if (Gradient.InfinityNorm() <= gradientTolerance)
{
exitCondition = ExitCondition.RelativeGradient; // SmallGradient
}
if (exitCondition != ExitCondition.None)
{
// finalize
objective.EvaluateCovariance(P);
return new ModelMinimizationResult(objective, -1, exitCondition);
}
// initialize trust-region radius, Δ
double delta = Gradient.DotProduct(Gradient) / (Hessian * Gradient).DotProduct(Gradient);
delta = Math.Max(1.0, Math.Min(delta, maxDelta));
int iterations = 0;
while (iterations < maximumIterations && exitCondition == ExitCondition.None)
{
iterations++;
// solve the subproblem
var subprogram = SolveQuadraticSubproblem(objective, delta);
var Pstep = subprogram.Item1;
var predictedReduction = subprogram.Item2;
var hitBoundary = subprogram.Item3;
if (Pstep.L2Norm() <= stepTolerance * (stepTolerance + P.L2Norm()))
{
exitCondition = ExitCondition.RelativePoints; // SmallRelativeParameters
break;
}
var Pnew = P + Pstep; // parameters to test
objective.EvaluateFunction(Pnew);
var RSSnew = objective.Residue;
// calculate the ratio of the actual to the predicted reduction.
double rho = (predictedReduction != 0)
? (RSS - RSSnew) / predictedReduction
: 0;
if (rho > 0.75 && hitBoundary)
{
delta = Math.Min(2.0 * delta, maxDelta);
}
else if (rho < 0.25)
{
delta = delta * 0.25;
if (delta <= radiusTolerance * (radiusTolerance + P.DotProduct(P)))
{
exitCondition = ExitCondition.RelativePoints; // SmallRelativeParameters
break;
}
}
if (rho > eta)
{
// accepted
Pnew.CopyTo(P);
RSS = RSSnew;
// update Jacobian, Hessian, and gradient
objective.EvaluateJacobian(P);
Gradient = objective.Gradient;
Hessian = objective.Hessian;
// if ||g||_oo <= gtol, found and stop
if (Gradient.InfinityNorm() <= gradientTolerance)
{
exitCondition = ExitCondition.RelativeGradient;
}
// if ||R||^2 < fTol, found and stop
if (RSS <= functionTolerance)
{
exitCondition = ExitCondition.Converged; // SmallRSS
}
}
}
if (iterations >= maximumIterations)
{
exitCondition = ExitCondition.ExceedIterations;
}
// finalize
objective.EvaluateCovariance(P);
return new ModelMinimizationResult(objective, iterations, exitCondition);
}
private static Tuple<Vector<double>, double, bool> SolveQuadraticSubproblem(IObjectiveModel objective, double delta)
{
Vector<double> Pstep;
double predictedReduction;
bool hitBoundary = false;
var Jacobian = objective.Jacobian;
var Gradient = objective.Gradient;
var Hessian = objective.Hessian;
var RSS = objective.Residue;
// newton point
// the Gauss–Newton step by solving the normal equations
var Pgn = Hessian.PseudoInverse() * Gradient; // Hessian.Solve(Gradient) fails so many times...
// cauchy point
// steepest descent direction is given by
var alpha = Gradient.DotProduct(Gradient) / (Hessian * Gradient).DotProduct(Gradient);
var Psd = alpha * Gradient;
// update step and prectted reduction
if (Pgn.L2Norm() <= delta)
{
// Pgn is inside trust region radius
hitBoundary = false;
Pstep = Pgn;
predictedReduction = RSS;
}
else if (alpha * Psd.L2Norm() >= delta)
{
// Psd is outside trust region radius
hitBoundary = true;
Pstep = delta / Psd.L2Norm() * Psd;
predictedReduction = delta * (2.0 * (alpha * Gradient).L2Norm() - delta) / 2.0 / alpha;
}
else
{
// Pstep is intersection of the trust region boundary
hitBoundary = true;
var beta = FindBeta(alpha, Psd, Pgn, delta);
Pstep = alpha * Psd + beta * (Pgn - alpha * Psd);
predictedReduction = 0.5 * alpha * (1 - beta) * (1 - beta) * Gradient.DotProduct(Gradient) + beta * (2 - beta) * RSS;
}
return new Tuple<Vector<double>, double, bool>(Pstep, predictedReduction, hitBoundary);
}
private static double FindBeta(double alpha, Vector<double> sd, Vector<double> gn, double delta)
{
// Pstep is intersection of the trust region boundary
// Pstep = α*Psd + β*(Pgn - α*Psd)
// find r so that ||Pstep|| = Δ
// z = α*Psd, d = (Pgn - z)
// (d^2)β^2 + (2*z*d)β + (z^2 - Δ^2) = 0
// get positive β by using the quadratic formula
var z = alpha * sd;
var d = gn - z;
var a = d.DotProduct(d);
var b = 2.0 * z.DotProduct(d);
var c = z.DotProduct(z) - delta * delta;
var aux = b + ((b >= 0) ? 1.0 : -1.0) * Math.Sqrt(b * b - 4.0 * a * c);
var beta = Math.Max(-aux / 2.0 / a, -2.0 * c / aux);
return beta;
}
: base(TrustRegionSubproblem.Quadratic(), gradientTolerance, stepTolerance, functionTolerance, radiusTolerance, maximumIterations)
{ }
}
}

261
src/Numerics/Optimization/TrustRegionMinimizerBase.cs

@ -0,0 +1,261 @@
using MathNet.Numerics.LinearAlgebra;
using System;
namespace MathNet.Numerics.Optimization
{
public abstract class TrustRegionMinimizerBase
{
public static ITrustRegionSubproblem Subproblem;
/// <summary>
/// The stopping threshold for infinity norm of the gradient.
/// </summary>
public static double GradientTolerance { get; set; }
/// <summary>
/// The stopping threshold for L2 norm of the change of the parameters.
/// </summary>
public static double StepTolerance { get; set; }
/// <summary>
/// The stopping threshold for the function value or L2 norm of the residuals.
/// </summary>
public static double FunctionTolerance { get; set; }
/// <summary>
/// The stopping threshold for the trust region radius.
/// </summary>
public static double RadiusTolerance { get; set; }
/// <summary>
/// The maximum number of iterations.
/// </summary>
public int MaximumIterations { get; set; }
public TrustRegionMinimizerBase(ITrustRegionSubproblem subproblem,
double gradientTolerance = 1E-8, double stepTolerance = 1E-8, double functionTolerance = 1E-8, double radiusTolerance = 1E-8, int maximumIterations = -1)
{
if (subproblem == null)
throw new ArgumentNullException("subproblem");
Subproblem = subproblem;
FunctionTolerance = functionTolerance;
GradientTolerance = gradientTolerance;
StepTolerance = stepTolerance;
RadiusTolerance = radiusTolerance;
MaximumIterations = maximumIterations;
}
public ModelMinimizationResult FindMinimum(IObjectiveModel objective, Vector<double> initialGuess)
{
if (objective == null)
throw new ArgumentNullException("objective");
if (initialGuess == null)
throw new ArgumentNullException("initialGuess");
return Minimum(objective, initialGuess, Subproblem, GradientTolerance, StepTolerance, FunctionTolerance, RadiusTolerance, MaximumIterations);
}
public ModelMinimizationResult FindMinimum(IObjectiveModel objective, double[] initialGuess)
{
if (objective == null)
throw new ArgumentNullException("objective");
if (initialGuess == null)
throw new ArgumentNullException("initialGuess");
return Minimum(objective, CreateVector.DenseOfArray<double>(initialGuess), Subproblem, GradientTolerance, StepTolerance, FunctionTolerance, RadiusTolerance, MaximumIterations);
}
/// <summary>
/// Non-linear least square fitting by the trust-region algorithm.
/// </summary>
/// <param name="objective">The objective model, including function, jacobian, observations, and parameter bounds.</param>
/// <param name="initialGuess">The initial guess values.</param>
/// <param name="subproblem">The subproblem</param>
/// <param name="functionTolerance">The stopping threshold for L2 norm of the residuals.</param>
/// <param name="gradientTolerance">The stopping threshold for infinity norm of the gradient vector.</param>
/// <param name="stepTolerance">The stopping threshold for L2 norm of the change of parameters.</param>
/// <param name="radiusTolerance">The stopping threshold for trust region radius</param>
/// <param name="maximumIterations">The max iterations.</param>
/// <returns></returns>
public static ModelMinimizationResult Minimum(IObjectiveModel objective, Vector<double> initialGuess, ITrustRegionSubproblem subproblem, double gradientTolerance = 1E-8, double stepTolerance = 1E-8, double functionTolerance = 1E-8, double radiusTolerance = 1E-18, int maximumIterations = -1)
{
// Non-linear least square fitting by the trust-region algorithm.
//
// For given datum pair (x, y), uncertainties σ (or weighting W = 1 / σ^2) and model function f = f(x; p),
// let's find the parameters of the model so that the sum of the quares of the deviations is minimized.
//
// F(p) = 1/2 * ∑{ Wi * (yi - f(xi; p))^2 }
// pbest = argmin F(p)
//
// Here, we will use the following terms:
// Weighting W is the diagonal matrix and can be decomposed as LL', so L = 1/σ
// Residuals, R = L(y - f(x; p))
// Residual sum of squares, RSS = ||R||^2 = R.DotProduct(R)
// Jacobian J = df(x; p)/dp
// Gradient g = J'W(y − f(x; p)) = J'LR
// Approximated Hessian H = J'WJ
//
// The trust region algorithm is summarized as follows:
// initially set trust-region radius, Δ
// repeat
// solve subproblem
// update Δ:
// let ρ = (RSS - RSSnew) / predRed
// if ρ > 0.75, Δ = 2Δ
// if ρ < 0.25, Δ = Δ/4
// if ρ > eta, P = P + ΔP
//
// References:
// [1]. Madsen, K., H. B. Nielsen, and O. Tingleff.
// "Methods for Non-Linear Least Squares Problems. Technical University of Denmark, 2004. Lecture notes." (2004).
// Available Online from: http://orbit.dtu.dk/files/2721358/imm3215.pdf
// [2]. Nocedal, Jorge, and Stephen J. Wright.
// Numerical optimization (2006): 101-134.
// [3]. SciPy
// Available Online from: https://github.com/scipy/scipy/blob/master/scipy/optimize/_trustregion.py
double maxDelta = 1000;
double eta = 0;
if (objective == null)
throw new ArgumentNullException("objective");
if (initialGuess == null)
throw new ArgumentNullException("initialGuess");
ExitCondition exitCondition = ExitCondition.None;
// Initialize objective
objective.FunctionEvaluations = 0;
objective.JacobianEvaluations = 0;
// First, calculate function values and setup variables
objective.EvaluateFunction(initialGuess);
var P = objective.Parameters; // current parameters
var RSS = objective.Residue; // Residual Sum of Squares = R'R
var RSSinit = RSS; // RSS at initial gussing parameters
if (maximumIterations < 0)
{
maximumIterations = 200 * (initialGuess.Count + 1);
}
// if R == NaN, stop
if (double.IsNaN(RSS))
{
exitCondition = ExitCondition.InvalidValues;
return new ModelMinimizationResult(objective, -1, exitCondition);
}
// When only function evaluation is needed, set maximumIterations to zero,
if (maximumIterations == 0)
{
exitCondition = ExitCondition.ManuallyStopped;
}
// if ||R||^2 <= fTol, stop
if (RSS <= functionTolerance)
{
exitCondition = ExitCondition.Converged; // SmallRSS
}
// Evaluate projected Hessian, and gradient
objective.EvaluateJacobian(P);
var Hessian = objective.Hessian;
var Gradient = objective.Gradient;
// if ||g||_oo <= gtol, found and stop
if (Gradient.InfinityNorm() <= gradientTolerance)
{
exitCondition = ExitCondition.RelativeGradient; // SmallGradient
}
if (exitCondition != ExitCondition.None)
{
// finalize
objective.EvaluateCovariance(P);
return new ModelMinimizationResult(objective, -1, exitCondition);
}
// initialize trust-region radius, Δ
double delta = Gradient.DotProduct(Gradient) / (Hessian * Gradient).DotProduct(Gradient);
delta = Math.Max(1.0, Math.Min(delta, maxDelta));
int iterations = 0;
while (iterations < maximumIterations && exitCondition == ExitCondition.None)
{
iterations++;
// solve the subproblem
subproblem.Solve(objective, delta);
var Pstep = subproblem.Pstep;
var predictedReduction = subproblem.PredictedReduction;
var hitBoundary = subproblem.HitBoundary;
if (Pstep.L2Norm() <= stepTolerance * (stepTolerance + P.L2Norm()))
{
exitCondition = ExitCondition.RelativePoints; // SmallRelativeParameters
break;
}
var Pnew = P + Pstep; // parameters to test
objective.EvaluateFunction(Pnew);
var RSSnew = objective.Residue;
// calculate the ratio of the actual to the predicted reduction.
double rho = (predictedReduction != 0)
? (RSS - RSSnew) / predictedReduction
: 0;
if (rho > 0.75 && hitBoundary)
{
delta = Math.Min(2.0 * delta, maxDelta);
}
else if (rho < 0.25)
{
delta = delta * 0.25;
if (delta <= radiusTolerance * (radiusTolerance + P.DotProduct(P)))
{
exitCondition = ExitCondition.RelativePoints; // SmallRelativeParameters
break;
}
}
if (rho > eta)
{
// accepted
Pnew.CopyTo(P);
RSS = RSSnew;
// update Jacobian, Hessian, and gradient
objective.EvaluateJacobian(P);
Gradient = objective.Gradient;
Hessian = objective.Hessian;
// if ||g||_oo <= gtol, found and stop
if (Gradient.InfinityNorm() <= gradientTolerance)
{
exitCondition = ExitCondition.RelativeGradient;
}
// if ||R||^2 < fTol, found and stop
if (RSS <= functionTolerance)
{
exitCondition = ExitCondition.Converged; // SmallRSS
}
}
}
if (iterations >= maximumIterations)
{
exitCondition = ExitCondition.ExceedIterations;
}
// finalize
objective.EvaluateCovariance(P);
return new ModelMinimizationResult(objective, iterations, exitCondition);
}
}
}

12
src/Numerics/Optimization/TrustRegionSubProblem.cs

@ -0,0 +1,12 @@
using MathNet.Numerics.Optimization.Subproblems;
namespace MathNet.Numerics.Optimization
{
public static class TrustRegionSubproblem
{
public static ITrustRegionSubproblem Quadratic()
{
return new QuadraticSubproblem();
}
}
}
Loading…
Cancel
Save