From b858289fbe64453d304547d9c1d61c84c8d6dc23 Mon Sep 17 00:00:00 2001 From: bdodson Date: Sun, 3 May 2015 02:35:19 -0700 Subject: [PATCH] Broyden-Fletcher-Goldfarb-Shanno optimizer --- src/Numerics/Numerics.csproj | 2 + src/Numerics/RootFinding/BfgsSolver.cs | 108 ++++++++++++++++++++ src/Numerics/RootFinding/WolfeRule.cs | 113 +++++++++++++++++++++ src/UnitTests/RootFindingTests/BfgsTest.cs | 77 ++++++++++++++ src/UnitTests/UnitTests.csproj | 1 + 5 files changed, 301 insertions(+) create mode 100644 src/Numerics/RootFinding/BfgsSolver.cs create mode 100644 src/Numerics/RootFinding/WolfeRule.cs create mode 100644 src/UnitTests/RootFindingTests/BfgsTest.cs diff --git a/src/Numerics/Numerics.csproj b/src/Numerics/Numerics.csproj index b783234e..9507da48 100644 --- a/src/Numerics/Numerics.csproj +++ b/src/Numerics/Numerics.csproj @@ -195,10 +195,12 @@ + + diff --git a/src/Numerics/RootFinding/BfgsSolver.cs b/src/Numerics/RootFinding/BfgsSolver.cs new file mode 100644 index 00000000..a9b4d8a8 --- /dev/null +++ b/src/Numerics/RootFinding/BfgsSolver.cs @@ -0,0 +1,108 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// http://mathnetnumerics.codeplex.com +// +// Copyright (c) 2009-2013 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + + +using System; +using System.Collections.Generic; +using MathNet.Numerics.LinearAlgebra; +using MathNet.Numerics.LinearAlgebra.Double; +using System.Linq; +using System.Text; + +namespace MathNet.Numerics.RootFinding +{ + /// + /// Broyden-Fletcher-Goldfarb-Shanno solver for finding function minima + /// See http://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm + /// Inspired by implementation: https://github.com/PatWie/CppNumericalSolvers/blob/master/src/BfgsSolver.cpp + /// + public static class BfgsSolver + { + private const double gradientTolerance = 1e-5; + private const int maxIterations = 100000; + + /// + /// Finds a minimum of a function by the BFGS quasi-Newton method + /// This uses the function and it's gradient (partial derivatives in each direction) and approximates the Hessian + /// + /// An initial guess + /// Evaluates the function at a point + /// Evaluates the gradient of the function at a point + /// The minimum found + public static Vector Solve(Vector initialGuess, Func, double> functionValue, Func, Vector> functionGradient) + { + int dim = initialGuess.Count; + int iter = 0; + // H represents the approximation of the inverse hessian matrix + // it is updated via the Sherman–Morrison formula (http://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula) + Matrix H = DenseMatrix.CreateIdentity(dim); + + Vector x = initialGuess; + Vector x_old = x; + Vector grad; + + do + { + // search along the direction of the gradient + grad = functionGradient(x); + Vector p = -1 * H * grad; + double rate = WolfeRule.LineSearch(x, p, functionValue, functionGradient); + x = x + rate * p; + Vector grad_old = grad; + + // update the gradient + grad = functionGradient(x); + + Vector s = x - x_old; + Vector y = grad - grad_old; + + double rho = 1.0 / (y * s); + if (iter == 0) + { + // set up an initial hessian + H = (y * s) / (y * y) * DenseMatrix.CreateIdentity(dim); + } + + var sM = s.ToColumnMatrix(); + var yM = y.ToColumnMatrix(); + + // Update the estimate of the hessian + H = H + - rho * (sM * (yM.TransposeThisAndMultiply(H)) + (H * yM).TransposeAndMultiply(sM)) + + rho * rho * (y.DotProduct(H * y) + 1.0 / rho) * (sM.TransposeAndMultiply(sM)); + x_old = x; + iter++; + } + while ((grad.InfinityNorm() > gradientTolerance) && (iter < maxIterations)); + + return x; + } + } +} diff --git a/src/Numerics/RootFinding/WolfeRule.cs b/src/Numerics/RootFinding/WolfeRule.cs new file mode 100644 index 00000000..fa426275 --- /dev/null +++ b/src/Numerics/RootFinding/WolfeRule.cs @@ -0,0 +1,113 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// http://mathnetnumerics.codeplex.com +// +// Copyright (c) 2009-2013 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +using System; +using System.Collections.Generic; +using System.Linq; +using MathNet.Numerics.LinearAlgebra; + +namespace MathNet.Numerics.RootFinding +{ + /// + /// Performs an inexact line search. This is used as a part of quasi-Newton optimization methods to figure + /// out how far to move along a certain gradient. + /// See http://en.wikipedia.org/wiki/Wolfe_conditions + /// Inspired by implementation: https://github.com/PatWie/CppNumericalSolvers/blob/master/src/linesearch/WolfeRule.h + /// + internal static class WolfeRule + { + /// + /// Searches along a line to satisfy the Wolfe conditions (inexact search for minimum) + /// + /// Starting point of search + /// Search direction + /// Evaluates the function being minimized + /// Evaluates the gradient of the function + /// Initial value for the coefficient of z (distance to travel in z direction) + /// + public static double LineSearch( + Vector x0, + Vector z, + Func, double> functionValue, + Func, Vector> functionGradient, + float alphaInit = 1) + { + Vector x = x0; + + // evaluate phi(0) + double phi0 = functionValue(x0); + + // evaluate phi'(0) + Vector grad = functionGradient(x); + double phi0_dash = z * grad; + + double alpha = alphaInit; + + bool decrease_direction = true; + + // 200 guesses max + for (int iter = 0; iter < 200; ++iter) { + + // new guess for phi(alpha) + Vector x_candidate = x + alpha * z; + double phi = functionValue(x_candidate); + + // decrease condition invalid --> shrink interval + if (phi > phi0 + 0.0001 * alpha * phi0_dash) + { + alpha *= 0.5; + decrease_direction = false; + } + else + { + // valid decrease --> test strong wolfe condition + Vector grad2 = functionGradient(x_candidate); + double phi_dash = z * grad2; + + // curvature condition invalid ? + if ((phi_dash < 0.9 * phi0_dash) || !decrease_direction) { + // increase interval + alpha *= 4.0; + } + else { + // both condition are valid --> we are happy + x = x_candidate; + grad = grad2; + phi0 = phi; + return alpha; + } + } + } + + + return alpha; + } + } +} diff --git a/src/UnitTests/RootFindingTests/BfgsTest.cs b/src/UnitTests/RootFindingTests/BfgsTest.cs new file mode 100644 index 00000000..37fc4337 --- /dev/null +++ b/src/UnitTests/RootFindingTests/BfgsTest.cs @@ -0,0 +1,77 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// http://mathnetnumerics.codeplex.com +// +// Copyright (c) 2009-2013 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using MathNet.Numerics.LinearAlgebra; +using NUnit.Framework; +using MathNet.Numerics.LinearAlgebra.Double; +using MathNet.Numerics.RootFinding; + +namespace MathNet.Numerics.UnitTests.RootFindingTests +{ + [TestFixture, Category("RootFinding")] + internal class BfgsTest + { + private const double precision = 1e-4; + + [Test] + public void MinimizeRosenbrock() + { + CheckRosenbrock(15.0, 8.0, expectedMin: 0.0); + CheckRosenbrock(-1.2, 1.0, expectedMin: 0.0); + CheckRosenbrock(-1.2, 100.0, expectedMin: 0.0); + } + + private static void CheckRosenbrock(double a, double b, double expectedMin) + { + var x = BfgsSolver.Solve(new DenseVector(new[] { a, b }), Rosenbrock, RosenbrockGradient); + Precision.AlmostEqual(expectedMin, Rosenbrock(x), precision); + } + + private static double Rosenbrock(Vector x) + { + double t1 = (1 - x[0]); + double t2 = (x[1] - x[0] * x[0]); + return t1 * t1 + 100 * t2 * t2; + } + + private static Vector RosenbrockGradient(Vector x) + { + var grad = new DenseVector(2); + grad[0] = -2 * (1 - x[0]) + 200 * (x[1] - x[0] * x[0]) * (-2 * x[0]); + grad[1] = 200 * (x[1] - x[0] * x[0]); + return grad; + } + } +} diff --git a/src/UnitTests/UnitTests.csproj b/src/UnitTests/UnitTests.csproj index 3b9505a2..8bf0ae7a 100644 --- a/src/UnitTests/UnitTests.csproj +++ b/src/UnitTests/UnitTests.csproj @@ -346,6 +346,7 @@ +