argmin/solver/newton/
newton_method.rs

1// Copyright 2018-2024 argmin developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use crate::core::{ArgminFloat, Error, Gradient, Hessian, IterState, Problem, Solver, KV};
9use argmin_math::{ArgminDot, ArgminInv, ArgminScaledSub};
10#[cfg(feature = "serde1")]
11use serde::{Deserialize, Serialize};
12
13/// # Newton's method
14///
15/// Newton's method iteratively finds the stationary points of a function f by using a second order
16/// approximation of f at the current point.
17///
18/// The stepsize `gamma` can be adjusted with the [`with_gamma`](`Newton::with_gamma`) method. It
19/// must be in `(0, 1])` and defaults to `1`.
20///
21/// ## Requirements on the optimization problem
22///
23/// The optimization problem is required to implement [`Gradient`] and [`Hessian`].
24///
25/// ## Reference
26///
27/// Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization.
28/// Springer. ISBN 0-387-30303-0.
29#[derive(Clone, Copy)]
30#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
31pub struct Newton<F> {
32    /// gamma
33    gamma: F,
34}
35
36impl<F> Newton<F>
37where
38    F: ArgminFloat,
39{
40    /// Construct a new instance of [`Newton`]
41    ///
42    /// # Example
43    ///
44    /// ```
45    /// # use argmin::solver::newton::Newton;
46    /// let newton: Newton<f64> = Newton::new();
47    /// ```
48    pub fn new() -> Self {
49        Newton { gamma: float!(1.0) }
50    }
51
52    /// Set step size gamma
53    ///
54    /// Gamma must be in `(0, 1]` and defaults to `1`.
55    ///
56    /// # Example
57    ///
58    /// ```
59    /// # use argmin::solver::newton::Newton;
60    /// # use argmin::core::Error;
61    /// # fn main() -> Result<(), Error> {
62    /// let newton: Newton<f64> = Newton::new().with_gamma(0.4)?;
63    /// # Ok(())
64    /// # }
65    /// ```
66    pub fn with_gamma(mut self, gamma: F) -> Result<Self, Error> {
67        if gamma <= float!(0.0) || gamma > float!(1.0) {
68            return Err(argmin_error!(
69                InvalidParameter,
70                "Newton: gamma must be in  (0, 1]."
71            ));
72        }
73        self.gamma = gamma;
74        Ok(self)
75    }
76}
77
78impl<F> Default for Newton<F>
79where
80    F: ArgminFloat,
81{
82    fn default() -> Newton<F> {
83        Newton::new()
84    }
85}
86
87impl<O, P, G, H, F> Solver<O, IterState<P, G, (), H, (), F>> for Newton<F>
88where
89    O: Gradient<Param = P, Gradient = G> + Hessian<Param = P, Hessian = H>,
90    P: Clone + ArgminScaledSub<P, F, P>,
91    H: ArgminInv<H> + ArgminDot<G, P>,
92    F: ArgminFloat,
93{
94    fn name(&self) -> &str {
95        "Newton method"
96    }
97
98    fn next_iter(
99        &mut self,
100        problem: &mut Problem<O>,
101        mut state: IterState<P, G, (), H, (), F>,
102    ) -> Result<(IterState<P, G, (), H, (), F>, Option<KV>), Error> {
103        let param = state.take_param().ok_or_else(argmin_error_closure!(
104            NotInitialized,
105            concat!(
106                "`Newton` requires an initial parameter vector. ",
107                "Please provide an initial guess via `Executor`s `configure` method."
108            )
109        ))?;
110        let grad = problem.gradient(&param)?;
111        let hessian = problem.hessian(&param)?;
112        let new_param = param.scaled_sub(&self.gamma, &hessian.inv()?.dot(&grad));
113        Ok((state.param(new_param), None))
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120    use crate::core::ArgminError;
121    #[cfg(feature = "_ndarrayl")]
122    use crate::core::Executor;
123    #[cfg(feature = "_ndarrayl")]
124    use approx::assert_relative_eq;
125
126    test_trait_impl!(newton_method, Newton<f64>);
127
128    #[test]
129    fn test_new() {
130        let solver: Newton<f64> = Newton::new();
131        assert_eq!(solver.gamma.to_ne_bytes(), 1.0f64.to_ne_bytes());
132    }
133
134    #[test]
135    fn test_default() {
136        let solver_new: Newton<f64> = Newton::new();
137        let solver_def: Newton<f64> = Newton::default();
138        assert_eq!(
139            solver_new.gamma.to_ne_bytes(),
140            solver_def.gamma.to_ne_bytes()
141        );
142    }
143
144    #[test]
145    fn test_with_gamma() {
146        for new_gamma in [f64::EPSILON, 0.1, 0.5, 0.9, 1.0] {
147            let solver: Newton<f64> = Newton::new().with_gamma(new_gamma).unwrap();
148            assert_eq!(solver.gamma.to_ne_bytes(), new_gamma.to_ne_bytes());
149        }
150
151        for new_gamma in [1.0 + f64::EPSILON, 2.0, 0.0, -1.0] {
152            let res = Newton::new().with_gamma(new_gamma);
153            assert_error!(
154                res,
155                ArgminError,
156                "Invalid parameter: \"Newton: gamma must be in  (0, 1].\""
157            );
158        }
159    }
160
161    #[cfg(feature = "_ndarrayl")]
162    #[test]
163    fn test_next_iter_param_not_initialized() {
164        use crate::core::State;
165        use ndarray::{Array, Array1, Array2};
166        let mut newton: Newton<f64> = Newton::new();
167
168        struct NewtonProblem {}
169
170        impl Gradient for NewtonProblem {
171            type Param = Array1<f64>;
172            type Gradient = Array1<f64>;
173
174            fn gradient(&self, _p: &Self::Param) -> Result<Self::Gradient, Error> {
175                Ok(Array1::from_vec(vec![1.0, 2.0]))
176            }
177        }
178
179        impl Hessian for NewtonProblem {
180            type Param = Array1<f64>;
181            type Hessian = Array2<f64>;
182
183            fn hessian(&self, _p: &Self::Param) -> Result<Self::Hessian, Error> {
184                Ok(Array::from_shape_vec((2, 2), vec![1.0f64, 0.0, 0.0, 1.0])?)
185            }
186        }
187
188        let res = newton.next_iter(&mut Problem::new(NewtonProblem {}), IterState::new());
189        assert_error!(
190            res,
191            ArgminError,
192            concat!(
193                "Not initialized: \"`Newton` requires an initial parameter vector. ",
194                "Please provide an initial guess via `Executor`s `configure` method.\""
195            )
196        );
197    }
198
199    #[cfg(feature = "_ndarrayl")]
200    #[test]
201    fn test_solver() {
202        use crate::core::State;
203        use ndarray::{Array, Array1, Array2};
204        struct Problem {}
205
206        impl Gradient for Problem {
207            type Param = Array1<f64>;
208            type Gradient = Array1<f64>;
209
210            fn gradient(&self, _p: &Self::Param) -> Result<Self::Gradient, Error> {
211                Ok(Array1::from_vec(vec![1.0, 2.0]))
212            }
213        }
214
215        impl Hessian for Problem {
216            type Param = Array1<f64>;
217            type Hessian = Array2<f64>;
218
219            fn hessian(&self, _p: &Self::Param) -> Result<Self::Hessian, Error> {
220                Ok(Array::from_shape_vec((2, 2), vec![1.0f64, 0.0, 0.0, 1.0])?)
221            }
222        }
223
224        // Single iteration, starting from [0, 0], gamma = 1
225        let problem = Problem {};
226        let solver: Newton<f64> = Newton::new();
227        let init_param = Array1::from_vec(vec![0.0, 0.0]);
228
229        let param = Executor::new(problem, solver)
230            .configure(|config| config.param(init_param).max_iters(1))
231            .run()
232            .unwrap()
233            .state
234            .get_best_param()
235            .unwrap()
236            .clone();
237        assert_relative_eq!(param[0], -1.0, epsilon = f64::EPSILON);
238        assert_relative_eq!(param[1], -2.0, epsilon = f64::EPSILON);
239
240        // Two iterations, starting from [0, 0], gamma = 1
241        let problem = Problem {};
242        let solver: Newton<f64> = Newton::new();
243        let init_param = Array1::from_vec(vec![0.0, 0.0]);
244
245        let param = Executor::new(problem, solver)
246            .configure(|config| config.param(init_param).max_iters(2))
247            .run()
248            .unwrap()
249            .state
250            .get_best_param()
251            .unwrap()
252            .clone();
253        assert_relative_eq!(param[0], -2.0, epsilon = f64::EPSILON);
254        assert_relative_eq!(param[1], -4.0, epsilon = f64::EPSILON);
255
256        // Single iteration, starting from [0, 0], gamma = 0.5
257        let problem = Problem {};
258        let solver: Newton<f64> = Newton::new().with_gamma(0.5).unwrap();
259        let init_param = Array1::from_vec(vec![0.0, 0.0]);
260
261        let param = Executor::new(problem, solver)
262            .configure(|config| config.param(init_param).max_iters(1))
263            .run()
264            .unwrap()
265            .state
266            .get_best_param()
267            .unwrap()
268            .clone();
269        assert_relative_eq!(param[0], -0.5, epsilon = f64::EPSILON);
270        assert_relative_eq!(param[1], -1.0, epsilon = f64::EPSILON);
271
272        // Two iterations, starting from [0, 0], gamma = 0.5
273        let problem = Problem {};
274        let solver: Newton<f64> = Newton::new().with_gamma(0.5).unwrap();
275        let init_param = Array1::from_vec(vec![0.0, 0.0]);
276
277        let param = Executor::new(problem, solver)
278            .configure(|config| config.param(init_param).max_iters(2))
279            .run()
280            .unwrap()
281            .state
282            .get_best_param()
283            .unwrap()
284            .clone();
285        assert_relative_eq!(param[0], -1.0, epsilon = f64::EPSILON);
286        assert_relative_eq!(param[1], -2.0, epsilon = f64::EPSILON);
287    }
288}