argmin/solver/trustregion/
dogleg.rs

1// Copyright 2018-2024 argmin developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use crate::core::{
9    ArgminFloat, Error, Gradient, Hessian, IterState, Problem, Solver, State, TerminationReason,
10    TerminationStatus, TrustRegionRadius, KV,
11};
12use argmin_math::{
13    ArgminAdd, ArgminDot, ArgminInv, ArgminL2Norm, ArgminMul, ArgminSub, ArgminWeightedDot,
14};
15#[cfg(feature = "serde1")]
16use serde::{Deserialize, Serialize};
17
18/// # Dogleg method
19///
20/// The Dogleg method computes the intersection of the trust region boundary with a path given by
21/// the unconstrained minimum along the steepest descent direction and the optimum of the quadratic
22/// approximation of the cost function at the current point.
23///
24/// ## Requirements on the optimization problem
25///
26/// The optimization problem is required to implement [`Gradient`] and [`Hessian`].
27///
28/// ## Reference
29///
30/// Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization.
31/// Springer. ISBN 0-387-30303-0.
32#[derive(Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Default)]
33#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
34pub struct Dogleg<F> {
35    /// Radius
36    radius: F,
37}
38
39impl<F> Dogleg<F>
40where
41    F: ArgminFloat,
42{
43    /// Construct a new instance of [`Dogleg`]
44    ///
45    /// # Example
46    ///
47    /// ```
48    /// # use argmin::solver::trustregion::Dogleg;
49    /// let dl: Dogleg<f64> = Dogleg::new();
50    /// ```
51    pub fn new() -> Self {
52        Dogleg { radius: F::nan() }
53    }
54}
55
56impl<O, F, P, H> Solver<O, IterState<P, P, (), H, (), F>> for Dogleg<F>
57where
58    O: Gradient<Param = P, Gradient = P> + Hessian<Param = P, Hessian = H>,
59    P: Clone
60        + ArgminMul<F, P>
61        + ArgminL2Norm<F>
62        + ArgminDot<P, F>
63        + ArgminAdd<P, P>
64        + ArgminSub<P, P>,
65    H: ArgminInv<H> + ArgminDot<P, P>,
66    F: ArgminFloat,
67{
68    fn name(&self) -> &str {
69        "Dogleg"
70    }
71
72    fn next_iter(
73        &mut self,
74        problem: &mut Problem<O>,
75        mut state: IterState<P, P, (), H, (), F>,
76    ) -> Result<(IterState<P, P, (), H, (), F>, Option<KV>), Error> {
77        let param = state.take_param().ok_or_else(argmin_error_closure!(
78            NotInitialized,
79            concat!(
80                "`Dogleg` requires an initial parameter vector. ",
81                "Please provide an initial guess via `Executor`s `configure` method."
82            )
83        ))?;
84
85        let g = state
86            .take_gradient()
87            .map(Result::Ok)
88            .unwrap_or_else(|| problem.gradient(&param))?;
89
90        let h = state
91            .take_hessian()
92            .map(Result::Ok)
93            .unwrap_or_else(|| problem.hessian(&param))?;
94
95        let pstar;
96
97        // pb = -H^-1g
98        let pb = (h.inv()?).dot(&g).mul(&float!(-1.0));
99
100        if pb.l2_norm() <= self.radius {
101            pstar = pb;
102        } else {
103            // pu = - (g^Tg)/(g^THg) * g
104            let pu = g.mul(&(-g.dot(&g) / g.weighted_dot(&h, &g)));
105
106            let k = pb.sub(&pu); // p^b - p^u
107            let c = pu.dot(&k); // p^u^T * (p^b - p^u)
108            let k = k.dot(&k); // (p^b - p^u)^T (p^b - p^u)
109            let u = pu.dot(&pu); // p^u^T p^u
110
111            let delta_squared = self.radius.powi(2);
112            let t1 = (c.powi(2) + delta_squared * k - k * u).sqrt();
113            let tau = [
114                -(t1 + c - k) / k,
115                (t1 - c + k) / k,
116                (float!(2.0) * c + delta_squared - u) / (float!(2.0) * c),
117            ]
118            .into_iter()
119            // .enumerate()
120            // .map(|(i, t)| {
121            //     println!("tau{}: {}", i, t);
122            //     t
123            // })
124            .filter(|t| !t.is_nan() && !t.is_infinite() && *t >= float!(0.0) && *t <= float!(2.0))
125            .fold(float!(0.0), |acc, t| if t >= acc { t } else { acc });
126
127            if tau >= float!(0.0) && tau < float!(1.0) {
128                pstar = pu.mul(&tau);
129            } else if tau >= float!(1.0) && tau <= float!(2.0) {
130                pstar = pu.add(&pb.sub(&pu).mul(&(tau - float!(1.0))));
131            } else {
132                return Err(argmin_error!(
133                    PotentialBug,
134                    "tau is outside the range [0, 2], this is not supposed to happen."
135                ));
136            }
137        }
138        Ok((state.param(pstar).gradient(g).hessian(h), None))
139    }
140
141    fn terminate(&mut self, state: &IterState<P, P, (), H, (), F>) -> TerminationStatus {
142        if state.get_iter() >= 1 {
143            TerminationStatus::Terminated(TerminationReason::MaxItersReached)
144        } else {
145            TerminationStatus::NotTerminated
146        }
147    }
148}
149
150impl<F: ArgminFloat> TrustRegionRadius<F> for Dogleg<F> {
151    /// Set current radius.
152    ///
153    /// Needed by [`TrustRegion`](`crate::solver::trustregion::TrustRegion`).
154    ///
155    /// # Example
156    ///
157    /// ```
158    /// use argmin::solver::trustregion::{Dogleg, TrustRegionRadius};
159    /// let mut dl: Dogleg<f64> = Dogleg::new();
160    /// dl.set_radius(0.8);
161    /// ```
162    fn set_radius(&mut self, radius: F) {
163        self.radius = radius;
164    }
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170    #[cfg(feature = "_ndarrayl")]
171    use crate::core::ArgminError;
172
173    test_trait_impl!(dogleg, Dogleg<f64>);
174
175    #[test]
176    fn test_new() {
177        let dl: Dogleg<f64> = Dogleg::new();
178
179        let Dogleg { radius } = dl;
180
181        assert_eq!(radius.to_ne_bytes(), f64::NAN.to_ne_bytes());
182    }
183
184    #[cfg(feature = "_ndarrayl")]
185    #[test]
186    fn test_next_iter() {
187        use approx::assert_relative_eq;
188        use ndarray::{Array, Array1, Array2};
189
190        struct TestProblem {}
191
192        impl Gradient for TestProblem {
193            type Param = Array1<f64>;
194            type Gradient = Array1<f64>;
195
196            fn gradient(&self, _p: &Self::Param) -> Result<Self::Gradient, Error> {
197                Ok(Array1::from_vec(vec![0.5, 2.0]))
198            }
199        }
200
201        impl Hessian for TestProblem {
202            type Param = Array1<f64>;
203            type Hessian = Array2<f64>;
204
205            fn hessian(&self, _p: &Self::Param) -> Result<Self::Hessian, Error> {
206                Ok(Array::from_shape_vec((2, 2), vec![1f64, 2.0, 3.0, 4.0])?)
207            }
208        }
209
210        let param: Array1<f64> = Array1::from_vec(vec![-1.0, 1.0]);
211
212        let mut dl: Dogleg<f64> = Dogleg::new();
213        dl.set_radius(1.0);
214
215        // Forgot to initialize the parameter vector
216        let state: IterState<Array1<f64>, Array1<f64>, (), Array2<f64>, (), f64> = IterState::new();
217        let problem = TestProblem {};
218        let res = dl.next_iter(&mut Problem::new(problem), state);
219        assert_error!(
220            res,
221            ArgminError,
222            concat!(
223                "Not initialized: \"`Dogleg` requires an initial parameter vector. Please ",
224                "provide an initial guess via `Executor`s `configure` method.\""
225            )
226        );
227
228        // All good.
229        let state: IterState<Array1<f64>, Array1<f64>, (), Array2<f64>, (), f64> =
230            IterState::new().param(param);
231        let problem = TestProblem {};
232        let (mut state_out, kv) = dl.next_iter(&mut Problem::new(problem), state).unwrap();
233
234        assert!(kv.is_none());
235
236        let s_param = state_out.take_param().unwrap();
237
238        assert_relative_eq!(s_param[0], -0.9730617585026127, epsilon = f64::EPSILON);
239        assert_relative_eq!(s_param[1], 0.2305446033629983, epsilon = f64::EPSILON);
240    }
241}