argmin/solver/gaussnewton/
gaussnewton_linesearch.rs

1// Copyright 2018-2024 argmin developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use crate::core::{
9    ArgminFloat, CostFunction, Error, Executor, Gradient, IterState, Jacobian, LineSearch,
10    Operator, OptimizationResult, Problem, Solver, TerminationReason, TerminationStatus, KV,
11};
12use argmin_math::{ArgminDot, ArgminInv, ArgminL2Norm, ArgminMul, ArgminTranspose};
13#[cfg(feature = "serde1")]
14use serde::{Deserialize, Serialize};
15
16/// # Gauss-Newton method with line search
17///
18/// Gauss-Newton method where an appropriate step length is obtained by a line search.
19///
20/// Requires an initial parameter vector.
21///
22/// ## Requirements on the optimization problem
23///
24/// The optimization problem is required to implement [`Operator`] and [`Jacobian`].
25///
26/// ## Reference
27///
28/// Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization.
29/// Springer. ISBN 0-387-30303-0.
30#[derive(Clone)]
31#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
32pub struct GaussNewtonLS<L, F> {
33    /// linesearch
34    linesearch: L,
35    /// Tolerance for the stopping criterion based on cost difference
36    tol: F,
37}
38
39impl<L, F: ArgminFloat> GaussNewtonLS<L, F> {
40    /// Construct a new instance of [`GaussNewtonLS`].
41    ///
42    /// # Example
43    ///
44    /// ```
45    /// # use argmin::solver::gaussnewton::GaussNewtonLS;
46    /// # let linesearch = ();
47    /// let gauss_newton_ls: GaussNewtonLS<_, f64> = GaussNewtonLS::new(linesearch);
48    /// ```
49    pub fn new(linesearch: L) -> Self {
50        GaussNewtonLS {
51            linesearch,
52            tol: F::epsilon().sqrt(),
53        }
54    }
55
56    /// Set tolerance for the stopping criterion based on cost difference.
57    ///
58    /// Tolerance must be larger than zero and defaults to `sqrt(EPSILON)`.
59    ///
60    /// # Example
61    ///
62    /// ```
63    /// # use argmin::solver::gaussnewton::GaussNewtonLS;
64    /// # use argmin::core::Error;
65    /// # fn main() -> Result<(), Error> {
66    /// # let linesearch = ();
67    /// let gauss_newton_ls = GaussNewtonLS::new(linesearch).with_tolerance(1e-4f64)?;
68    /// # Ok(())
69    /// # }
70    /// ```
71    pub fn with_tolerance(mut self, tol: F) -> Result<Self, Error> {
72        if tol <= float!(0.0) {
73            return Err(argmin_error!(
74                InvalidParameter,
75                "Gauss-Newton-Linesearch: tol must be positive."
76            ));
77        }
78        self.tol = tol;
79        Ok(self)
80    }
81}
82
83impl<O, L, F, P, G, J, U, R> Solver<O, IterState<P, G, J, (), R, F>> for GaussNewtonLS<L, F>
84where
85    O: Operator<Param = P, Output = U> + Jacobian<Param = P, Jacobian = J>,
86    P: Clone + ArgminMul<F, P>,
87    G: Clone,
88    U: ArgminL2Norm<F>,
89    J: Clone
90        + ArgminTranspose<J>
91        + ArgminInv<J>
92        + ArgminDot<J, J>
93        + ArgminDot<G, P>
94        + ArgminDot<U, G>,
95    L: Clone + LineSearch<P, F> + Solver<LineSearchProblem<O, F>, IterState<P, G, (), (), R, F>>,
96    F: ArgminFloat,
97    R: Clone,
98{
99    fn name(&self) -> &str {
100        "Gauss-Newton method with line search"
101    }
102
103    fn next_iter(
104        &mut self,
105        problem: &mut Problem<O>,
106        mut state: IterState<P, G, J, (), R, F>,
107    ) -> Result<(IterState<P, G, J, (), R, F>, Option<KV>), Error> {
108        let param = state.take_param().ok_or_else(argmin_error_closure!(
109            NotInitialized,
110            concat!(
111                "`GaussNewtonLS` requires an initial parameter vector. ",
112                "Please provide an initial guess via `Executor`s `configure` method."
113            )
114        ))?;
115        let residuals = problem.apply(&param)?;
116        let jacobian = problem.jacobian(&param)?;
117        let jacobian_t = jacobian.clone().t();
118        let grad = jacobian_t.dot(&residuals);
119
120        let p: P = jacobian_t.dot(&jacobian).inv()?.dot(&grad);
121
122        self.linesearch.search_direction(p.mul(&(float!(-1.0))));
123
124        // perform linesearch
125        let OptimizationResult {
126            problem: mut line_problem,
127            state: mut linesearch_state,
128            ..
129        } = Executor::new(
130            LineSearchProblem::new(problem.take_problem().ok_or_else(argmin_error_closure!(
131                PotentialBug,
132                "`GaussNewtonLS`: Failed to take `problem` for line search"
133            ))?),
134            self.linesearch.clone(),
135        )
136        .configure(|config| config.param(param).gradient(grad).cost(residuals.l2_norm()))
137        .ctrlc(false)
138        .run()?;
139
140        // Here we cannot use `consume_problem` because the problem we need is hidden inside a
141        // `LineSearchProblem` hidden inside a `Problem`. Therefore we have to split this in two
142        // separate tasks: first getting the problem, then dealing with the function counts.
143        problem.problem = Some(
144            line_problem
145                .take_problem()
146                .ok_or_else(argmin_error_closure!(
147                    PotentialBug,
148                    "`GaussNewtonLS`: Failed to take `problem` from line search"
149                ))?
150                .problem,
151        );
152        problem.consume_func_counts(line_problem);
153
154        Ok((
155            state
156                .param(
157                    linesearch_state
158                        .take_param()
159                        .ok_or_else(argmin_error_closure!(
160                            PotentialBug,
161                            "`GaussNewtonLS`: Failed to take `param` from line search state"
162                        ))?,
163                )
164                .cost(linesearch_state.get_cost()),
165            None,
166        ))
167    }
168
169    fn terminate(&mut self, state: &IterState<P, G, J, (), R, F>) -> TerminationStatus {
170        if (state.get_prev_cost() - state.get_cost()).abs() < self.tol {
171            return TerminationStatus::Terminated(TerminationReason::SolverConverged);
172        }
173        TerminationStatus::NotTerminated
174    }
175}
176
177#[doc(hidden)]
178#[derive(Clone, Default)]
179#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
180struct LineSearchProblem<O, F> {
181    problem: O,
182    _phantom: std::marker::PhantomData<F>,
183}
184
185impl<O, F> LineSearchProblem<O, F> {
186    /// Construct a new [`LineSearchProblem`]
187    fn new(operator: O) -> Self {
188        LineSearchProblem {
189            problem: operator,
190            _phantom: std::marker::PhantomData,
191        }
192    }
193}
194
195impl<O, P, F> CostFunction for LineSearchProblem<O, F>
196where
197    O: Operator<Param = P, Output = P>,
198    P: Clone + ArgminL2Norm<F>,
199    F: ArgminFloat,
200{
201    type Param = P;
202    type Output = F;
203
204    fn cost(&self, p: &Self::Param) -> Result<Self::Output, Error> {
205        Ok(self.problem.apply(p)?.l2_norm())
206    }
207}
208
209impl<O, P, J, F> Gradient for LineSearchProblem<O, F>
210where
211    O: Operator<Param = P, Output = P> + Jacobian<Param = P, Jacobian = J>,
212    P: Clone,
213    J: ArgminTranspose<J> + ArgminDot<P, P>,
214{
215    type Param = P;
216    type Gradient = P;
217
218    fn gradient(&self, p: &Self::Param) -> Result<Self::Gradient, Error> {
219        Ok(self.problem.jacobian(p)?.t().dot(&self.problem.apply(p)?))
220    }
221}
222
223#[cfg(test)]
224#[allow(clippy::let_unit_value)]
225mod tests {
226    use super::*;
227    use crate::core::ArgminError;
228    #[cfg(feature = "_ndarrayl")]
229    use crate::core::State;
230    use crate::solver::linesearch::{condition::ArmijoCondition, BacktrackingLineSearch};
231    #[cfg(feature = "_ndarrayl")]
232    use approx::assert_relative_eq;
233
234    test_trait_impl!(
235        gauss_newton_linesearch_method,
236        GaussNewtonLS<BacktrackingLineSearch<Vec<f64>, Vec<f64>, ArmijoCondition<f64>, f64>, f64>
237    );
238
239    #[test]
240    fn test_new() {
241        #[derive(Eq, PartialEq, Debug)]
242        struct MyLinesearch {}
243
244        let GaussNewtonLS {
245            linesearch: ls,
246            tol: t,
247        } = GaussNewtonLS::<_, f64>::new(MyLinesearch {});
248
249        assert_eq!(ls, MyLinesearch {});
250        assert_eq!(t.to_ne_bytes(), f64::EPSILON.sqrt().to_ne_bytes());
251    }
252
253    #[test]
254    fn test_tolerance() {
255        let tol1: f64 = 1e-4;
256
257        let linesearch = ();
258        let GaussNewtonLS { tol: t1, .. } =
259            GaussNewtonLS::new(linesearch).with_tolerance(tol1).unwrap();
260
261        assert_eq!(t1.to_ne_bytes(), tol1.to_ne_bytes());
262    }
263
264    #[test]
265    fn test_tolerance_error_when_negative() {
266        let tol = -2.0;
267        let error = GaussNewtonLS::new(()).with_tolerance(tol);
268        assert_error!(
269            error,
270            ArgminError,
271            "Invalid parameter: \"Gauss-Newton-Linesearch: tol must be positive.\""
272        );
273    }
274
275    #[test]
276    fn test_tolerance_error_when_zero() {
277        let tol = 0.0;
278        let error = GaussNewtonLS::new(()).with_tolerance(tol);
279        assert_error!(
280            error,
281            ArgminError,
282            "Invalid parameter: \"Gauss-Newton-Linesearch: tol must be positive.\""
283        );
284    }
285
286    #[cfg(feature = "_ndarrayl")]
287    #[test]
288    fn test_line_search_sub_problem() {
289        use ndarray::{Array, Array1, Array2};
290
291        struct TestProblem {}
292
293        impl Operator for TestProblem {
294            type Param = Array1<f64>;
295            type Output = Array1<f64>;
296
297            fn apply(&self, _p: &Self::Param) -> Result<Self::Output, Error> {
298                Ok(Array1::from_vec(vec![0.5, 2.0]))
299            }
300        }
301
302        impl Gradient for TestProblem {
303            type Param = Array1<f64>;
304            type Gradient = Array1<f64>;
305
306            fn gradient(&self, _p: &Self::Param) -> Result<Self::Gradient, Error> {
307                Ok(Array1::from_vec(vec![1.5, 3.0]))
308            }
309        }
310
311        impl Jacobian for TestProblem {
312            type Param = Array1<f64>;
313            type Jacobian = Array2<f64>;
314
315            fn jacobian(&self, _p: &Self::Param) -> Result<Self::Jacobian, Error> {
316                Ok(Array::from_shape_vec((2, 2), vec![1f64, 2.0, 3.0, 4.0])?)
317            }
318        }
319
320        let lsp: LineSearchProblem<_, f64> = LineSearchProblem::new(TestProblem {});
321
322        let res = lsp.cost(&Array1::from_vec(vec![])).unwrap();
323        assert_relative_eq!(
324            res,
325            (0.5f64.powi(2) + 2.0f64.powi(2)).sqrt(),
326            epsilon = f64::EPSILON
327        );
328
329        let res = lsp.gradient(&Array1::from_vec(vec![])).unwrap();
330        assert_relative_eq!(res[0], 1.0 * 0.5 + 3.0 * 2.0, epsilon = f64::EPSILON);
331        assert_relative_eq!(res[1], 2.0 * 0.5 + 4.0 * 2.0, epsilon = f64::EPSILON);
332    }
333
334    #[cfg(feature = "_ndarrayl")]
335    #[test]
336    fn test_next_iter_param_not_initialized() {
337        use ndarray::{Array, Array1, Array2};
338
339        struct TestProblem {}
340
341        impl Operator for TestProblem {
342            type Param = Array1<f64>;
343            type Output = Array1<f64>;
344
345            fn apply(&self, _p: &Self::Param) -> Result<Self::Output, Error> {
346                Ok(Array1::from_vec(vec![0.5, 2.0]))
347            }
348        }
349
350        impl Jacobian for TestProblem {
351            type Param = Array1<f64>;
352            type Jacobian = Array2<f64>;
353
354            fn jacobian(&self, _p: &Self::Param) -> Result<Self::Jacobian, Error> {
355                Ok(Array::from_shape_vec((2, 2), vec![1f64, 2.0, 3.0, 4.0])?)
356            }
357        }
358
359        let linesearch: BacktrackingLineSearch<
360            Array1<f64>,
361            Array1<f64>,
362            ArmijoCondition<f64>,
363            f64,
364        > = BacktrackingLineSearch::new(ArmijoCondition::new(0.2).unwrap());
365        let mut gnls = GaussNewtonLS::<_, f64>::new(linesearch);
366        let res = gnls.next_iter(&mut Problem::new(TestProblem {}), IterState::new());
367        assert_error!(
368            res,
369            ArgminError,
370            concat!(
371                "Not initialized: \"`GaussNewtonLS` requires an initial parameter vector. ",
372                "Please provide an initial guess via `Executor`s `configure` method.\""
373            )
374        );
375    }
376
377    #[cfg(feature = "_ndarrayl")]
378    #[test]
379    fn test_next_iter_regression() {
380        use ndarray::{Array, Array1, Array2};
381        use std::cell::RefCell;
382
383        struct MyProblem {
384            counter: RefCell<usize>,
385        }
386
387        impl Operator for MyProblem {
388            type Param = Array1<f64>;
389            type Output = Array1<f64>;
390
391            fn apply(&self, _p: &Self::Param) -> Result<Self::Output, Error> {
392                if *self.counter.borrow() == 0 {
393                    let mut c = self.counter.borrow_mut();
394                    *c += 1;
395                    Ok(Array1::from_vec(vec![0.5, 2.0]))
396                } else {
397                    Ok(Array1::from_vec(vec![0.3, 1.0]))
398                }
399            }
400        }
401
402        impl Gradient for MyProblem {
403            type Param = Array1<f64>;
404            type Gradient = Array1<f64>;
405
406            fn gradient(&self, _p: &Self::Param) -> Result<Self::Gradient, Error> {
407                Ok(Array1::from_vec(vec![3.2, 1.1]))
408            }
409        }
410
411        impl Jacobian for MyProblem {
412            type Param = Array1<f64>;
413            type Jacobian = Array2<f64>;
414
415            fn jacobian(&self, _p: &Self::Param) -> Result<Self::Jacobian, Error> {
416                Ok(Array::from_shape_vec((2, 2), vec![1f64, 2.0, 3.0, 4.0])?)
417            }
418        }
419
420        let problem = MyProblem {
421            counter: RefCell::new(0),
422        };
423
424        let linesearch: BacktrackingLineSearch<
425            Array1<f64>,
426            Array1<f64>,
427            ArmijoCondition<f64>,
428            f64,
429        > = BacktrackingLineSearch::new(ArmijoCondition::new(0.2).unwrap());
430        let mut gnls = GaussNewtonLS::<_, f64>::new(linesearch);
431        let state = IterState::new()
432            .param(Array1::from_vec(vec![1.0, 2.0]))
433            .jacobian(Array::from_shape_vec((2, 2), vec![1f64, 2.0, 3.0, 4.0]).unwrap());
434        let mut problem = Problem::new(problem);
435        let (mut state, kv) = gnls.next_iter(&mut problem, state).unwrap();
436        state.update();
437
438        assert!(kv.is_none());
439
440        assert_relative_eq!(
441            state.param.as_ref().unwrap()[0],
442            7.105427357601002e-15,
443            epsilon = f64::EPSILON
444        );
445        assert_relative_eq!(
446            state.param.as_ref().unwrap()[1],
447            2.25f64,
448            epsilon = f64::EPSILON
449        );
450        assert_relative_eq!(
451            state.best_param.as_ref().unwrap()[0],
452            7.105427357601002e-15,
453            epsilon = f64::EPSILON
454        );
455        assert_relative_eq!(
456            state.best_param.as_ref().unwrap()[1],
457            2.25f64,
458            epsilon = f64::EPSILON
459        );
460        assert_relative_eq!(state.cost, 1.044030650891055f64, epsilon = f64::EPSILON);
461        assert!(!state.prev_cost.is_finite());
462        assert_relative_eq!(
463            state.best_cost,
464            1.044030650891055f64,
465            epsilon = f64::EPSILON
466        );
467    }
468}