argmin/solver/quasinewton/
dfp.rs

1// Copyright 2018-2024 argmin developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use crate::core::{
9    ArgminFloat, CostFunction, Error, Executor, Gradient, IterState, LineSearch,
10    OptimizationResult, Problem, Solver, TerminationReason, TerminationStatus, KV,
11};
12use argmin_math::{ArgminAdd, ArgminDot, ArgminL2Norm, ArgminMul, ArgminSub};
13#[cfg(feature = "serde1")]
14use serde::{Deserialize, Serialize};
15
16/// # Davidon-Fletcher-Powell (DFP) method
17///
18/// The Davidon-Fletcher-Powell algorithm (DFP) is a method for solving unconstrained nonlinear
19/// optimization problems.
20///
21/// The algorithm requires a line search which is provided via the constructor. Additionally an
22/// initial guess for the parameter vector and an initial inverse Hessian is required, which are to
23/// be provided via the [`configure`](`crate::core::Executor::configure`) method of the
24/// [`Executor`](`crate::core::Executor`) (See [`IterState`], in particular [`IterState::param`]
25/// and [`IterState::inv_hessian`]).
26/// In the same way the initial gradient and cost function corresponding to the initial parameter
27/// vector can be provided. If these are not provided, they will be computed during initialization
28/// of the algorithm.
29///
30/// A tolerance on the gradient can be configured with
31/// [`with_tolerance_grad`](`DFP::with_tolerance_grad`): If the norm of the gradient is below
32/// said tolerance, the algorithm stops. It defaults to `sqrt(EPSILON)`.
33///
34/// ## Requirements on the optimization problem
35///
36/// The optimization problem is required to implement [`CostFunction`] and [`Gradient`].
37///
38/// ## Reference
39///
40/// Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization.
41/// Springer. ISBN 0-387-30303-0.
42#[derive(Clone)]
43#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
44pub struct DFP<L, F> {
45    /// line search
46    linesearch: L,
47    /// Tolerance for the stopping criterion based on the change of the norm on the gradient
48    tol_grad: F,
49}
50
51impl<L, F> DFP<L, F>
52where
53    F: ArgminFloat,
54{
55    /// Construct a new instance of [`DFP`]
56    ///
57    /// # Example
58    ///
59    /// ```
60    /// # use argmin::solver::quasinewton::DFP;
61    /// # let linesearch = ();
62    /// let dfp: DFP<_, f64> = DFP::new(linesearch);
63    /// ```
64    pub fn new(linesearch: L) -> Self {
65        DFP {
66            linesearch,
67            tol_grad: F::epsilon().sqrt(),
68        }
69    }
70
71    /// The algorithm stops if the norm of the gradient is below `tol_grad`.
72    ///
73    /// The provided value must be non-negative. Defaults to `sqrt(EPSILON)`.
74    ///
75    /// # Example
76    ///
77    /// ```
78    /// # use argmin::solver::quasinewton::DFP;
79    /// # use argmin::core::Error;
80    /// # fn main() -> Result<(), Error> {
81    /// # let linesearch = ();
82    /// let dfp: DFP<_, f64> = DFP::new(linesearch).with_tolerance_grad(1e-6)?;
83    /// # Ok(())
84    /// # }
85    /// ```
86    pub fn with_tolerance_grad(mut self, tol_grad: F) -> Result<Self, Error> {
87        if tol_grad < float!(0.0) {
88            return Err(argmin_error!(
89                InvalidParameter,
90                "`DFP`: gradient tolerance must be >= 0."
91            ));
92        }
93        self.tol_grad = tol_grad;
94        Ok(self)
95    }
96}
97
98impl<O, L, P, G, H, F> Solver<O, IterState<P, G, (), H, (), F>> for DFP<L, F>
99where
100    O: CostFunction<Param = P, Output = F> + Gradient<Param = P, Gradient = G>,
101    P: Clone + ArgminSub<P, P> + ArgminDot<G, F> + ArgminDot<P, H> + ArgminMul<F, P>,
102    G: Clone + ArgminSub<G, G> + ArgminL2Norm<F> + ArgminDot<P, F>,
103    H: Clone + ArgminSub<H, H> + ArgminDot<G, P> + ArgminAdd<H, H> + ArgminMul<F, H>,
104    L: Clone + LineSearch<P, F> + Solver<O, IterState<P, G, (), (), (), F>>,
105    F: ArgminFloat,
106{
107    fn name(&self) -> &str {
108        "DFP"
109    }
110
111    fn init(
112        &mut self,
113        problem: &mut Problem<O>,
114        mut state: IterState<P, G, (), H, (), F>,
115    ) -> Result<(IterState<P, G, (), H, (), F>, Option<KV>), Error> {
116        let param = state.take_param().ok_or_else(argmin_error_closure!(
117            NotInitialized,
118            concat!(
119                "`DFP` requires an initial parameter vector. ",
120                "Please provide an initial guess via `Executor`s `configure` method."
121            )
122        ))?;
123
124        let inv_hessian = state.take_inv_hessian().ok_or_else(argmin_error_closure!(
125            NotInitialized,
126            concat!(
127                "`DFP` requires an initial inverse Hessian. ",
128                "Please provide an initial guess via `Executor`s `configure` method."
129            )
130        ))?;
131
132        let cost = state.get_cost();
133        let cost = if cost.is_infinite() {
134            problem.cost(&param)?
135        } else {
136            cost
137        };
138
139        let grad = state
140            .take_gradient()
141            .map(Result::Ok)
142            .unwrap_or_else(|| problem.gradient(&param))?;
143
144        Ok((
145            state
146                .param(param)
147                .cost(cost)
148                .gradient(grad)
149                .inv_hessian(inv_hessian),
150            None,
151        ))
152    }
153
154    fn next_iter(
155        &mut self,
156        problem: &mut Problem<O>,
157        mut state: IterState<P, G, (), H, (), F>,
158    ) -> Result<(IterState<P, G, (), H, (), F>, Option<KV>), Error> {
159        let param = state.take_param().ok_or_else(argmin_error_closure!(
160            PotentialBug,
161            "`DFP`: Parameter vector in state not set."
162        ))?;
163        let cost = state.get_cost();
164
165        let prev_grad = state.take_gradient().ok_or_else(argmin_error_closure!(
166            PotentialBug,
167            "`DFP`: Gradient in state not set."
168        ))?;
169
170        let inv_hessian = state.take_inv_hessian().ok_or_else(argmin_error_closure!(
171            PotentialBug,
172            "`DFP`: Inverse Hessian in state not set."
173        ))?;
174
175        let p = inv_hessian.dot(&prev_grad).mul(&float!(-1.0));
176
177        self.linesearch.search_direction(p);
178
179        let OptimizationResult {
180            problem: line_problem,
181            state: mut linesearch_state,
182            ..
183        } = Executor::new(problem.take_problem().unwrap(), self.linesearch.clone())
184            .configure(|config| {
185                config
186                    .param(param.clone())
187                    .gradient(prev_grad.clone())
188                    .cost(cost)
189            })
190            .ctrlc(false)
191            .run()?;
192
193        let xk1 = linesearch_state
194            .take_param()
195            .ok_or_else(argmin_error_closure!(
196                PotentialBug,
197                "`DFP`: No parameters returned by line search."
198            ))?;
199
200        let next_cost = linesearch_state.get_cost();
201
202        // take care of function eval counts
203        problem.consume_problem(line_problem);
204
205        let grad = problem.gradient(&xk1)?;
206        let yk = grad.sub(&prev_grad);
207
208        let sk = xk1.sub(&param);
209
210        let yksk: F = yk.dot(&sk);
211
212        let sksk: H = sk.dot(&sk);
213
214        let tmp3: P = inv_hessian.dot(&yk);
215        let tmp4: F = tmp3.dot(&yk);
216        let tmp3: H = tmp3.dot(&tmp3);
217        let tmp3: H = tmp3.mul(&(float!(1.0) / tmp4));
218
219        let inv_hessian = inv_hessian.sub(&tmp3).add(&sksk.mul(&(float!(1.0) / yksk)));
220
221        Ok((
222            state
223                .param(xk1)
224                .cost(next_cost)
225                .gradient(grad)
226                .inv_hessian(inv_hessian),
227            None,
228        ))
229    }
230
231    fn terminate(&mut self, state: &IterState<P, G, (), H, (), F>) -> TerminationStatus {
232        if state.get_gradient().unwrap().l2_norm() < self.tol_grad {
233            return TerminationStatus::Terminated(TerminationReason::SolverConverged);
234        }
235        TerminationStatus::NotTerminated
236    }
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242    use crate::core::{test_utils::TestProblem, ArgminError, State};
243    use crate::solver::linesearch::MoreThuenteLineSearch;
244
245    test_trait_impl!(
246        dfp,
247        DFP<MoreThuenteLineSearch<Vec<f64>, Vec<f64>, f64>, f64>
248    );
249
250    #[test]
251    fn test_new() {
252        #[derive(Eq, PartialEq, Debug)]
253        struct MyFakeLineSearch {}
254
255        let dfp: DFP<_, f64> = DFP::new(MyFakeLineSearch {});
256        let DFP {
257            linesearch,
258            tol_grad,
259        } = dfp;
260
261        assert_eq!(linesearch, MyFakeLineSearch {});
262        assert_eq!(tol_grad.to_ne_bytes(), f64::EPSILON.sqrt().to_ne_bytes());
263    }
264
265    #[test]
266    fn test_with_tolerance_grad() {
267        #[derive(Eq, PartialEq, Debug, Clone, Copy)]
268        struct MyFakeLineSearch {}
269
270        // correct parameters
271        for tol in [1e-6, 0.0, 1e-2, 1.0, 2.0] {
272            let dfp: DFP<_, f64> = DFP::new(MyFakeLineSearch {});
273            let res = dfp.with_tolerance_grad(tol);
274            assert!(res.is_ok());
275
276            let nm = res.unwrap();
277            assert_eq!(nm.tol_grad.to_ne_bytes(), tol.to_ne_bytes());
278        }
279
280        // incorrect parameters
281        for tol in [-f64::EPSILON, -1.0, -100.0, -42.0] {
282            let dfp: DFP<_, f64> = DFP::new(MyFakeLineSearch {});
283            let res = dfp.with_tolerance_grad(tol);
284            assert_error!(
285                res,
286                ArgminError,
287                "Invalid parameter: \"`DFP`: gradient tolerance must be >= 0.\""
288            );
289        }
290    }
291
292    #[test]
293    fn test_init() {
294        let linesearch = MoreThuenteLineSearch::new().with_c(1e-4, 0.9).unwrap();
295
296        let param: Vec<f64> = vec![-1.0, 1.0];
297        let inv_hessian: Vec<Vec<f64>> = vec![vec![1.0, 2.0], vec![3.0, 4.0]];
298
299        let mut dfp: DFP<_, f64> = DFP::new(linesearch);
300
301        // Forgot to initialize the parameter vector
302        let state: IterState<Vec<f64>, Vec<f64>, (), Vec<Vec<f64>>, (), f64> = IterState::new();
303        let problem = TestProblem::new();
304        let res = dfp.init(&mut Problem::new(problem), state);
305        assert_error!(
306            res,
307            ArgminError,
308            concat!(
309                "Not initialized: \"`DFP` requires an initial parameter vector. Please ",
310                "provide an initial guess via `Executor`s `configure` method.\""
311            )
312        );
313
314        // Forgot initial inverse Hessian guess
315        let state: IterState<Vec<f64>, Vec<f64>, (), Vec<Vec<f64>>, (), f64> =
316            IterState::new().param(param.clone());
317        let problem = TestProblem::new();
318        let res = dfp.init(&mut Problem::new(problem), state);
319
320        assert_error!(
321            res,
322            ArgminError,
323            concat!(
324                "Not initialized: \"`DFP` requires an initial inverse Hessian. Please ",
325                "provide an initial guess via `Executor`s `configure` method.\""
326            )
327        );
328
329        // All good.
330        let state: IterState<Vec<f64>, Vec<f64>, (), Vec<Vec<f64>>, (), f64> = IterState::new()
331            .param(param.clone())
332            .inv_hessian(inv_hessian.clone());
333        let problem = TestProblem::new();
334        let (mut state_out, kv) = dfp.init(&mut Problem::new(problem), state).unwrap();
335
336        assert!(kv.is_none());
337
338        let s_param = state_out.take_param().unwrap();
339
340        for (s, p) in s_param.iter().zip(param.iter()) {
341            assert_eq!(s.to_ne_bytes(), p.to_ne_bytes());
342        }
343
344        let s_grad = state_out.take_gradient().unwrap();
345
346        for (s, p) in s_grad.iter().zip(param.iter()) {
347            assert_eq!(s.to_ne_bytes(), p.to_ne_bytes());
348        }
349
350        let s_inv_hessian = state_out.take_inv_hessian().unwrap();
351
352        for (s, h) in s_inv_hessian
353            .iter()
354            .flatten()
355            .zip(inv_hessian.iter().flatten())
356        {
357            assert_eq!(s.to_ne_bytes(), h.to_ne_bytes());
358        }
359
360        assert_eq!(state_out.get_cost().to_ne_bytes(), 1.0f64.to_ne_bytes())
361    }
362
363    #[test]
364    fn test_init_provided_cost() {
365        let linesearch = MoreThuenteLineSearch::new().with_c(1e-4, 0.9).unwrap();
366
367        let param: Vec<f64> = vec![-1.0, 1.0];
368        let inv_hessian: Vec<Vec<f64>> = vec![vec![1.0, 2.0], vec![3.0, 4.0]];
369
370        let mut dfp: DFP<_, f64> = DFP::new(linesearch);
371
372        let state: IterState<Vec<f64>, Vec<f64>, (), Vec<Vec<f64>>, (), f64> = IterState::new()
373            .param(param)
374            .inv_hessian(inv_hessian)
375            .cost(1234.0);
376
377        let problem = TestProblem::new();
378        let (state_out, kv) = dfp.init(&mut Problem::new(problem), state).unwrap();
379
380        assert!(kv.is_none());
381
382        assert_eq!(state_out.get_cost().to_ne_bytes(), 1234.0f64.to_ne_bytes())
383    }
384
385    #[test]
386    fn test_init_provided_grad() {
387        let linesearch = MoreThuenteLineSearch::new().with_c(1e-4, 0.9).unwrap();
388
389        let param: Vec<f64> = vec![-1.0, 1.0];
390        let gradient: Vec<f64> = vec![4.0, 9.0];
391        let inv_hessian: Vec<Vec<f64>> = vec![vec![1.0, 2.0], vec![3.0, 4.0]];
392
393        let mut dfp: DFP<_, f64> = DFP::new(linesearch);
394
395        let state: IterState<Vec<f64>, Vec<f64>, (), Vec<Vec<f64>>, (), f64> = IterState::new()
396            .param(param)
397            .inv_hessian(inv_hessian)
398            .gradient(gradient.clone());
399
400        let problem = TestProblem::new();
401        let (mut state_out, kv) = dfp.init(&mut Problem::new(problem), state).unwrap();
402
403        assert!(kv.is_none());
404
405        let s_grad = state_out.take_gradient().unwrap();
406
407        for (s, g) in s_grad.iter().zip(gradient.iter()) {
408            assert_eq!(s.to_ne_bytes(), g.to_ne_bytes());
409        }
410    }
411}