finitediff/
lib.rs

1// Copyright 2018-2024 argmin developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8//! This crate contains a wide range of methods for the calculation of gradients, Jacobians and
9//! Hessians using forward and central differences.
10//! The methods have been implemented for input vectors of the type `Vec<f64>` and
11//! `ndarray::Array1<f64>`.
12//! Central differences are more accurate but require more evaluations of the cost function and are
13//! therefore computationally more expensive.
14//!
15//! # References
16//!
17//! Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization.
18//! Springer. ISBN 0-387-30303-0.
19//!
20//! # Usage
21//!
22//! Add this to your `dependencies` section of `Cargo.toml`:
23//!
24//! ```toml
25//! [dependencies]
26//! finitediff = "0.1.4"
27//! ```
28//!
29//! To use the `FiniteDiff` trait implementations on the `ndarray` types, please activate the
30//! `ndarray` feature:
31//!
32//! ```toml
33//! [dependencies]
34//! finitediff = { version = "0.1.4", features = ["ndarray"] }
35//! ```
36//!
37//! # Examples
38//!
39//! * [Calculation of the gradient](#calculation-of-the-gradient)
40//!   * [For `Vec<f64>`](#for-vecf64)
41//!   * [For `ndarray::Array1<f64>`](#for-ndarrayarray1f64)
42//! * [Calculation of the Jacobian](#calculation-of-the-jacobian)
43//!   * [Full Jacobian](#full-jacobian)
44//!   * [Product of the Jacobian `J(x)` with a vector `p`](#product-of-the-jacobian-jx-with-a-vector-p)
45//!   * [Sparse Jacobian](#sparse-jacobian)
46//! * [Calculation of the Hessian](#calculation-of-the-hessian)
47//!   * [Full Hessian](#full-hessian)
48//!   * [Product of the Hessian `H(x)` with a vector `p`](#product-of-the-hessian-hx-with-a-vector-p)
49//!   * [Calculation of the Hessian without knowledge of the gradient](#calculation-of-the-hessian-without-knowledge-of-the-gradient)
50//!   * [Calculation of the sparse Hessian without knowledge of the gradient](#calculation-of-the-sparse-hessian-without-knowledge-of-the-gradient)
51//!
52//!
53//! ## Calculation of the gradient
54//!
55//! This section illustrates the computation of a gradient of a function `f` at a point `x` of type
56//! `Vec<f64>`. Using forward differences requires `n+1` evaluations of the function `f` while
57//! central differences requires `2*n` evaluations.
58//!
59//! ### For `Vec<f64>`
60//!
61//! ```rust
62//! # fn main() -> Result<(), anyhow::Error> {
63//! use finitediff::vec;
64//!
65//! // Define function `f(x)`
66//! let f = |x: &Vec<f64>| -> Result<f64, anyhow::Error> {
67//!     // ...
68//! #     Ok(x[0] + x[1].powi(2))
69//! };
70//!
71//! // Point at which gradient should be calculated
72//! let x = vec![1.0f64, 1.0];
73//!
74//! // Calculate gradient of `f` at `x` using forward differences
75//! let g_forward = vec::forward_diff(&f);
76//! let grad_forward = g_forward(&x)?;
77//!
78//! // Calculate gradient of `f` at `x` using central differences
79//! let g_central = vec::central_diff(&f);
80//! let grad_central = g_central(&x)?;
81//! #
82//! #  // Desired solution
83//! #  let res = vec![1.0f64, 2.0];
84//! #
85//! #  // Check result
86//! #  for i in 0..2 {
87//! #      assert!((res[i] - grad_forward[i]).abs() < 1e-6);
88//! #      assert!((res[i] - grad_central[i]).abs() < 1e-6);
89//! #  }
90//! # Ok(())
91//! # }
92//! ```
93//!
94//! ### For `ndarray::Array1<f64>`
95//!
96//! ```rust
97//! # fn main() -> Result<(), anyhow::Error> {
98//! # #[cfg(feature = "ndarray")]
99//! # {
100//! use ndarray::{array, Array1};
101//! use finitediff::ndarr;
102//!
103//! // Define cost function `f(x)`
104//! let f = |x: &Array1<f64>| -> Result<f64, anyhow::Error> {
105//!     // ...
106//! #     Ok(x[0] + x[1].powi(2))
107//! };
108//!
109//! // Point at which gradient should be calculated
110//! let x = array![1.0f64, 1.0];
111//!
112//! // Calculate gradient of `f` at `x` using forward differences
113//! let g_forward = ndarr::forward_diff(&f);
114//! let grad_forward = g_forward(&x)?;
115//!
116//! // Calculate gradient of `f` at `x` using central differences
117//! let g_central = ndarr::central_diff(&f);
118//! let grad_central = g_central(&x)?;
119//! #
120//! #  // Desired solution
121//! #  let res = vec![1.0f64, 2.0];
122//! #
123//! #  // Check result
124//! #  for i in 0..2 {
125//! #      assert!((res[i] - grad_forward[i]).abs() < 1e-6);
126//! #      assert!((res[i] - grad_central[i]).abs() < 1e-6);
127//! #  }
128//! # }
129//! # Ok(())
130//! # }
131//! ```
132//!
133//! ## Calculation of the Jacobian
134//!
135//! Note that the same interface is also implemented for `ndarray::Array1<f64>` (not shown).
136//!
137//! ### Full Jacobian
138//!
139//! The calculation of the full Jacobian requires `n+1` evaluations of the function `f`.
140//!
141//! ```rust
142//! # fn main() -> Result<(), anyhow::Error> {
143//! use finitediff::vec;
144//!
145//! let f = |x: &Vec<f64>| -> Result<Vec<f64>, anyhow::Error> {
146//!     // ...
147//! #      Ok(vec![
148//! #          2.0 * (x[1].powi(3) - x[0].powi(2)),
149//! #          3.0 * (x[1].powi(3) - x[0].powi(2)) + 2.0 * (x[2].powi(3) - x[1].powi(2)),
150//! #          3.0 * (x[2].powi(3) - x[1].powi(2)) + 2.0 * (x[3].powi(3) - x[2].powi(2)),
151//! #          3.0 * (x[3].powi(3) - x[2].powi(2)) + 2.0 * (x[4].powi(3) - x[3].powi(2)),
152//! #          3.0 * (x[4].powi(3) - x[3].powi(2)) + 2.0 * (x[5].powi(3) - x[4].powi(2)),
153//! #          3.0 * (x[5].powi(3) - x[4].powi(2)),
154//! #      ])
155//! };
156//!
157//! let x = vec![1.0f64, 1.0, 1.0, 1.0, 1.0, 1.0];
158//!
159//! // Using forward differences
160//! let j_forward = vec::forward_jacobian(&f);
161//! let jacobian_forward = j_forward(&x)?;
162//!
163//! // Using central differences
164//! let j_central = vec::central_jacobian(&f);
165//! let jacobian_central = j_central(&x)?;
166//!
167//! #  let res = vec![
168//! #      vec![-4.0, 6.0, 0.0, 0.0, 0.0, 0.0],
169//! #      vec![-6.0, 5.0, 6.0, 0.0, 0.0, 0.0],
170//! #      vec![0.0, -6.0, 5.0, 6.0, 0.0, 0.0],
171//! #      vec![0.0, 0.0, -6.0, 5.0, 6.0, 0.0],
172//! #      vec![0.0, 0.0, 0.0, -6.0, 5.0, 6.0],
173//! #      vec![0.0, 0.0, 0.0, 0.0, -6.0, 9.0],
174//! #  ];
175//! #
176//! #  // Check result
177//! #  for i in 0..6 {
178//! #      for j in 0..6 {
179//! #          assert!((res[i][j] - jacobian_forward[i][j]).abs() < 1e-6);
180//! #          assert!((res[i][j] - jacobian_central[i][j]).abs() < 1e-6);
181//! #      }
182//! #  }
183//! # Ok(())
184//! # }
185//! ```
186//!
187//! ### Product of the Jacobian `J(x)` with a vector `p`
188//!
189//! Directly computing `J(x)*p` can be much more efficient than computing `J(x)` first and then
190//! multiplying it with `p`. While computing the full Jacobian `J(x)` requires `n+1` evaluations of
191//! `f`, `J(x)*p` only requires `2`.
192//!
193//! ```rust
194//! # fn main() -> Result<(), anyhow::Error> {
195//! use finitediff::vec;
196//!
197//! let f = |x: &Vec<f64>| -> Result<Vec<f64>, anyhow::Error> {
198//!     // ...
199//! #      Ok(vec![
200//! #          2.0 * (x[1].powi(3) - x[0].powi(2)),
201//! #          3.0 * (x[1].powi(3) - x[0].powi(2)) + 2.0 * (x[2].powi(3) - x[1].powi(2)),
202//! #          3.0 * (x[2].powi(3) - x[1].powi(2)) + 2.0 * (x[3].powi(3) - x[2].powi(2)),
203//! #          3.0 * (x[3].powi(3) - x[2].powi(2)) + 2.0 * (x[4].powi(3) - x[3].powi(2)),
204//! #          3.0 * (x[4].powi(3) - x[3].powi(2)) + 2.0 * (x[5].powi(3) - x[4].powi(2)),
205//! #          3.0 * (x[5].powi(3) - x[4].powi(2)),
206//! #      ])
207//! };
208//!
209//! let x = vec![1.0f64, 1.0, 1.0, 1.0, 1.0, 1.0];
210//! let p = vec![1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0];
211//!
212//! // using forward differences
213//! let j_forward = vec::forward_jacobian_vec_prod(&f);
214//! let jacobian_forward = j_forward(&x, &p)?;
215//!
216//! // using central differences
217//! let j_central = vec::central_jacobian_vec_prod(&f);
218//! let jacobian_central = j_central(&x, &p)?;
219//! #
220//! #  let res = vec![8.0, 22.0, 27.0, 32.0, 37.0, 24.0];
221//! #
222//! #  // Check result
223//! #  for i in 0..6 {
224//! #      assert!((res[i] - jacobian_forward[i]).abs() < 11.0*1e-6);
225//! #      assert!((res[i] - jacobian_central[i]).abs() < 11.0*1e-6);
226//! #  }
227//! # Ok(())
228//! # }
229//! ```
230//!
231//! ### Sparse Jacobian
232//!
233//! If the Jacobian is sparse its structure can be exploited using perturbation vectors. See
234//! Nocedal & Wright for details.
235//!
236//! ```rust
237//! # fn main() -> Result<(), anyhow::Error> {
238//! use finitediff::{vec, PerturbationVector};
239//!
240//! let f = |x: &Vec<f64>| -> Result<Vec<f64>, anyhow::Error> {
241//!     // ...
242//! #      Ok(vec![
243//! #          2.0 * (x[1].powi(3) - x[0].powi(2)),
244//! #          3.0 * (x[1].powi(3) - x[0].powi(2)) + 2.0 * (x[2].powi(3) - x[1].powi(2)),
245//! #          3.0 * (x[2].powi(3) - x[1].powi(2)) + 2.0 * (x[3].powi(3) - x[2].powi(2)),
246//! #          3.0 * (x[3].powi(3) - x[2].powi(2)) + 2.0 * (x[4].powi(3) - x[3].powi(2)),
247//! #          3.0 * (x[4].powi(3) - x[3].powi(2)) + 2.0 * (x[5].powi(3) - x[4].powi(2)),
248//! #          3.0 * (x[5].powi(3) - x[4].powi(2)),
249//! #      ])
250//! };
251//!
252//! let x = vec![1.0f64, 1.0, 1.0, 1.0, 1.0, 1.0];
253//!
254//! let pert = vec![
255//!     PerturbationVector::new()
256//!         .add(0, vec![0, 1])
257//!         .add(3, vec![2, 3, 4]),
258//!     PerturbationVector::new()
259//!         .add(1, vec![0, 1, 2])
260//!         .add(4, vec![3, 4, 5]),
261//!     PerturbationVector::new()
262//!         .add(2, vec![1, 2, 3])
263//!         .add(5, vec![4, 5]),
264//! ];
265//!
266//! // using forward differences
267//! let j_forward = vec::forward_jacobian_pert(&f);
268//! let jacobian_forward = j_forward(&x, &pert)?;
269//!
270//! // using central differences
271//! let j_central = vec::central_jacobian_pert(&f);
272//! let jacobian_central = j_central(&x, &pert)?;
273//! #
274//! #  let res = vec![
275//! #      vec![-4.0, 6.0, 0.0, 0.0, 0.0, 0.0],
276//! #      vec![-6.0, 5.0, 6.0, 0.0, 0.0, 0.0],
277//! #      vec![0.0, -6.0, 5.0, 6.0, 0.0, 0.0],
278//! #      vec![0.0, 0.0, -6.0, 5.0, 6.0, 0.0],
279//! #      vec![0.0, 0.0, 0.0, -6.0, 5.0, 6.0],
280//! #      vec![0.0, 0.0, 0.0, 0.0, -6.0, 9.0],
281//! #  ];
282//! #
283//! #  // Check result
284//! #  for i in 0..6 {
285//! #      for j in 0..6 {
286//! #          assert!((res[i][j] - jacobian_forward[i][j]).abs() < 1e-6);
287//! #          assert!((res[i][j] - jacobian_central[i][j]).abs() < 1e-6);
288//! #      }
289//! #  }
290//! # Ok(())
291//! # }
292//! ```
293//!
294//! ## Calculation of the Hessian
295//!
296//! Note that the same interface is also implemented for `ndarray::Array1<f64>` (not shown).
297//!
298//! ### Full Hessian
299//!
300//! ```rust
301//! # fn main() -> Result<(), anyhow::Error> {
302//! use finitediff::vec;
303//!
304//! let g = |x: &Vec<f64>| -> Result<Vec<f64>, anyhow::Error> {
305//!     // ...
306//! #     Ok(vec![1.0, 2.0 * x[1], x[3].powi(2), 2.0 * x[3] * x[2]])
307//! };
308//!
309//! let x = vec![1.0f64, 1.0, 1.0, 1.0];
310//!
311//! // using forward differences
312//! let h_forward = vec::forward_hessian(&g);
313//! let hessian_forward = h_forward(&x)?;
314//!
315//! // using central differences
316//! let h_central = vec::central_hessian(&g);
317//! let hessian_central = h_central(&x)?;
318//! #
319//! #  let res = vec![
320//! #      vec![0.0, 0.0, 0.0, 0.0],
321//! #      vec![0.0, 2.0, 0.0, 0.0],
322//! #      vec![0.0, 0.0, 0.0, 2.0],
323//! #      vec![0.0, 0.0, 2.0, 2.0],
324//! #  ];
325//! #
326//! #  // Check result
327//! #  for i in 0..4 {
328//! #      for j in 0..4 {
329//! #          assert!((res[i][j] - hessian_forward[i][j]).abs() < 1e-6);
330//! #          assert!((res[i][j] - hessian_central[i][j]).abs() < 1e-6);
331//! #      }
332//! #  }
333//! # Ok(())
334//! # }
335//! ```
336//!
337//! ### Product of the Hessian `H(x)` with a vector `p`
338//!
339//! ```rust
340//! # fn main() -> Result<(), anyhow::Error> {
341//! use finitediff::vec;
342//!
343//! let g = |x: &Vec<f64>| -> Result<Vec<f64>, anyhow::Error> {
344//!     // ...
345//! #     Ok(vec![1.0, 2.0 * x[1], x[3].powi(2), 2.0 * x[3] * x[2]])
346//! };
347//!
348//! let x = vec![1.0f64, 1.0, 1.0, 1.0];
349//! let p = vec![2.0, 3.0, 4.0, 5.0];
350//!
351//! // using forward differences
352//! let h_forward = vec::forward_hessian_vec_prod(&g);
353//! let hessian_forward = h_forward(&x, &p)?;
354//!
355//! // using forward differences
356//! let h_central = vec::central_hessian_vec_prod(&g);
357//! let hessian_central = h_central(&x, &p)?;
358//! #
359//! #  let res = vec![0.0, 6.0, 10.0, 18.0];
360//! #
361//! #  for i in 0..4 {
362//! #      assert!((res[i] - hessian_forward[i]).abs() < 1e-6);
363//! #      assert!((res[i] - hessian_central[i]).abs() < 1e-6);
364//! #  }
365//! # Ok(())
366//! # }
367//! ```
368//!
369//! ### Calculation of the Hessian without knowledge of the gradient
370//!
371//! ```rust
372//! # fn main() -> Result<(), anyhow::Error> {
373//! use finitediff::vec;
374//!
375//! let f = |x: &Vec<f64>| -> Result<f64, anyhow::Error> {
376//!     // ...
377//! #     Ok(x[0] + x[1].powi(2) + x[2] * x[3].powi(2))
378//! };
379//!
380//! let x = vec![1.0f64, 1.0, 1.0, 1.0];
381//!
382//! let h = vec::forward_hessian_nograd(&f);
383//! let hessian = h(&x)?;
384//! #
385//! #  let res = vec![
386//! #      vec![0.0, 0.0, 0.0, 0.0],
387//! #      vec![0.0, 2.0, 0.0, 0.0],
388//! #      vec![0.0, 0.0, 0.0, 2.0],
389//! #      vec![0.0, 0.0, 2.0, 2.0],
390//! #  ];
391//! #
392//! #  // Check result
393//! #  for i in 0..4 {
394//! #      for j in 0..4 {
395//! #          assert!((res[i][j] - hessian[i][j]).abs() < 1e-6)
396//! #      }
397//! #  }
398//! # Ok(())
399//! # }
400//! ```
401//!
402//! ### Calculation of the sparse Hessian without knowledge of the gradient
403//!
404//! ```rust
405//! # fn main() -> Result<(), anyhow::Error> {
406//! use finitediff::vec;
407//!
408//! let f = |x: &Vec<f64>| -> Result<f64, anyhow::Error> {
409//!     // ...
410//! #     Ok(x[0] + x[1].powi(2) + x[2] * x[3].powi(2))
411//! };
412//!
413//! let x = vec![1.0f64, 1.0, 1.0, 1.0];
414//!
415//! // Indices at which the Hessian should be evaluated. All other
416//! // elements of the Hessian will be zero
417//! let indices = vec![[1, 1], [2, 3], [3, 3]];
418//!
419//! let h = vec::forward_hessian_nograd_sparse(&f);
420//! let hessian = h(&x, indices)?;
421//! #
422//! #  let res = vec![
423//! #      vec![0.0, 0.0, 0.0, 0.0],
424//! #      vec![0.0, 2.0, 0.0, 0.0],
425//! #      vec![0.0, 0.0, 0.0, 2.0],
426//! #      vec![0.0, 0.0, 2.0, 2.0],
427//! #  ];
428//! #
429//! #  // Check result
430//! #  for i in 0..4 {
431//! #      for j in 0..4 {
432//! #          assert!((res[i][j] - hessian[i][j]).abs() < 1e-6)
433//! #      }
434//! #  }
435//! # Ok(())
436//! # }
437//! ```
438
439pub mod array;
440#[cfg(feature = "ndarray")]
441pub mod ndarr;
442mod pert;
443mod utils;
444pub mod vec;
445
446pub use pert::{PerturbationVector, PerturbationVectors};