use crate::core::{
ArgminFloat, CostFunction, Error, Executor, Gradient, Hessian, IterState, OptimizationResult,
Problem, Solver, TerminationReason, TerminationStatus, TrustRegionRadius, KV,
};
use argmin_math::{
ArgminAdd, ArgminDot, ArgminL2Norm, ArgminMul, ArgminSub, ArgminWeightedDot, ArgminZeroLike,
};
#[cfg(feature = "serde1")]
use serde::{Deserialize, Serialize};
#[derive(Clone)]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
pub struct SR1TrustRegion<R, F> {
denominator_factor: F,
subproblem: R,
radius: F,
eta: F,
tol_grad: F,
}
impl<R, F> SR1TrustRegion<R, F>
where
F: ArgminFloat,
{
pub fn new(subproblem: R) -> Self {
SR1TrustRegion {
denominator_factor: float!(1e-8),
subproblem,
radius: float!(1.0),
eta: float!(0.5 * 1e-3),
tol_grad: float!(1e-3),
}
}
pub fn with_denominator_factor(mut self, denominator_factor: F) -> Result<Self, Error> {
if denominator_factor <= float!(0.0) || denominator_factor >= float!(1.0) {
return Err(argmin_error!(
InvalidParameter,
"`SR1TrustRegion`: denominator_factor must be in (0, 1)."
));
}
self.denominator_factor = denominator_factor;
Ok(self)
}
#[must_use]
pub fn with_radius(mut self, radius: F) -> Self {
self.radius = radius.abs();
self
}
pub fn with_eta(mut self, eta: F) -> Result<Self, Error> {
if eta >= float!(10e-3) || eta <= float!(0.0) {
return Err(argmin_error!(
InvalidParameter,
"SR1TrustRegion: eta must be in (0, 10^-3)."
));
}
self.eta = eta;
Ok(self)
}
pub fn with_tolerance_grad(mut self, tol_grad: F) -> Result<Self, Error> {
if tol_grad < float!(0.0) {
return Err(argmin_error!(
InvalidParameter,
"`SR1TrustRegion`: gradient tolerance must be >= 0."
));
}
self.tol_grad = tol_grad;
Ok(self)
}
}
impl<O, R, P, G, B, F> Solver<O, IterState<P, G, (), B, (), F>> for SR1TrustRegion<R, F>
where
O: CostFunction<Param = P, Output = F>
+ Gradient<Param = P, Gradient = G>
+ Hessian<Param = P, Hessian = B>,
P: Clone
+ ArgminSub<P, P>
+ ArgminAdd<P, P>
+ ArgminDot<P, F>
+ ArgminDot<P, B>
+ ArgminL2Norm<F>
+ ArgminZeroLike,
G: Clone + ArgminL2Norm<F> + ArgminDot<P, F> + ArgminSub<G, P>,
B: Clone + ArgminDot<P, P> + ArgminAdd<B, B> + ArgminMul<F, B>,
R: Clone + TrustRegionRadius<F> + Solver<O, IterState<P, G, (), B, (), F>>,
F: ArgminFloat + ArgminL2Norm<F>,
{
fn name(&self) -> &str {
"SR1 trust region"
}
fn init(
&mut self,
problem: &mut Problem<O>,
mut state: IterState<P, G, (), B, (), F>,
) -> Result<(IterState<P, G, (), B, (), F>, Option<KV>), Error> {
let param = state.take_param().ok_or_else(argmin_error_closure!(
NotInitialized,
concat!(
"`SR1TrustRegion` requires an initial parameter vector. ",
"Please provide an initial guess via `Executor`s `configure` method."
)
))?;
let cost = state.get_cost();
let cost = if cost.is_infinite() {
problem.cost(¶m)?
} else {
cost
};
let grad = state
.take_gradient()
.map(Result::Ok)
.unwrap_or_else(|| problem.gradient(¶m))?;
let hessian = state
.take_hessian()
.map(Result::Ok)
.unwrap_or_else(|| problem.hessian(¶m))?;
Ok((
state
.param(param)
.cost(cost)
.gradient(grad)
.hessian(hessian),
None,
))
}
fn next_iter(
&mut self,
problem: &mut Problem<O>,
mut state: IterState<P, G, (), B, (), F>,
) -> Result<(IterState<P, G, (), B, (), F>, Option<KV>), Error> {
let xk = state.take_param().ok_or_else(argmin_error_closure!(
PotentialBug,
"`SR1TrustRegion`: Parameter vector in state not set."
))?;
let cost = state.get_cost();
let prev_grad = state.take_gradient().ok_or_else(argmin_error_closure!(
PotentialBug,
"`SR1TrustRegion`: Gradient in state not set."
))?;
let hessian = state.take_hessian().ok_or_else(argmin_error_closure!(
PotentialBug,
"`SR1TrustRegion`: Hessian in state not set."
))?;
self.subproblem.set_radius(self.radius);
let OptimizationResult {
problem: sub_problem,
state: mut sub_state,
..
} = Executor::new(problem.take_problem().unwrap(), self.subproblem.clone())
.configure(|config| {
config
.param(xk.zero_like())
.hessian(hessian.clone())
.gradient(prev_grad.clone())
.cost(cost)
})
.ctrlc(false)
.run()?;
let sk = sub_state.take_param().ok_or_else(argmin_error_closure!(
PotentialBug,
"`SR1TrustRegion`: No parameters returned by line search."
))?;
problem.consume_problem(sub_problem);
let xksk = xk.add(&sk);
let dfk1 = problem.gradient(&xksk)?;
let yk = dfk1.sub(&prev_grad);
let fk1 = problem.cost(&xksk)?;
let ared = cost - fk1;
let tmp1: F = prev_grad.dot(&sk);
let tmp2: F = sk.weighted_dot(&hessian, &sk);
let tmp2: F = tmp2.mul(float!(0.5));
let pred = -tmp1 - tmp2;
let ap = ared / pred;
let (xk1, fk1, dfk1) = if ap > self.eta {
(xksk, fk1, dfk1)
} else {
(xk, cost, prev_grad)
};
self.radius = if ap > float!(0.75) {
if sk.l2_norm() <= float!(0.8) * self.radius {
self.radius
} else {
float!(2.0) * self.radius
}
} else if ap <= float!(0.75) && ap >= float!(0.1) {
self.radius
} else {
float!(0.5) * self.radius
};
let bksk = hessian.dot(&sk);
let ykbksk = yk.sub(&bksk);
let skykbksk: F = sk.dot(&ykbksk);
let hessian_update =
skykbksk.abs() >= self.denominator_factor * sk.l2_norm() * skykbksk.l2_norm();
let hessian = if hessian_update {
let a: B = ykbksk.dot(&ykbksk);
let b: F = sk.dot(&ykbksk);
hessian.add(&a.mul(&(float!(1.0) / b)))
} else {
hessian
};
Ok((
state.param(xk1).cost(fk1).gradient(dfk1).hessian(hessian),
Some(kv!["ared" => ared;
"pred" => pred;
"ap" => ap;
"radius" => self.radius;
"hessian_update" => hessian_update;]),
))
}
fn terminate(&mut self, state: &IterState<P, G, (), B, (), F>) -> TerminationStatus {
if state.get_gradient().unwrap().l2_norm() < self.tol_grad {
return TerminationStatus::Terminated(TerminationReason::SolverConverged);
}
TerminationStatus::NotTerminated
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::{test_utils::TestProblem, ArgminError, State};
use crate::solver::trustregion::CauchyPoint;
test_trait_impl!(sr1, SR1TrustRegion<CauchyPoint<f64>, f64>);
#[test]
fn test_new() {
#[derive(Eq, PartialEq, Debug)]
struct MyFakeSubProblem {}
let sr1: SR1TrustRegion<_, f64> = SR1TrustRegion::new(MyFakeSubProblem {});
let SR1TrustRegion {
denominator_factor,
subproblem,
radius,
eta,
tol_grad,
} = sr1;
assert_eq!(denominator_factor.to_ne_bytes(), 1e-8f64.to_ne_bytes());
assert_eq!(subproblem, MyFakeSubProblem {});
assert_eq!(radius.to_ne_bytes(), 1.0f64.to_ne_bytes());
assert_eq!(eta.to_ne_bytes(), (0.5f64 * 1e-3f64).to_ne_bytes());
assert_eq!(tol_grad.to_ne_bytes(), 1e-3f64.to_ne_bytes());
}
#[test]
fn test_with_denominator_factor() {
#[derive(Eq, PartialEq, Debug, Clone, Copy)]
struct MyFakeSubProblem {}
for tol in [f64::EPSILON, 1e-8, 1e-6, 1e-2, 1.0 - f64::EPSILON] {
let sr1: SR1TrustRegion<_, f64> = SR1TrustRegion::new(MyFakeSubProblem {});
let res = sr1.with_denominator_factor(tol);
assert!(res.is_ok());
let nm = res.unwrap();
assert_eq!(nm.denominator_factor.to_ne_bytes(), tol.to_ne_bytes());
}
for tol in [-f64::EPSILON, 0.0, -1.0, 1.0] {
let sr1: SR1TrustRegion<_, f64> = SR1TrustRegion::new(MyFakeSubProblem {});
let res = sr1.with_denominator_factor(tol);
assert_error!(
res,
ArgminError,
"Invalid parameter: \"`SR1TrustRegion`: denominator_factor must be in (0, 1).\""
);
}
}
#[test]
fn test_with_tolerance_grad() {
#[derive(Eq, PartialEq, Debug, Clone, Copy)]
struct MyFakeSubProblem {}
for tol in [1e-6, 0.0, 1e-2, 1.0, 2.0] {
let sr1: SR1TrustRegion<_, f64> = SR1TrustRegion::new(MyFakeSubProblem {});
let res = sr1.with_tolerance_grad(tol);
assert!(res.is_ok());
let nm = res.unwrap();
assert_eq!(nm.tol_grad.to_ne_bytes(), tol.to_ne_bytes());
}
for tol in [-f64::EPSILON, -1.0, -100.0, -42.0] {
let sr1: SR1TrustRegion<_, f64> = SR1TrustRegion::new(MyFakeSubProblem {});
let res = sr1.with_tolerance_grad(tol);
assert_error!(
res,
ArgminError,
"Invalid parameter: \"`SR1TrustRegion`: gradient tolerance must be >= 0.\""
);
}
}
#[test]
fn test_init() {
let subproblem = CauchyPoint::new();
let param: Vec<f64> = vec![-1.0, 1.0];
let mut sr1: SR1TrustRegion<_, f64> = SR1TrustRegion::new(subproblem);
let state: IterState<Vec<f64>, Vec<f64>, (), Vec<Vec<f64>>, (), f64> = IterState::new();
let problem = TestProblem::new();
let res = sr1.init(&mut Problem::new(problem), state);
assert_error!(
res,
ArgminError,
concat!(
"Not initialized: \"`SR1TrustRegion` requires an initial parameter vector. Please ",
"provide an initial guess via `Executor`s `configure` method.\""
)
);
let state: IterState<Vec<f64>, Vec<f64>, (), Vec<Vec<f64>>, (), f64> =
IterState::new().param(param.clone());
let problem = TestProblem::new();
let (mut state_out, kv) = sr1.init(&mut Problem::new(problem), state).unwrap();
assert!(kv.is_none());
let s_param = state_out.take_param().unwrap();
for (s, p) in s_param.iter().zip(param.iter()) {
assert_eq!(s.to_ne_bytes(), p.to_ne_bytes());
}
let s_grad = state_out.take_gradient().unwrap();
for (s, p) in s_grad.iter().zip(param.iter()) {
assert_eq!(s.to_ne_bytes(), p.to_ne_bytes());
}
assert_eq!(state_out.get_cost().to_ne_bytes(), 1.0f64.to_ne_bytes())
}
#[test]
fn test_init_provided_cost() {
let subproblem = CauchyPoint::new();
let param: Vec<f64> = vec![-1.0, 1.0];
let mut sr1: SR1TrustRegion<_, f64> = SR1TrustRegion::new(subproblem);
let state: IterState<Vec<f64>, Vec<f64>, (), Vec<Vec<f64>>, (), f64> =
IterState::new().param(param).cost(1234.0);
let problem = TestProblem::new();
let (state_out, kv) = sr1.init(&mut Problem::new(problem), state).unwrap();
assert!(kv.is_none());
assert_eq!(state_out.get_cost().to_ne_bytes(), 1234.0f64.to_ne_bytes())
}
#[test]
fn test_init_provided_grad() {
let subproblem = CauchyPoint::new();
let param: Vec<f64> = vec![-1.0, 1.0];
let gradient: Vec<f64> = vec![4.0, 9.0];
let mut sr1: SR1TrustRegion<_, f64> = SR1TrustRegion::new(subproblem);
let state: IterState<Vec<f64>, Vec<f64>, (), Vec<Vec<f64>>, (), f64> =
IterState::new().param(param).gradient(gradient.clone());
let problem = TestProblem::new();
let (mut state_out, kv) = sr1.init(&mut Problem::new(problem), state).unwrap();
assert!(kv.is_none());
let s_grad = state_out.take_gradient().unwrap();
for (s, g) in s_grad.iter().zip(gradient.iter()) {
assert_eq!(s.to_ne_bytes(), g.to_ne_bytes());
}
}
#[test]
fn test_init_provided_hessian() {
let subproblem = CauchyPoint::new();
let param: Vec<f64> = vec![-1.0, 1.0];
let gradient: Vec<f64> = vec![4.0, 9.0];
let hessian: Vec<Vec<f64>> = vec![vec![1.0, 2.0], vec![3.0, 4.0]];
let mut sr1: SR1TrustRegion<_, f64> = SR1TrustRegion::new(subproblem);
let state: IterState<Vec<f64>, Vec<f64>, (), Vec<Vec<f64>>, (), f64> = IterState::new()
.param(param)
.gradient(gradient)
.hessian(hessian.clone());
let problem = TestProblem::new();
let (mut state_out, kv) = sr1.init(&mut Problem::new(problem), state).unwrap();
assert!(kv.is_none());
let s_hessian = state_out.take_hessian().unwrap();
for (s1, g1) in s_hessian.iter().zip(hessian.iter()) {
for (s, g) in s1.iter().zip(g1.iter()) {
assert_eq!(s.to_ne_bytes(), g.to_ne_bytes());
}
}
}
}