use crate::core::{
ArgminFloat, CostFunction, Error, Executor, Gradient, Hessian, IterState, OptimizationResult,
Problem, Solver, TerminationStatus, TrustRegionRadius, KV,
};
use crate::solver::trustregion::reduction_ratio;
use argmin_math::{ArgminAdd, ArgminDot, ArgminL2Norm, ArgminWeightedDot};
#[cfg(feature = "serde1")]
use serde::{Deserialize, Serialize};
#[derive(Clone)]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
pub struct TrustRegion<R, F> {
radius: F,
max_radius: F,
eta: F,
subproblem: R,
fxk: F,
mk0: F,
}
impl<R, F> TrustRegion<R, F>
where
F: ArgminFloat,
{
pub fn new(subproblem: R) -> Self {
TrustRegion {
radius: float!(1.0),
max_radius: float!(100.0),
eta: float!(0.125),
subproblem,
fxk: F::nan(),
mk0: F::nan(),
}
}
pub fn with_radius(mut self, radius: F) -> Result<Self, Error> {
if radius <= float!(0.0) {
return Err(argmin_error!(
InvalidParameter,
"`TrustRegion`: radius must be > 0."
));
}
self.radius = radius;
Ok(self)
}
pub fn with_max_radius(mut self, max_radius: F) -> Result<Self, Error> {
if max_radius <= float!(0.0) {
return Err(argmin_error!(
InvalidParameter,
"`TrustRegion`: maximum radius must be > 0."
));
}
self.max_radius = max_radius;
Ok(self)
}
pub fn with_eta(mut self, eta: F) -> Result<Self, Error> {
if eta >= float!(0.25) || eta < float!(0.0) {
return Err(argmin_error!(
InvalidParameter,
"`TrustRegion`: eta must be in [0, 1/4)."
));
}
self.eta = eta;
Ok(self)
}
}
impl<O, R, F, P, G, H> Solver<O, IterState<P, G, (), H, (), F>> for TrustRegion<R, F>
where
O: CostFunction<Param = P, Output = F>
+ Gradient<Param = P, Gradient = G>
+ Hessian<Param = P, Hessian = H>,
P: Clone + ArgminL2Norm<F> + ArgminDot<P, F> + ArgminDot<G, F> + ArgminAdd<P, P>,
G: Clone,
H: Clone + ArgminDot<P, P>,
R: Clone + TrustRegionRadius<F> + Solver<O, IterState<P, G, (), H, (), F>>,
F: ArgminFloat,
{
fn name(&self) -> &str {
"Trust region"
}
fn init(
&mut self,
problem: &mut Problem<O>,
mut state: IterState<P, G, (), H, (), F>,
) -> Result<(IterState<P, G, (), H, (), F>, Option<KV>), Error> {
let param = state.take_param().ok_or_else(argmin_error_closure!(
NotInitialized,
concat!(
"`TrustRegion` requires an initial parameter vector. ",
"Please provide an initial guess via `Executor`s `configure` method."
)
))?;
let grad = state
.take_gradient()
.map(Result::Ok)
.unwrap_or_else(|| problem.gradient(¶m))?;
let hessian = state
.take_hessian()
.map(Result::Ok)
.unwrap_or_else(|| problem.hessian(¶m))?;
let cost = state.get_cost();
self.fxk = if cost.is_infinite() && cost.is_sign_positive() {
problem.cost(¶m)?
} else {
cost
};
self.mk0 = self.fxk;
Ok((
state
.param(param)
.cost(self.fxk)
.gradient(grad)
.hessian(hessian),
None,
))
}
fn next_iter(
&mut self,
problem: &mut Problem<O>,
mut state: IterState<P, G, (), H, (), F>,
) -> Result<(IterState<P, G, (), H, (), F>, Option<KV>), Error> {
let param = state.take_param().ok_or_else(argmin_error_closure!(
PotentialBug,
"`TrustRegion`: Parameter vector in state not set."
))?;
let grad = state.take_gradient().ok_or_else(argmin_error_closure!(
PotentialBug,
"`TrustRegion`: Gradient in state not set."
))?;
let hessian = state.take_hessian().ok_or_else(argmin_error_closure!(
PotentialBug,
"`TrustRegion`: Hessian in state not set."
))?;
self.subproblem.set_radius(self.radius);
let OptimizationResult {
problem: sub_problem,
state: mut sub_state,
..
} = Executor::new(problem.take_problem().unwrap(), self.subproblem.clone())
.configure(|config| {
config
.param(param.clone())
.gradient(grad.clone())
.hessian(hessian.clone())
})
.ctrlc(false)
.run()?;
let pk = sub_state.take_param().unwrap();
problem.consume_problem(sub_problem);
let new_param = pk.add(¶m);
let fxkpk = problem.cost(&new_param)?;
let mkpk = self.fxk + pk.dot(&grad) + float!(0.5) * pk.weighted_dot(&hessian, &pk);
let rho = reduction_ratio(self.fxk, fxkpk, self.mk0, mkpk);
let pk_norm = pk.l2_norm();
let cur_radius = self.radius;
self.radius = if rho < float!(0.25) {
float!(0.25) * pk_norm
} else if rho > float!(0.75) && (pk_norm - self.radius).abs() <= float!(10.0) * F::epsilon()
{
self.max_radius.min(float!(2.0) * self.radius)
} else {
self.radius
};
Ok((
if rho > self.eta {
self.fxk = fxkpk;
self.mk0 = fxkpk;
let grad = problem.gradient(&new_param)?;
let hessian = problem.hessian(&new_param)?;
state
.param(new_param)
.cost(fxkpk)
.gradient(grad)
.hessian(hessian)
} else {
state
.param(param)
.cost(self.fxk)
.gradient(grad)
.hessian(hessian)
},
Some(kv!("radius" => cur_radius;)),
))
}
fn terminate(&mut self, _state: &IterState<P, G, (), H, (), F>) -> TerminationStatus {
TerminationStatus::NotTerminated
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::test_utils::TestProblem;
use crate::core::{ArgminError, State};
use crate::solver::trustregion::{CauchyPoint, Steihaug};
test_trait_impl!(trustregion, TrustRegion<Steihaug<TestProblem, f64>, f64>);
#[test]
fn test_new() {
let cp: CauchyPoint<f64> = CauchyPoint::new();
let tr: TrustRegion<_, f64> = TrustRegion::new(cp);
let TrustRegion {
radius,
max_radius,
eta,
subproblem: _,
fxk,
mk0,
} = tr;
assert_eq!(radius.to_ne_bytes(), 1.0f64.to_ne_bytes());
assert_eq!(max_radius.to_ne_bytes(), 100.0f64.to_ne_bytes());
assert_eq!(eta.to_ne_bytes(), 0.125f64.to_ne_bytes());
assert_eq!(fxk.to_ne_bytes(), f64::NAN.to_ne_bytes());
assert_eq!(mk0.to_ne_bytes(), f64::NAN.to_ne_bytes());
}
#[test]
fn test_with_radius() {
for radius in [f64::EPSILON, 1e-2, 1.0, 2.0, 10.0, 100.0] {
let cp: CauchyPoint<f64> = CauchyPoint::new();
let tr: TrustRegion<_, f64> = TrustRegion::new(cp);
let res = tr.with_radius(radius);
assert!(res.is_ok());
let nm = res.unwrap();
assert_eq!(nm.radius.to_ne_bytes(), radius.to_ne_bytes());
}
for radius in [0.0, -f64::EPSILON, -1.0, -100.0, -42.0] {
let cp: CauchyPoint<f64> = CauchyPoint::new();
let tr: TrustRegion<_, f64> = TrustRegion::new(cp);
let res = tr.with_radius(radius);
assert_error!(
res,
ArgminError,
"Invalid parameter: \"`TrustRegion`: radius must be > 0.\""
);
}
}
#[test]
fn test_with_eta() {
for eta in [0.0, f64::EPSILON, 1e-2, 0.125, 0.25 - f64::EPSILON] {
let cp: CauchyPoint<f64> = CauchyPoint::new();
let tr: TrustRegion<_, f64> = TrustRegion::new(cp);
let res = tr.with_eta(eta);
assert!(res.is_ok());
let nm = res.unwrap();
assert_eq!(nm.eta.to_ne_bytes(), eta.to_ne_bytes());
}
for eta in [-f64::EPSILON, -1.0, -100.0, -42.0, 0.25, 1.0] {
let cp: CauchyPoint<f64> = CauchyPoint::new();
let tr: TrustRegion<_, f64> = TrustRegion::new(cp);
let res = tr.with_eta(eta);
assert_error!(
res,
ArgminError,
"Invalid parameter: \"`TrustRegion`: eta must be in [0, 1/4).\""
);
}
}
#[test]
fn test_init() {
let param: Vec<f64> = vec![1.0, 2.0];
let cp: CauchyPoint<f64> = CauchyPoint::new();
let mut tr: TrustRegion<_, f64> = TrustRegion::new(cp);
let state: IterState<Vec<f64>, Vec<f64>, (), Vec<Vec<f64>>, (), f64> = IterState::new();
let problem = TestProblem::new();
let res = tr.init(&mut Problem::new(problem), state);
assert_error!(
res,
ArgminError,
concat!(
"Not initialized: \"`TrustRegion` requires an initial parameter vector. Please ",
"provide an initial guess via `Executor`s `configure` method.\""
)
);
let state: IterState<Vec<f64>, Vec<f64>, (), Vec<Vec<f64>>, (), f64> =
IterState::new().param(param.clone());
let problem = TestProblem::new();
let (mut state_out, kv) = tr.init(&mut Problem::new(problem), state).unwrap();
assert!(kv.is_none());
let s_param = state_out.take_param().unwrap();
assert_eq!(s_param[0].to_ne_bytes(), param[0].to_ne_bytes());
assert_eq!(s_param[1].to_ne_bytes(), param[1].to_ne_bytes());
let TrustRegion {
radius,
max_radius,
eta,
subproblem: _,
fxk,
mk0,
} = tr;
assert_eq!(radius.to_ne_bytes(), 1.0f64.to_ne_bytes());
assert_eq!(max_radius.to_ne_bytes(), 100.0f64.to_ne_bytes());
assert_eq!(eta.to_ne_bytes(), 0.125f64.to_ne_bytes());
assert_eq!(fxk.to_ne_bytes(), 1.0f64.sqrt().to_ne_bytes());
assert_eq!(mk0.to_ne_bytes(), 1.0f64.to_ne_bytes());
}
}