25 ParameterList &trlist = list.sublist(
"Step").sublist(
"Trust Region");
27 state_->searchSize = trlist.get(
"Initial Radius", -1.0);
28 delMax_ = trlist.get(
"Maximum Radius", ROL_INF<Real>());
29 eta0_ = trlist.get(
"Step Acceptance Threshold", 0.05);
30 eta1_ = trlist.get(
"Radius Shrinking Threshold", 0.05);
31 eta2_ = trlist.get(
"Radius Growing Threshold", 0.9);
32 gamma0_ = trlist.get(
"Radius Shrinking Rate (Negative rho)", 0.0625);
33 gamma1_ = trlist.get(
"Radius Shrinking Rate (Positive rho)", 0.25);
34 gamma2_ = trlist.get(
"Radius Growing Rate", 2.5);
35 TRsafe_ = trlist.get(
"Safeguard Size", 100.0);
36 eps_ = TRsafe_*ROL_EPSILON<Real>();
37 interpRad_ = trlist.get(
"Use Radius Interpolation",
false);
38 verbosity_ = trlist.sublist(
"General").get(
"Output Level", 0);
40 storageNM_ = trlist.get(
"Nonmonotone Storage Size", 0);
41 useNM_ = (storageNM_ <= 0 ? false :
true);
43 ROL::ParameterList &lmlist = trlist.sublist(
"SPG");
44 mu0_ = lmlist.get(
"Sufficient Decrease Parameter", 1e-2);
45 spexp_ = lmlist.get(
"Relative Tolerance Exponent", 1.0);
46 spexp_ = std::max(
static_cast<Real
>(1),std::min(spexp_,
static_cast<Real
>(2)));
47 redlim_ = lmlist.sublist(
"Cauchy Point").get(
"Maximum Number of Reduction Steps", 10);
48 explim_ = lmlist.sublist(
"Cauchy Point").get(
"Maximum Number of Expansion Steps", 10);
49 alpha_ = lmlist.sublist(
"Cauchy Point").get(
"Initial Step Size", 1.0);
50 normAlpha_ = lmlist.sublist(
"Cauchy Point").get(
"Normalize Initial Step Size",
false);
51 interpf_ = lmlist.sublist(
"Cauchy Point").get(
"Reduction Rate", 0.1);
52 extrapf_ = lmlist.sublist(
"Cauchy Point").get(
"Expansion Rate", 10.0);
53 qtol_ = lmlist.sublist(
"Cauchy Point").get(
"Decrease Tolerance", 1e-8);
55 lambdaMin_ = lmlist.sublist(
"Solver").get(
"Minimum Spectral Step Size", 1e-8);
56 lambdaMax_ = lmlist.sublist(
"Solver").get(
"Maximum Spectral Step Size", 1e8);
57 gamma_ = lmlist.sublist(
"Solver").get(
"Sufficient Decrease Tolerance", 1e-4);
58 maxSize_ = lmlist.sublist(
"Solver").get(
"Maximum Storage Size", 10);
59 maxit_ = lmlist.sublist(
"Solver").get(
"Iteration Limit", 25);
60 tol1_ = lmlist.sublist(
"Solver").get(
"Absolute Tolerance", 1e-4);
61 tol2_ = lmlist.sublist(
"Solver").get(
"Relative Tolerance", 1e-2);
62 useMin_ = lmlist.sublist(
"Solver").get(
"Use Smallest Model Iterate",
true);
63 useNMSP_ = lmlist.sublist(
"Solver").get(
"Use Nonmonotone Search",
false);
65 bool useCachyPoint = lmlist.sublist(
"Solver").get(
"Compute Cauchy Point",
true);
66 useSimpleSPG_ = !useCachyPoint;
68 ParameterList &glist = list.sublist(
"General");
70 useInexact_.push_back(glist.get(
"Inexact Objective Function",
false));
71 useInexact_.push_back(glist.get(
"Inexact Gradient",
false));
72 useInexact_.push_back(glist.get(
"Inexact Hessian-Times-A-Vector",
false));
74 ParameterList &ilist = trlist.sublist(
"Inexact").sublist(
"Gradient");
75 scale0_ = ilist.get(
"Tolerance Scaling",
static_cast<Real
>(0.1));
76 scale1_ = ilist.get(
"Relative Tolerance",
static_cast<Real
>(2));
78 ParameterList &vlist = trlist.sublist(
"Inexact").sublist(
"Value");
79 scale_ = vlist.get(
"Tolerance Scaling",
static_cast<Real
>(1.e-1));
80 omega_ = vlist.get(
"Exponent",
static_cast<Real
>(0.9));
81 force_ = vlist.get(
"Forcing Sequence Initial Value",
static_cast<Real
>(1.0));
82 updateIter_ = vlist.get(
"Forcing Sequence Update Frequency",
static_cast<int>(10));
83 forceFactor_ = vlist.get(
"Forcing Sequence Reduction Factor",
static_cast<Real
>(0.1));
85 verbosity_ = list.sublist(
"General").get(
"Output Level",0);
86 writeHeader_ = verbosity_ > 2;
88 useSecantPrecond_ = list.sublist(
"General").sublist(
"Secant").get(
"Use as Preconditioner",
false);
89 useSecantHessVec_ = list.sublist(
"General").sublist(
"Secant").get(
"Use as Hessian",
false);
94 model_ = makePtr<TrustRegionModel_U<Real>>(list,secant,mode);
95 if (secant == nullPtr) {
96 std::string secantType = list.sublist(
"General").sublist(
"Secant").get(
"Type",
"Limited-Memory BFGS");
195 std::ostream &outStream ) {
196 const Real
zero(0), one(1);
198 Real inTol =
static_cast<Real
>(0.1)*ROL_OVERFLOW<Real>(), outTol(inTol);
199 Real ftrial(0), pRed(0), rho(1), q(0);
201 std::vector<std::string> output;
202 initialize(x,g,inTol,obj,bnd,outStream);
203 Ptr<Vector<Real>> gmod = g.
clone();
204 Ptr<Vector<Real>> pwa1 = x.
clone(), pwa2 = x.
clone();
205 Ptr<Vector<Real>> pwa3 = x.
clone(), pwa4 = x.
clone();
206 Ptr<Vector<Real>> pwa5 = x.
clone(), pwa6 = x.
clone();
207 Ptr<Vector<Real>> pwa7 = x.
clone();
208 Ptr<Vector<Real>> dwa1 = g.
clone(), dwa2 = g.
clone();
210 Real rhoNM(0), sigmac(0), sigmar(0);
211 Real fr(state_->value), fc(state_->value), fmin(state_->value);
216 if (verbosity_ > 0) writeOutput(outStream,
true);
218 while (status_->check(*state_)) {
220 model_->setData(obj,*state_->iterateVec,*state_->gradientVec,gtol_);
224 gmod->set(*state_->gradientVec);
226 dpsg_simple(x,q,*gmod,*state_->iterateVec,state_->searchSize,*model_,
227 *pwa1,*pwa2,*dwa1,outStream);
230 dcauchy(*state_->stepVec,alpha_,q,*state_->iterateVec,
231 state_->gradientVec->dual(),state_->searchSize,
232 *model_,*dwa1,*dwa2,outStream);
233 x.
plus(*state_->stepVec);
239 dpsg(x,q,*gmod,*state_->iterateVec,state_->searchSize,*model_,
240 *pwa1,*pwa2,*pwa3,*pwa4,*pwa5,*pwa6,*pwa7,*dwa1,outStream);
245 state_->stepVec->set(x); state_->stepVec->axpy(-one,*state_->iterateVec);
246 state_->snorm = state_->stepVec->norm();
249 ftrial = computeValue(inTol,outTol,pRed,state_->value,state_->iter,x,*state_->iterateVec,obj);
254 TRUtils::analyzeRatio<Real>(rho,TRflag_,state_->value,ftrial,pRed,eps_,outStream,verbosity_>1);
256 TRUtils::analyzeRatio<Real>(rhoNM,TRflagNM,fr,ftrial,pRed+sigmar,eps_,outStream,verbosity_>1);
257 TRflag_ = (rho < rhoNM ? TRflagNM : TRflag_);
258 rho = (rho < rhoNM ? rhoNM : rho );
265 x.
set(*state_->iterateVec);
269 state_->searchSize = TRUtils::interpolateRadius<Real>(*state_->gradientVec,*state_->stepVec,
270 state_->snorm,pRed,state_->value,ftrial,state_->searchSize,gamma0_,gamma1_,eta2_,
271 outStream,verbosity_>1);
274 state_->searchSize = gamma1_*std::min(state_->snorm,state_->searchSize);
276 computeGradient(x,*state_->gradientVec,*pwa1,state_->searchSize,obj,
false,gtol_,state_->gnorm,outStream);
280 state_->value = ftrial;
284 sigmac += pRed; sigmar += pRed;
285 if (ftrial < fmin) { fmin = ftrial; fc = fmin; sigmac =
zero; L = 0; }
288 if (ftrial > fc) { fc = ftrial; sigmac =
zero; }
289 if (L == storageNM_) { fr = fc; sigmar = sigmac; }
293 if (rho >= eta2_) state_->searchSize = std::min(gamma2_*state_->searchSize, delMax_);
295 dwa1->set(*state_->gradientVec);
296 computeGradient(x,*state_->gradientVec,*pwa1,state_->searchSize,obj,
true,gtol_,state_->gnorm,outStream);
298 state_->iterateVec->set(x);
300 model_->update(x,*state_->stepVec,*dwa1,*state_->gradientVec,
301 state_->snorm,state_->iter);
305 if (verbosity_ > 0) writeOutput(outStream,writeHeader_);
329 std::ostream &outStream) {
330 const Real half(0.5);
332 Real tol = std::sqrt(ROL_EPSILON<Real>());
334 Real gs(0), snorm(0);
336 snorm = dgpstep(s,g,x,-alpha,outStream);
341 model.
hessVec(dwa,s,x,tol); nhess_++;
344 q = half * s.
apply(dwa) + gs;
345 interp = (q > mu0_*gs);
353 snorm = dgpstep(s,g,x,-alpha,outStream);
355 model.
hessVec(dwa,s,x,tol); nhess_++;
358 q = half * s.
apply(dwa) + gs;
359 search = (q > mu0_*gs) && (cnt < redlim_);
371 snorm = dgpstep(s,g,x,-alpha,outStream);
372 if (snorm <= del && cnt < explim_) {
373 model.
hessVec(dwa,s,x,tol); nhess_++;
376 q = half * s.
apply(dwa) + gs;
377 if (q <= mu0_*gs && std::abs(q-qs) > qtol_*std::abs(qs)) {
395 snorm = dgpstep(s,g,x,-alpha,outStream);
397 if (verbosity_ > 1) {
398 outStream <<
" Cauchy point" << std::endl;
399 outStream <<
" Step length (alpha): " << alpha << std::endl;
400 outStream <<
" Step length (alpha*g): " << snorm << std::endl;
401 outStream <<
" Model decrease (pRed): " << -q << std::endl;
403 outStream <<
" Number of extrapolation steps: " << cnt << std::endl;
419 std::ostream &outStream) {
427 const Real half(0.5), one(1), safeguard(1e2*ROL_EPSILON<Real>());
428 Real tol(std::sqrt(ROL_EPSILON<Real>()));
429 Real alpha(1), alphaMax(1), s0s0(0), ss0(0), sHs(0), lambdaTmp(1), snorm(0);
436 Real coeff = one/gmod.
norm();
437 Real lambda = std::max(lambdaMin_,std::min(coeff,lambdaMax_));
439 proj_->project(pwa,outStream); state_->nproj++;
441 Real gs = gmod.
apply(pwa);
442 Real ss = pwa.
dot(pwa);
443 Real gnorm = std::sqrt(ss);
446 const Real gtol = std::min(tol1_,tol2_*gnorm);
449 outStream <<
" Spectral Projected Gradient" << std::endl;
452 while (SPiter_ < maxit_) {
456 model.
hessVec(dwa,pwa,x,tol); nhess_++;
457 sHs = dwa.
apply(pwa);
461 if (gnorm >= del-safeguard) {
463 alphaMax = std::min(one, (-ss0 + std::sqrt(ss0*ss0 - ss*(s0s0-del*del)))/ss);
465 if (sHs <= safeguard)
468 alpha = std::min(alphaMax, -gs/sHs);
471 q += alpha * (gs + half * alpha * sHs);
472 gmod.
axpy(alpha,dwa);
476 pwa1.
set(y); pwa1.
axpy(-one,x);
477 s0s0 = pwa1.
dot(pwa1);
478 snorm = std::sqrt(s0s0);
480 if (verbosity_ > 1) {
481 outStream << std::endl;
482 outStream <<
" Iterate: " << SPiter_ << std::endl;
483 outStream <<
" Spectral step length (lambda): " << lambda << std::endl;
484 outStream <<
" Step length (alpha): " << alpha << std::endl;
485 outStream <<
" Model decrease (pRed): " << -q << std::endl;
486 outStream <<
" Optimality criterion: " << gnorm << std::endl;
487 outStream <<
" Step norm: " << snorm << std::endl;
488 outStream << std::endl;
491 if (snorm >= del - safeguard) { SPflag_ = 2;
break; }
494 lambdaTmp = (sHs <= safeguard ? one/gmod.
norm() : ss/sHs);
495 lambda = std::max(lambdaMin_,std::min(lambdaTmp,lambdaMax_));
497 proj_->project(pwa,outStream); state_->nproj++;
499 gs = gmod.
apply(pwa);
501 gnorm = std::sqrt(ss);
503 if (gnorm <= gtol) { SPflag_ = 0;
break; }
505 SPflag_ = (SPiter_==maxit_) ? 1 : SPflag_;
523 std::ostream &outStream) {
531 const Real
zero(0), half(0.5), one(1), two(2);
532 Real tol(std::sqrt(ROL_EPSILON<Real>()));
533 Real alpha(1), sHs(0), alphaTmp(1), mmax(0), qmin(0), lambdaTmp(1);
534 std::deque<Real> mqueue; mqueue.push_back(q);
536 if (useNMSP_ && useMin_) { qmin = q; ymin.
set(y); }
540 pwa.
set(y); pwa.
axpy(-one,pwa1);
541 dproj(pwa,x,del,pwa2,pwa3,pwa4,pwa5,outStream);
543 Real gnorm = pwa.
norm();
544 const Real gtol = std::min(tol1_,tol2_*gnorm);
547 Real coeff = one/gmod.
norm();
548 Real lambda = std::max(lambdaMin_,std::min(coeff,lambdaMax_));
549 pwa.
set(y); pwa.
axpy(-lambda,pwa1);
550 dproj(pwa,x,del,pwa2,pwa3,pwa4,pwa5,outStream);
552 Real gs = gmod.
apply(pwa);
553 Real ss = pwa.
dot(pwa);
556 outStream <<
" Spectral Projected Gradient" << std::endl;
559 while (SPiter_ < maxit_) {
563 model.
hessVec(dwa,pwa,x,tol); nhess_++;
564 sHs = dwa.
apply(pwa);
568 mmax = *std::max_element(mqueue.begin(),mqueue.end());
569 alphaTmp = (-(one-gamma_)*gs + std::sqrt(std::pow((one-gamma_)*gs,two)-two*sHs*(q-mmax)))/sHs;
574 alpha = (sHs >
zero ? std::min(one,std::max(
zero,alphaTmp)) : one);
577 q += alpha * (gs + half * alpha * sHs);
578 gmod.
axpy(alpha,dwa);
583 if (
static_cast<int>(mqueue.size())==maxSize_) mqueue.pop_front();
585 if (useMin_ && q <= qmin) { qmin = q; ymin.
set(y); }
590 pwa.
set(y); pwa.
axpy(-one,pwa1);
591 dproj(pwa,x,del,pwa2,pwa3,pwa4,pwa5,outStream);
595 if (verbosity_ > 1) {
596 outStream << std::endl;
597 outStream <<
" Iterate: " << SPiter_ << std::endl;
598 outStream <<
" Spectral step length (lambda): " << lambda << std::endl;
599 outStream <<
" Step length (alpha): " << alpha << std::endl;
600 outStream <<
" Model decrease (pRed): " << -q << std::endl;
601 outStream <<
" Optimality criterion: " << gnorm << std::endl;
602 outStream << std::endl;
604 if (gnorm < gtol)
break;
608 lambdaTmp = (sHs == 0 ? coeff : ss/sHs);
609 lambda = std::max(lambdaMin_,std::min(lambdaTmp,lambdaMax_));
610 pwa.
set(y); pwa.
axpy(-lambda,pwa1);
611 dproj(pwa,x,del,pwa2,pwa3,pwa4,pwa5,outStream);
613 gs = gmod.
apply(pwa);
616 if (useNMSP_ && useMin_) { q = qmin; y.
set(ymin); }
617 SPflag_ = (SPiter_==maxit_) ? 1 : 0;
628 std::ostream &outStream)
const {
630 const Real
zero(0), half(0.5), one(1), two(2), three(3);
631 const Real eps(ROL_EPSILON<Real>()), tol0(1e1*eps), fudge(1.0-1e-2*sqrt(eps));
632 Real f0(0), f1(0), fc(0), t0(0), t1(1), tc(0), d1(1), d2(1), tol(1);
633 Real p(0), q(0), r(0), s(0), m(0);
634 int cnt(state_->nproj);
636 proj_->project(y1,outStream); state_->nproj++;
637 pwa.
set(y1); pwa.
axpy(-one,x0);
644 tc = t0; fc = f0; yc.
set(y0);
648 if (std::abs(fc-del) < std::abs(f1-del)) {
649 t0 = t1; t1 = tc; tc = t0;
650 f0 = f1; f1 = fc; fc = f0;
653 tol = two*eps*std::abs(t1) + half*tol0;
655 if (std::abs(m) <= tol) { code = 1;
break; }
656 if ((f1 >= fudge*del && f1 <= del))
break;
657 if (std::abs(d1) < tol || std::abs(f0-del) <= std::abs(f1-del)) {
661 s = (f1-del)/(f0-del);
667 q = (f0-del)/(fc-del);
668 r = (f1-del)/(fc-del);
669 p = s*(two*m*q*(q-r)-(t1-t0)*(r-one));
670 q = (q-one)*(r-one)*(s-one);
672 if (p >
zero) q = -q;
676 if (two*p < three*m*q-std::abs(tol*q) && p < std::abs(half*s*q)) {
683 t0 = t1; f0 = f1; y0.
set(y1);
684 if (std::abs(d2) > tol) t1 += d2;
685 else if (m >
zero) t1 += tol;
688 proj_->project(y1,outStream); state_->nproj++;
689 pwa.
set(y1); pwa.
axpy(-one,x0);
691 if ((f1 > del && fc > del) || (f1 <= del && fc <= del)) {
692 tc = t0; fc = f0; yc.
set(y0);
696 if (code==1 && f1>del) x.
set(yc);
698 if (verbosity_ > 1) {
699 outStream << std::endl;
700 outStream <<
" Trust-Region Subproblem Projection" << std::endl;
701 outStream <<
" Number of polyhedral projections: " << state_->nproj-cnt << std::endl;
702 if (code == 1 && f1 > del) {
703 outStream <<
" Transformed Multiplier: " << tc << std::endl;
704 outStream <<
" Dual Residual: " << fc-del << std::endl;
707 outStream <<
" Transformed Multiplier: " << t1 << std::endl;
708 outStream <<
" Dual Residual: " << f1-del << std::endl;
710 outStream <<
" Exit Code: " << code << std::endl;
711 outStream << std::endl;
943 std::ios_base::fmtflags osFlags(os.flags());
944 os << std::scientific << std::setprecision(6);
945 if ( state_->iter == 0 ) writeName(os);
946 if ( write_header ) writeHeader(os);
947 if ( state_->iter == 0 ) {
949 os << std::setw(6) << std::left << state_->iter;
950 os << std::setw(15) << std::left << state_->value;
951 os << std::setw(15) << std::left << state_->gnorm;
952 os << std::setw(15) << std::left <<
"---";
953 os << std::setw(15) << std::left << state_->searchSize;
954 os << std::setw(10) << std::left << state_->nfval;
955 os << std::setw(10) << std::left << state_->ngrad;
956 os << std::setw(10) << std::left << nhess_;
957 os << std::setw(10) << std::left << state_->nproj;
958 os << std::setw(10) << std::left <<
"---";
959 os << std::setw(10) << std::left <<
"---";
960 os << std::setw(10) << std::left <<
"---";
965 os << std::setw(6) << std::left << state_->iter;
966 os << std::setw(15) << std::left << state_->value;
967 os << std::setw(15) << std::left << state_->gnorm;
968 os << std::setw(15) << std::left << state_->snorm;
969 os << std::setw(15) << std::left << state_->searchSize;
970 os << std::setw(10) << std::left << state_->nfval;
971 os << std::setw(10) << std::left << state_->ngrad;
972 os << std::setw(10) << std::left << nhess_;
973 os << std::setw(10) << std::left << state_->nproj;
974 os << std::setw(10) << std::left << TRflag_;
975 os << std::setw(10) << std::left << SPiter_;
976 os << std::setw(10) << std::left << SPflag_;