1071 Teuchos::LAPACK<int,ScalarType> lapack;
1072 std::vector<int> index(recycleBlocks_);
1073 ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
1074 ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
1083 setParameters(Teuchos::parameterList(*getValidParameters()));
1087 "Belos::RCGSolMgr::solve(): Linear problem is not a valid object.");
1089 "Belos::RCGSolMgr::solve(): Linear problem is not ready, setProblem() has not been called.");
1090 TEUCHOS_TEST_FOR_EXCEPTION((problem_->getLeftPrec() != Teuchos::null)&&(problem_->getRightPrec() != Teuchos::null),
1092 "Belos::RCGSolMgr::solve(): RCG does not support split preconditioning, only set left or right preconditioner.");
1095 Teuchos::RCP<OP> precObj;
1096 if (problem_->getLeftPrec() != Teuchos::null) {
1097 precObj = Teuchos::rcp_const_cast<OP>(problem_->getLeftPrec());
1099 else if (problem_->getRightPrec() != Teuchos::null) {
1100 precObj = Teuchos::rcp_const_cast<OP>(problem_->getRightPrec());
1104 int numRHS2Solve = MVT::GetNumberVecs( *(problem_->getRHS()) );
1105 std::vector<int> currIdx(1);
1109 problem_->setLSIndex( currIdx );
1112 ptrdiff_t dim = MVT::GetGlobalLength( *(problem_->getRHS()) );
1113 if (numBlocks_ > dim) {
1114 numBlocks_ = Teuchos::asSafe<int>(dim);
1115 params_->set(
"Num Blocks", numBlocks_);
1117 "Warning! Requested Krylov subspace dimension is larger than operator dimension!" << std::endl <<
1118 " The maximum number of blocks allowed for the Krylov subspace will be adjusted to " << numBlocks_ << std::endl;
1122 initializeStateStorage();
1125 Teuchos::ParameterList plist;
1126 plist.set(
"Num Blocks",numBlocks_);
1127 plist.set(
"Recycled Blocks",recycleBlocks_);
1130 outputTest_->reset();
1133 bool isConverged =
true;
1137 index.resize(recycleBlocks_);
1138 for (
int i=0; i<recycleBlocks_; ++i) { index[i] = i; }
1139 Teuchos::RCP<const MV> Utmp = MVT::CloneView( *U_, index );
1140 index.resize(recycleBlocks_);
1141 for (
int i=0; i<recycleBlocks_; ++i) { index[i] = i; }
1142 Teuchos::RCP<MV> AUtmp = MVT::CloneViewNonConst( *AU_, index );
1144 problem_->applyOp( *Utmp, *AUtmp );
1146 Teuchos::SerialDenseMatrix<int,ScalarType> UTAUtmp( Teuchos::View, *UTAU_, recycleBlocks_, recycleBlocks_ );
1147 MVT::MvTransMv( one, *Utmp, *AUtmp, UTAUtmp );
1149 Teuchos::SerialDenseMatrix<int,ScalarType> AUTAUtmp( Teuchos::View, *AUTAU_, recycleBlocks_, recycleBlocks_ );
1150 if ( precObj != Teuchos::null ) {
1151 index.resize(recycleBlocks_);
1152 for (
int i=0; i<recycleBlocks_; ++i) { index[i] = i; }
1153 index.resize(recycleBlocks_);
1154 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1155 Teuchos::RCP<MV> PCAU = MVT::CloneViewNonConst( *U1_, index );
1156 OPT::Apply( *precObj, *AUtmp, *PCAU );
1157 MVT::MvTransMv( one, *AUtmp, *PCAU, AUTAUtmp );
1159 MVT::MvTransMv( one, *AUtmp, *AUtmp, AUTAUtmp );
1166 Teuchos::RCP<RCGIter<ScalarType,MV,OP> > rcg_iter;
1171#ifdef BELOS_TEUCHOS_TIME_MONITOR
1172 Teuchos::TimeMonitor slvtimer(*timerSolve_);
1175 while ( numRHS2Solve > 0 ) {
1178 if (printer_->isVerbosity(
Debug ) ) {
1179 if (existU_) printer_->print(
Debug,
"Using recycle space generated from previous call to solve()." );
1180 else printer_->print(
Debug,
"No recycle space exists." );
1184 rcg_iter->resetNumIters();
1187 rcg_iter->setSize( recycleBlocks_, numBlocks_ );
1190 outputTest_->resetNumCalls();
1199 problem_->computeCurrResVec( &*r_ );
1204 Teuchos::SerialDenseMatrix<int,ScalarType> Utr(recycleBlocks_,1);
1205 index.resize(recycleBlocks_);
1206 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1207 Teuchos::RCP<const MV> Utmp = MVT::CloneView( *U_, index );
1208 MVT::MvTransMv( one, *Utmp, *r_, Utr );
1209 Teuchos::SerialDenseMatrix<int,ScalarType> UTAUtmp( Teuchos::View, *UTAU_, recycleBlocks_, recycleBlocks_ );
1210 Teuchos::SerialDenseMatrix<int,ScalarType> LUUTAUtmp( Teuchos::View, *LUUTAU_, recycleBlocks_, recycleBlocks_ );
1211 LUUTAUtmp.assign(UTAUtmp);
1213 lapack.GESV(recycleBlocks_, 1, LUUTAUtmp.values(), LUUTAUtmp.stride(), &(*ipiv_)[0], Utr.values(), Utr.stride(), &info);
1215 "Belos::RCGSolMgr::solve(): LAPACK GESV failed to compute a solution.");
1218 MVT::MvTimesMatAddMv( one, *Utmp, Utr, one, *problem_->getCurrLHSVec() );
1221 index.resize(recycleBlocks_);
1222 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1223 Teuchos::RCP<const MV> AUtmp = MVT::CloneView( *AU_, index );
1224 MVT::MvTimesMatAddMv( -one, *AUtmp, Utr, one, *r_ );
1227 if ( precObj != Teuchos::null ) {
1228 OPT::Apply( *precObj, *r_, *z_ );
1234 MVT::MvTransMv( one, *r_, *z_, *rTz_old_ );
1238 Teuchos::SerialDenseMatrix<int,ScalarType> mu( Teuchos::View, *Delta_, recycleBlocks_, 1);
1239 index.resize(recycleBlocks_);
1240 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1241 Teuchos::RCP<const MV> AUtmp = MVT::CloneView( *AU_, index );
1242 MVT::MvTransMv( one, *AUtmp, *z_, mu );
1243 Teuchos::SerialDenseMatrix<int,ScalarType> LUUTAUtmp( Teuchos::View, *LUUTAU_, recycleBlocks_, recycleBlocks_ );
1246 lapack.GETRS(
TRANS, recycleBlocks_, 1, LUUTAUtmp.values(), LUUTAUtmp.stride(), &(*ipiv_)[0], mu.values(), mu.stride(), &info );
1248 "Belos::RCGSolMgr::solve(): LAPACK GETRS failed to compute a solution.");
1252 Teuchos::RCP<MV> Ptmp = MVT::CloneViewNonConst( *P_, index );
1253 MVT::MvAddMv(one,*z_,zero,*z_,*Ptmp);
1254 MVT::MvTimesMatAddMv( -one, *U_, mu, one, *Ptmp );
1259 Teuchos::RCP<MV> Ptmp = MVT::CloneViewNonConst( *P_, index );
1260 MVT::MvAddMv(one,*z_,zero,*z_,*Ptmp);
1267 index.resize( numBlocks_+1 );
1268 for (
int ii=0; ii<(numBlocks_+1); ++ii) { index[ii] = ii; }
1269 newstate.
P = MVT::CloneViewNonConst( *P_, index );
1270 index.resize( recycleBlocks_ );
1271 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1272 newstate.
U = MVT::CloneViewNonConst( *U_, index );
1273 index.resize( recycleBlocks_ );
1274 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1275 newstate.
AU = MVT::CloneViewNonConst( *AU_, index );
1276 newstate.
Alpha = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *Alpha_, numBlocks_, 1 ) );
1277 newstate.
Beta = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *Beta_, numBlocks_, 1 ) );
1278 newstate.
D = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *D_, numBlocks_, 1 ) );
1279 newstate.
Delta = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *Delta_, recycleBlocks_, numBlocks_, 0, 1 ) );
1280 newstate.
LUUTAU = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *LUUTAU_, recycleBlocks_, recycleBlocks_ ) );
1286 newstate.
existU = existU_;
1287 newstate.
ipiv = ipiv_;
1290 rcg_iter->initialize(newstate);
1296 rcg_iter->iterate();
1303 if ( convTest_->getStatus() ==
Passed ) {
1312 else if ( maxIterTest_->getStatus() ==
Passed ) {
1314 isConverged =
false;
1322 else if ( rcg_iter->getCurSubspaceDim() == rcg_iter->getMaxSubspaceDim() ) {
1327 if (recycleBlocks_ > 0) {
1331 Teuchos::SerialDenseMatrix<int,ScalarType> Ftmp( Teuchos::View, *F_, numBlocks_, numBlocks_ );
1332 Teuchos::SerialDenseMatrix<int,ScalarType> Gtmp( Teuchos::View, *G_, numBlocks_, numBlocks_ );
1333 Teuchos::SerialDenseMatrix<int,ScalarType> Dtmp( Teuchos::View, *D_, numBlocks_, 1 );
1334 Teuchos::SerialDenseMatrix<int,ScalarType> Alphatmp( Teuchos::View, *Alpha_, numBlocks_, 1 );
1335 Teuchos::SerialDenseMatrix<int,ScalarType> Betatmp( Teuchos::View, *Beta_, numBlocks_, 1 );
1336 Ftmp.putScalar(zero);
1337 Gtmp.putScalar(zero);
1338 for (
int ii=0;ii<numBlocks_;ii++) {
1339 Gtmp(ii,ii) = (Dtmp(ii,0) / Alphatmp(ii,0))*(1 + Betatmp(ii,0));
1341 Gtmp(ii-1,ii) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1342 Gtmp(ii,ii-1) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1344 Ftmp(ii,ii) = Dtmp(ii,0);
1348 Teuchos::SerialDenseMatrix<int,ScalarType> Ytmp( Teuchos::View, *Y_, numBlocks_, recycleBlocks_ );
1349 getHarmonicVecs(Ftmp,Gtmp,Ytmp);
1352 index.resize( numBlocks_ );
1353 for (
int ii=0; ii<numBlocks_; ++ii) { index[ii] = ii; }
1354 Teuchos::RCP<const MV> Ptmp = MVT::CloneView( *P_, index );
1355 index.resize( recycleBlocks_ );
1356 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1357 Teuchos::RCP<MV> U1tmp = MVT::CloneViewNonConst( *U1_, index );
1358 MVT::MvTimesMatAddMv( one, *Ptmp, Ytmp, zero, *U1tmp );
1363 Teuchos::SerialDenseMatrix<int,ScalarType> GYtmp( Teuchos::View, *GY_, numBlocks_, recycleBlocks_ );
1364 GYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Gtmp,Ytmp,zero);
1365 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TAU1tmp( Teuchos::View, *AU1TAU1_, recycleBlocks_, recycleBlocks_ );
1366 AU1TAU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,GYtmp,zero);
1369 Teuchos::SerialDenseMatrix<int,ScalarType> FYtmp( Teuchos::View, *FY_, numBlocks_, recycleBlocks_ );
1370 FYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Ftmp,Ytmp,zero);
1371 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TU1tmp( Teuchos::View, *AU1TU1_, recycleBlocks_, recycleBlocks_ );
1372 AU1TU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,FYtmp,zero);
1374 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TAPtmp( Teuchos::View, *AU1TAP_, recycleBlocks_, 1 );
1376 AU1TAPtmp.putScalar(zero);
1378 ScalarType alphatmp = -1.0 / Alphatmp(numBlocks_-1,0);
1379 for (
int ii=0; ii<recycleBlocks_; ++ii) {
1380 AU1TAPtmp(ii,0) = Ytmp(numBlocks_-1,ii) * alphatmp;
1388 lastBeta = numBlocks_-1;
1395 Teuchos::SerialDenseMatrix<int,ScalarType> Dtmp( Teuchos::View, *D_, numBlocks_, 1 );
1396 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TAPtmp( Teuchos::View, *AU1TAP_, recycleBlocks_, numBlocks_ );
1397 AU1TAPtmp.scale(Dtmp(0,0));
1399 Teuchos::SerialDenseMatrix<int,ScalarType> Alphatmp( Teuchos::View, *Alpha_, numBlocks_, 1 );
1400 Teuchos::SerialDenseMatrix<int,ScalarType> Betatmp( Teuchos::View, *Beta_, numBlocks_+1, 1 );
1401 Teuchos::SerialDenseMatrix<int,ScalarType> APTAPtmp( Teuchos::View, *APTAP_, numBlocks_, numBlocks_ );
1402 APTAPtmp.putScalar(zero);
1403 for (
int ii=0; ii<numBlocks_; ii++) {
1404 APTAPtmp(ii,ii) = (Dtmp(ii,0) / Alphatmp(ii,0))*(1 + Betatmp(ii+1,0));
1406 APTAPtmp(ii-1,ii) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1407 APTAPtmp(ii,ii-1) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1412 Teuchos::SerialDenseMatrix<int,ScalarType> Ftmp( Teuchos::View, *F_, (numBlocks_+recycleBlocks_), (numBlocks_+recycleBlocks_) );
1413 Teuchos::SerialDenseMatrix<int,ScalarType> F11( Teuchos::View, *F_, recycleBlocks_, recycleBlocks_ );
1414 Teuchos::SerialDenseMatrix<int,ScalarType> F12( Teuchos::View, *F_, recycleBlocks_, numBlocks_, 0, recycleBlocks_ );
1415 Teuchos::SerialDenseMatrix<int,ScalarType> F21( Teuchos::View, *F_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1416 Teuchos::SerialDenseMatrix<int,ScalarType> F22( Teuchos::View, *F_, numBlocks_, numBlocks_, recycleBlocks_, recycleBlocks_ );
1417 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TU1tmp( Teuchos::View, *AU1TU1_, recycleBlocks_, recycleBlocks_ );
1418 F11.assign(AU1TU1tmp);
1419 F12.putScalar(zero);
1420 F21.putScalar(zero);
1421 F22.putScalar(zero);
1422 for(
int ii=0;ii<numBlocks_;ii++) {
1423 F22(ii,ii) = Dtmp(ii,0);
1427 Teuchos::SerialDenseMatrix<int,ScalarType> Gtmp( Teuchos::View, *G_, (numBlocks_+recycleBlocks_), (numBlocks_+recycleBlocks_) );
1428 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TAU1tmp( Teuchos::View, *AU1TAU1_, recycleBlocks_, recycleBlocks_ );
1429 Teuchos::SerialDenseMatrix<int,ScalarType> G11( Teuchos::View, *G_, recycleBlocks_, recycleBlocks_ );
1430 Teuchos::SerialDenseMatrix<int,ScalarType> G12( Teuchos::View, *G_, recycleBlocks_, numBlocks_, 0, recycleBlocks_ );
1431 Teuchos::SerialDenseMatrix<int,ScalarType> G21( Teuchos::View, *G_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1432 Teuchos::SerialDenseMatrix<int,ScalarType> G22( Teuchos::View, *G_, numBlocks_, numBlocks_, recycleBlocks_, recycleBlocks_ );
1433 G11.assign(AU1TAU1tmp);
1434 G12.assign(AU1TAPtmp);
1436 for (
int ii=0;ii<recycleBlocks_;++ii)
1437 for (
int jj=0;jj<numBlocks_;++jj)
1438 G21(jj,ii) = G12(ii,jj);
1439 G22.assign(APTAPtmp);
1442 Teuchos::SerialDenseMatrix<int,ScalarType> Ytmp( Teuchos::View, *Y_, (recycleBlocks_+numBlocks_), recycleBlocks_ );
1443 getHarmonicVecs(Ftmp,Gtmp,Ytmp);
1446 index.resize( numBlocks_ );
1447 for (
int ii=0; ii<numBlocks_; ++ii) { index[ii] = ii+1; }
1448 Teuchos::RCP<const MV> Ptmp = MVT::CloneView( *P_, index );
1449 index.resize( recycleBlocks_ );
1450 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1451 Teuchos::RCP<MV> PY2tmp = MVT::CloneViewNonConst( *PY2_, index );
1452 Teuchos::SerialDenseMatrix<int,ScalarType> Y2( Teuchos::View, *Y_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1453 index.resize( recycleBlocks_ );
1454 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1455 Teuchos::RCP<MV> U1tmp = MVT::CloneViewNonConst( *U1_, index );
1456 index.resize( recycleBlocks_ );
1457 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1458 Teuchos::RCP<MV> U1Y1tmp = MVT::CloneViewNonConst( *U1Y1_, index );
1459 Teuchos::SerialDenseMatrix<int,ScalarType> Y1( Teuchos::View, *Y_, recycleBlocks_, recycleBlocks_ );
1460 MVT::MvTimesMatAddMv( one, *Ptmp, Y2, zero, *PY2tmp );
1461 MVT::MvTimesMatAddMv( one, *U1tmp, Y1, zero, *U1Y1tmp );
1462 MVT::MvAddMv(one,*U1Y1tmp, one, *PY2tmp, *U1tmp);
1467 Teuchos::SerialDenseMatrix<int,ScalarType> GYtmp( Teuchos::View, *GY_, (numBlocks_+recycleBlocks_), recycleBlocks_ );
1468 GYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Gtmp,Ytmp,zero);
1469 AU1TAU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,GYtmp,zero);
1473 AU1TAPtmp.putScalar(zero);
1474 ScalarType alphatmp = -1.0 / Alphatmp(numBlocks_-1,0);
1475 for (
int ii=0; ii<recycleBlocks_; ++ii) {
1476 AU1TAPtmp(ii,0) = Ytmp(numBlocks_+recycleBlocks_-1,ii) * alphatmp;
1480 Teuchos::SerialDenseMatrix<int,ScalarType> FYtmp( Teuchos::View, *FY_, (numBlocks_+recycleBlocks_), recycleBlocks_ );
1481 FYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Ftmp,Ytmp,zero);
1483 AU1TU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,FYtmp,zero);
1486 lastp = numBlocks_+1;
1487 lastBeta = numBlocks_;
1493 Teuchos::SerialDenseMatrix<int,ScalarType> Alphatmp( Teuchos::View, *Alpha_, numBlocks_, 1 );
1494 Teuchos::SerialDenseMatrix<int,ScalarType> Betatmp( Teuchos::View, *Beta_, numBlocks_, 1 );
1495 Teuchos::SerialDenseMatrix<int,ScalarType> Dtmp( Teuchos::View, *D_, numBlocks_, 1 );
1496 Teuchos::SerialDenseMatrix<int,ScalarType> APTAPtmp( Teuchos::View, *APTAP_, numBlocks_, numBlocks_ );
1497 APTAPtmp.putScalar(zero);
1498 for (
int ii=0; ii<numBlocks_; ii++) {
1499 APTAPtmp(ii,ii) = (Dtmp(ii,0) / Alphatmp(ii,0))*(1 + Betatmp(ii,0));
1501 APTAPtmp(ii-1,ii) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1502 APTAPtmp(ii,ii-1) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1506 Teuchos::SerialDenseMatrix<int,ScalarType> L2tmp( Teuchos::View, *L2_, numBlocks_+1, numBlocks_ );
1507 L2tmp.putScalar(zero);
1508 for(
int ii=0;ii<numBlocks_;ii++) {
1509 L2tmp(ii,ii) = 1./Alphatmp(ii,0);
1510 L2tmp(ii+1,ii) = -1./Alphatmp(ii,0);
1514 Teuchos::SerialDenseMatrix<int,ScalarType> AUTAPtmp( Teuchos::View, *AUTAP_, recycleBlocks_, numBlocks_ );
1515 Teuchos::SerialDenseMatrix<int,ScalarType> UTAUtmp( Teuchos::View, *UTAU_, recycleBlocks_, recycleBlocks_ );
1516 Teuchos::SerialDenseMatrix<int,ScalarType> Deltatmp( Teuchos::View, *Delta_, recycleBlocks_, numBlocks_+1 );
1517 Teuchos::SerialDenseMatrix<int,ScalarType> DeltaL2tmp( Teuchos::View, *DeltaL2_, recycleBlocks_, numBlocks_ );
1518 DeltaL2tmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Deltatmp,L2tmp,zero);
1519 AUTAPtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,UTAUtmp,DeltaL2tmp,zero);
1522 Teuchos::SerialDenseMatrix<int,ScalarType> Ftmp( Teuchos::View, *F_, (numBlocks_+recycleBlocks_), (numBlocks_+recycleBlocks_) );
1523 Teuchos::SerialDenseMatrix<int,ScalarType> F11( Teuchos::View, *F_, recycleBlocks_, recycleBlocks_ );
1524 Teuchos::SerialDenseMatrix<int,ScalarType> F12( Teuchos::View, *F_, recycleBlocks_, numBlocks_, 0, recycleBlocks_ );
1525 Teuchos::SerialDenseMatrix<int,ScalarType> F21( Teuchos::View, *F_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1526 Teuchos::SerialDenseMatrix<int,ScalarType> F22( Teuchos::View, *F_, numBlocks_, numBlocks_, recycleBlocks_, recycleBlocks_ );
1527 F11.assign(UTAUtmp);
1528 F12.putScalar(zero);
1529 F21.putScalar(zero);
1530 F22.putScalar(zero);
1531 for(
int ii=0;ii<numBlocks_;ii++) {
1532 F22(ii,ii) = Dtmp(ii,0);
1536 Teuchos::SerialDenseMatrix<int,ScalarType> Gtmp( Teuchos::View, *G_, (numBlocks_+recycleBlocks_), (numBlocks_+recycleBlocks_) );
1537 Teuchos::SerialDenseMatrix<int,ScalarType> G11( Teuchos::View, *G_, recycleBlocks_, recycleBlocks_ );
1538 Teuchos::SerialDenseMatrix<int,ScalarType> G12( Teuchos::View, *G_, recycleBlocks_, numBlocks_, 0, recycleBlocks_ );
1539 Teuchos::SerialDenseMatrix<int,ScalarType> G21( Teuchos::View, *G_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1540 Teuchos::SerialDenseMatrix<int,ScalarType> G22( Teuchos::View, *G_, numBlocks_, numBlocks_, recycleBlocks_, recycleBlocks_ );
1541 Teuchos::SerialDenseMatrix<int,ScalarType> AUTAUtmp( Teuchos::View, *AUTAU_, recycleBlocks_, recycleBlocks_ );
1542 G11.assign(AUTAUtmp);
1543 G12.assign(AUTAPtmp);
1545 for (
int ii=0;ii<recycleBlocks_;++ii)
1546 for (
int jj=0;jj<numBlocks_;++jj)
1547 G21(jj,ii) = G12(ii,jj);
1548 G22.assign(APTAPtmp);
1551 Teuchos::SerialDenseMatrix<int,ScalarType> Ytmp( Teuchos::View, *Y_, (recycleBlocks_+numBlocks_), recycleBlocks_ );
1552 getHarmonicVecs(Ftmp,Gtmp,Ytmp);
1555 index.resize( recycleBlocks_ );
1556 for (
int ii=0; ii<(recycleBlocks_); ++ii) { index[ii] = ii; }
1557 Teuchos::RCP<const MV> Utmp = MVT::CloneView( *U_, index );
1558 index.resize( numBlocks_ );
1559 for (
int ii=0; ii<numBlocks_; ++ii) { index[ii] = ii; }
1560 Teuchos::RCP<const MV> Ptmp = MVT::CloneView( *P_, index );
1561 index.resize( recycleBlocks_ );
1562 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1563 Teuchos::RCP<MV> PY2tmp = MVT::CloneViewNonConst( *PY2_, index );
1564 Teuchos::SerialDenseMatrix<int,ScalarType> Y2( Teuchos::View, *Y_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1565 index.resize( recycleBlocks_ );
1566 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1567 Teuchos::RCP<MV> UY1tmp = MVT::CloneViewNonConst( *U1Y1_, index );
1568 Teuchos::SerialDenseMatrix<int,ScalarType> Y1( Teuchos::View, *Y_, recycleBlocks_, recycleBlocks_ );
1569 index.resize( recycleBlocks_ );
1570 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1571 Teuchos::RCP<MV> U1tmp = MVT::CloneViewNonConst( *U1_, index );
1572 MVT::MvTimesMatAddMv( one, *Ptmp, Y2, zero, *PY2tmp );
1573 MVT::MvTimesMatAddMv( one, *Utmp, Y1, zero, *UY1tmp );
1574 MVT::MvAddMv(one,*UY1tmp, one, *PY2tmp, *U1tmp);
1579 Teuchos::SerialDenseMatrix<int,ScalarType> GYtmp( Teuchos::View, *GY_, (numBlocks_+recycleBlocks_), recycleBlocks_ );
1580 GYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Gtmp,Ytmp,zero);
1581 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TAU1tmp( Teuchos::View, *AU1TAU1_, recycleBlocks_, recycleBlocks_ );
1582 AU1TAU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,GYtmp,zero);
1585 Teuchos::SerialDenseMatrix<int,ScalarType> FYtmp( Teuchos::View, *FY_, (numBlocks_+recycleBlocks_), recycleBlocks_ );
1586 FYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Ftmp,Ytmp,zero);
1587 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TU1tmp( Teuchos::View, *AU1TU1_, recycleBlocks_, recycleBlocks_ );
1588 AU1TU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,FYtmp,zero);
1591 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TUtmp( Teuchos::View, *AU1TU_, recycleBlocks_, recycleBlocks_ );
1592 AU1TUtmp.assign(UTAUtmp);
1595 dold = Dtmp(numBlocks_-1,0);
1602 lastBeta = numBlocks_-1;
1605 Teuchos::SerialDenseMatrix<int,ScalarType> Alphatmp( Teuchos::View, *Alpha_, numBlocks_, 1 );
1606 Teuchos::SerialDenseMatrix<int,ScalarType> Betatmp( Teuchos::View, *Beta_, numBlocks_+1, 1 );
1607 Teuchos::SerialDenseMatrix<int,ScalarType> Dtmp( Teuchos::View, *D_, numBlocks_, 1 );
1608 Teuchos::SerialDenseMatrix<int,ScalarType> APTAPtmp( Teuchos::View, *APTAP_, numBlocks_, numBlocks_ );
1609 for (
int ii=0; ii<numBlocks_; ii++) {
1610 APTAPtmp(ii,ii) = (Dtmp(ii,0) / Alphatmp(ii,0))*(1 + Betatmp(ii+1,0));
1612 APTAPtmp(ii-1,ii) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1613 APTAPtmp(ii,ii-1) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1617 Teuchos::SerialDenseMatrix<int,ScalarType> L2tmp( Teuchos::View, *L2_, numBlocks_+1, numBlocks_ );
1618 for(
int ii=0;ii<numBlocks_;ii++) {
1619 L2tmp(ii,ii) = 1./Alphatmp(ii,0);
1620 L2tmp(ii+1,ii) = -1./Alphatmp(ii,0);
1625 Teuchos::SerialDenseMatrix<int,ScalarType> DeltaL2( Teuchos::View, *DeltaL2_, recycleBlocks_, numBlocks_ );
1626 Teuchos::SerialDenseMatrix<int,ScalarType> Deltatmp( Teuchos::View, *Delta_, recycleBlocks_, numBlocks_+1 );
1627 DeltaL2.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Deltatmp,L2tmp,zero);
1628 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TUDeltaL2( Teuchos::View, *AU1TUDeltaL2_, recycleBlocks_, numBlocks_ );
1629 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TUtmp( Teuchos::View, *AU1TU_, recycleBlocks_, recycleBlocks_ );
1630 AU1TUDeltaL2.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,AU1TUtmp,DeltaL2,zero);
1631 Teuchos::SerialDenseMatrix<int,ScalarType> Y1( Teuchos::View, *Y_, recycleBlocks_, recycleBlocks_ );
1632 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TAPtmp( Teuchos::View, *AU1TAP_, recycleBlocks_, numBlocks_ );
1633 AU1TAPtmp.putScalar(zero);
1634 AU1TAPtmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Y1,AU1TUDeltaL2,zero);
1635 Teuchos::SerialDenseMatrix<int,ScalarType> Y2( Teuchos::View, *Y_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1636 ScalarType val = dold * (-Betatmp(0,0)/Alphatmp(0,0));
1637 for(
int ii=0;ii<recycleBlocks_;ii++) {
1638 AU1TAPtmp(ii,0) += Y2(numBlocks_-1,ii)*val;
1642 Teuchos::SerialDenseMatrix<int,ScalarType> Y1TAU1TU( Teuchos::View, *GY_, recycleBlocks_, recycleBlocks_ );
1643 Y1TAU1TU.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Y1,AU1TUtmp,zero);
1644 AU1TUtmp.assign(Y1TAU1TU);
1647 Teuchos::SerialDenseMatrix<int,ScalarType> Ftmp( Teuchos::View, *F_, (numBlocks_+recycleBlocks_), (numBlocks_+recycleBlocks_) );
1648 Teuchos::SerialDenseMatrix<int,ScalarType> F11( Teuchos::View, *F_, recycleBlocks_, recycleBlocks_ );
1649 Teuchos::SerialDenseMatrix<int,ScalarType> F12( Teuchos::View, *F_, recycleBlocks_, numBlocks_, 0, recycleBlocks_ );
1650 Teuchos::SerialDenseMatrix<int,ScalarType> F21( Teuchos::View, *F_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1651 Teuchos::SerialDenseMatrix<int,ScalarType> F22( Teuchos::View, *F_, numBlocks_, numBlocks_, recycleBlocks_, recycleBlocks_ );
1652 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TU1tmp( Teuchos::View, *AU1TU1_, recycleBlocks_, recycleBlocks_ );
1653 F11.assign(AU1TU1tmp);
1654 for(
int ii=0;ii<numBlocks_;ii++) {
1655 F22(ii,ii) = Dtmp(ii,0);
1659 Teuchos::SerialDenseMatrix<int,ScalarType> Gtmp( Teuchos::View, *G_, (numBlocks_+recycleBlocks_), (numBlocks_+recycleBlocks_) );
1660 Teuchos::SerialDenseMatrix<int,ScalarType> G11( Teuchos::View, *G_, recycleBlocks_, recycleBlocks_ );
1661 Teuchos::SerialDenseMatrix<int,ScalarType> G12( Teuchos::View, *G_, recycleBlocks_, numBlocks_, 0, recycleBlocks_ );
1662 Teuchos::SerialDenseMatrix<int,ScalarType> G21( Teuchos::View, *G_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1663 Teuchos::SerialDenseMatrix<int,ScalarType> G22( Teuchos::View, *G_, numBlocks_, numBlocks_, recycleBlocks_, recycleBlocks_ );
1664 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TAU1tmp( Teuchos::View, *AU1TAU1_, recycleBlocks_, recycleBlocks_ );
1665 G11.assign(AU1TAU1tmp);
1666 G12.assign(AU1TAPtmp);
1668 for (
int ii=0;ii<recycleBlocks_;++ii)
1669 for (
int jj=0;jj<numBlocks_;++jj)
1670 G21(jj,ii) = G12(ii,jj);
1671 G22.assign(APTAPtmp);
1674 Teuchos::SerialDenseMatrix<int,ScalarType> Ytmp( Teuchos::View, *Y_, (recycleBlocks_+numBlocks_), recycleBlocks_ );
1675 getHarmonicVecs(Ftmp,Gtmp,Ytmp);
1678 index.resize( numBlocks_ );
1679 for (
int ii=0; ii<numBlocks_; ++ii) { index[ii] = ii+1; }
1680 Teuchos::RCP<const MV> Ptmp = MVT::CloneView( *P_, index );
1681 index.resize( recycleBlocks_ );
1682 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1683 Teuchos::RCP<MV> PY2tmp = MVT::CloneViewNonConst( *PY2_, index );
1684 index.resize( recycleBlocks_ );
1685 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1686 Teuchos::RCP<MV> U1tmp = MVT::CloneViewNonConst( *U1_, index );
1687 index.resize( recycleBlocks_ );
1688 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1689 Teuchos::RCP<MV> U1Y1tmp = MVT::CloneViewNonConst( *U1Y1_, index );
1690 MVT::MvTimesMatAddMv( one, *Ptmp, Y2, zero, *PY2tmp );
1691 MVT::MvTimesMatAddMv( one, *U1tmp, Y1, zero, *U1Y1tmp );
1692 MVT::MvAddMv(one,*U1Y1tmp, one, *PY2tmp, *U1tmp);
1697 Teuchos::SerialDenseMatrix<int,ScalarType> GYtmp( Teuchos::View, *GY_, (numBlocks_+recycleBlocks_), recycleBlocks_ );
1698 GYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Gtmp,Ytmp,zero);
1699 AU1TAU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,GYtmp,zero);
1702 Teuchos::SerialDenseMatrix<int,ScalarType> FYtmp( Teuchos::View, *FY_, (numBlocks_+recycleBlocks_), recycleBlocks_ );
1703 FYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Ftmp,Ytmp,zero);
1704 AU1TU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,FYtmp,zero);
1707 dold = Dtmp(numBlocks_-1,0);
1710 lastp = numBlocks_+1;
1711 lastBeta = numBlocks_;
1721 index[0] = lastp-1; index[1] = lastp;
1722 Teuchos::RCP<const MV> Ptmp2 = MVT::CloneView( *P_, index );
1723 index[0] = 0; index[1] = 1;
1724 MVT::SetBlock(*Ptmp2,index,*P_);
1727 (*Beta_)(0,0) = (*Beta_)(lastBeta,0);
1731 Teuchos::SerialDenseMatrix<int,ScalarType> mu1( Teuchos::View, *Delta_, recycleBlocks_, 1, 0, 0 );
1732 Teuchos::SerialDenseMatrix<int,ScalarType> mu2( Teuchos::View, *Delta_, recycleBlocks_, 1, 0, numBlocks_ );
1737 newstate.
P = Teuchos::null;
1738 index.resize( numBlocks_+1 );
1739 for (
int ii=0; ii<(numBlocks_+1); ++ii) { index[ii] = ii+1; }
1740 newstate.
P = MVT::CloneViewNonConst( *P_, index );
1742 newstate.
Beta = Teuchos::null;
1743 newstate.
Beta = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *Beta_, numBlocks_, 1, 1, 0 ) );
1745 newstate.
Delta = Teuchos::null;
1746 newstate.
Delta = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *Delta_, recycleBlocks_, numBlocks_, 0, 1 ) );
1751 rcg_iter->initialize(newstate);
1764 TEUCHOS_TEST_FOR_EXCEPTION(
true,std::logic_error,
1765 "Belos::RCGSolMgr::solve(): Invalid return from RCGIter::iterate().");
1768 catch (
const std::exception &e) {
1769 printer_->stream(
Errors) <<
"Error! Caught std::exception in RCGIter::iterate() at iteration "
1770 << rcg_iter->getNumIters() << std::endl
1771 << e.what() << std::endl;
1777 problem_->setCurrLS();
1781 if ( numRHS2Solve > 0 ) {
1784 problem_->setLSIndex( currIdx );
1787 currIdx.resize( numRHS2Solve );
1793 index.resize(recycleBlocks_);
1794 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1795 MVT::SetBlock(*U1_,index,*U_);
1798 if (numRHS2Solve > 0) {
1800 newstate.
P = Teuchos::null;
1801 newstate.
Ap = Teuchos::null;
1802 newstate.
r = Teuchos::null;
1803 newstate.
z = Teuchos::null;
1804 newstate.
U = Teuchos::null;
1805 newstate.
AU = Teuchos::null;
1806 newstate.
Alpha = Teuchos::null;
1807 newstate.
Beta = Teuchos::null;
1808 newstate.
D = Teuchos::null;
1809 newstate.
Delta = Teuchos::null;
1810 newstate.
LUUTAU = Teuchos::null;
1811 newstate.
ipiv = Teuchos::null;
1812 newstate.
rTz_old = Teuchos::null;
1815 index.resize(recycleBlocks_);
1816 for (
int i=0; i<recycleBlocks_; ++i) { index[i] = i; }
1817 Teuchos::RCP<const MV> Utmp = MVT::CloneView( *U_, index );
1818 index.resize(recycleBlocks_);
1819 for (
int i=0; i<recycleBlocks_; ++i) { index[i] = i; }
1820 Teuchos::RCP<MV> AUtmp = MVT::CloneViewNonConst( *AU_, index );
1822 problem_->applyOp( *Utmp, *AUtmp );
1824 Teuchos::SerialDenseMatrix<int,ScalarType> UTAUtmp( Teuchos::View, *UTAU_, recycleBlocks_, recycleBlocks_ );
1825 MVT::MvTransMv( one, *Utmp, *AUtmp, UTAUtmp );
1827 Teuchos::SerialDenseMatrix<int,ScalarType> AUTAUtmp( Teuchos::View, *AUTAU_, recycleBlocks_, recycleBlocks_ );
1828 if ( precObj != Teuchos::null ) {
1829 index.resize(recycleBlocks_);
1830 for (
int i=0; i<recycleBlocks_; ++i) { index[i] = i; }
1831 index.resize(recycleBlocks_);
1832 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1833 Teuchos::RCP<MV> LeftPCAU = MVT::CloneViewNonConst( *U1_, index );
1834 OPT::Apply( *precObj, *AUtmp, *LeftPCAU );
1835 MVT::MvTransMv( one, *AUtmp, *LeftPCAU, AUTAUtmp );
1837 MVT::MvTransMv( one, *AUtmp, *AUtmp, AUTAUtmp );
1850#ifdef BELOS_TEUCHOS_TIME_MONITOR
1855 Teuchos::TimeMonitor::summarize( printer_->stream(
TimingDetails) );
1859 numIters_ = maxIterTest_->getNumIters();
1863 using Teuchos::rcp_dynamic_cast;
1866 const std::vector<MagnitudeType>* pTestValues =
1867 rcp_dynamic_cast<conv_test_type>(convTest_)->
getTestValue();
1869 TEUCHOS_TEST_FOR_EXCEPTION(pTestValues == NULL, std::logic_error,
1870 "Belos::RCGSolMgr::solve(): The convergence test's getTestValue() "
1871 "method returned NULL. Please report this bug to the Belos developers.");
1873 TEUCHOS_TEST_FOR_EXCEPTION(pTestValues->size() < 1, std::logic_error,
1874 "Belos::RCGSolMgr::solve(): The convergence test's getTestValue() "
1875 "method returned a vector of length zero. Please report this bug to the "
1876 "Belos developers.");
1881 achievedTol_ = *std::max_element (pTestValues->begin(), pTestValues->end());