10#ifndef TPETRA_MATRIXMATRIX_DEF_HPP
11#define TPETRA_MATRIXMATRIX_DEF_HPP
13#include "KokkosSparse_Utils.hpp"
14#include "Tpetra_ConfigDefs.hpp"
16#include "Teuchos_VerboseObject.hpp"
17#include "Teuchos_Array.hpp"
19#include "Tpetra_CrsMatrix.hpp"
20#include "Tpetra_BlockCrsMatrix.hpp"
22#include "Tpetra_RowMatrixTransposer.hpp"
25#include "Tpetra_Details_makeColMap.hpp"
26#include "Tpetra_ConfigDefs.hpp"
27#include "Tpetra_Map.hpp"
28#include "Tpetra_Export.hpp"
33#include "Teuchos_FancyOStream.hpp"
35#include "TpetraExt_MatrixMatrix_ExtraKernels_def.hpp"
38#include "KokkosSparse_spgemm.hpp"
39#include "KokkosSparse_spadd.hpp"
40#include "Kokkos_Bitset.hpp"
54#include "TpetraExt_MatrixMatrix_OpenMP.hpp"
55#include "TpetraExt_MatrixMatrix_Cuda.hpp"
56#include "TpetraExt_MatrixMatrix_HIP.hpp"
57#include "TpetraExt_MatrixMatrix_SYCL.hpp"
61namespace MatrixMatrix{
69template <
class Scalar,
80 const std::string& label,
81 const Teuchos::RCP<Teuchos::ParameterList>&
params)
96#ifdef HAVE_TPETRA_MMM_TIMINGS
97 std::string
prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
98 using Teuchos::TimeMonitor;
104 const std::string
prefix =
"TpetraExt::MatrixMatrix::Multiply(): ";
129 const bool newFlag = !
C.getGraph()->isLocallyIndexed() && !
C.getGraph()->isGloballyIndexed();
135#ifdef USE_OLD_TRANSPOSE
139 using Teuchos::ParameterList;
167 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
168 "must match for matrix-matrix product. op(A) is "
176 prefix <<
"ERROR, dimensions of result C must "
177 "match dimensions of op(A) * op(B). C has " <<
C.getGlobalNumRows()
178 <<
" rows, should have at least " <<
Aouter << std::endl);
186 if (!
C.isFillActive())
C.resumeFill();
200#ifdef HAVE_TPETRA_MMM_TIMINGS
222#ifdef HAVE_TPETRA_MMM_TIMINGS
230 MMdetails::mult_AT_B_newmatrix(
A,
B,
C, label,
params);
247#ifdef HAVE_TPETRA_MMM_TIMINGS
257 C.fillComplete(
Bprime->getDomainMap(),
Aprime->getRangeMap());
275 const std::string& label)
288 std::string
prefix = std::string(
"TpetraExt ") + label + std::string(
": ");
304 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
305 "must match for matrix-matrix product. op(A) is "
312 const LO blocksize =
A->getBlockSize();
314 prefix <<
"ERROR, Blocksizes do not match. A.blocksize = " <<
315 blocksize <<
", B.blocksize = " <<
B->getBlockSize() );
335 MMdetails::import_and_extract_views(*
B,
targetMap_B,
Bview,
A->getGraph()->getImporter(),
336 A->getGraph()->getImporter().is_null());
352 const std::string& label,
353 const Teuchos::RCP<Teuchos::ParameterList>&
params)
365#ifdef HAVE_TPETRA_MMM_TIMINGS
366 std::string
prefix_mmm = std::string(
"TpetraExt ")+ label + std::string(
": ");
367 using Teuchos::TimeMonitor;
371 const std::string
prefix =
"TpetraExt::MatrixMatrix::Jacobi(): ";
390 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
391 "must match for matrix-matrix product. op(A) is "
399 prefix <<
"ERROR, dimensions of result C must "
400 "match dimensions of op(A) * op(B). C has "<<
C.getGlobalNumRows()
401 <<
" rows, should have at least "<<
Aouter << std::endl);
423#ifdef HAVE_TPETRA_MMM_TIMINGS
432 importParams1->set(
"compute global constants",
params->get(
"compute global constants: temporaries",
false));
434 auto slist =
params->sublist(
"matrixmatrix: kernel params",
false);
438 bool isMM =
slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
442 ip1slist.set(
"isMatrixMatrix_TransferAndFillComplete",
isMM);
460 importParams2->set(
"compute global constants",
params->get(
"compute global constants: temporaries",
false));
462 auto slist =
params->sublist(
"matrixmatrix: kernel params",
false);
464 bool isMM =
slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
470 ip2slist.set(
"isMatrixMatrix_TransferAndFillComplete",
isMM);
476#ifdef HAVE_TPETRA_MMM_TIMINGS
485 bool newFlag = !
C.getGraph()->isLocallyIndexed() && !
C.getGraph()->isGloballyIndexed();
500 typedef Teuchos::ScalarTraits<Scalar> STS;
501 typename STS::magnitudeType
threshold =
params->get(
"remove zeros threshold", STS::magnitude(STS::zero()));
519 using Teuchos::Array;
529 const std::string
prefix =
"TpetraExt::MatrixMatrix::Add(): ";
532 prefix <<
"ERROR, input matrix A.isFillComplete() is false; it is required to be true. "
533 "(Result matrix B is not required to be isFillComplete()).");
535 prefix <<
"ERROR, input matrix B must not be fill complete!");
537 prefix <<
"ERROR, input matrix B must not have static graph!");
539 prefix <<
"ERROR, input matrix B must not be locally indexed!");
541 using Teuchos::ParameterList;
555 typename crs_matrix_type::nonconst_global_inds_host_view_type
a_inds(
"a_inds",
A.getLocalMaxNumRowEntries());
556 typename crs_matrix_type::nonconst_values_host_view_type
a_vals(
"a_vals",
A.getLocalMaxNumRowEntries());
559 if (
scalarB != Teuchos::ScalarTraits<SC>::one())
563 if (
scalarA != Teuchos::ScalarTraits<SC>::zero()) {
565 row =
B.getRowMap()->getGlobalElement(
i);
568 if (
scalarA != Teuchos::ScalarTraits<SC>::one()) {
581Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
590 const Teuchos::RCP<Teuchos::ParameterList>&
params)
593 using Teuchos::rcpFromRef;
595 using Teuchos::ParameterList;
600 params->isParameter(
"Call fillComplete") && !
params->get<
bool>(
"Call fillComplete"),
601 std::invalid_argument,
602 "Tpetra::MatrixMatrix::add(): this version of add() always calls fillComplete\n"
603 "on the result, but you explicitly set 'Call fillComplete' = false in the parameter list. Don't set this explicitly.");
604 params->set(
"Call fillComplete",
true);
616 (!
A.isFillComplete () || !
Brcp->isFillComplete (), std::invalid_argument,
617 "TpetraExt::MatrixMatrix::add(): A and B must both be fill complete.");
628template<
class LO,
class GO,
class LOView,
class GOView,
class LocalMap>
629struct ConvertGlobalToLocalFunctor
635 KOKKOS_FUNCTION
void operator() (
const GO i)
const
637 lids(i) = localColMap.getLocalElement(gids(i));
642 const LocalMap localColMap;
645template <
class Scalar,
659 const Teuchos::RCP<Teuchos::ParameterList>&
params)
663 using Teuchos::rcpFromRef;
664 using Teuchos::rcp_implicit_cast;
665 using Teuchos::rcp_dynamic_cast;
666 using Teuchos::TimeMonitor;
677 using exec_space =
typename crs_graph_type::execution_space;
678 using AddKern = MMdetails::AddKernels<SC,LO,GO,NO>;
679 const char*
prefix_mmm =
"TpetraExt::MatrixMatrix::add: ";
680 constexpr bool debug =
false;
682#ifdef HAVE_TPETRA_MMM_TIMINGS
687 std::ostringstream
os;
688 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
689 <<
"TpetraExt::MatrixMatrix::add" << std::endl;
690 std::cerr <<
os.str ();
694 (
C.isLocallyIndexed() ||
C.isGloballyIndexed(), std::invalid_argument,
695 prefix_mmm <<
"C must be a 'new' matrix (neither locally nor globally indexed).");
697 (!
A.isFillComplete () || !
B.isFillComplete (), std::invalid_argument,
698 prefix_mmm <<
"A and B must both be fill complete.");
699#ifdef HAVE_TPETRA_DEBUG
701 if (
A.isFillComplete () &&
B.isFillComplete ()) {
704 !
A.getDomainMap()->locallySameAs (*
B.getDomainMap ())) ||
706 !
A.getDomainMap()->isSameAs (*
B.getRangeMap ())) ||
708 !
A.getRangeMap ()->isSameAs (*
B.getDomainMap ()));
710 prefix_mmm <<
"The domain Maps of Op(A) and Op(B) are not the same.");
714 !
A.getRangeMap ()->isSameAs (*
B.getRangeMap ())) ||
716 !
A.getRangeMap ()->isSameAs (*
B.getDomainMap())) ||
718 !
A.getDomainMap()->isSameAs (*
B.getRangeMap ()));
720 prefix_mmm <<
"The range Maps of Op(A) and Op(B) are not the same.");
724 using Teuchos::ParameterList;
732#ifdef HAVE_TPETRA_DEBUG
734 (
Aprime.is_null (), std::logic_error,
736 "Please report this bug to the Tpetra developers.");
743 std::ostringstream
os;
744 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
745 <<
"Form explicit xpose of B" << std::endl;
746 std::cerr <<
os.str ();
751#ifdef HAVE_TPETRA_DEBUG
753 prefix_mmm <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
755 !
Aprime->isFillComplete () || !
Bprime->isFillComplete (), std::invalid_argument,
756 prefix_mmm <<
"Aprime and Bprime must both be fill complete. "
757 "Please report this bug to the Tpetra developers.");
771 typedef typename AddKern::values_array values_array;
772 typedef typename AddKern::row_ptrs_array row_ptrs_array;
773 typedef typename AddKern::col_inds_array col_inds_array;
779#ifdef HAVE_TPETRA_MMM_TIMINGS
783 if(!(
Aprime->getRowMap()->isSameAs(*(
Bprime->getRowMap()))))
786 auto import = rcp(new import_type(Aprime->getRowMap(), Bprime->getRowMap()));
798 if(Teuchos::nonnull(
params) &&
params->isParameter(
"Call fillComplete"))
812 rowptrs = row_ptrs_array(
"C rowptrs", 0);
818 using global_col_inds_array =
typename AddKern::global_col_inds_array;
819#ifdef HAVE_TPETRA_MMM_TIMINGS
828 std::ostringstream
os;
829 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
830 <<
"Call AddKern::convertToGlobalAndAdd(...)" << std::endl;
831 std::cerr <<
os.str ();
833 AddKern::convertToGlobalAndAdd(
837 std::ostringstream
os;
838 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
839 <<
"Finished AddKern::convertToGlobalAndAdd(...)" << std::endl;
840 std::cerr <<
os.str ();
842#ifdef HAVE_TPETRA_MMM_TIMINGS
847 Tpetra::Details::makeColMap<LocalOrdinal, GlobalOrdinal, Node>
851 Kokkos::parallel_for(Kokkos::RangePolicy<exec_space>(0,
globalColinds.extent(0)),
853 col_inds_array, global_col_inds_array,
854 typename map_type::local_map_type>
856 KokkosSparse::sort_crs_matrix<exec_space, row_ptrs_array, col_inds_array, values_array>(
rowptrs,
localColinds,
vals);
867 auto Arowptrs =
Alocal.graph.row_map;
868 auto Browptrs =
Blocal.graph.row_map;
869 auto Acolinds =
Alocal.graph.entries;
870 auto Bcolinds =
Blocal.graph.entries;
874#ifdef HAVE_TPETRA_MMM_TIMINGS
879 std::ostringstream
os;
880 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
881 <<
"Call AddKern::addSorted(...)" << std::endl;
882 std::cerr <<
os.str ();
884#if KOKKOSKERNELS_VERSION >= 40299
885 AddKern::addSorted(
Avals, Arowptrs, Acolinds,
alpha,
Bvals, Browptrs, Bcolinds,
beta,
Aprime->getGlobalNumCols(),
vals,
rowptrs,
colinds);
887 AddKern::addSorted(
Avals, Arowptrs, Acolinds,
alpha,
Bvals, Browptrs, Bcolinds,
beta,
vals,
rowptrs,
colinds);
893#ifdef HAVE_TPETRA_MMM_TIMINGS
898 std::ostringstream
os;
899 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
900 <<
"Call AddKern::addUnsorted(...)" << std::endl;
901 std::cerr <<
os.str ();
903 AddKern::addUnsorted(
Avals, Arowptrs, Acolinds,
alpha,
Bvals, Browptrs, Bcolinds,
beta,
Aprime->getGlobalNumCols(),
vals,
rowptrs,
colinds);
911 #ifdef HAVE_TPETRA_MMM_TIMINGS
918 std::ostringstream
os;
919 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
920 <<
"Create Cimport" << std::endl;
921 std::cerr <<
os.str ();
928 std::ostringstream
os;
929 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
930 <<
"Create Cexport" << std::endl;
931 std::cerr <<
os.str ();
937 std::ostringstream
os;
938 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
939 <<
"Call C->expertStaticFillComplete(...)" << std::endl;
940 std::cerr <<
os.str ();
962 using Teuchos::Array;
963 using Teuchos::ArrayRCP;
964 using Teuchos::ArrayView;
967 using Teuchos::rcp_dynamic_cast;
968 using Teuchos::rcpFromRef;
969 using Teuchos::tuple;
972 typedef Teuchos::ScalarTraits<Scalar> STS;
980 std::string
prefix =
"TpetraExt::MatrixMatrix::Add(): ";
983 !
A.isFillComplete () || !
B.isFillComplete (), std::invalid_argument,
984 prefix <<
"A and B must both be fill complete before calling this function.");
988 prefix <<
"C is null (must be allocated), but A.haveGlobalConstants() is false. "
989 "Please report this bug to the Tpetra developers.");
991 prefix <<
"C is null (must be allocated), but B.haveGlobalConstants() is false. "
992 "Please report this bug to the Tpetra developers.");
995#ifdef HAVE_TPETRA_DEBUG
1002 prefix <<
"The domain Maps of Op(A) and Op(B) are not the same.");
1009 prefix <<
"The range Maps of Op(A) and Op(B) are not the same.");
1013 using Teuchos::ParameterList;
1027#ifdef HAVE_TPETRA_DEBUG
1029 prefix <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
1042#ifdef HAVE_TPETRA_DEBUG
1044 prefix <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
1050 if (!
C.is_null ()) {
1054 C->setAllToScalar (STS::zero ());
1064 if(
Aprime->getRowMap()->isSameAs(*
Bprime->getRowMap())) {
1074 C =
rcp (
new crs_matrix_type (
Aprime->getRowMap (),
Aprime->getGlobalMaxNumRowEntries() +
Bprime->getGlobalMaxNumRowEntries()));
1078#ifdef HAVE_TPETRA_DEBUG
1080 prefix <<
"At this point, Aprime is null. Please report this bug to the Tpetra developers.");
1082 prefix <<
"At this point, Bprime is null. Please report this bug to the Tpetra developers.");
1084 prefix <<
"At this point, C is null. Please report this bug to the Tpetra developers.");
1092 for (
int k = 0;
k < 2; ++
k) {
1093 typename crs_matrix_type::nonconst_global_inds_host_view_type
Indices;
1094 typename crs_matrix_type::nonconst_values_host_view_type
Values;
1102#ifdef HAVE_TPETRA_DEBUG
1104 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1107#ifdef HAVE_TPETRA_DEBUG
1109 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1115 size_t numEntries =
Mat[
k]->getNumEntriesInGlobalRow (
globalRow);
1116 if (numEntries > 0) {
1117 if(numEntries >
Indices.extent(0)) {
1118 Kokkos::resize(
Indices, numEntries);
1119 Kokkos::resize(
Values, numEntries);
1123 if (
scalar[
k] != STS::one ()) {
1124 for (
size_t j = 0;
j < numEntries; ++
j) {
1133 prefix <<
"sumIntoGlobalValues failed to add entries from A or B into C.");
1135 C->insertGlobalValues (
globalRow, numEntries,
1142 C->fillComplete(
C->getDomainMap (),
1162 std::string
prefix =
"TpetraExt::MatrixMatrix::Add(): ";
1165 prefix <<
"C must not be null");
1167 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
C_ =
C;
1225template<
class Scalar,
1227 class GlobalOrdinal,
1229void mult_AT_B_newmatrix(
1230 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1231 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B,
1232 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1233 const std::string & label,
1234 const Teuchos::RCP<Teuchos::ParameterList>& params)
1239 typedef LocalOrdinal LO;
1240 typedef GlobalOrdinal GO;
1242 typedef CrsMatrixStruct<SC,LO,GO,NO> crs_matrix_struct_type;
1243 typedef RowMatrixTransposer<SC,LO,GO,NO> transposer_type;
1245#ifdef HAVE_TPETRA_MMM_TIMINGS
1246 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1247 using Teuchos::TimeMonitor;
1248 RCP<TimeMonitor> MM = rcp (
new TimeMonitor
1249 (*TimeMonitor::getNewTimer (prefix_mmm +
"MMM-T Transpose")));
1255 transposer_type transposer (rcpFromRef (A), label + std::string(
"XP: "));
1257 using Teuchos::ParameterList;
1258 RCP<ParameterList> transposeParams (
new ParameterList);
1259 transposeParams->set (
"sort",
true);
1260 if(! params.is_null ()) {
1261 transposeParams->set (
"compute global constants",
1262 params->get (
"compute global constants: temporaries",
1265 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Atrans =
1266 transposer.createTransposeLocal (transposeParams);
1271#ifdef HAVE_TPETRA_MMM_TIMINGS
1273 MM = rcp (
new TimeMonitor
1274 (*TimeMonitor::getNewTimer (prefix_mmm + std::string (
"MMM-T I&X"))));
1278 crs_matrix_struct_type Aview;
1279 crs_matrix_struct_type Bview;
1280 RCP<const Import<LO, GO, NO> > dummyImporter;
1283 RCP<Teuchos::ParameterList> importParams1 (
new ParameterList);
1284 if (! params.is_null ()) {
1285 importParams1->set (
"compute global constants",
1286 params->get (
"compute global constants: temporaries",
1288 auto slist = params->sublist (
"matrixmatrix: kernel params",
false);
1289 bool isMM = slist.get (
"isMatrixMatrix_TransferAndFillComplete",
false);
1290 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1291 int mm_optimization_core_count =
1293 mm_optimization_core_count =
1294 slist.get (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1295 int mm_optimization_core_count2 =
1296 params->get (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1297 if (mm_optimization_core_count2 < mm_optimization_core_count) {
1298 mm_optimization_core_count = mm_optimization_core_count2;
1300 auto & sip1 = importParams1->sublist (
"matrixmatrix: kernel params",
false);
1301 sip1.set (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1302 sip1.set (
"isMatrixMatrix_TransferAndFillComplete", isMM);
1303 sip1.set (
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
1306 MMdetails::import_and_extract_views (*Atrans, Atrans->getRowMap (),
1307 Aview, dummyImporter,
true,
1308 label, importParams1);
1310 RCP<ParameterList> importParams2 (
new ParameterList);
1311 if (! params.is_null ()) {
1312 importParams2->set (
"compute global constants",
1313 params->get (
"compute global constants: temporaries",
1315 auto slist = params->sublist (
"matrixmatrix: kernel params",
false);
1316 bool isMM = slist.get (
"isMatrixMatrix_TransferAndFillComplete",
false);
1317 bool overrideAllreduce = slist.get (
"MM_TAFC_OverrideAllreduceCheck",
false);
1318 int mm_optimization_core_count =
1320 mm_optimization_core_count =
1321 slist.get (
"MM_TAFC_OptimizationCoreCount",
1322 mm_optimization_core_count);
1323 int mm_optimization_core_count2 =
1324 params->get (
"MM_TAFC_OptimizationCoreCount",
1325 mm_optimization_core_count);
1326 if (mm_optimization_core_count2 < mm_optimization_core_count) {
1327 mm_optimization_core_count = mm_optimization_core_count2;
1329 auto & sip2 = importParams2->sublist (
"matrixmatrix: kernel params",
false);
1330 sip2.set (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1331 sip2.set (
"isMatrixMatrix_TransferAndFillComplete", isMM);
1332 sip2.set (
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
1335 if(B.getRowMap()->isSameAs(*Atrans->getColMap())){
1336 MMdetails::import_and_extract_views(B, B.getRowMap(), Bview, dummyImporter,
true, label,importParams2);
1339 MMdetails::import_and_extract_views(B, Atrans->getColMap(), Bview, dummyImporter,
false, label,importParams2);
1342#ifdef HAVE_TPETRA_MMM_TIMINGS
1344 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T AB-core"))));
1347 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Ctemp;
1350 bool needs_final_export = ! Atrans->getGraph ()->getExporter ().is_null();
1351 if (needs_final_export) {
1355 Ctemp = rcp (&C,
false);
1358 mult_A_B_newmatrix(Aview, Bview, *Ctemp, label,params);
1363#ifdef HAVE_TPETRA_MMM_TIMINGS
1365 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T exportAndFillComplete"))));
1368 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Crcp (&C,
false);
1370 if (needs_final_export) {
1371 ParameterList labelList;
1372 labelList.set(
"Timer Label", label);
1373 if(!params.is_null()) {
1374 ParameterList& params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
1375 ParameterList& labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
1377 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1378 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1379 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
1380 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count,
"Core Count above which the optimized neighbor discovery is used");
1381 bool isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
1382 bool overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1384 labelList_subList.set (
"isMatrixMatrix_TransferAndFillComplete", isMM,
1385 "This parameter should be set to true only for MatrixMatrix operations: the optimization in Epetra that was ported to Tpetra does _not_ take into account the possibility that for any given source PID, a particular GID may not exist on the target PID: i.e. a transfer operation. A fix for this general case is in development.");
1386 labelList.set(
"compute global constants",params->get(
"compute global constants",
true));
1387 labelList.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
1389 Ctemp->exportAndFillComplete (Crcp,
1390 *Ctemp->getGraph ()->getExporter (),
1393 rcp (&labelList,
false));
1395#ifdef HAVE_TPETRA_MMM_STATISTICS
1396 printMultiplicationStatistics(Ctemp->getGraph()->getExporter(), label+std::string(
" AT_B MMM"));
1402template<
class Scalar,
1404 class GlobalOrdinal,
1407 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1408 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1409 CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1410 const std::string& ,
1411 const Teuchos::RCP<Teuchos::ParameterList>& )
1413 using Teuchos::Array;
1414 using Teuchos::ArrayRCP;
1415 using Teuchos::ArrayView;
1416 using Teuchos::OrdinalTraits;
1417 using Teuchos::null;
1419 typedef Teuchos::ScalarTraits<Scalar> STS;
1421 LocalOrdinal C_firstCol = Bview.
colMap->getMinLocalIndex();
1422 LocalOrdinal C_lastCol = Bview.colMap->getMaxLocalIndex();
1424 LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
1425 LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
1427 ArrayView<const GlobalOrdinal> bcols = Bview.colMap->getLocalElementList();
1428 ArrayView<const GlobalOrdinal> bcols_import = null;
1429 if (Bview.importColMap != null) {
1430 C_firstCol_import = Bview.importColMap->getMinLocalIndex();
1431 C_lastCol_import = Bview.importColMap->getMaxLocalIndex();
1433 bcols_import = Bview.importColMap->getLocalElementList();
1436 size_t C_numCols = C_lastCol - C_firstCol +
1437 OrdinalTraits<LocalOrdinal>::one();
1438 size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
1439 OrdinalTraits<LocalOrdinal>::one();
1441 if (C_numCols_import > C_numCols)
1442 C_numCols = C_numCols_import;
1444 Array<Scalar> dwork = Array<Scalar>(C_numCols);
1445 Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
1446 Array<size_t> iwork2 = Array<size_t>(C_numCols);
1448 Array<Scalar> C_row_i = dwork;
1449 Array<GlobalOrdinal> C_cols = iwork;
1450 Array<size_t> c_index = iwork2;
1451 Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2*C_numCols);
1452 Array<Scalar> combined_values = Array<Scalar>(2*C_numCols);
1454 size_t C_row_i_length, j, k, last_index;
1457 LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
1458 Array<LocalOrdinal> Acol2Brow(Aview.colMap->getLocalNumElements(),LO_INVALID);
1459 Array<LocalOrdinal> Acol2Irow(Aview.colMap->getLocalNumElements(),LO_INVALID);
1460 if(Aview.colMap->isSameAs(*Bview.origMatrix->getRowMap())){
1462 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1463 Aview.colMap->getMaxLocalIndex(); i++)
1468 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1469 Aview.colMap->getMaxLocalIndex(); i++) {
1470 GlobalOrdinal GID = Aview.colMap->getGlobalElement(i);
1471 LocalOrdinal BLID = Bview.origMatrix->getRowMap()->getLocalElement(GID);
1472 if(BLID != LO_INVALID) Acol2Brow[i] = BLID;
1473 else Acol2Irow[i] = Bview.importMatrix->getRowMap()->getLocalElement(GID);
1483 auto Arowptr = Aview.origMatrix->getLocalRowPtrsHost();
1484 auto Acolind = Aview.origMatrix->getLocalIndicesHost();
1485 auto Avals = Aview.origMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1486 auto Browptr = Bview.origMatrix->getLocalRowPtrsHost();
1487 auto Bcolind = Bview.origMatrix->getLocalIndicesHost();
1488 auto Bvals = Bview.origMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1489 decltype(Browptr) Irowptr;
1490 decltype(Bcolind) Icolind;
1491 decltype(Bvals) Ivals;
1492 if(!Bview.importMatrix.is_null()) {
1493 Irowptr = Bview.importMatrix->getLocalRowPtrsHost();
1494 Icolind = Bview.importMatrix->getLocalIndicesHost();
1495 Ivals = Bview.importMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1498 bool C_filled = C.isFillComplete();
1500 for (
size_t i = 0; i < C_numCols; i++)
1501 c_index[i] = OrdinalTraits<size_t>::invalid();
1504 size_t Arows = Aview.rowMap->getLocalNumElements();
1505 for(
size_t i=0; i<Arows; ++i) {
1509 GlobalOrdinal global_row = Aview.rowMap->getGlobalElement(i);
1515 C_row_i_length = OrdinalTraits<size_t>::zero();
1517 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1518 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1519 const Scalar Aval = Avals[k];
1520 if (Aval == STS::zero())
1523 if (Ak == LO_INVALID)
1526 for (j = Browptr[Ak]; j < Browptr[Ak+1]; ++j) {
1527 LocalOrdinal col = Bcolind[j];
1530 if (c_index[col] == OrdinalTraits<size_t>::invalid()){
1533 C_row_i[C_row_i_length] = Aval*Bvals[j];
1534 C_cols[C_row_i_length] = col;
1535 c_index[col] = C_row_i_length;
1540 C_row_i[c_index[col]] += Aval *
static_cast<Scalar
>(Bvals[j]);
1545 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1546 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1547 C_cols[ii] = bcols[C_cols[ii]];
1548 combined_index[ii] = C_cols[ii];
1549 combined_values[ii] = C_row_i[ii];
1551 last_index = C_row_i_length;
1557 C_row_i_length = OrdinalTraits<size_t>::zero();
1559 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1560 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1561 const Scalar Aval = Avals[k];
1562 if (Aval == STS::zero())
1565 if (Ak!=LO_INVALID)
continue;
1567 Ak = Acol2Irow[Acolind[k]];
1568 for (j = Irowptr[Ak]; j < Irowptr[Ak+1]; ++j) {
1569 LocalOrdinal col = Icolind[j];
1572 if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
1575 C_row_i[C_row_i_length] = Aval*Ivals[j];
1576 C_cols[C_row_i_length] = col;
1577 c_index[col] = C_row_i_length;
1583 C_row_i[c_index[col]] += Aval *
static_cast<Scalar
>(Ivals[j]);
1588 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1589 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1590 C_cols[ii] = bcols_import[C_cols[ii]];
1591 combined_index[last_index] = C_cols[ii];
1592 combined_values[last_index] = C_row_i[ii];
1599 C.sumIntoGlobalValues(
1601 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1602 combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
1604 C.insertGlobalValues(
1606 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1607 combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
1613template<
class Scalar,
1615 class GlobalOrdinal,
1617void setMaxNumEntriesPerRow(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview) {
1618 typedef typename Teuchos::Array<Teuchos::ArrayView<const LocalOrdinal> >::size_type local_length_size;
1619 Mview.maxNumRowEntries = Teuchos::OrdinalTraits<local_length_size>::zero();
1621 if (Mview.indices.size() > Teuchos::OrdinalTraits<local_length_size>::zero()) {
1622 Mview.maxNumRowEntries = Mview.indices[0].size();
1624 for (local_length_size i = 1; i < Mview.indices.size(); ++i)
1625 if (Mview.indices[i].size() > Mview.maxNumRowEntries)
1626 Mview.maxNumRowEntries = Mview.indices[i].size();
1631template<
class CrsMatrixType>
1632size_t C_estimate_nnz(CrsMatrixType & A, CrsMatrixType &B){
1634 size_t Aest = 100, Best=100;
1635 if (A.getLocalNumEntries() >= A.getLocalNumRows())
1636 Aest = (A.getLocalNumRows() > 0) ? A.getLocalNumEntries()/A.getLocalNumRows() : 100;
1637 if (B.getLocalNumEntries() >= B.getLocalNumRows())
1638 Best = (B.getLocalNumRows() > 0) ? B.getLocalNumEntries()/B.getLocalNumRows() : 100;
1640 size_t nnzperrow = (size_t)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
1641 nnzperrow *= nnzperrow;
1643 return (
size_t)(A.getLocalNumRows()*nnzperrow*0.75 + 100);
1653template<
class Scalar,
1655 class GlobalOrdinal,
1657void mult_A_B_newmatrix(
1658 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1659 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1660 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1661 const std::string& label,
1662 const Teuchos::RCP<Teuchos::ParameterList>& params)
1664 using Teuchos::Array;
1665 using Teuchos::ArrayRCP;
1666 using Teuchos::ArrayView;
1671 typedef LocalOrdinal LO;
1672 typedef GlobalOrdinal GO;
1674 typedef Import<LO,GO,NO> import_type;
1675 typedef Map<LO,GO,NO> map_type;
1678 typedef typename map_type::local_map_type local_map_type;
1680 typedef typename KCRS::StaticCrsGraphType graph_t;
1681 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1682 typedef typename NO::execution_space execution_space;
1683 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1684 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
1686#ifdef HAVE_TPETRA_MMM_TIMINGS
1687 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1688 using Teuchos::TimeMonitor;
1689 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM M5 Cmap")))));
1691 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1694 RCP<const import_type> Cimport;
1695 RCP<const map_type> Ccolmap;
1696 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1697 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
1698 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1699 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
1700 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
1701 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
1702 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
1703 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
1710 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements()), Icol2Ccol;
1712 if (Bview.importMatrix.is_null()) {
1715 Ccolmap = Bview.colMap;
1716 const LO colMapSize =
static_cast<LO
>(Bview.colMap->getLocalNumElements());
1718 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
1719 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
1720 KOKKOS_LAMBDA(
const LO i) {
1733 if (!Bimport.is_null() && !Iimport.is_null()) {
1734 Cimport = Bimport->setUnion(*Iimport);
1736 else if (!Bimport.is_null() && Iimport.is_null()) {
1737 Cimport = Bimport->setUnion();
1739 else if (Bimport.is_null() && !Iimport.is_null()) {
1740 Cimport = Iimport->setUnion();
1743 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1745 Ccolmap = Cimport->getTargetMap();
1750 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
1751 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
1758 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
1759 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
1760 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement",range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
1761 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
1763 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement",range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
1764 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
1773 C.replaceColMap(Ccolmap);
1791 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getLocalNumElements());
1792 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getLocalNumElements());
1794 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::construct_tables",range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
1795 GO aidx = Acolmap_local.getGlobalElement(i);
1796 LO B_LID = Browmap_local.getLocalElement(aidx);
1797 if (B_LID != LO_INVALID) {
1798 targetMapToOrigRow(i) = B_LID;
1799 targetMapToImportRow(i) = LO_INVALID;
1801 LO I_LID = Irowmap_local.getLocalElement(aidx);
1802 targetMapToOrigRow(i) = LO_INVALID;
1803 targetMapToImportRow(i) = I_LID;
1810 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::mult_A_B_newmatrix_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
1816template<
class Scalar,
1818 class GlobalOrdinal,
1820void mult_A_B_newmatrix(BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1821 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1822 Teuchos::RCP<BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &C)
1824 using Teuchos::null;
1825 using Teuchos::Array;
1826 using Teuchos::ArrayRCP;
1827 using Teuchos::ArrayView;
1832 typedef LocalOrdinal LO;
1833 typedef GlobalOrdinal GO;
1835 typedef Import<LO,GO,NO> import_type;
1836 typedef Map<LO,GO,NO> map_type;
1837 typedef BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> block_crs_matrix_type;
1838 typedef typename block_crs_matrix_type::crs_graph_type graph_t;
1841 typedef typename map_type::local_map_type local_map_type;
1842 typedef typename block_crs_matrix_type::local_matrix_device_type KBSR;
1843 typedef typename KBSR::device_type device_t;
1844 typedef typename KBSR::StaticCrsGraphType static_graph_t;
1845 typedef typename static_graph_t::row_map_type::non_const_type lno_view_t;
1846 typedef typename static_graph_t::entries_type::non_const_type lno_nnz_view_t;
1847 typedef typename KBSR::values_type::non_const_type scalar_view_t;
1848 typedef typename NO::execution_space execution_space;
1849 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1850 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
1852 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1855 RCP<const import_type> Cimport;
1856 RCP<const map_type> Ccolmap;
1857 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1858 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
1859 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1860 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
1861 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
1862 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
1863 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
1864 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
1871 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements()), Icol2Ccol;
1873 if (Bview.importMatrix.is_null()) {
1876 Ccolmap = Bview.colMap;
1877 const LO colMapSize =
static_cast<LO
>(Bview.colMap->getLocalNumElements());
1879 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
1880 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
1881 KOKKOS_LAMBDA(
const LO i) {
1894 if (!Bimport.is_null() && !Iimport.is_null()) {
1895 Cimport = Bimport->setUnion(*Iimport);
1897 else if (!Bimport.is_null() && Iimport.is_null()) {
1898 Cimport = Bimport->setUnion();
1900 else if (Bimport.is_null() && !Iimport.is_null()) {
1901 Cimport = Iimport->setUnion();
1904 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1906 Ccolmap = Cimport->getTargetMap();
1913 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
1914 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
1915 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement",
1916 range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),
1917 KOKKOS_LAMBDA(
const LO i) {
1918 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
1920 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement",
1921 range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),
1922 KOKKOS_LAMBDA(
const LO i) {
1923 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
1943 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),
1944 Aview.colMap->getLocalNumElements());
1945 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),
1946 Aview.colMap->getLocalNumElements());
1948 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::construct_tables",
1949 range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),
1950 KOKKOS_LAMBDA(
const LO i) {
1951 GO aidx = Acolmap_local.getGlobalElement(i);
1952 LO B_LID = Browmap_local.getLocalElement(aidx);
1953 if (B_LID != LO_INVALID) {
1954 targetMapToOrigRow(i) = B_LID;
1955 targetMapToImportRow(i) = LO_INVALID;
1957 LO I_LID = Irowmap_local.getLocalElement(aidx);
1958 targetMapToOrigRow(i) = LO_INVALID;
1959 targetMapToImportRow(i) = I_LID;
1964 using KernelHandle =
1965 KokkosKernels::Experimental::KokkosKernelsHandle<
typename lno_view_t::const_value_type,
1966 typename lno_nnz_view_t::const_value_type,
1967 typename scalar_view_t::const_value_type,
1968 typename device_t::execution_space,
1969 typename device_t::memory_space,
1970 typename device_t::memory_space>;
1971 int team_work_size = 16;
1972 std::string myalg(
"SPGEMM_KK_MEMORY");
1973 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
1976 kh.create_spgemm_handle(alg_enum);
1977 kh.set_team_work_size(team_work_size);
1980 const KBSR Amat = Aview.origMatrix->getLocalMatrixDevice();
1981 const KBSR Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,
1982 targetMapToOrigRow,targetMapToImportRow,
1983 Bcol2Ccol,Icol2Ccol,
1984 Ccolmap.getConst()->getLocalNumElements());
1986 RCP<graph_t> graphC;
1987 typename KBSR::values_type values;
1992 KokkosSparse::block_spgemm_symbolic(kh, Amat,
false, Bmerged,
false, Cmat);
1993 KokkosSparse::block_spgemm_numeric (kh, Amat,
false, Bmerged,
false, Cmat);
1994 kh.destroy_spgemm_handle();
1997 graphC = rcp(
new graph_t(Cmat.graph, Aview.origMatrix->getRowMap(), Ccolmap.getConst()));
1998 values = Cmat.values;
2000 C = rcp (
new block_crs_matrix_type (*graphC, values, Aview.blocksize));
2006template<
class Scalar,
2008 class GlobalOrdinal,
2010 class LocalOrdinalViewType>
2011void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2012 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2013 const LocalOrdinalViewType & targetMapToOrigRow,
2014 const LocalOrdinalViewType & targetMapToImportRow,
2015 const LocalOrdinalViewType & Bcol2Ccol,
2016 const LocalOrdinalViewType & Icol2Ccol,
2017 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2018 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
2019 const std::string& label,
2020 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2021#ifdef HAVE_TPETRA_MMM_TIMINGS
2022 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2023 using Teuchos::TimeMonitor;
2024 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore"))));
2027 using Teuchos::Array;
2028 using Teuchos::ArrayRCP;
2029 using Teuchos::ArrayView;
2034 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
2035 typedef typename KCRS::StaticCrsGraphType graph_t;
2036 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2037 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2038 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2039 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2042 typedef LocalOrdinal LO;
2043 typedef GlobalOrdinal GO;
2045 typedef Map<LO,GO,NO> map_type;
2046 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2047 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2048 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2051 RCP<const map_type> Ccolmap = C.getColMap();
2052 size_t m = Aview.origMatrix->getLocalNumRows();
2053 size_t n = Ccolmap->getLocalNumElements();
2054 size_t b_max_nnz_per_row = Bview.origMatrix->getLocalMaxNumRowEntries();
2057 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
2058 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
2060 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
2061 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
2062 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2064 c_lno_view_t Irowptr;
2065 lno_nnz_view_t Icolind;
2066 scalar_view_t Ivals;
2067 if(!Bview.importMatrix.is_null()) {
2068 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2069 Irowptr = lclB.graph.row_map;
2070 Icolind = lclB.graph.entries;
2071 Ivals = lclB.values;
2072 b_max_nnz_per_row = std::max(b_max_nnz_per_row,Bview.importMatrix->getLocalMaxNumRowEntries());
2075#ifdef HAVE_TPETRA_MMM_TIMINGS
2076 RCP<TimeMonitor> MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
2086 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
2087 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"),m+1);
2088 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"),CSR_alloc);
2089 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"),CSR_alloc);
2099 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2100 std::vector<size_t> c_status(n, ST_INVALID);
2110 size_t CSR_ip = 0, OLD_ip = 0;
2111 for (
size_t i = 0; i < m; i++) {
2114 Crowptr[i] = CSR_ip;
2117 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2118 LO Aik = Acolind[k];
2119 const SC Aval = Avals[k];
2120 if (Aval == SC_ZERO)
2123 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2130 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2133 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2134 LO Bkj = Bcolind[j];
2135 LO Cij = Bcol2Ccol[Bkj];
2137 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2139 c_status[Cij] = CSR_ip;
2140 Ccolind[CSR_ip] = Cij;
2141 Cvals[CSR_ip] = Aval*Bvals[j];
2145 Cvals[c_status[Cij]] += Aval*Bvals[j];
2156 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2157 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2158 LO Ikj = Icolind[j];
2159 LO Cij = Icol2Ccol[Ikj];
2161 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip){
2163 c_status[Cij] = CSR_ip;
2164 Ccolind[CSR_ip] = Cij;
2165 Cvals[CSR_ip] = Aval*Ivals[j];
2168 Cvals[c_status[Cij]] += Aval*Ivals[j];
2175 if (i+1 < m && CSR_ip + std::min(n,(Arowptr[i+2]-Arowptr[i+1])*b_max_nnz_per_row) > CSR_alloc) {
2177 Kokkos::resize(Ccolind,CSR_alloc);
2178 Kokkos::resize(Cvals,CSR_alloc);
2183 Crowptr[m] = CSR_ip;
2186 Kokkos::resize(Ccolind,CSR_ip);
2187 Kokkos::resize(Cvals,CSR_ip);
2189#ifdef HAVE_TPETRA_MMM_TIMINGS
2191 auto MM3(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix Final Sort")));
2195 if (params.is_null() || params->get(
"sort entries",
true))
2196 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
2197 C.setAllValues(Crowptr,Ccolind, Cvals);
2200#ifdef HAVE_TPETRA_MMM_TIMINGS
2202 auto MM4(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix ESFC")));
2214 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2215 labelList->set(
"Timer Label",label);
2216 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2217 RCP<const Export<LO,GO,NO> > dummyExport;
2218 C.expertStaticFillComplete(Bview. origMatrix->getDomainMap(), Aview. origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
2219#ifdef HAVE_TPETRA_MMM_TIMINGS
2221 MM2 = Teuchos::null;
2227template<
class Scalar,
2229 class GlobalOrdinal,
2232 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2233 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2234 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2235 const std::string& label,
2236 const Teuchos::RCP<Teuchos::ParameterList>& params)
2238 using Teuchos::Array;
2239 using Teuchos::ArrayRCP;
2240 using Teuchos::ArrayView;
2245 typedef LocalOrdinal LO;
2246 typedef GlobalOrdinal GO;
2248 typedef Import<LO,GO,NO> import_type;
2249 typedef Map<LO,GO,NO> map_type;
2252 typedef typename map_type::local_map_type local_map_type;
2254 typedef typename KCRS::StaticCrsGraphType graph_t;
2255 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2256 typedef typename NO::execution_space execution_space;
2257 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2258 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2260#ifdef HAVE_TPETRA_MMM_TIMINGS
2261 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2262 using Teuchos::TimeMonitor;
2263 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse Cmap"))));
2265 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2268 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2269 RCP<const map_type> Ccolmap = C.getColMap();
2270 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2271 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2272 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2273 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2274 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2275 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2278 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements()), Icol2Ccol;
2282 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2283 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2286 if (!Bview.importMatrix.is_null()) {
2287 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2288 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
2290 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
2291 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2292 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2298 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getLocalNumElements());
2299 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getLocalNumElements());
2300 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2301 GO aidx = Acolmap_local.getGlobalElement(i);
2302 LO B_LID = Browmap_local.getLocalElement(aidx);
2303 if (B_LID != LO_INVALID) {
2304 targetMapToOrigRow(i) = B_LID;
2305 targetMapToImportRow(i) = LO_INVALID;
2307 LO I_LID = Irowmap_local.getLocalElement(aidx);
2308 targetMapToOrigRow(i) = LO_INVALID;
2309 targetMapToImportRow(i) = I_LID;
2316 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::mult_A_B_reuse_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2320template<
class Scalar,
2322 class GlobalOrdinal,
2324 class LocalOrdinalViewType>
2325void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2326 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2327 const LocalOrdinalViewType & targetMapToOrigRow,
2328 const LocalOrdinalViewType & targetMapToImportRow,
2329 const LocalOrdinalViewType & Bcol2Ccol,
2330 const LocalOrdinalViewType & Icol2Ccol,
2331 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2332 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > ,
2333 const std::string& label,
2334 const Teuchos::RCP<Teuchos::ParameterList>& ) {
2335#ifdef HAVE_TPETRA_MMM_TIMINGS
2336 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2337 using Teuchos::TimeMonitor;
2338 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
2339 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
2348 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
2349 typedef typename KCRS::StaticCrsGraphType graph_t;
2350 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2351 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2352 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2355 typedef LocalOrdinal LO;
2356 typedef GlobalOrdinal GO;
2358 typedef Map<LO,GO,NO> map_type;
2359 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2360 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2361 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2364 RCP<const map_type> Ccolmap = C.getColMap();
2365 size_t m = Aview.origMatrix->getLocalNumRows();
2366 size_t n = Ccolmap->getLocalNumElements();
2369 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
2370 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
2371 const KCRS & Cmat = C.getLocalMatrixHost();
2373 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2374 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2375 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2376 scalar_view_t Cvals = Cmat.values;
2378 c_lno_view_t Irowptr;
2379 lno_nnz_view_t Icolind;
2380 scalar_view_t Ivals;
2381 if(!Bview.importMatrix.is_null()) {
2382 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2383 Irowptr = lclB.graph.row_map;
2384 Icolind = lclB.graph.entries;
2385 Ivals = lclB.values;
2388#ifdef HAVE_TPETRA_MMM_TIMINGS
2389 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
2401 std::vector<size_t> c_status(n, ST_INVALID);
2404 size_t CSR_ip = 0, OLD_ip = 0;
2405 for (
size_t i = 0; i < m; i++) {
2408 OLD_ip = Crowptr[i];
2409 CSR_ip = Crowptr[i+1];
2410 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2411 c_status[Ccolind[k]] = k;
2417 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2418 LO Aik = Acolind[k];
2419 const SC Aval = Avals[k];
2420 if (Aval == SC_ZERO)
2423 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2425 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2427 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2428 LO Bkj = Bcolind[j];
2429 LO Cij = Bcol2Ccol[Bkj];
2431 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2432 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2433 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2435 Cvals[c_status[Cij]] += Aval * Bvals[j];
2440 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2441 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2442 LO Ikj = Icolind[j];
2443 LO Cij = Icol2Ccol[Ikj];
2445 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2446 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2447 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2449 Cvals[c_status[Cij]] += Aval * Ivals[j];
2455#ifdef HAVE_TPETRA_MMM_TIMINGS
2456 auto MM3 = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse ESFC"))));
2459 C.fillComplete(C.getDomainMap(), C.getRangeMap());
2465template<
class Scalar,
2467 class GlobalOrdinal,
2469void jacobi_A_B_newmatrix(
2471 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2472 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2473 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2474 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2475 const std::string& label,
2476 const Teuchos::RCP<Teuchos::ParameterList>& params)
2478 using Teuchos::Array;
2479 using Teuchos::ArrayRCP;
2480 using Teuchos::ArrayView;
2484 typedef LocalOrdinal LO;
2485 typedef GlobalOrdinal GO;
2488 typedef Import<LO,GO,NO> import_type;
2489 typedef Map<LO,GO,NO> map_type;
2490 typedef typename map_type::local_map_type local_map_type;
2494 typedef typename KCRS::StaticCrsGraphType graph_t;
2495 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2496 typedef typename NO::execution_space execution_space;
2497 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2498 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2501#ifdef HAVE_TPETRA_MMM_TIMINGS
2502 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2503 using Teuchos::TimeMonitor;
2504 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi M5 Cmap"))));
2506 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2509 RCP<const import_type> Cimport;
2510 RCP<const map_type> Ccolmap;
2511 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
2512 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
2513 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
2514 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2515 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2516 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2517 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2518 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2525 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements()), Icol2Ccol;
2527 if (Bview.importMatrix.is_null()) {
2530 Ccolmap = Bview.colMap;
2534 Kokkos::RangePolicy<execution_space, LO> range (0,
static_cast<LO
> (Bview.colMap->getLocalNumElements ()));
2535 Kokkos::parallel_for (range, KOKKOS_LAMBDA (
const size_t i) {
2536 Bcol2Ccol(i) =
static_cast<LO
> (i);
2547 if (!Bimport.is_null() && !Iimport.is_null()){
2548 Cimport = Bimport->setUnion(*Iimport);
2549 Ccolmap = Cimport->getTargetMap();
2551 }
else if (!Bimport.is_null() && Iimport.is_null()) {
2552 Cimport = Bimport->setUnion();
2554 }
else if(Bimport.is_null() && !Iimport.is_null()) {
2555 Cimport = Iimport->setUnion();
2558 throw std::runtime_error(
"TpetraExt::Jacobi status of matrix importers is nonsensical");
2560 Ccolmap = Cimport->getTargetMap();
2562 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2563 std::runtime_error,
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
2570 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
2571 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2572 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2573 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2575 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2576 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2585 C.replaceColMap(Ccolmap);
2603 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getLocalNumElements());
2604 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getLocalNumElements());
2605 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2606 GO aidx = Acolmap_local.getGlobalElement(i);
2607 LO B_LID = Browmap_local.getLocalElement(aidx);
2608 if (B_LID != LO_INVALID) {
2609 targetMapToOrigRow(i) = B_LID;
2610 targetMapToImportRow(i) = LO_INVALID;
2612 LO I_LID = Irowmap_local.getLocalElement(aidx);
2613 targetMapToOrigRow(i) = LO_INVALID;
2614 targetMapToImportRow(i) = I_LID;
2621 KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::jacobi_A_B_newmatrix_kernel_wrapper(omega,Dinv,Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2630template<
class Scalar,
2632 class GlobalOrdinal,
2634 class LocalOrdinalViewType>
2635void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
2636 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> & Dinv,
2637 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2638 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2639 const LocalOrdinalViewType & targetMapToOrigRow,
2640 const LocalOrdinalViewType & targetMapToImportRow,
2641 const LocalOrdinalViewType & Bcol2Ccol,
2642 const LocalOrdinalViewType & Icol2Ccol,
2643 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2644 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
2645 const std::string& label,
2646 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2648#ifdef HAVE_TPETRA_MMM_TIMINGS
2649 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2650 using Teuchos::TimeMonitor;
2651 auto MM(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Nemwmatrix SerialCore")));
2654 using Teuchos::Array;
2655 using Teuchos::ArrayRCP;
2656 using Teuchos::ArrayView;
2661 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
2662 typedef typename KCRS::StaticCrsGraphType graph_t;
2663 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2664 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2665 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2666 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2669 typedef typename scalar_view_t::memory_space scalar_memory_space;
2672 typedef LocalOrdinal LO;
2673 typedef GlobalOrdinal GO;
2676 typedef Map<LO,GO,NO> map_type;
2677 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2678 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2681 RCP<const map_type> Ccolmap = C.getColMap();
2682 size_t m = Aview.origMatrix->getLocalNumRows();
2683 size_t n = Ccolmap->getLocalNumElements();
2684 size_t b_max_nnz_per_row = Bview.origMatrix->getLocalMaxNumRowEntries();
2687 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
2688 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
2690 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
2691 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
2692 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2694 c_lno_view_t Irowptr;
2695 lno_nnz_view_t Icolind;
2696 scalar_view_t Ivals;
2697 if(!Bview.importMatrix.is_null()) {
2698 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2699 Irowptr = lclB.graph.row_map;
2700 Icolind = lclB.graph.entries;
2701 Ivals = lclB.values;
2702 b_max_nnz_per_row = std::max(b_max_nnz_per_row,Bview.importMatrix->getLocalMaxNumRowEntries());
2707 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
2715 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2716 Array<size_t> c_status(n, ST_INVALID);
2725 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
2726 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"),m+1);
2727 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"),CSR_alloc);
2728 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"),CSR_alloc);
2729 size_t CSR_ip = 0, OLD_ip = 0;
2731 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2745 for (
size_t i = 0; i < m; i++) {
2748 Crowptr[i] = CSR_ip;
2749 SC minusOmegaDval = -omega*Dvals(i,0);
2752 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
2753 Scalar Bval = Bvals[j];
2754 if (Bval == SC_ZERO)
2756 LO Bij = Bcolind[j];
2757 LO Cij = Bcol2Ccol[Bij];
2760 c_status[Cij] = CSR_ip;
2761 Ccolind[CSR_ip] = Cij;
2762 Cvals[CSR_ip] = Bvals[j];
2767 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2768 LO Aik = Acolind[k];
2769 const SC Aval = Avals[k];
2770 if (Aval == SC_ZERO)
2773 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2775 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2777 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2778 LO Bkj = Bcolind[j];
2779 LO Cij = Bcol2Ccol[Bkj];
2781 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2783 c_status[Cij] = CSR_ip;
2784 Ccolind[CSR_ip] = Cij;
2785 Cvals[CSR_ip] = minusOmegaDval* Aval * Bvals[j];
2789 Cvals[c_status[Cij]] += minusOmegaDval* Aval * Bvals[j];
2795 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2796 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2797 LO Ikj = Icolind[j];
2798 LO Cij = Icol2Ccol[Ikj];
2800 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2802 c_status[Cij] = CSR_ip;
2803 Ccolind[CSR_ip] = Cij;
2804 Cvals[CSR_ip] = minusOmegaDval* Aval * Ivals[j];
2807 Cvals[c_status[Cij]] += minusOmegaDval* Aval * Ivals[j];
2814 if (i+1 < m && CSR_ip + std::min(n,(Arowptr[i+2]-Arowptr[i+1]+1)*b_max_nnz_per_row) > CSR_alloc) {
2816 Kokkos::resize(Ccolind,CSR_alloc);
2817 Kokkos::resize(Cvals,CSR_alloc);
2821 Crowptr[m] = CSR_ip;
2824 Kokkos::resize(Ccolind,CSR_ip);
2825 Kokkos::resize(Cvals,CSR_ip);
2828#ifdef HAVE_TPETRA_MMM_TIMINGS
2829 auto MM2(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix Final Sort")));
2836 C.replaceColMap(Ccolmap);
2843 if (params.is_null() || params->get(
"sort entries",
true))
2844 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
2845 C.setAllValues(Crowptr,Ccolind, Cvals);
2848#ifdef HAVE_TPETRA_MMM_TIMINGS
2849 auto MM3(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix ESFC")));
2860 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2861 labelList->set(
"Timer Label",label);
2862 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2863 RCP<const Export<LO,GO,NO> > dummyExport;
2864 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
2872template<
class Scalar,
2874 class GlobalOrdinal,
2876void jacobi_A_B_reuse(
2878 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2879 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2880 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2881 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2882 const std::string& label,
2883 const Teuchos::RCP<Teuchos::ParameterList>& params)
2885 using Teuchos::Array;
2886 using Teuchos::ArrayRCP;
2887 using Teuchos::ArrayView;
2891 typedef LocalOrdinal LO;
2892 typedef GlobalOrdinal GO;
2895 typedef Import<LO,GO,NO> import_type;
2896 typedef Map<LO,GO,NO> map_type;
2899 typedef typename map_type::local_map_type local_map_type;
2901 typedef typename KCRS::StaticCrsGraphType graph_t;
2902 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2903 typedef typename NO::execution_space execution_space;
2904 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2905 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2907#ifdef HAVE_TPETRA_MMM_TIMINGS
2908 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2909 using Teuchos::TimeMonitor;
2910 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse Cmap"))));
2912 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2915 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2916 RCP<const map_type> Ccolmap = C.getColMap();
2917 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2918 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2919 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2920 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2921 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2922 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2925 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements()), Icol2Ccol;
2929 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2930 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2933 if (!Bview.importMatrix.is_null()) {
2934 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2935 std::runtime_error,
"Tpetra::Jacobi: Import setUnion messed with the DomainMap in an unfortunate way");
2937 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
2938 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2939 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2945 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getLocalNumElements());
2946 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getLocalNumElements());
2947 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2948 GO aidx = Acolmap_local.getGlobalElement(i);
2949 LO B_LID = Browmap_local.getLocalElement(aidx);
2950 if (B_LID != LO_INVALID) {
2951 targetMapToOrigRow(i) = B_LID;
2952 targetMapToImportRow(i) = LO_INVALID;
2954 LO I_LID = Irowmap_local.getLocalElement(aidx);
2955 targetMapToOrigRow(i) = LO_INVALID;
2956 targetMapToImportRow(i) = I_LID;
2961#ifdef HAVE_TPETRA_MMM_TIMINGS
2967 KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::jacobi_A_B_reuse_kernel_wrapper(omega,Dinv,Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2973template<
class Scalar,
2975 class GlobalOrdinal,
2977 class LocalOrdinalViewType>
2978void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
2979 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2980 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2981 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2982 const LocalOrdinalViewType & targetMapToOrigRow,
2983 const LocalOrdinalViewType & targetMapToImportRow,
2984 const LocalOrdinalViewType & Bcol2Ccol,
2985 const LocalOrdinalViewType & Icol2Ccol,
2986 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2987 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > ,
2988 const std::string& label,
2989 const Teuchos::RCP<Teuchos::ParameterList>& ) {
2990#ifdef HAVE_TPETRA_MMM_TIMINGS
2991 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2992 using Teuchos::TimeMonitor;
2993 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SerialCore"))));
2994 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
3002 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
3003 typedef typename KCRS::StaticCrsGraphType graph_t;
3004 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
3005 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3006 typedef typename KCRS::values_type::non_const_type scalar_view_t;
3007 typedef typename scalar_view_t::memory_space scalar_memory_space;
3010 typedef LocalOrdinal LO;
3011 typedef GlobalOrdinal GO;
3013 typedef Map<LO,GO,NO> map_type;
3014 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
3015 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
3016 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
3019 RCP<const map_type> Ccolmap = C.getColMap();
3020 size_t m = Aview.origMatrix->getLocalNumRows();
3021 size_t n = Ccolmap->getLocalNumElements();
3024 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
3025 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
3026 const KCRS & Cmat = C.getLocalMatrixHost();
3028 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
3029 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
3030 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
3031 scalar_view_t Cvals = Cmat.values;
3033 c_lno_view_t Irowptr;
3034 lno_nnz_view_t Icolind;
3035 scalar_view_t Ivals;
3036 if(!Bview.importMatrix.is_null()) {
3037 auto lclB = Bview.importMatrix->getLocalMatrixHost();
3038 Irowptr = lclB.graph.row_map;
3039 Icolind = lclB.graph.entries;
3040 Ivals = lclB.values;
3045 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
3047#ifdef HAVE_TPETRA_MMM_TIMINGS
3048 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SerialCore - Compare"))));
3056 std::vector<size_t> c_status(n, ST_INVALID);
3059 size_t CSR_ip = 0, OLD_ip = 0;
3060 for (
size_t i = 0; i < m; i++) {
3064 OLD_ip = Crowptr[i];
3065 CSR_ip = Crowptr[i+1];
3066 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
3067 c_status[Ccolind[k]] = k;
3073 SC minusOmegaDval = -omega*Dvals(i,0);
3076 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
3077 Scalar Bval = Bvals[j];
3078 if (Bval == SC_ZERO)
3080 LO Bij = Bcolind[j];
3081 LO Cij = Bcol2Ccol[Bij];
3083 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3084 std::runtime_error,
"Trying to insert a new entry into a static graph");
3086 Cvals[c_status[Cij]] = Bvals[j];
3090 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
3091 LO Aik = Acolind[k];
3092 const SC Aval = Avals[k];
3093 if (Aval == SC_ZERO)
3096 if (targetMapToOrigRow[Aik] != LO_INVALID) {
3098 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
3100 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
3101 LO Bkj = Bcolind[j];
3102 LO Cij = Bcol2Ccol[Bkj];
3104 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3105 std::runtime_error,
"Trying to insert a new entry into a static graph");
3107 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
3112 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
3113 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
3114 LO Ikj = Icolind[j];
3115 LO Cij = Icol2Ccol[Ikj];
3117 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3118 std::runtime_error,
"Trying to insert a new entry into a static graph");
3120 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
3126#ifdef HAVE_TPETRA_MMM_TIMINGS
3127 MM2 = Teuchos::null;
3128 MM2 = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
3131 C.fillComplete(C.getDomainMap(), C.getRangeMap());
3132#ifdef HAVE_TPETRA_MMM_TIMINGS
3133 MM2 = Teuchos::null;
3142template<
class Scalar,
3144 class GlobalOrdinal,
3146void import_and_extract_views(
3147 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
3148 Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node> > targetMap,
3149 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3150 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node> > prototypeImporter,
3151 bool userAssertsThereAreNoRemotes,
3152 const std::string& label,
3153 const Teuchos::RCP<Teuchos::ParameterList>& params)
3155 using Teuchos::Array;
3156 using Teuchos::ArrayView;
3159 using Teuchos::null;
3162 typedef LocalOrdinal LO;
3163 typedef GlobalOrdinal GO;
3166 typedef Map<LO,GO,NO> map_type;
3167 typedef Import<LO,GO,NO> import_type;
3168 typedef CrsMatrix<SC,LO,GO,NO> crs_matrix_type;
3170#ifdef HAVE_TPETRA_MMM_TIMINGS
3171 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
3172 using Teuchos::TimeMonitor;
3173 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Alloc"))));
3181 Aview.deleteContents();
3183 Aview.origMatrix = rcp(&A,
false);
3185 Aview.origMatrix->getApplyHelper();
3186 Aview.origRowMap = A.getRowMap();
3187 Aview.rowMap = targetMap;
3188 Aview.colMap = A.getColMap();
3189 Aview.domainMap = A.getDomainMap();
3190 Aview.importColMap = null;
3191 RCP<const map_type> rowMap = A.getRowMap();
3192 const int numProcs = rowMap->getComm()->getSize();
3195 if (userAssertsThereAreNoRemotes || numProcs < 2)
3198 RCP<const import_type> importer;
3199 if (params != null && params->isParameter(
"importer")) {
3200 importer = params->get<RCP<const import_type> >(
"importer");
3203#ifdef HAVE_TPETRA_MMM_TIMINGS
3205 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X RemoteMap"))));
3210 RCP<const map_type> remoteRowMap;
3211 size_t numRemote = 0;
3213 if (!prototypeImporter.is_null() &&
3214 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
3215 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
3217#ifdef HAVE_TPETRA_MMM_TIMINGS
3218 TimeMonitor MM2 = *TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X RemoteMap-Mode1"));
3220 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
3221 numRemote = prototypeImporter->getNumRemoteIDs();
3223 Array<GO> remoteRows(numRemote);
3224 for (
size_t i = 0; i < numRemote; i++)
3225 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
3227 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3228 rowMap->getIndexBase(), rowMap->getComm()));
3231 }
else if (prototypeImporter.is_null()) {
3233#ifdef HAVE_TPETRA_MMM_TIMINGS
3234 TimeMonitor MM2 = *TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X RemoteMap-Mode2"));
3236 ArrayView<const GO> rows = targetMap->getLocalElementList();
3237 size_t numRows = targetMap->getLocalNumElements();
3239 Array<GO> remoteRows(numRows);
3240 for(
size_t i = 0; i < numRows; ++i) {
3241 const LO mlid = rowMap->getLocalElement(rows[i]);
3243 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
3244 remoteRows[numRemote++] = rows[i];
3246 remoteRows.resize(numRemote);
3247 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3248 rowMap->getIndexBase(), rowMap->getComm()));
3257 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
3258 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
3267#ifdef HAVE_TPETRA_MMM_TIMINGS
3269 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Collective-0"))));
3273 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
3275 if (globalMaxNumRemote > 0) {
3276#ifdef HAVE_TPETRA_MMM_TIMINGS
3278 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-2"))));
3282 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
3284 importer = rcp(
new import_type(rowMap, remoteRowMap));
3286 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match A.getRowMap()!");
3290 params->set(
"importer", importer);
3293 if (importer != null) {
3294#ifdef HAVE_TPETRA_MMM_TIMINGS
3296 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-3"))));
3300 Teuchos::ParameterList labelList;
3301 labelList.set(
"Timer Label", label);
3302 auto & labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
3305 bool overrideAllreduce =
false;
3308 Teuchos::ParameterList params_sublist;
3309 if(!params.is_null()) {
3310 labelList.set(
"compute global constants", params->get(
"compute global constants",
false));
3311 params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
3312 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3313 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3314 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
3315 isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
3316 overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
3318 labelList_subList.set(
"isMatrixMatrix_TransferAndFillComplete",isMM);
3319 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3320 labelList_subList.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
3323 A.getDomainMap(), importer->getTargetMap(), rcpFromRef(labelList));
3325 Aview.importMatrix->getApplyHelper();
3331 sprintf(str,
"import_matrix.%d.dat",count);
3336#ifdef HAVE_TPETRA_MMM_STATISTICS
3337 printMultiplicationStatistics(importer, label + std::string(
" I&X MMM"));
3341#ifdef HAVE_TPETRA_MMM_TIMINGS
3343 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-4"))));
3347 Aview.importColMap = Aview.importMatrix->getColMap();
3348#ifdef HAVE_TPETRA_MMM_TIMINGS
3355template<
class Scalar,
3357 class GlobalOrdinal,
3359void import_and_extract_views(
3360 const BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& M,
3361 Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node> > targetMap,
3362 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview,
3363 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node> > prototypeImporter,
3364 bool userAssertsThereAreNoRemotes)
3366 using Teuchos::Array;
3367 using Teuchos::ArrayView;
3370 using Teuchos::null;
3373 typedef LocalOrdinal LO;
3374 typedef GlobalOrdinal GO;
3377 typedef Map<LO,GO,NO> map_type;
3378 typedef Import<LO,GO,NO> import_type;
3379 typedef BlockCrsMatrix<SC,LO,GO,NO> blockcrs_matrix_type;
3387 Mview.deleteContents();
3391 Mview.origMatrix->getApplyHelper();
3392 Mview.origRowMap = M.getRowMap();
3393 Mview.rowMap = targetMap;
3394 Mview.colMap = M.getColMap();
3395 Mview.importColMap = null;
3396 RCP<const map_type> rowMap = M.getRowMap();
3397 const int numProcs = rowMap->getComm()->getSize();
3400 if (userAssertsThereAreNoRemotes || numProcs < 2)
return;
3404 RCP<const map_type> remoteRowMap;
3405 size_t numRemote = 0;
3407 if (!prototypeImporter.is_null() &&
3408 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
3409 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
3412 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
3413 numRemote = prototypeImporter->getNumRemoteIDs();
3415 Array<GO> remoteRows(numRemote);
3416 for (
size_t i = 0; i < numRemote; i++)
3417 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
3419 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3420 rowMap->getIndexBase(), rowMap->getComm()));
3423 }
else if (prototypeImporter.is_null()) {
3426 ArrayView<const GO> rows = targetMap->getLocalElementList();
3427 size_t numRows = targetMap->getLocalNumElements();
3429 Array<GO> remoteRows(numRows);
3430 for(
size_t i = 0; i < numRows; ++i) {
3431 const LO mlid = rowMap->getLocalElement(rows[i]);
3433 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
3434 remoteRows[numRemote++] = rows[i];
3436 remoteRows.resize(numRemote);
3437 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3438 rowMap->getIndexBase(), rowMap->getComm()));
3447 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
3448 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
3456 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
3458 RCP<const import_type> importer;
3460 if (globalMaxNumRemote > 0) {
3463 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
3465 importer = rcp(
new import_type(rowMap, remoteRowMap));
3467 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match M.getRowMap()!");
3470 if (importer != null) {
3475 Mview.importMatrix->getApplyHelper();
3478 Mview.importColMap = Mview.importMatrix->getColMap();
3484template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class LocalOrdinalViewType>
3486merge_matrices(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3487 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
3488 const LocalOrdinalViewType & Acol2Brow,
3489 const LocalOrdinalViewType & Acol2Irow,
3490 const LocalOrdinalViewType & Bcol2Ccol,
3491 const LocalOrdinalViewType & Icol2Ccol,
3492 const size_t mergedNodeNumCols) {
3496 typedef typename KCRS::StaticCrsGraphType graph_t;
3497 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
3498 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3499 typedef typename KCRS::values_type::non_const_type scalar_view_t;
3501 const KCRS & Ak = Aview.
origMatrix->getLocalMatrixDevice();
3502 const KCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
3505 if(!Bview.importMatrix.is_null() || (Bview.importMatrix.is_null() && (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
3509 RCP<const KCRS> Ik_;
3510 if(!Bview.importMatrix.is_null()) Ik_ = Teuchos::rcpFromRef<const KCRS>(Bview.importMatrix->getLocalMatrixDevice());
3511 const KCRS * Ik = Bview.importMatrix.is_null() ? 0 : &*Ik_;
3513 if(Ik!=0) Iks = *Ik;
3514 size_t merge_numrows = Ak.numCols();
3516 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
3518 const LocalOrdinal LO_INVALID =Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
3521 typedef typename Node::execution_space execution_space;
3522 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3523 Kokkos::parallel_scan (
"Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type (0, merge_numrows),
3524 KOKKOS_LAMBDA(
const size_t i,
size_t& update,
const bool final) {
3525 if(
final) Mrowptr(i) = update;
3528 if(Acol2Brow(i)!=LO_INVALID)
3529 ct = Bk.graph.row_map(Acol2Brow(i)+1) - Bk.graph.row_map(Acol2Brow(i));
3531 ct = Iks.graph.row_map(Acol2Irow(i)+1) - Iks.graph.row_map(Acol2Irow(i));
3534 if(
final && i+1==merge_numrows)
3535 Mrowptr(i+1)=update;
3539 size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr,merge_numrows);
3540 lno_nnz_view_t Mcolind(Kokkos::ViewAllocateWithoutInitializing(
"Mcolind"),merge_nnz);
3541 scalar_view_t Mvalues(Kokkos::ViewAllocateWithoutInitializing(
"Mvals"),merge_nnz);
3544 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3545 Kokkos::parallel_for (
"Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type (0, merge_numrows),KOKKOS_LAMBDA(
const size_t i) {
3546 if(Acol2Brow(i)!=LO_INVALID) {
3547 size_t row = Acol2Brow(i);
3548 size_t start = Bk.graph.row_map(row);
3549 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3550 Mvalues(j) = Bk.values(j-Mrowptr(i)+start);
3551 Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j-Mrowptr(i)+start));
3555 size_t row = Acol2Irow(i);
3556 size_t start = Iks.graph.row_map(row);
3557 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3558 Mvalues(j) = Iks.values(j-Mrowptr(i)+start);
3559 Mcolind(j) = Icol2Ccol(Iks.graph.entries(j-Mrowptr(i)+start));
3564 KCRS newmat(
"CrsMatrix",merge_numrows,mergedNodeNumCols,merge_nnz,Mvalues,Mrowptr,Mcolind);
3575template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class LocalOrdinalViewType>
3576const typename Tpetra::BlockCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_device_type
3577merge_matrices(BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3578 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
3579 const LocalOrdinalViewType & Acol2Brow,
3580 const LocalOrdinalViewType & Acol2Irow,
3581 const LocalOrdinalViewType & Bcol2Ccol,
3582 const LocalOrdinalViewType & Icol2Ccol,
3583 const size_t mergedNodeNumCols)
3586 typedef typename Tpetra::BlockCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_device_type KBCRS;
3587 typedef typename KBCRS::StaticCrsGraphType graph_t;
3588 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
3589 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3590 typedef typename KBCRS::values_type::non_const_type scalar_view_t;
3593 const KBCRS & Ak = Aview.
origMatrix->getLocalMatrixDevice();
3594 const KBCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
3597 if(!Bview.importMatrix.is_null() ||
3598 (Bview.importMatrix.is_null() &&
3599 (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
3604 RCP<const KBCRS> Ik_;
3605 if(!Bview.importMatrix.is_null()) Ik_ = Teuchos::rcpFromRef<const KBCRS>(Bview.importMatrix->getLocalMatrixDevice());
3606 const KBCRS * Ik = Bview.importMatrix.is_null() ? 0 : &*Ik_;
3608 if(Ik!=0) Iks = *Ik;
3609 size_t merge_numrows = Ak.numCols();
3612 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
3614 const LocalOrdinal LO_INVALID =Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
3617 typedef typename Node::execution_space execution_space;
3618 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3619 Kokkos::parallel_scan (
"Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type (0, merge_numrows),
3620 KOKKOS_LAMBDA(
const size_t i,
size_t& update,
const bool final) {
3621 if(
final) Mrowptr(i) = update;
3624 if(Acol2Brow(i)!=LO_INVALID)
3625 ct = Bk.graph.row_map(Acol2Brow(i)+1) - Bk.graph.row_map(Acol2Brow(i));
3627 ct = Iks.graph.row_map(Acol2Irow(i)+1) - Iks.graph.row_map(Acol2Irow(i));
3630 if(
final && i+1==merge_numrows)
3631 Mrowptr(i+1)=update;
3635 size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr,merge_numrows);
3636 const int blocksize = Ak.blockDim();
3637 lno_nnz_view_t Mcolind(Kokkos::ViewAllocateWithoutInitializing(
"Mcolind"),merge_nnz);
3638 scalar_view_t Mvalues(Kokkos::ViewAllocateWithoutInitializing(
"Mvals"),merge_nnz*blocksize*blocksize);
3641 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3642 Kokkos::parallel_for (
"Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type (0, merge_numrows),KOKKOS_LAMBDA(
const size_t i) {
3643 if(Acol2Brow(i)!=LO_INVALID) {
3644 size_t row = Acol2Brow(i);
3645 size_t start = Bk.graph.row_map(row);
3646 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3647 Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j-Mrowptr(i)+start));
3649 for (
int b=0; b<blocksize*blocksize; ++b) {
3650 const int val_indx = j*blocksize*blocksize + b;
3651 const int b_val_indx = (j-Mrowptr(i)+
start)*blocksize*blocksize + b;
3652 Mvalues(val_indx) = Bk.values(b_val_indx);
3657 size_t row = Acol2Irow(i);
3658 size_t start = Iks.graph.row_map(row);
3659 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3660 Mcolind(j) = Icol2Ccol(Iks.graph.entries(j-Mrowptr(i)+start));
3662 for (
int b=0; b<blocksize*blocksize; ++b) {
3663 const int val_indx = j*blocksize*blocksize + b;
3664 const int b_val_indx = (j-Mrowptr(i)+
start)*blocksize*blocksize + b;
3665 Mvalues(val_indx) = Iks.values(b_val_indx);
3672 KBCRS newmat(
"CrsMatrix",merge_numrows,mergedNodeNumCols,merge_nnz,Mvalues,Mrowptr,Mcolind, blocksize);
3682template<
typename SC,
typename LO,
typename GO,
typename NO>
3683void AddKernels<SC, LO, GO, NO>::
3685 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
3686 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
3687 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
3688 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3689 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
3690 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
3691 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
3692 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3693#
if KOKKOSKERNELS_VERSION >= 40299
3696 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3697 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3698 typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds)
3700 using Teuchos::TimeMonitor;
3701 using AddKern = MMdetails::AddKernels<SC, LO, GO, NO>;
3702 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
3703 auto nrows = Arowptrs.extent(0) - 1;
3704 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3705 typename AddKern::KKH handle;
3706 handle.create_spadd_handle(
true);
3707 auto addHandle = handle.get_spadd_handle();
3708#ifdef HAVE_TPETRA_MMM_TIMINGS
3709 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted symbolic")));
3711 KokkosSparse::Experimental::spadd_symbolic
3713#
if KOKKOSKERNELS_VERSION >= 40299
3714 nrows, numGlobalCols,
3716 Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
3718 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3719 Ccolinds = col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3720#ifdef HAVE_TPETRA_MMM_TIMINGS
3722 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted numeric")));
3724 KokkosSparse::Experimental::spadd_numeric(&handle,
3725#
if KOKKOSKERNELS_VERSION >= 40299
3726 nrows, numGlobalCols,
3728 Arowptrs, Acolinds, Avals, scalarA,
3729 Browptrs, Bcolinds, Bvals, scalarB,
3730 Crowptrs, Ccolinds, Cvals);
3733template<
typename SC,
typename LO,
typename GO,
typename NO>
3734void AddKernels<SC, LO, GO, NO>::
3736 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
3737 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
3738 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
3739 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3740 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
3741 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
3742 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
3743 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3744#
if KOKKOSKERNELS_VERSION >= 40299
3749 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3750 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3751 typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds)
3753 using Teuchos::TimeMonitor;
3754 using AddKern = MMdetails::AddKernels<SC, LO, GO, NO>;
3755 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
3756 auto nrows = Arowptrs.extent(0) - 1;
3757 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3758 typedef MMdetails::AddKernels<SC, LO, GO, NO> AddKern;
3759 typename AddKern::KKH handle;
3760 handle.create_spadd_handle(
false);
3761 auto addHandle = handle.get_spadd_handle();
3762#ifdef HAVE_TPETRA_MMM_TIMINGS
3763 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() unsorted symbolic")));
3765 KokkosSparse::Experimental::spadd_symbolic
3767#
if KOKKOSKERNELS_VERSION >= 40299
3768 nrows, numGlobalCols,
3770 Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
3772 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3773 Ccolinds = col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3774#ifdef HAVE_TPETRA_MMM_TIMINGS
3776 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() unsorted kernel: unsorted numeric")));
3778 KokkosSparse::Experimental::spadd_numeric(&handle,
3779#
if KOKKOSKERNELS_VERSION >= 40299
3780 nrows, numGlobalCols,
3782 Arowptrs, Acolinds, Avals, scalarA,
3783 Browptrs, Bcolinds, Bvals, scalarB,
3784 Crowptrs, Ccolinds, Cvals);
3787template<
typename GO,
3788 typename LocalIndicesType,
3789 typename GlobalIndicesType,
3790 typename ColMapType>
3791struct ConvertLocalToGlobalFunctor
3793 ConvertLocalToGlobalFunctor(
3794 const LocalIndicesType& colindsOrig_,
3795 const GlobalIndicesType& colindsConverted_,
3796 const ColMapType& colmap_) :
3797 colindsOrig (colindsOrig_),
3798 colindsConverted (colindsConverted_),
3801 KOKKOS_INLINE_FUNCTION
void
3802 operator() (
const GO i)
const
3804 colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
3806 LocalIndicesType colindsOrig;
3807 GlobalIndicesType colindsConverted;
3811template<
typename SC,
typename LO,
typename GO,
typename NO>
3812void MMdetails::AddKernels<SC, LO, GO, NO>::
3813convertToGlobalAndAdd(
3814 const typename MMdetails::AddKernels<SC, LO, GO, NO>::KCRS& A,
3815 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3816 const typename MMdetails::AddKernels<SC, LO, GO, NO>::KCRS& B,
3817 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3818 const typename MMdetails::AddKernels<SC, LO, GO, NO>::local_map_type& AcolMap,
3819 const typename MMdetails::AddKernels<SC, LO, GO, NO>::local_map_type& BcolMap,
3820 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3821 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3822 typename MMdetails::AddKernels<SC, LO, GO, NO>::global_col_inds_array& Ccolinds)
3824 using Teuchos::TimeMonitor;
3827 using KKH_GO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, GO, impl_scalar_type,
3828 typename NO::execution_space,
typename NO::memory_space,
typename NO::memory_space>;
3830 const values_array Avals = A.values;
3831 const values_array Bvals = B.values;
3832 const col_inds_array Acolinds = A.graph.entries;
3833 const col_inds_array Bcolinds = B.graph.entries;
3834 auto Arowptrs = A.graph.row_map;
3835 auto Browptrs = B.graph.row_map;
3836 global_col_inds_array AcolindsConverted(Kokkos::ViewAllocateWithoutInitializing(
"A colinds (converted)"), Acolinds.extent(0));
3837 global_col_inds_array BcolindsConverted(Kokkos::ViewAllocateWithoutInitializing(
"B colinds (converted)"), Bcolinds.extent(0));
3838#ifdef HAVE_TPETRA_MMM_TIMINGS
3839 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: " + std::string(
"column map conversion"))));
3841 ConvertLocalToGlobalFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertA(Acolinds, AcolindsConverted, AcolMap);
3842 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsA", range_type(0, Acolinds.extent(0)), convertA);
3843 ConvertLocalToGlobalFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertB(Bcolinds, BcolindsConverted, BcolMap);
3844 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsB", range_type(0, Bcolinds.extent(0)), convertB);
3846 handle.create_spadd_handle(
false);
3847 auto addHandle = handle.get_spadd_handle();
3848#ifdef HAVE_TPETRA_MMM_TIMINGS
3850 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: unsorted symbolic")));
3852 auto nrows = Arowptrs.extent(0) - 1;
3853 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3854 KokkosSparse::Experimental::spadd_symbolic
3856#
if KOKKOSKERNELS_VERSION >= 40299
3859 Arowptrs, AcolindsConverted, Browptrs, BcolindsConverted, Crowptrs);
3860 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3861 Ccolinds = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3862#ifdef HAVE_TPETRA_MMM_TIMINGS
3864 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: unsorted numeric")));
3866 KokkosSparse::Experimental::spadd_numeric(&handle,
3867#
if KOKKOSKERNELS_VERSION >= 40299
3870 Arowptrs, AcolindsConverted, Avals, scalarA,
3871 Browptrs, BcolindsConverted, Bvals, scalarB,
3872 Crowptrs, Ccolinds, Cvals);
3888#define TPETRA_MATRIXMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
3890 void MatrixMatrix::Multiply( \
3891 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3893 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3895 CrsMatrix< SCALAR , LO , GO , NODE >& C, \
3896 bool call_FillComplete_on_result, \
3897 const std::string & label, \
3898 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3901 void MatrixMatrix::Multiply( \
3902 const Teuchos::RCP<const BlockCrsMatrix< SCALAR , LO , GO , NODE > >& A, \
3904 const Teuchos::RCP<const BlockCrsMatrix< SCALAR , LO , GO , NODE > >& B, \
3906 Teuchos::RCP<BlockCrsMatrix< SCALAR , LO , GO , NODE > >& C, \
3907 const std::string & label); \
3910 void MatrixMatrix::Jacobi( \
3912 const Vector< SCALAR, LO, GO, NODE > & Dinv, \
3913 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3914 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3915 CrsMatrix< SCALAR , LO , GO , NODE >& C, \
3916 bool call_FillComplete_on_result, \
3917 const std::string & label, \
3918 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3921 void MatrixMatrix::Add( \
3922 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3925 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3928 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > >& C); \
3931 void MatrixMatrix::Add( \
3932 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3935 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3938 const Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > >& C); \
3941 void MatrixMatrix::Add( \
3942 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3945 CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3949 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > \
3950 MatrixMatrix::add<SCALAR, LO, GO, NODE> \
3951 (const SCALAR & alpha, \
3952 const bool transposeA, \
3953 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3954 const SCALAR & beta, \
3955 const bool transposeB, \
3956 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3957 const Teuchos::RCP<const Map<LO, GO, NODE> >& domainMap, \
3958 const Teuchos::RCP<const Map<LO, GO, NODE> >& rangeMap, \
3959 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3963 MatrixMatrix::add< SCALAR , LO, GO , NODE > \
3964 (const SCALAR & alpha, \
3965 const bool transposeA, \
3966 const CrsMatrix< SCALAR , LO, GO , NODE >& A, \
3967 const SCALAR& beta, \
3968 const bool transposeB, \
3969 const CrsMatrix< SCALAR , LO, GO , NODE >& B, \
3970 CrsMatrix< SCALAR , LO, GO , NODE >& C, \
3971 const Teuchos::RCP<const Map<LO, GO , NODE > >& domainMap, \
3972 const Teuchos::RCP<const Map<LO, GO , NODE > >& rangeMap, \
3973 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3975 template struct MMdetails::AddKernels<SCALAR, LO, GO, NODE>; \
3977 template void MMdetails::import_and_extract_views<SCALAR, LO, GO, NODE>(const CrsMatrix<SCALAR, LO, GO, NODE>& M, \
3978 Teuchos::RCP<const Map<LO, GO, NODE> > targetMap, \
3979 CrsMatrixStruct<SCALAR, LO, GO, NODE>& Mview, \
3980 Teuchos::RCP<const Import<LO,GO, NODE> > prototypeImporter, \
3981 bool userAssertsThereAreNoRemotes, \
3982 const std::string& label, \
3983 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3985 template void MMdetails::import_and_extract_views<SCALAR, LO, GO, NODE>(const BlockCrsMatrix<SCALAR, LO, GO, NODE>& M, \
3986 Teuchos::RCP<const Map<LO, GO, NODE> > targetMap, \
3987 BlockCrsMatrixStruct<SCALAR, LO, GO, NODE>& Mview, \
3988 Teuchos::RCP<const Import<LO,GO, NODE> > prototypeImporter, \
3989 bool userAssertsThereAreNoRemotes);
Matrix Market file readers and writers for Tpetra objects.
Forward declaration of some Tpetra Matrix Matrix objects.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declaration and definition of Tpetra::Details::getEntryOnHost.
Utility functions for packing and unpacking sparse matrix entries.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Teuchos::RCP< const map_type > colMap
Col map for the original version of the matrix.
Teuchos::RCP< const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > origMatrix
The original matrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static void writeSparseFile(const std::string &filename, const sparse_matrix_type &matrix, const std::string &matrixName, const std::string &matrixDescription, const bool debug=false)
Print the sparse matrix in Matrix Market format, with comments.
void start()
Start the deep_copy counter.
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const bool transposeB, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap=Teuchos::null, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute the sparse matrix sum C = scalarA * Op(A) + scalarB * Op(B), where Op(X) is either X or its t...
void Multiply(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, bool transposeB, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Sparse matrix-matrix multiply.
void Jacobi(Scalar omega, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
void Add(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, Scalar scalarA, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarB)
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void removeCrsMatrixZeros(CrsMatrixType &matrix, typename Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::magnitudeType const &threshold=Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::magnitude(Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::zero()))
Remove zero entries from a matrix.
size_t global_size_t
Global size_t object.