12#ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP
13#define TPETRA_BLOCKCRSMATRIX_DEF_HPP
21#include "Tpetra_BlockMultiVector.hpp"
24#include "Teuchos_TimeMonitor.hpp"
25#ifdef HAVE_TPETRA_DEBUG
29#include "KokkosSparse_spmv.hpp"
36 struct BlockCrsRowStruct {
37 T totalNumEntries, totalNumBytes, maxRowLength;
38 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct() =
default;
39 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct &b) =
default;
40 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
41 : totalNumEntries(numEnt), totalNumBytes(numBytes), maxRowLength(rowLength) {}
46 KOKKOS_INLINE_FUNCTION
47 void operator+=(BlockCrsRowStruct<T> &a,
const BlockCrsRowStruct<T> &b) {
48 a.totalNumEntries += b.totalNumEntries;
49 a.totalNumBytes += b.totalNumBytes;
50 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
53 template<
typename T,
typename ExecSpace>
54 struct BlockCrsReducer {
55 typedef BlockCrsReducer reducer;
57 typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
60 KOKKOS_INLINE_FUNCTION
61 BlockCrsReducer(value_type &val) : value(&val) {}
63 KOKKOS_INLINE_FUNCTION
void join(value_type &dst, value_type &src)
const { dst += src; }
64 KOKKOS_INLINE_FUNCTION
void join(value_type &dst,
const value_type &src)
const { dst += src; }
65 KOKKOS_INLINE_FUNCTION
void init(value_type &val)
const { val = value_type(); }
66 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
67 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
76template<
class Scalar,
class LO,
class GO,
class Node>
77class GetLocalDiagCopy {
79 typedef typename Node::device_type device_type;
80 typedef size_t diag_offset_type;
81 typedef Kokkos::View<
const size_t*, device_type,
82 Kokkos::MemoryUnmanaged> diag_offsets_type;
83 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
84 typedef typename global_graph_type::local_graph_device_type local_graph_device_type;
85 typedef typename local_graph_device_type::row_map_type row_offsets_type;
86 typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
87 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
88 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
91 GetLocalDiagCopy (
const diag_type& diag,
92 const values_type& val,
93 const diag_offsets_type& diagOffsets,
94 const row_offsets_type& ptr,
97 diagOffsets_ (diagOffsets),
99 blockSize_ (blockSize),
100 offsetPerBlock_ (blockSize_*blockSize_),
104 KOKKOS_INLINE_FUNCTION
void
105 operator() (
const LO& lclRowInd)
const
110 const size_t absOffset = ptr_[lclRowInd];
113 const size_t relOffset = diagOffsets_[lclRowInd];
116 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
121 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
122 const_little_block_type;
123 const_little_block_type D_in (val_.data () + pointOffset,
124 blockSize_, blockSize_);
125 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
131 diag_offsets_type diagOffsets_;
132 row_offsets_type ptr_;
139 template<
class Scalar,
class LO,
class GO,
class Node>
141 BlockCrsMatrix<Scalar, LO, GO, Node>::
142 markLocalErrorAndGetStream ()
144 * (this->localError_) =
true;
145 if ((*errs_).is_null ()) {
146 *errs_ = Teuchos::rcp (
new std::ostringstream ());
151 template<
class Scalar,
class LO,
class GO,
class Node>
166 template<
class Scalar,
class LO,
class GO,
class Node>
172 rowMeshMap_ (* (
graph.getRowMap ())),
184 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
185 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
186 "rows (isSorted() is false). This class assumes sorted rows.");
188 graphRCP_ = Teuchos::rcpFromRef(graph_);
197 "BlockCrsMatrix constructor: The input blockSize = " <<
blockSize <<
198 " <= 0. The block size must be positive.");
208 *pointImporter_ = Teuchos::rcp
214 ptrHost_ =
decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"),
ptr_h.extent(0));
215 Kokkos::deep_copy(ptrHost_,
ptr_h);
218 indHost_ =
decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"),
ind_h.extent(0));
220 Kokkos::deep_copy (indHost_,
ind_h);
223 val_ =
decltype (val_) (impl_scalar_type_dualview(
"val",
numValEnt));
227 template<
class Scalar,
class LO,
class GO,
class Node>
230 const typename local_matrix_device_type::values_type&
values,
234 rowMeshMap_ (* (
graph.getRowMap ())),
245 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
246 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
247 "rows (isSorted() is false). This class assumes sorted rows.");
249 graphRCP_ = Teuchos::rcpFromRef(graph_);
258 "BlockCrsMatrix constructor: The input blockSize = " <<
blockSize <<
259 " <= 0. The block size must be positive.");
269 *pointImporter_ = Teuchos::rcp
275 ptrHost_ =
decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"),
ptr_h.extent(0));
276 Kokkos::deep_copy(ptrHost_,
ptr_h);
279 indHost_ =
decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"),
ind_h.extent(0));
280 Kokkos::deep_copy (indHost_,
ind_h);
284 val_ =
decltype (val_) (
values);
288 template<
class Scalar,
class LO,
class GO,
class Node>
296 rowMeshMap_ (* (
graph.getRowMap ())),
308 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
309 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
310 "rows (isSorted() is false). This class assumes sorted rows.");
312 graphRCP_ = Teuchos::rcpFromRef(graph_);
321 "BlockCrsMatrix constructor: The input blockSize = " <<
blockSize <<
322 " <= 0. The block size must be positive.");
328 *pointImporter_ = Teuchos::rcp
334 ptrHost_ =
decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"),
ptr_h.extent(0));
336 Kokkos::deep_copy(ptrHost_,
ptr_h);
339 indHost_ =
decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"),
ind_h.extent(0));
341 Kokkos::deep_copy (indHost_,
ind_h);
344 val_ =
decltype (val_) (impl_scalar_type_dualview(
"val",
numValEnt));
349 template<
class Scalar,
class LO,
class GO,
class Node>
350 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
355 return Teuchos::rcp (
new map_type (domainPointMap_));
358 template<
class Scalar,
class LO,
class GO,
class Node>
359 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
364 return Teuchos::rcp (
new map_type (rangePointMap_));
367 template<
class Scalar,
class LO,
class GO,
class Node>
368 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
372 return graph_.getRowMap();
375 template<
class Scalar,
class LO,
class GO,
class Node>
376 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
380 return graph_.getColMap();
383 template<
class Scalar,
class LO,
class GO,
class Node>
388 return graph_.getGlobalNumRows();
391 template<
class Scalar,
class LO,
class GO,
class Node>
396 return graph_.getLocalNumRows();
399 template<
class Scalar,
class LO,
class GO,
class Node>
404 return graph_.getLocalMaxNumRowEntries();
407 template<
class Scalar,
class LO,
class GO,
class Node>
412 Teuchos::ETransp
mode,
418 mode != Teuchos::NO_TRANS &&
mode != Teuchos::TRANS &&
mode != Teuchos::CONJ_TRANS,
419 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
420 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
421 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
424 !
X.isConstantStride() || !
Y.isConstantStride(),
425 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
426 "X and Y must both be constant stride");
435 catch (std::invalid_argument&
e) {
437 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
438 "apply: Either the input MultiVector X or the output MultiVector Y "
439 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
440 "graph. BlockMultiVector's constructor threw the following exception: "
450 }
catch (std::invalid_argument&
e) {
452 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
453 "apply: The implementation method applyBlock complained about having "
454 "an invalid argument. It reported the following: " <<
e.what ());
455 }
catch (std::logic_error&
e) {
457 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
458 "apply: The implementation method applyBlock complained about a "
459 "possible bug in its implementation. It reported the following: "
461 }
catch (std::exception&
e) {
463 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
464 "apply: The implementation method applyBlock threw an exception which "
465 "is neither std::invalid_argument nor std::logic_error, but is a "
466 "subclass of std::exception. It reported the following: "
470 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
471 "apply: The implementation method applyBlock threw an exception which "
472 "is not an instance of a subclass of std::exception. This probably "
473 "indicates a bug. Please report this to the Tpetra developers.");
477 template<
class Scalar,
class LO,
class GO,
class Node>
482 Teuchos::ETransp
mode,
487 X.getBlockSize () !=
Y.getBlockSize (), std::invalid_argument,
488 "Tpetra::BlockCrsMatrix::applyBlock: "
489 "X and Y have different block sizes. X.getBlockSize() = "
490 <<
X.getBlockSize () <<
" != Y.getBlockSize() = "
491 <<
Y.getBlockSize () <<
".");
493 if (
mode == Teuchos::NO_TRANS) {
495 }
else if (
mode == Teuchos::TRANS ||
mode == Teuchos::CONJ_TRANS) {
499 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
500 "applyBlock: Invalid 'mode' argument. Valid values are "
501 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
505 template<
class Scalar,
class LO,
class GO,
class Node>
517 "destMatrix is required to be null.");
533 template<
class Scalar,
class LO,
class GO,
class Node>
545 "destMatrix is required to be null.");
561 template<
class Scalar,
class LO,
class GO,
class Node>
566 auto val_d = val_.getDeviceView(Access::OverwriteAll);
570 template<
class Scalar,
class LO,
class GO,
class Node>
584 template <
class Scalar,
class LO,
class GO,
class Node>
588 Kokkos::MemoryUnmanaged>& offsets)
const
590 graph_.getLocalDiagOffsets (offsets);
593 template <
class Scalar,
class LO,
class GO,
class Node>
597 Kokkos::MemoryUnmanaged>& diag,
599 Kokkos::MemoryUnmanaged>& offsets)
const
601 using Kokkos::parallel_for;
602 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
604 const LO
lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getLocalNumElements ());
605 const LO
blockSize = this->getBlockSize ();
608 static_cast<LO
> (diag.extent (1)) <
blockSize ||
609 static_cast<LO
> (diag.extent (2)) <
blockSize,
610 std::invalid_argument,
prefix <<
611 "The input Kokkos::View is not big enough to hold all the data.");
613 (
static_cast<LO
> (offsets.size ()) <
lclNumMeshRows, std::invalid_argument,
614 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of "
617 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
624 auto val_d = val_.getDeviceView(Access::ReadOnly);
627 graph_.getLocalGraphDevice ().row_map, blockSize_));
630 template<
class Scalar,
class LO,
class GO,
class Node>
645 template<
class Scalar,
class LO,
class GO,
class Node>
658 template<
class Scalar,
class LO,
class GO,
class Node>
662 nonconst_local_inds_host_view_type &
Indices,
663 nonconst_values_host_view_type &
Values,
670 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
677 for (LO
i=0;
i<
numInds*blockSize_*blockSize_; ++
i) {
683 template<
class Scalar,
class LO,
class GO,
class Node>
691 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
696 return static_cast<LO
> (0);
699 const LO
LINV = Teuchos::OrdinalTraits<LO>::invalid ();
716 template<
class Scalar,
class LO,
class GO,
class Node>
724 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
729 return static_cast<LO
> (0);
736 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
737 const ptrdiff_t STINV = Teuchos::OrdinalTraits<ptrdiff_t>::invalid ();
743 if (offsets[
k] !=
STINV) {
754 template<
class Scalar,
class LO,
class GO,
class Node>
762 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
767 return static_cast<LO
> (0);
769 const impl_scalar_type*
const vIn =
reinterpret_cast<const impl_scalar_type*
> (vals);
770 auto val_out = getValuesHost(localRowInd);
771 impl_scalar_type* vOut =
const_cast<impl_scalar_type*
>(val_out.data());
773 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
774 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
775 size_t pointOffset = 0;
778 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
779 const size_t blockOffset = offsets[k]*perBlockSize;
780 if (blockOffset != STINV) {
781 little_block_type A_old = getNonConstLocalBlockFromInput (vOut, blockOffset);
782 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
791 template<
class Scalar,
class LO,
class GO,
class Node>
799 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
804 return static_cast<LO
> (0);
812 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
813 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
829 template<
class Scalar,
class LO,
class GO,
class Node>
831 impl_scalar_type_dualview::t_host::const_type
835 return val_.getHostView(Access::ReadOnly);
838 template<
class Scalar,
class LO,
class GO,
class Node>
839 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
840 impl_scalar_type_dualview::t_dev::const_type
841 BlockCrsMatrix<Scalar, LO, GO, Node>::
842 getValuesDevice ()
const
844 return val_.getDeviceView(Access::ReadOnly);
847 template<
class Scalar,
class LO,
class GO,
class Node>
848 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
849 impl_scalar_type_dualview::t_host
853 return val_.getHostView(Access::ReadWrite);
856 template<
class Scalar,
class LO,
class GO,
class Node>
858 impl_scalar_type_dualview::t_dev
862 return val_.getDeviceView(Access::ReadWrite);
865 template<
class Scalar,
class LO,
class GO,
class Node>
866 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
867 impl_scalar_type_dualview::t_host::const_type
871 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
872 auto val = val_.getHostView(Access::ReadOnly);
877 template<
class Scalar,
class LO,
class GO,
class Node>
879 impl_scalar_type_dualview::t_dev::const_type
883 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
884 auto val = val_.getDeviceView(Access::ReadOnly);
889 template<
class Scalar,
class LO,
class GO,
class Node>
894 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
895 auto val = val_.getHostView(Access::ReadWrite);
900 template<
class Scalar,
class LO,
class GO,
class Node>
905 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
906 auto val = val_.getDeviceView(Access::ReadWrite);
911 template<
class Scalar,
class LO,
class GO,
class Node>
917 if (
numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
918 return static_cast<LO
> (0);
924 template<
class Scalar,
class LO,
class GO,
class Node>
925 typename BlockCrsMatrix<Scalar, LO, GO, Node>::local_matrix_device_type
929 auto numCols = this->graph_.getColMap()->getLocalNumElements();
930 auto val = val_.getDeviceView(Access::ReadWrite);
931 const LO
blockSize = this->getBlockSize ();
932 const auto graph = this->graph_.getLocalGraphDevice ();
934 return local_matrix_device_type(
"Tpetra::BlockCrsMatrix::lclMatrixDevice",
941 template<
class Scalar,
class LO,
class GO,
class Node>
946 const Teuchos::ETransp
mode,
957 true, std::logic_error,
"Tpetra::BlockCrsMatrix::apply: "
958 "transpose and conjugate transpose modes are not yet implemented.");
961 template<
class Scalar,
class LO,
class GO,
class Node>
963 BlockCrsMatrix<Scalar, LO, GO, Node>::
964 applyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
965 BlockMultiVector<Scalar, LO, GO, Node>& Y,
971 typedef ::Tpetra::Import<LO, GO, Node> import_type;
972 typedef ::Tpetra::Export<LO, GO, Node> export_type;
973 const Scalar zero = STS::zero ();
974 const Scalar one = STS::one ();
975 RCP<const import_type>
import = graph_.getImporter ();
977 RCP<const export_type> theExport = graph_.getExporter ();
978 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
984 else if (beta != one) {
989 const BMV* X_colMap = NULL;
990 if (
import.is_null ()) {
994 catch (std::exception& e) {
995 TEUCHOS_TEST_FOR_EXCEPTION
996 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
997 "operator= threw an exception: " << std::endl << e.what ());
1007 if ((*X_colMap_).is_null () ||
1008 (**X_colMap_).getNumVectors () != X.getNumVectors () ||
1009 (**X_colMap_).getBlockSize () != X.getBlockSize ()) {
1010 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1011 static_cast<LO
> (X.getNumVectors ())));
1013 (*X_colMap_)->getMultiVectorView().doImport (X.getMultiVectorView (),
1017 X_colMap = &(**X_colMap_);
1019 catch (std::exception& e) {
1020 TEUCHOS_TEST_FOR_EXCEPTION
1021 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1022 "operator= threw an exception: " << std::endl << e.what ());
1026 BMV* Y_rowMap = NULL;
1027 if (theExport.is_null ()) {
1030 else if ((*Y_rowMap_).is_null () ||
1031 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () ||
1032 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) {
1033 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1034 static_cast<LO
> (X.getNumVectors ())));
1036 Y_rowMap = &(**Y_rowMap_);
1038 catch (std::exception& e) {
1039 TEUCHOS_TEST_FOR_EXCEPTION(
1040 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1041 "operator= threw an exception: " << std::endl << e.what ());
1046 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1048 catch (std::exception& e) {
1049 TEUCHOS_TEST_FOR_EXCEPTION
1050 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1051 "an exception: " << e.what ());
1054 TEUCHOS_TEST_FOR_EXCEPTION
1055 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1056 "an exception not a subclass of std::exception.");
1059 if (! theExport.is_null ()) {
1066template <
class Scalar,
class LO,
class GO,
class Node>
1067void BlockCrsMatrix<Scalar, LO, GO, Node>::localApplyBlockNoTrans(
1068 const BlockMultiVector<Scalar, LO, GO, Node> &X,
1069 BlockMultiVector<Scalar, LO, GO, Node> &Y,
const Scalar alpha,
1070 const Scalar beta) {
1072 "Tpetra::BlockCrsMatrix::localApplyBlockNoTrans");
1073 const impl_scalar_type alpha_impl = alpha;
1074 const auto graph = this->graph_.getLocalGraphDevice();
1076 mv_type X_mv = X.getMultiVectorView();
1077 mv_type Y_mv = Y.getMultiVectorView();
1078 auto X_lcl = X_mv.getLocalViewDevice(Access::ReadOnly);
1079 auto Y_lcl = Y_mv.getLocalViewDevice(Access::ReadWrite);
1081#if KOKKOSKERNELS_VERSION >= 40299
1082 auto A_lcl = getLocalMatrixDevice();
1083 if(!applyHelper.get()) {
1085 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map);
1087 if(applyHelper->shouldUseIntRowptrs())
1089 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
1091 &applyHelper->handle_int, KokkosSparse::NoTranspose,
1092 alpha_impl, A_lcl_int_rowptrs, X_lcl, beta, Y_lcl);
1097 &applyHelper->handle, KokkosSparse::NoTranspose,
1098 alpha_impl, A_lcl, X_lcl, beta, Y_lcl);
1101 auto A_lcl = getLocalMatrixDevice();
1102 KokkosSparse::spmv(KokkosSparse::NoTranspose, alpha_impl, A_lcl, X_lcl, beta,
1108 template<
class Scalar,
class LO,
class GO,
class Node>
1110 BlockCrsMatrix<Scalar, LO, GO, Node>::
1111 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1112 const LO colIndexToFind,
1113 const LO hint)
const
1115 const size_t absStartOffset = ptrHost_[localRowIndex];
1116 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1117 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1119 const LO*
const curInd = indHost_.data () + absStartOffset;
1122 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1129 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1134 const LO maxNumEntriesForLinearSearch = 32;
1135 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1140 const LO* beg = curInd;
1141 const LO* end = curInd + numEntriesInRow;
1142 std::pair<const LO*, const LO*> p =
1143 std::equal_range (beg, end, colIndexToFind);
1144 if (p.first != p.second) {
1146 relOffset =
static_cast<LO
> (p.first - beg);
1150 for (LO k = 0; k < numEntriesInRow; ++k) {
1151 if (colIndexToFind == curInd[k]) {
1161 template<
class Scalar,
class LO,
class GO,
class Node>
1163 BlockCrsMatrix<Scalar, LO, GO, Node>::
1164 offsetPerBlock ()
const
1166 return offsetPerBlock_;
1169 template<
class Scalar,
class LO,
class GO,
class Node>
1171 BlockCrsMatrix<Scalar, LO, GO, Node>::
1172 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1173 const size_t pointOffset)
const
1176 const LO rowStride = blockSize_;
1177 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1180 template<
class Scalar,
class LO,
class GO,
class Node>
1182 BlockCrsMatrix<Scalar, LO, GO, Node>::
1183 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1184 const size_t pointOffset)
const
1187 const LO rowStride = blockSize_;
1188 return little_block_type (val + pointOffset, blockSize_, rowStride);
1191 template<
class Scalar,
class LO,
class GO,
class Node>
1192 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1193 BlockCrsMatrix<Scalar, LO, GO, Node>::
1194 getNonConstLocalBlockFromInputHost (impl_scalar_type* val,
1195 const size_t pointOffset)
const
1198 const LO rowStride = blockSize_;
1199 const size_t bs2 = blockSize_ * blockSize_;
1200 return little_block_host_type (val + bs2 * pointOffset, blockSize_, rowStride);
1203 template<
class Scalar,
class LO,
class GO,
class Node>
1205 BlockCrsMatrix<Scalar, LO, GO, Node>::
1206 getLocalBlockDeviceNonConst (
const LO localRowInd,
const LO localColInd)
const
1208 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1210 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1211 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1212 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1213 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1214 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesDeviceNonConst();
1215 auto r_val = getNonConstLocalBlockFromInput (vals.data(), absBlockOffset);
1219 return little_block_type ();
1223 template<
class Scalar,
class LO,
class GO,
class Node>
1224 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1225 BlockCrsMatrix<Scalar, LO, GO, Node>::
1226 getLocalBlockHostNonConst (
const LO localRowInd,
const LO localColInd)
const
1228 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1230 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1231 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1232 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1233 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1234 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesHostNonConst();
1235 auto r_val = getNonConstLocalBlockFromInputHost (vals.data(), absBlockOffset);
1239 return little_block_host_type ();
1244 template<
class Scalar,
class LO,
class GO,
class Node>
1246 BlockCrsMatrix<Scalar, LO, GO, Node>::
1247 checkSizes (const ::Tpetra::SrcDistObject& source)
1250 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_BCRS_type;
1251 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
1254 std::ostream& err = markLocalErrorAndGetStream ();
1255 err <<
"checkSizes: The source object of the Import or Export "
1256 "must be a BlockCrsMatrix with the same template parameters as the "
1257 "target object." << endl;
1262 if (src->getBlockSize () != this->getBlockSize ()) {
1263 std::ostream& err = markLocalErrorAndGetStream ();
1264 err <<
"checkSizes: The source and target objects of the Import or "
1265 <<
"Export must have the same block sizes. The source's block "
1266 <<
"size = " << src->getBlockSize () <<
" != the target's block "
1267 <<
"size = " << this->getBlockSize () <<
"." << endl;
1269 if (! src->graph_.isFillComplete ()) {
1270 std::ostream& err = markLocalErrorAndGetStream ();
1271 err <<
"checkSizes: The source object of the Import or Export is "
1272 "not fill complete. Both source and target objects must be fill "
1273 "complete." << endl;
1275 if (! this->graph_.isFillComplete ()) {
1276 std::ostream& err = markLocalErrorAndGetStream ();
1277 err <<
"checkSizes: The target object of the Import or Export is "
1278 "not fill complete. Both source and target objects must be fill "
1279 "complete." << endl;
1281 if (src->graph_.getColMap ().is_null ()) {
1282 std::ostream& err = markLocalErrorAndGetStream ();
1283 err <<
"checkSizes: The source object of the Import or Export does "
1284 "not have a column Map. Both source and target objects must have "
1285 "column Maps." << endl;
1287 if (this->graph_.getColMap ().is_null ()) {
1288 std::ostream& err = markLocalErrorAndGetStream ();
1289 err <<
"checkSizes: The target object of the Import or Export does "
1290 "not have a column Map. Both source and target objects must have "
1291 "column Maps." << endl;
1295 return ! (* (this->localError_));
1298 template<
class Scalar,
class LO,
class GO,
class Node>
1302 (const ::Tpetra::SrcDistObject&
source,
1303 const size_t numSameIDs,
1310 using ::Tpetra::Details::Behavior;
1311 using ::Tpetra::Details::dualViewStatusToString;
1312 using ::Tpetra::Details::ProfilingRegion;
1316 ProfilingRegion
profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
1317 const bool debug = Behavior::debug();
1318 const bool verbose = Behavior::verbose();
1323 std::ostringstream
os;
1324 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1325 os <<
"Proc " <<
myRank <<
": BlockCrsMatrix::copyAndPermute : " <<
endl;
1330 if (* (this->localError_)) {
1331 std::ostream&
err = this->markLocalErrorAndGetStream ();
1333 <<
"The target object of the Import or Export is already in an error state."
1342 std::ostringstream
os;
1346 std::cerr <<
os.str ();
1353 std::ostream&
err = this->markLocalErrorAndGetStream ();
1361 std::ostream&
err = this->markLocalErrorAndGetStream ();
1363 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
1370 std::ostream&
err = this->markLocalErrorAndGetStream ();
1372 <<
"The source (input) object of the Import or "
1373 "Export is either not a BlockCrsMatrix, or does not have the right "
1374 "template parameters. checkSizes() should have caught this. "
1375 "Please report this bug to the Tpetra developers." <<
endl;
1380#ifdef HAVE_TPETRA_DEBUG
1395#ifdef HAVE_TPETRA_DEBUG
1405 std::ostringstream
os;
1407 <<
"canUseLocalColumnIndices: "
1411 std::cerr <<
os.str ();
1420#ifdef HAVE_TPETRA_DEBUG
1421 if (!
srcRowMap.isNodeLocalElement (localRow)) {
1429 values_host_view_type
vals;
1434 if (numEntries > 0) {
1436 if (
err != numEntries) {
1438 if (!
dstRowMap.isNodeLocalElement (localRow)) {
1439#ifdef HAVE_TPETRA_DEBUG
1449 for (LO
k = 0;
k < numEntries; ++
k) {
1452#ifdef HAVE_TPETRA_DEBUG
1468 values_host_view_type
vals;
1471 if (numEntries > 0) {
1473 if (
err != numEntries) {
1475#ifdef HAVE_TPETRA_DEBUG
1476 for (LO
c = 0;
c < numEntries; ++
c) {
1488 const size_t maxNumEnt = src->graph_.getLocalMaxNumRowEntries ();
1494 values_host_view_type
vals;
1501 }
catch (std::exception&
e) {
1503 std::ostringstream
os;
1504 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1505 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" localRow "
1506 << localRow <<
", src->getLocalRowView() threw an exception: "
1508 std::cerr <<
os.str ();
1513 if (numEntries > 0) {
1514 if (
static_cast<size_t> (numEntries) >
static_cast<size_t> (
lclDstCols.size ())) {
1517 std::ostringstream
os;
1518 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1519 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" localRow "
1520 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
1522 std::cerr <<
os.str ();
1529 for (LO
j = 0;
j < numEntries; ++
j) {
1533#ifdef HAVE_TPETRA_DEBUG
1541 }
catch (std::exception&
e) {
1543 std::ostringstream
os;
1544 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1545 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" localRow "
1546 << localRow <<
", this->replaceLocalValues() threw an exception: "
1548 std::cerr <<
os.str ();
1552 if (
err != numEntries) {
1555 std::ostringstream
os;
1556 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1557 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" "
1558 "localRow " << localRow <<
", this->replaceLocalValues "
1559 "returned " <<
err <<
" instead of numEntries = "
1560 << numEntries <<
endl;
1561 std::cerr <<
os.str ();
1574 values_host_view_type
vals;
1579 }
catch (std::exception&
e) {
1581 std::ostringstream
os;
1582 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1583 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"permute\" "
1585 <<
", src->getLocalRowView() threw an exception: " <<
e.what ();
1586 std::cerr <<
os.str ();
1591 if (numEntries > 0) {
1592 if (
static_cast<size_t> (numEntries) >
static_cast<size_t> (
lclDstCols.size ())) {
1599 for (LO
j = 0;
j < numEntries; ++
j) {
1603#ifdef HAVE_TPETRA_DEBUG
1609 if (
err != numEntries) {
1618 std::ostream&
err = this->markLocalErrorAndGetStream ();
1619#ifdef HAVE_TPETRA_DEBUG
1620 err <<
"copyAndPermute: The graph structure of the source of the "
1621 "Import or Export must be a subset of the graph structure of the "
1623 err <<
"invalidSrcCopyRows = [";
1627 typename std::set<LO>::const_iterator
itp1 =
it;
1633 err <<
"], invalidDstCopyRows = [";
1637 typename std::set<LO>::const_iterator
itp1 =
it;
1643 err <<
"], invalidDstCopyCols = [";
1647 typename std::set<LO>::const_iterator
itp1 =
it;
1653 err <<
"], invalidDstPermuteCols = [";
1657 typename std::set<LO>::const_iterator
itp1 =
it;
1663 err <<
"], invalidPermuteFromRows = [";
1667 typename std::set<LO>::const_iterator
itp1 =
it;
1675 err <<
"copyAndPermute: The graph structure of the source of the "
1676 "Import or Export must be a subset of the graph structure of the "
1682 std::ostringstream
os;
1683 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1684 const bool lclSuccess = ! (* (this->localError_));
1685 os <<
"*** Proc " <<
myRank <<
": copyAndPermute "
1690 os <<
": error messages: " << this->errorMessages ();
1692 std::cerr <<
os.str ();
1716 template<
class LO,
class GO>
1722 using ::Tpetra::Details::PackTraits;
1732 const size_t numEntLen = PackTraits<LO>::packValueCount (numEntLO);
1733 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1734 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
1735 return numEntLen + gidsLen + valsLen;
1749 template<
class ST,
class LO,
class GO>
1751 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO>::input_buffer_type& imports,
1752 const size_t offset,
1753 const size_t numBytes,
1756 using Kokkos::subview;
1757 using ::Tpetra::Details::PackTraits;
1759 if (numBytes == 0) {
1761 return static_cast<size_t> (0);
1765 const size_t theNumBytes = PackTraits<LO>::packValueCount (numEntLO);
1766 TEUCHOS_TEST_FOR_EXCEPTION
1767 (theNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
1768 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
1770 const char*
const inBuf = imports.data () + offset;
1771 const size_t actualNumBytes = PackTraits<LO>::unpackValue (numEntLO, inBuf);
1772 TEUCHOS_TEST_FOR_EXCEPTION
1773 (actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
1774 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
1776 return static_cast<size_t> (numEntLO);
1783 template<
class ST,
class LO,
class GO>
1785 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO>::output_buffer_type exports,
1786 const size_t offset,
1787 const size_t numEnt,
1788 const typename ::Tpetra::Details::PackTraits<GO>::input_array_type& gidsIn,
1789 const typename ::Tpetra::Details::PackTraits<ST>::input_array_type& valsIn,
1790 const size_t numBytesPerValue,
1791 const size_t blockSize)
1793 using Kokkos::subview;
1794 using ::Tpetra::Details::PackTraits;
1800 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1803 const LO numEntLO =
static_cast<size_t> (numEnt);
1805 const size_t numEntBeg = offset;
1806 const size_t numEntLen = PackTraits<LO>::packValueCount (numEntLO);
1807 const size_t gidsBeg = numEntBeg + numEntLen;
1808 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1809 const size_t valsBeg = gidsBeg + gidsLen;
1810 const size_t valsLen = numScalarEnt * numBytesPerValue;
1812 char*
const numEntOut = exports.data () + numEntBeg;
1813 char*
const gidsOut = exports.data () + gidsBeg;
1814 char*
const valsOut = exports.data () + valsBeg;
1816 size_t numBytesOut = 0;
1818 numBytesOut += PackTraits<LO>::packValue (numEntOut, numEntLO);
1821 Kokkos::pair<int, size_t> p;
1822 p = PackTraits<GO>::packArray (gidsOut, gidsIn.data (), numEnt);
1823 errorCode += p.first;
1824 numBytesOut += p.second;
1826 p = PackTraits<ST>::packArray (valsOut, valsIn.data (), numScalarEnt);
1827 errorCode += p.first;
1828 numBytesOut += p.second;
1831 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1832 TEUCHOS_TEST_FOR_EXCEPTION
1833 (numBytesOut != expectedNumBytes, std::logic_error,
1834 "packRowForBlockCrs: numBytesOut = " << numBytesOut
1835 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1837 TEUCHOS_TEST_FOR_EXCEPTION
1838 (errorCode != 0, std::runtime_error,
"packRowForBlockCrs: "
1839 "PackTraits::packArray returned a nonzero error code");
1845 template<
class ST,
class LO,
class GO>
1847 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO>::output_array_type& gidsOut,
1848 const typename ::Tpetra::Details::PackTraits<ST>::output_array_type& valsOut,
1849 const typename ::Tpetra::Details::PackTraits<int>::input_buffer_type& imports,
1850 const size_t offset,
1851 const size_t numBytes,
1852 const size_t numEnt,
1853 const size_t numBytesPerValue,
1854 const size_t blockSize)
1856 using ::Tpetra::Details::PackTraits;
1858 if (numBytes == 0) {
1862 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1863 TEUCHOS_TEST_FOR_EXCEPTION
1864 (
static_cast<size_t> (imports.extent (0)) <= offset,
1865 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
1866 << imports.extent (0) <<
" <= offset = " << offset <<
".");
1867 TEUCHOS_TEST_FOR_EXCEPTION
1868 (
static_cast<size_t> (imports.extent (0)) < offset + numBytes,
1869 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
1870 << imports.extent (0) <<
" < offset + numBytes = "
1871 << (offset + numBytes) <<
".");
1876 const size_t numEntBeg = offset;
1877 const size_t numEntLen = PackTraits<LO>::packValueCount (lid);
1878 const size_t gidsBeg = numEntBeg + numEntLen;
1879 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1880 const size_t valsBeg = gidsBeg + gidsLen;
1881 const size_t valsLen = numScalarEnt * numBytesPerValue;
1883 const char*
const numEntIn = imports.data () + numEntBeg;
1884 const char*
const gidsIn = imports.data () + gidsBeg;
1885 const char*
const valsIn = imports.data () + valsBeg;
1887 size_t numBytesOut = 0;
1890 numBytesOut += PackTraits<LO>::unpackValue (numEntOut, numEntIn);
1891 TEUCHOS_TEST_FOR_EXCEPTION
1892 (
static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
1893 "unpackRowForBlockCrs: Expected number of entries " << numEnt
1894 <<
" != actual number of entries " << numEntOut <<
".");
1897 Kokkos::pair<int, size_t> p;
1898 p = PackTraits<GO>::unpackArray (gidsOut.data (), gidsIn, numEnt);
1899 errorCode += p.first;
1900 numBytesOut += p.second;
1902 p = PackTraits<ST>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
1903 errorCode += p.first;
1904 numBytesOut += p.second;
1907 TEUCHOS_TEST_FOR_EXCEPTION
1908 (numBytesOut != numBytes, std::logic_error,
1909 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1910 <<
" != numBytes = " << numBytes <<
".");
1912 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1913 TEUCHOS_TEST_FOR_EXCEPTION
1914 (numBytesOut != expectedNumBytes, std::logic_error,
1915 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1916 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1918 TEUCHOS_TEST_FOR_EXCEPTION
1919 (errorCode != 0, std::runtime_error,
"unpackRowForBlockCrs: "
1920 "PackTraits::unpackArray returned a nonzero error code");
1926 template<
class Scalar,
class LO,
class GO,
class Node>
1930 (const ::Tpetra::SrcDistObject&
source,
1935 Kokkos::DualView<
size_t*,
1939 using ::Tpetra::Details::Behavior;
1940 using ::Tpetra::Details::dualViewStatusToString;
1941 using ::Tpetra::Details::ProfilingRegion;
1942 using ::Tpetra::Details::PackTraits;
1944 typedef typename Kokkos::View<int*, device_type>::HostMirror::execution_space
host_exec;
1948 ProfilingRegion
profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
1950 const bool debug = Behavior::debug();
1951 const bool verbose = Behavior::verbose();
1956 std::ostringstream
os;
1957 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1958 os <<
"Proc " <<
myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
1963 if (* (this->localError_)) {
1964 std::ostream&
err = this->markLocalErrorAndGetStream ();
1966 <<
"The target object of the Import or Export is already in an error state."
1975 std::ostringstream
os;
1977 <<
prefix <<
" " << dualViewStatusToString (
exportLIDs,
"exportLIDs") << std::endl
1978 <<
prefix <<
" " << dualViewStatusToString (exports,
"exports") << std::endl
1980 std::cerr <<
os.str ();
1987 std::ostream&
err = this->markLocalErrorAndGetStream ();
1989 <<
"exportLIDs.extent(0) = " <<
exportLIDs.extent (0)
1991 <<
"." << std::endl;
1995 std::ostream&
err = this->markLocalErrorAndGetStream ();
1996 err <<
prefix <<
"exportLIDs be sync'd to host." << std::endl;
2002 std::ostream&
err = this->markLocalErrorAndGetStream ();
2004 <<
"The source (input) object of the Import or "
2005 "Export is either not a BlockCrsMatrix, or does not have the right "
2006 "template parameters. checkSizes() should have caught this. "
2007 "Please report this bug to the Tpetra developers." << std::endl;
2023 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2027 auto val_host = val_.getHostView(Access::ReadOnly);
2029 PackTraits<impl_scalar_type>
2065 std::ostringstream
os;
2067 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
2069 std::cerr <<
os.str ();
2075 if(exports.extent(0) != totalNumBytes)
2077 const std::string
oldLabel = exports.view_device().label ();
2079 exports = Kokkos::DualView<packet_type*, buffer_device_type>(
newLabel, totalNumBytes);
2081 if (totalNumEntries > 0) {
2086 Kokkos::parallel_scan
2088 [=](
const size_t &
i,
size_t &update,
const bool &
final) {
2089 if (
final)
offset(
i) = update;
2094 std::ostream&
err = this->markLocalErrorAndGetStream ();
2096 <<
"At end of method, the final offset (in bytes) "
2098 << totalNumBytes <<
". "
2099 <<
"Please report this bug to the Tpetra developers." << std::endl;
2113 exports.modify_host();
2115 typedef Kokkos::TeamPolicy<host_exec> policy_type;
2118 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO)*maxRowLength));
2123 Kokkos::parallel_for
2125 [=](
const typename policy_type::member_type &
member) {
2126 const size_t i =
member.league_rank();
2127 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
2132 values_host_view_type
vals;
2150 (exports.view_host(),
2153 Kokkos::View<const GO*, host_exec>(
gblColInds.data(), maxRowLength),
2162 std::ostringstream
os;
2164 <<
"numBytes computed from packRowForBlockCrs is different from "
2165 <<
"precomputed offset values, LID = " <<
i << std::endl;
2166 std::cerr <<
os.str ();
2175 std::ostringstream
os;
2176 const bool lclSuccess = ! (* (this->localError_));
2180 std::cerr <<
os.str ();
2184 template<
class Scalar,
class LO,
class GO,
class Node>
2192 Kokkos::DualView<
size_t*,
2197 using ::Tpetra::Details::Behavior;
2198 using ::Tpetra::Details::dualViewStatusToString;
2199 using ::Tpetra::Details::ProfilingRegion;
2200 using ::Tpetra::Details::PackTraits;
2203 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
2205 ProfilingRegion
profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
2206 const bool verbose = Behavior::verbose ();
2211 std::ostringstream
os;
2212 auto map = this->graph_.getRowMap ();
2213 auto comm =
map.is_null () ? Teuchos::null :
map->getComm ();
2214 const int myRank = comm.is_null () ? -1 : comm->getRank ();
2215 os <<
"Proc " <<
myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
2219 std::cerr <<
os.str ();
2224 if (* (this->localError_)) {
2225 std::ostream&
err = this->markLocalErrorAndGetStream ();
2226 std::ostringstream
os;
2227 os <<
prefix <<
"{Im/Ex}port target is already in error." <<
endl;
2229 std::cerr <<
os.str ();
2239 std::ostream&
err = this->markLocalErrorAndGetStream ();
2240 std::ostringstream
os;
2245 std::cerr <<
os.str ();
2254 std::ostream&
err = this->markLocalErrorAndGetStream ();
2255 std::ostringstream
os;
2257 <<
"Invalid CombineMode value " <<
combineMode <<
". Valid "
2258 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
2261 std::cerr <<
os.str ();
2267 if (this->graph_.getColMap ().is_null ()) {
2268 std::ostream&
err = this->markLocalErrorAndGetStream ();
2269 std::ostringstream
os;
2270 os <<
prefix <<
"Target matrix's column Map is null." <<
endl;
2272 std::cerr <<
os.str ();
2284 const size_t blockSize = this->getBlockSize ();
2294 auto val_host = val_.getHostView(Access::ReadOnly);
2296 PackTraits<impl_scalar_type>::packValueCount
2299 const size_t maxRowNumEnt = graph_.getLocalMaxNumRowEntries ();
2303 std::ostringstream
os;
2311 std::cerr <<
os.str ();
2316 std::ostringstream
os;
2317 os <<
prefix <<
"Nothing to unpack. Done!" << std::endl;
2318 std::cerr <<
os.str ();
2325 if (imports.need_sync_host ()) {
2326 imports.sync_host ();
2338 std::ostream&
err = this->markLocalErrorAndGetStream ();
2339 std::ostringstream
os;
2340 os <<
prefix <<
"All input DualViews must be sync'd to host by now. "
2341 <<
"imports_nc.need_sync_host()="
2342 << (imports.need_sync_host () ?
"true" :
"false")
2343 <<
", numPacketsPerLID.need_sync_host()="
2345 <<
", importLIDs.need_sync_host()="
2346 << (
importLIDs.need_sync_host () ?
"true" :
"false")
2349 std::cerr <<
os.str ();
2365 Kokkos::parallel_scan
2366 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets",
policy,
2367 [=] (
const size_t &
i,
size_t &update,
const bool &
final) {
2368 if (
final)
offset(
i) = update;
2379 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
2383 using policy_type = Kokkos::TeamPolicy<host_exec>;
2391 using pair_type = Kokkos::pair<size_t, size_t>;
2394 getValuesHostNonConst();
2396 Kokkos::parallel_for
2397 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack",
policy,
2398 [=] (
const typename policy_type::member_type&
member) {
2399 const size_t i =
member.league_rank();
2400 Kokkos::View<GO*, host_scratch_space>
gblColInds
2402 Kokkos::View<LO*, host_scratch_space>
lclColInds
2404 Kokkos::View<impl_scalar_type*, host_scratch_space>
vals
2419 std::ostringstream
os;
2421 <<
"At i = " <<
i <<
", numEnt = " <<
numEnt
2424 std::cerr <<
os.str ();
2443 imports.view_host(),
2447 catch (std::exception&
e) {
2450 std::ostringstream
os;
2451 os <<
prefix <<
"At i = " <<
i <<
", unpackRowForBlockCrs threw: "
2452 <<
e.what () <<
endl;
2453 std::cerr <<
os.str ();
2461 std::ostringstream
os;
2463 <<
"At i = " <<
i <<
", numBytes = " <<
numBytes
2466 std::cerr <<
os.str();
2474 if (
lidsOut(
k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
2476 std::ostringstream
os;
2478 <<
"At i = " <<
i <<
", GID " <<
gidsOut(
k)
2479 <<
" is not owned by the calling process."
2481 std::cerr <<
os.str();
2489 const LO*
const lidsRaw =
const_cast<const LO*
> (
lidsOut.data ());
2503 std::ostringstream
os;
2505 <<
" != numCombd = " <<
numCombd <<
"."
2507 std::cerr <<
os.str ();
2516 std::ostream&
err = this->markLocalErrorAndGetStream ();
2519 err <<
" Please run again with the environment variable setting "
2520 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
2526 std::ostringstream
os;
2527 const bool lclSuccess = ! (* (this->localError_));
2531 std::cerr <<
os.str ();
2535 template<
class Scalar,
class LO,
class GO,
class Node>
2539 using Teuchos::TypeNameTraits;
2540 std::ostringstream
os;
2541 os <<
"\"Tpetra::BlockCrsMatrix\": { "
2542 <<
"Template parameters: { "
2549 <<
", Global dimensions: ["
2550 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
2551 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]"
2552 <<
", Block size: " << getBlockSize ()
2558 template<
class Scalar,
class LO,
class GO,
class Node>
2562 const Teuchos::EVerbosityLevel
verbLevel)
const
2564 using Teuchos::ArrayRCP;
2565 using Teuchos::CommRequest;
2566 using Teuchos::FancyOStream;
2567 using Teuchos::getFancyOStream;
2568 using Teuchos::ireceive;
2569 using Teuchos::isend;
2570 using Teuchos::outArg;
2571 using Teuchos::TypeNameTraits;
2572 using Teuchos::VERB_DEFAULT;
2573 using Teuchos::VERB_NONE;
2574 using Teuchos::VERB_LOW;
2575 using Teuchos::VERB_MEDIUM;
2577 using Teuchos::VERB_EXTREME;
2579 using Teuchos::wait;
2583 const Teuchos::EVerbosityLevel
vl =
2593 out <<
"\"Tpetra::BlockCrsMatrix\":" <<
endl;
2596 out <<
"Template parameters:" <<
endl;
2605 <<
"Global dimensions: ["
2606 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
2607 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" <<
endl;
2614 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
2615 const int myRank = comm.getRank ();
2619 getRowMap()->describe (
out,
vl);
2621 if (! getColMap ().
is_null ()) {
2622 if (getColMap() == getRowMap()) {
2624 out <<
"Column Map: same as row Map" <<
endl;
2631 getColMap ()->describe (
out,
vl);
2634 if (! getDomainMap ().
is_null ()) {
2635 if (getDomainMap () == getRowMap ()) {
2637 out <<
"Domain Map: same as row Map" <<
endl;
2640 else if (getDomainMap () == getColMap ()) {
2642 out <<
"Domain Map: same as column Map" <<
endl;
2649 getDomainMap ()->describe (
out,
vl);
2652 if (! getRangeMap ().
is_null ()) {
2653 if (getRangeMap () == getDomainMap ()) {
2655 out <<
"Range Map: same as domain Map" <<
endl;
2658 else if (getRangeMap () == getRowMap ()) {
2660 out <<
"Range Map: same as row Map" <<
endl;
2667 getRangeMap ()->describe (
out,
vl);
2673 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
2674 const int myRank = comm.getRank ();
2675 const int numProcs = comm.getSize ();
2682 Teuchos::OSTab
tab2 (
os);
2693 values_host_view_type
vals;
2794 template<
class Scalar,
class LO,
class GO,
class Node>
2795 Teuchos::RCP<const Teuchos::Comm<int> >
2799 return graph_.getComm();
2803 template<
class Scalar,
class LO,
class GO,
class Node>
2808 return graph_.getGlobalNumCols();
2812 template<
class Scalar,
class LO,
class GO,
class Node>
2817 return graph_.getLocalNumCols();
2820 template<
class Scalar,
class LO,
class GO,
class Node>
2825 return graph_.getIndexBase();
2828 template<
class Scalar,
class LO,
class GO,
class Node>
2833 return graph_.getGlobalNumEntries();
2836 template<
class Scalar,
class LO,
class GO,
class Node>
2841 return graph_.getLocalNumEntries();
2844 template<
class Scalar,
class LO,
class GO,
class Node>
2849 return graph_.getNumEntriesInGlobalRow(
globalRow);
2853 template<
class Scalar,
class LO,
class GO,
class Node>
2858 return graph_.getGlobalMaxNumRowEntries();
2861 template<
class Scalar,
class LO,
class GO,
class Node>
2866 return graph_.hasColMap();
2870 template<
class Scalar,
class LO,
class GO,
class Node>
2875 return graph_.isLocallyIndexed();
2878 template<
class Scalar,
class LO,
class GO,
class Node>
2883 return graph_.isGloballyIndexed();
2886 template<
class Scalar,
class LO,
class GO,
class Node>
2891 return graph_.isFillComplete ();
2894 template<
class Scalar,
class LO,
class GO,
class Node>
2902 template<
class Scalar,
class LO,
class GO,
class Node>
2906 nonconst_global_inds_host_view_type &,
2907 nonconst_values_host_view_type &,
2911 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
2912 "This class doesn't support global matrix indexing.");
2916 template<
class Scalar,
class LO,
class GO,
class Node>
2920 global_inds_host_view_type &,
2921 values_host_view_type &)
const
2924 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowView: "
2925 "This class doesn't support global matrix indexing.");
2929 template<
class Scalar,
class LO,
class GO,
class Node>
2933 local_inds_host_view_type &
colInds,
2934 values_host_view_type &
vals)
const
2936 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
2937 colInds = local_inds_host_view_type();
2938 vals = values_host_view_type();
2949 template<
class Scalar,
class LO,
class GO,
class Node>
2953 local_inds_host_view_type &
colInds,
2954 nonconst_values_host_view_type &
vals)
const
2956 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
2957 colInds = nonconst_local_inds_host_view_type();
2958 vals = nonconst_values_host_view_type();
2970 template<
class Scalar,
class LO,
class GO,
class Node>
2991 LO
bs = getBlockSize();
2992 for(
size_t r=0;
r<getLocalNumRows();
r++)
2996 for(
int b=0;
b<
bs;
b++)
3007 template<
class Scalar,
class LO,
class GO,
class Node>
3010 leftScale (const ::Tpetra::Vector<Scalar, LO, GO, Node>& )
3013 true, std::logic_error,
"Tpetra::BlockCrsMatrix::leftScale: "
3014 "not implemented.");
3018 template<
class Scalar,
class LO,
class GO,
class Node>
3021 rightScale (const ::Tpetra::Vector<Scalar, LO, GO, Node>& )
3024 true, std::logic_error,
"Tpetra::BlockCrsMatrix::rightScale: "
3025 "not implemented.");
3029 template<
class Scalar,
class LO,
class GO,
class Node>
3030 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3037 template<
class Scalar,
class LO,
class GO,
class Node>
3038 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
3043 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getFrobeniusNorm: "
3044 "not implemented.");
3054#define TPETRA_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \
3055 template class BlockCrsMatrix< S, LO, GO, NODE >;
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh,...
void exportAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Export< LO, GO, Node > &exporter) const
Import from this to the given destination matrix, and make the result fill complete.
virtual bool isLocallyIndexed() const override
Whether matrix indices are locally indexed.
virtual bool isFillComplete() const override
Whether fillComplete() has been called.
virtual size_t getGlobalMaxNumRowEntries() const override
The maximum number of entries in any row over all processes in the matrix's communicator.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
virtual void getLocalRowCopy(LO LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Not implemented.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
virtual void getGlobalRowView(GO GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const override
Print a description of this object to the given output stream.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
LO local_ordinal_type
The type of local indices.
virtual void getGlobalRowCopy(GO GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Get a copy of the given global row's entries.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
virtual size_t getLocalNumCols() const override
The number of columns needed to apply the forward operator on this node.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
virtual bool hasColMap() const override
Whether this matrix has a well-defined column Map.
device_type::execution_space execution_space
The Kokkos execution space that this class uses.
size_t getLocalNumRows() const override
get the local number of block rows
virtual size_t getLocalNumEntries() const override
The local number of stored (structurally nonzero) entries.
impl_scalar_type_dualview::t_host getValuesHostNonConst() const
Get the host or device View of the matrix's values (val_).
Kokkos::View< impl_scalar_type **, Impl::BlockCrsMatrixLittleBlockArrayLayout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
virtual typename::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const override
The Frobenius norm of the matrix.
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const override
Get the (mesh) graph.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
local_matrix_device_type getLocalMatrixDevice() const
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
std::string description() const override
One-line description of this object.
void importAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Import< LO, GO, Node > &importer) const
Import from this to the given destination matrix, and make the result fill complete.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
size_t getNumEntriesInLocalRow(const LO localRowInd) const override
Return the number of entries in the given row on the calling process.
virtual bool supportsRowViews() const override
Whether this object implements getLocalRowView() and getGlobalRowView().
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh,...
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const override
The current number of entries on the calling process in the specified global row.
Node::device_type device_type
The Kokkos::Device specialization that this class uses.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
Kokkos::View< const impl_scalar_type **, Impl::BlockCrsMatrixLittleBlockArrayLayout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > const_little_block_type
The type used to access const matrix blocks.
void getLocalRowViewNonConst(LO LocalRow, local_inds_host_view_type &indices, nonconst_values_host_view_type &values) const
char packet_type
Implementation detail; tells.
typename DistObject< Scalar, LO, GO, Node >::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
virtual void packAndPrepare(const SrcDistObject &sourceObj, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the right with the given Vector x.
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which this matrix is distributed.
global_size_t getGlobalNumRows() const override
get the global number of block rows
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the left with the given Vector x.
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
virtual bool isGloballyIndexed() const override
Whether matrix indices are globally indexed.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
MultiVector for multiple degrees of freedom per mesh point.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Struct that holds views of the contents of a CrsMatrix.
One or more distributed dense vectors.
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
Kokkos::LayoutRight BlockCrsMatrixLittleBlockArrayLayout
give an option to use layoutleft
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
size_t global_size_t
Global size_t object.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ INSERT
Insert new values that don't currently exist.
@ ZERO
Replace old values with zero.