10#ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
11#define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
13#include "Tpetra_BlockMultiVector_decl.hpp"
16#include "Teuchos_OrdinalTraits.hpp"
21template<
class Scalar,
class LO,
class GO,
class Node>
29template<
class Scalar,
class LO,
class GO,
class Node>
37template<
class Scalar,
class LO,
class GO,
class Node>
38Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
39BlockMultiVector<Scalar, LO, GO, Node>::
42 typedef BlockMultiVector<Scalar, LO, GO, Node> BMV;
43 const BMV* src_bmv =
dynamic_cast<const BMV*
> (&src);
44 TEUCHOS_TEST_FOR_EXCEPTION(
45 src_bmv ==
nullptr, std::invalid_argument,
"Tpetra::"
46 "BlockMultiVector: The source object of an Import or Export to a "
47 "BlockMultiVector, must also be a BlockMultiVector.");
48 return Teuchos::rcp (src_bmv,
false);
51template<
class Scalar,
class LO,
class GO,
class Node>
56 meshMap_ (
in.meshMap_),
57 pointMap_ (
in.pointMap_),
59 blockSize_ (
in.blockSize_)
62template<
class Scalar,
class LO,
class GO,
class Node>
74template<
class Scalar,
class LO,
class GO,
class Node>
87template<
class Scalar,
class LO,
class GO,
class Node>
98 if (
X_mv.getCopyOrView () == Teuchos::View) {
103 else if (
X_mv.getCopyOrView () == Teuchos::Copy) {
111 const size_t numCols =
X_mv.getNumVectors ();
113 Teuchos::Array<size_t>
cols (0);
120 "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
121 "should never happen. Please report this bug to the Tpetra developers.");
128 X_view->getCopyOrView () != Teuchos::View, std::logic_error,
"Tpetra::"
129 "BlockMultiVector constructor: We just set a MultiVector "
130 "to have view semantics, but it claims that it doesn't have view "
131 "semantics. This should never happen. "
132 "Please report this bug to the Tpetra developers.");
142template<
class Scalar,
class LO,
class GO,
class Node>
152 blockSize_ (
X.getBlockSize ())
155template<
class Scalar,
class LO,
class GO,
class Node>
162 pointMap_ (makePointMapRCP (
newMeshMap,
X.getBlockSize ())),
163 mv_ (
X.mv_, pointMap_,
offset *
X.getBlockSize ()),
164 blockSize_ (
X.getBlockSize ())
167template<
class Scalar,
class LO,
class GO,
class Node>
174template<
class Scalar,
class LO,
class GO,
class Node>
180 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
183 static_cast<GST> (
meshMap.getGlobalNumElements ());
185 static_cast<size_t> (
meshMap.getLocalNumElements ());
220template<
class Scalar,
class LO,
class GO,
class Node>
221Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::map_type>
226 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
229 static_cast<GST> (
meshMap.getGlobalNumElements ());
231 static_cast<size_t> (
meshMap.getLocalNumElements ());
265template<
class Scalar,
class LO,
class GO,
class Node>
273 typename const_little_vec_type::HostMirror::const_type
X_src (
reinterpret_cast<const impl_scalar_type*
> (
vals),
276 using exec_space =
typename device_type::execution_space;
281template<
class Scalar,
class LO,
class GO,
class Node>
296template<
class Scalar,
class LO,
class GO,
class Node>
304 if (
localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
312template<
class Scalar,
class LO,
class GO,
class Node>
320 typename const_little_vec_type::HostMirror::const_type
X_src (
reinterpret_cast<const impl_scalar_type*
> (
vals),
325template<
class Scalar,
class LO,
class GO,
class Node>
340template<
class Scalar,
class LO,
class GO,
class Node>
348 if (
localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
357template<
class Scalar,
class LO,
class GO,
class Node>
358typename BlockMultiVector<Scalar, LO, GO, Node>::const_little_host_vec_type
362 const Access::ReadOnlyStruct)
const
365 return const_little_host_vec_type();
367 const size_t blockSize = getBlockSize();
368 auto hostView = mv_.getLocalViewHost(Access::ReadOnly);
369 LO startRow = localRowIndex*blockSize;
370 LO endRow = startRow + blockSize;
371 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
376template<
class Scalar,
class LO,
class GO,
class Node>
377typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
381 const Access::OverwriteAllStruct)
384 return little_host_vec_type();
387 auto hostView = mv_.getLocalViewHost(Access::OverwriteAll);
395template<
class Scalar,
class LO,
class GO,
class Node>
396typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
400 const Access::ReadWriteStruct)
403 return little_host_vec_type();
405 const size_t blockSize = getBlockSize();
406 auto hostView = mv_.getLocalViewHost(Access::ReadWrite);
407 LO startRow = localRowIndex*blockSize;
408 LO endRow = startRow + blockSize;
409 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
414template<
class Scalar,
class LO,
class GO,
class Node>
415Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
416BlockMultiVector<Scalar, LO, GO, Node>::
420 using Teuchos::rcpFromRef;
426 typedef BlockMultiVector<Scalar, LO, GO, Node> this_BMV_type;
427 const this_BMV_type* srcBlkVec =
dynamic_cast<const this_BMV_type*
> (&src);
428 if (srcBlkVec ==
nullptr) {
429 const mv_type* srcMultiVec =
dynamic_cast<const mv_type*
> (&src);
430 if (srcMultiVec ==
nullptr) {
434 return rcp (
new mv_type ());
436 return rcp (srcMultiVec,
false);
439 return rcpFromRef (srcBlkVec->mv_);
443template<
class Scalar,
class LO,
class GO,
class Node>
447 return ! getMultiVectorFromSrcDistObject (src).is_null ();
450template<
class Scalar,
class LO,
class GO,
class Node>
454 const size_t numSameIDs,
462 (
true, std::logic_error,
463 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
464 "instead, create a point importer using makePointMap function.");
467template<
class Scalar,
class LO,
class GO,
class Node>
473 Kokkos::DualView<packet_type*,
475 Kokkos::DualView<
size_t*,
480 (
true, std::logic_error,
481 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
482 "instead, create a point importer using makePointMap function.");
485template<
class Scalar,
class LO,
class GO,
class Node>
490 Kokkos::DualView<packet_type*,
492 Kokkos::DualView<
size_t*,
498 (
true, std::logic_error,
499 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
500 "instead, create a point importer using makePointMap function.");
503template<
class Scalar,
class LO,
class GO,
class Node>
507 return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid () &&
511template<
class Scalar,
class LO,
class GO,
class Node>
518template<
class Scalar,
class LO,
class GO,
class Node>
525template<
class Scalar,
class LO,
class GO,
class Node>
536template <
typename Scalar,
typename ViewY,
typename ViewD,
typename ViewX>
537struct BlockWiseMultiply {
538 typedef typename ViewD::size_type Size;
541 typedef typename ViewD::device_type Device;
542 typedef typename ViewD::non_const_value_type ImplScalar;
543 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
545 template <
typename View>
546 using UnmanagedView = Kokkos::View<
typename View::data_type,
typename View::array_layout,
547 typename View::device_type, Unmanaged>;
552 const Size block_size_;
564 KOKKOS_INLINE_FUNCTION
565 void operator() (
const Size k)
const {
566 const auto zero = Kokkos::ArithTraits<Scalar>::zero();
567 auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
568 const auto num_vecs = X_.extent(1);
569 for (Size i = 0; i < num_vecs; ++i) {
570 Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
571 auto X_curBlk = Kokkos::subview(X_, kslice, i);
572 auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
581template <
typename Scalar,
typename ViewY,
typename ViewD,
typename ViewX>
582inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
583createBlockWiseMultiply (
const int block_size,
const Scalar& alpha,
584 const ViewY& Y,
const ViewD& D,
const ViewX& X) {
585 return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
588template <
typename ViewY,
592 typename LO =
typename ViewY::size_type>
593class BlockJacobiUpdate {
595 typedef typename ViewD::device_type Device;
596 typedef typename ViewD::non_const_value_type ImplScalar;
597 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
599 template <
typename ViewType>
600 using UnmanagedView = Kokkos::View<
typename ViewType::data_type,
601 typename ViewType::array_layout,
602 typename ViewType::device_type,
604 typedef UnmanagedView<ViewY> UnMViewY;
605 typedef UnmanagedView<ViewD> UnMViewD;
606 typedef UnmanagedView<ViewZ> UnMViewZ;
616 BlockJacobiUpdate (
const ViewY& Y,
620 const Scalar& beta) :
621 blockSize_ (D.extent (1)),
629 static_assert (
static_cast<int> (ViewY::rank) == 1,
630 "Y must have rank 1.");
631 static_assert (
static_cast<int> (ViewD::rank) == 3,
"D must have rank 3.");
632 static_assert (
static_cast<int> (ViewZ::rank) == 1,
633 "Z must have rank 1.");
639 KOKKOS_INLINE_FUNCTION
void
640 operator() (
const LO& k)
const
643 using Kokkos::subview;
644 typedef Kokkos::pair<LO, LO> range_type;
645 typedef Kokkos::ArithTraits<Scalar> KAT;
649 auto D_curBlk = subview (D_, k, ALL (), ALL ());
650 const range_type kslice (k*blockSize_, (k+1)*blockSize_);
654 auto Z_curBlk = subview (Z_, kslice);
655 auto Y_curBlk = subview (Y_, kslice);
657 if (beta_ == KAT::zero ()) {
660 else if (beta_ != KAT::one ()) {
671 class LO =
typename ViewD::size_type>
673blockJacobiUpdate (
const ViewY& Y,
679 static_assert (Kokkos::is_view<ViewY>::value,
"Y must be a Kokkos::View.");
680 static_assert (Kokkos::is_view<ViewD>::value,
"D must be a Kokkos::View.");
681 static_assert (Kokkos::is_view<ViewZ>::value,
"Z must be a Kokkos::View.");
682 static_assert (
static_cast<int> (ViewY::rank) ==
static_cast<int> (ViewZ::rank),
683 "Y and Z must have the same rank.");
684 static_assert (
static_cast<int> (ViewD::rank) == 3,
"D must have rank 3.");
686 const auto lclNumMeshRows = D.extent (0);
688#ifdef HAVE_TPETRA_DEBUG
692 const auto blkSize = D.extent (1);
693 const auto lclNumPtRows = lclNumMeshRows * blkSize;
694 TEUCHOS_TEST_FOR_EXCEPTION
695 (Y.extent (0) != lclNumPtRows, std::invalid_argument,
696 "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) <<
" != "
697 "D.extent(0)*D.extent(1) = " << lclNumMeshRows <<
" * " << blkSize
698 <<
" = " << lclNumPtRows <<
".");
699 TEUCHOS_TEST_FOR_EXCEPTION
700 (Y.extent (0) != Z.extent (0), std::invalid_argument,
701 "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) <<
" != "
702 "Z.extent(0) = " << Z.extent (0) <<
".");
703 TEUCHOS_TEST_FOR_EXCEPTION
704 (Y.extent (1) != Z.extent (1), std::invalid_argument,
705 "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) <<
" != "
706 "Z.extent(1) = " << Z.extent (1) <<
".");
709 BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
710 typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
712 range_type range (0,
static_cast<LO
> (lclNumMeshRows));
713 Kokkos::parallel_for (range, functor);
718template<
class Scalar,
class LO,
class GO,
class Node>
726 typedef typename device_type::execution_space
exec_space;
729 if (
alpha == STS::zero ()) {
730 this->putScalar (STS::zero ());
733 const LO
blockSize = this->getBlockSize ();
735 auto X_lcl =
X.mv_.getLocalViewDevice (Access::ReadOnly);
736 auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
749template<
class Scalar,
class LO,
class GO,
class Node>
759 using Kokkos::subview;
764 const LO
numVecs = mv_.getNumVectors ();
766 if (
alpha == STS::zero ()) {
770 Z.update (STS::one (),
X, -STS::one ());
772 auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
773 auto Z_lcl =
Z.mv_.getLocalViewDevice (Access::ReadWrite);
788#define TPETRA_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
789 template class BlockMultiVector< S, LO, GO, NODE >;
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
MultiVector for multiple degrees of freedom per mesh point.
virtual bool checkSizes(const Tpetra::SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
const mv_type & getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector's data.
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a local index.
static Teuchos::RCP< const map_type > makePointMapRCP(const map_type &meshMap, const LO blockSize)
Create and return an owning RCP to the point Map corresponding to the given mesh Map and block size.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a global index.
BlockMultiVector()
Default constructor.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using local row and column indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization used for communication buffers.
Tpetra::MultiVector< Scalar, LO, GO, Node > mv_type
The specialization of Tpetra::MultiVector that this class uses.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
typename mv_type::device_type device_type
The Kokkos Device type.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
LO local_ordinal_type
The type of local indices.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using a global index.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Struct that holds views of the contents of a CrsMatrix.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Abstract base class for objects that can be the source of an Import or Export operation.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
CombineMode
Rule for combining data in an Import or Export.