10#ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
11#define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
13#include "TpetraCore_config.h"
14#include "Teuchos_Array.hpp"
15#include "Teuchos_ArrayView.hpp"
59namespace PackCrsMatrixImpl {
67template<
class OutputOffsetsViewType,
69 class InputOffsetsViewType,
70 class InputLocalRowIndicesViewType,
71 class InputLocalRowPidsViewType,
73#ifdef HAVE_TPETRA_DEBUG
81 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
82 typedef typename CountsViewType::non_const_value_type count_type;
83 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
84 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
85 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
87 typedef typename OutputOffsetsViewType::device_type device_type;
88 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
89 typename device_type::execution_space>::value,
90 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
91 static_assert (Kokkos::is_view<OutputOffsetsViewType>::value,
92 "OutputOffsetsViewType must be a Kokkos::View.");
93 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
94 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
95 static_assert (std::is_integral<output_offset_type>::value,
96 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
97 static_assert (Kokkos::is_view<CountsViewType>::value,
98 "CountsViewType must be a Kokkos::View.");
99 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
100 "CountsViewType must be a nonconst Kokkos::View.");
101 static_assert (std::is_integral<count_type>::value,
102 "The type of each entry of CountsViewType must be a built-in integer type.");
103 static_assert (Kokkos::is_view<InputOffsetsViewType>::value,
104 "InputOffsetsViewType must be a Kokkos::View.");
105 static_assert (std::is_integral<input_offset_type>::value,
106 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
107 static_assert (Kokkos::is_view<InputLocalRowIndicesViewType>::value,
108 "InputLocalRowIndicesViewType must be a Kokkos::View.");
109 static_assert (std::is_integral<local_row_index_type>::value,
110 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
133 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
135 if (
numRowsToPack !=
static_cast<size_t> (counts_.extent (0))) {
136 std::ostringstream
os;
138 <<
" != counts.extent(0) = " << counts_.extent (0)
143 static_cast<size_t> (outputOffsets_.extent (0))) {
144 std::ostringstream
os;
146 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
155 output_offset_type& update,
156 const bool final)
const
159 if (
curInd <
static_cast<local_row_index_type
> (0)) {
167 if (
curInd >=
static_cast<local_row_index_type
> (outputOffsets_.extent (0))) {
172 outputOffsets_(
curInd) = update;
175 if (
curInd <
static_cast<local_row_index_type
> (counts_.extent (0))) {
177 if (
static_cast<size_t> (
lclRow + 1) >=
static_cast<size_t> (rowOffsets_.extent (0)) ||
178 static_cast<local_row_index_type
> (
lclRow) <
static_cast<local_row_index_type
> (0)) {
186 const count_type
count =
187 static_cast<count_type
> (rowOffsets_(
lclRow+1) - rowOffsets_(
lclRow));
194 static_cast<count_type
> (0) :
195 sizeOfLclCount_ +
count * (sizeOfGblColInd_ +
196 (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
212 auto error_h = Kokkos::create_mirror_view (error_);
217 Kokkos::deep_copy (
error_h, error_);
224 typename InputOffsetsViewType::const_type rowOffsets_;
225 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
226 typename InputLocalRowPidsViewType::const_type lclRowPids_;
227 count_type sizeOfLclCount_;
228 count_type sizeOfGblColInd_;
229 count_type sizeOfPid_;
230 count_type sizeOfValue_;
231 Kokkos::View<int, device_type> error_;
248typename CountsViewType::non_const_value_type
254 const typename CountsViewType::non_const_value_type
sizeOfLclCount,
256 const typename CountsViewType::non_const_value_type
sizeOfPid,
257 const typename CountsViewType::non_const_value_type
sizeOfValue)
261 typename InputLocalRowIndicesViewType::const_type,
262 typename InputLocalRowPidsViewType::const_type>
functor_type;
263 typedef typename CountsViewType::non_const_value_type count_type;
264 typedef typename OutputOffsetsViewType::size_type size_type;
265 typedef typename OutputOffsetsViewType::execution_space execution_space;
266 typedef typename functor_type::local_row_index_type LO;
267 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
268 const char prefix[] =
"computeNumPacketsAndOffsets: ";
270 count_type
count = 0;
278 (
rowOffsets.extent (0) <=
static_cast<size_type
> (1),
279 std::invalid_argument,
prefix <<
"There is at least one row to pack, "
280 "but the matrix has no rows. lclRowInds.extent(0) = " <<
285 static_cast<size_type
> (
numRowsToPack + 1), std::invalid_argument,
286 prefix <<
"Output dimension does not match number of rows to pack. "
288 <<
" != lclRowInds.extent(0) + 1 = "
303 (
errCode != 0, std::runtime_error,
prefix <<
"parallel_scan error code "
313 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
319 <<
total <<
"." << std::endl;
323 os <<
"outputOffsets: [";
330 os <<
"]" << std::endl;
338 os <<
"]" << std::endl;
352 using Tpetra::Details::getEntryOnHost;
353 return static_cast<count_type
> (getEntryOnHost (
outputOffsets,
373template<
class ST,
class ColumnMap,
class BufferDeviceType>
375Kokkos::pair<int, size_t>
377 const Kokkos::View<char*, BufferDeviceType>& exports,
383 const size_t num_bytes_per_value,
384 const bool pack_pids)
386 using Kokkos::subview;
387 using LO =
typename ColumnMap::local_ordinal_type;
388 using GO =
typename ColumnMap::global_ordinal_type;
406 static_cast<size_t> (0);
438 error_code +=
p.first;
442 if (error_code != 0) {
454template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
455struct PackCrsMatrixFunctor {
458 typedef typename local_matrix_device_type::value_type ST;
461 typedef typename local_matrix_device_type::device_type DT;
463 typedef Kokkos::View<const size_t*, BufferDeviceType>
464 num_packets_per_lid_view_type;
465 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
466 typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
470 typedef typename num_packets_per_lid_view_type::non_const_value_type
472 typedef typename offsets_view_type::non_const_value_type
474 typedef Kokkos::pair<int, LO> value_type;
476 static_assert (std::is_same<LO, typename local_matrix_device_type::ordinal_type>::value,
477 "local_map_type::local_ordinal_type and "
478 "local_matrix_device_type::ordinal_type must be the same.");
480 local_matrix_device_type local_matrix;
481 local_map_type local_col_map;
482 exports_view_type exports;
483 num_packets_per_lid_view_type num_packets_per_lid;
484 export_lids_view_type export_lids;
485 source_pids_view_type source_pids;
486 offsets_view_type offsets;
487 size_t num_bytes_per_value;
511 static_cast<LO
> (local_matrix.graph.row_map.extent (0));
514 std::logic_error,
"local_matrix.graph.row_map.extent(0) = "
520 using ::Tpetra::Details::OrdinalTraits;
524 KOKKOS_INLINE_FUNCTION
void
525 join (value_type& dst,
const value_type& src)
const
529 if (src.first != 0 && dst.first == 0) {
534 KOKKOS_INLINE_FUNCTION
535 void operator() (
const LO i, value_type& dst)
const
537 const size_t offset = offsets[i];
538 const LO export_lid = export_lids[i];
539 const size_t buf_size = exports.size();
540 const size_t num_bytes = num_packets_per_lid(i);
541 const size_t num_ent =
542 static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
543 - local_matrix.graph.row_map[export_lid]);
553 if (export_lid >= local_matrix.numRows ()) {
554 if (dst.first != 0) {
555 dst = Kokkos::make_pair (1, i);
559 else if ((offset > buf_size || offset + num_bytes > buf_size)) {
560 if (dst.first != 0) {
561 dst = Kokkos::make_pair (2, i);
571 const auto row_beg = local_matrix.graph.row_map[export_lid];
572 const auto row_end = local_matrix.graph.row_map[export_lid + 1];
573 auto vals_in = subview (local_matrix.values,
574 Kokkos::make_pair (row_beg, row_end));
575 auto lids_in = subview (local_matrix.graph.entries,
576 Kokkos::make_pair (row_beg, row_end));
577 typedef local_map_type LMT;
578 typedef BufferDeviceType BDT;
579 auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
580 source_pids, vals_in, offset,
581 num_ent, num_bytes_per_value,
583 int error_code_this_row = p.first;
584 size_t num_bytes_packed_this_row = p.second;
585 if (error_code_this_row != 0) {
586 if (dst.first != 0) {
587 dst = Kokkos::make_pair (error_code_this_row, i);
590 else if (num_bytes_packed_this_row != num_bytes) {
591 if (dst.first != 0) {
592 dst = Kokkos::make_pair (3, i);
605template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
609 const Kokkos::View<char*, BufferDeviceType>& exports,
613 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
614 const size_t num_bytes_per_value,
615 const bool pack_pids)
618 using DT =
typename LocalMatrix::device_type;
619 using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
620 const char prefix[] =
"Tpetra::Details::do_pack: ";
622 if (export_lids.extent (0) != 0) {
624 (
static_cast<size_t> (offsets.extent (0)) !=
625 static_cast<size_t> (export_lids.extent (0) + 1),
626 std::invalid_argument,
prefix <<
"offsets.extent(0) = "
627 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
628 << export_lids.extent (0) <<
") + 1.");
630 (export_lids.extent (0) != num_packets_per_lid.extent (0),
631 std::invalid_argument,
prefix <<
"export_lids.extent(0) = " <<
632 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
633 << num_packets_per_lid.extent (0) <<
".");
638 (pack_pids && exports.extent (0) != 0 &&
639 source_pids.extent (0) == 0, std::invalid_argument,
prefix <<
640 "pack_pids is true, and exports.extent(0) = " <<
641 exports.extent (0) <<
" != 0, meaning that we need to pack at "
642 "least one matrix entry, but source_pids.extent(0) = 0.");
648 num_packets_per_lid, export_lids,
649 source_pids, offsets, num_bytes_per_value,
652 typename pack_functor_type::value_type
result;
653 range_type
range (0, num_packets_per_lid.extent (0));
660 (
true, std::runtime_error,
prefix <<
"PackCrsMatrixFunctor "
661 "reported error code " <<
result.first <<
" for the first "
662 "bad row " <<
result.second <<
".");
695template<
typename ST,
typename LO,
typename GO,
typename NT,
typename BufferDeviceType>
698 Kokkos::DualView<char*, BufferDeviceType>& exports,
699 const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
700 const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
701 const Kokkos::View<const int*, typename NT::device_type>&
export_pids,
703 const bool pack_pids)
706 "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
711 typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
712 const char prefix[] =
"Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
713 constexpr bool debug =
false;
715 auto local_matrix =
sourceMatrix.getLocalMatrixDevice ();
716 auto local_col_map =
sourceMatrix.getColMap ()->getLocalMap ();
724 static_cast<size_t> (export_lids.extent (0));
727 static_cast<size_t> (num_packets_per_lid.extent (0)),
728 std::invalid_argument,
prefix <<
"num_export_lids.extent(0) = "
730 << num_packets_per_lid.extent (0) <<
".");
733 (num_packets_per_lid.data () ==
NULL, std::invalid_argument,
735 "num_packets_per_lid.data() = "
736 << num_packets_per_lid.data () <<
" == NULL.");
743 size_t num_bytes_per_value = 0;
759 if (local_matrix.values.extent(0) > 0) {
760 const ST&
val = local_matrix.values(0);
763 using Teuchos::reduceAll;
767 Teuchos::outArg (num_bytes_per_value));
771 exports = exports_view_type (
"exports", 0);
781 computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
782 local_matrix.graph.row_map, export_lids,
788 if (
count >
static_cast<size_t> (exports.extent (0))) {
789 exports = exports_view_type (
"exports",
count);
791 std::ostringstream
os;
792 os <<
"*** exports resized to " <<
count << std::endl;
793 std::cerr <<
os.str ();
797 std::ostringstream
os;
798 os <<
"*** count: " <<
count <<
", exports.extent(0): "
799 << exports.extent (0) << std::endl;
800 std::cerr <<
os.str ();
807 (pack_pids && exports.extent (0) != 0 &&
809 "pack_pids is true, and exports.extent(0) = " <<
810 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
811 "one matrix entry, but export_pids.extent(0) = 0.");
813 typedef typename std::decay<
decltype (local_matrix)>::type
814 local_matrix_device_type;
815 typedef typename std::decay<
decltype (local_col_map)>::type
818 exports.modify_device ();
821 (local_matrix, local_col_map,
exports_d, num_packets_per_lid,
822 export_lids,
export_pids, offsets, num_bytes_per_value,
829template<
typename ST,
typename LO,
typename GO,
typename NT>
832 Teuchos::Array<char>& exports,
834 const Teuchos::ArrayView<const LO>&
exportLIDs,
840 using host_exec_space =
typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
842 using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
852 "num_packets_per_lid");
866 Kokkos::DualView<char*, buffer_device_type>
exports_dv;
867 constexpr bool pack_pids =
false;
868 PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
885 if (
static_cast<size_t> (exports.size ()) !=
886 static_cast<size_t> (
exports_dv.extent (0))) {
889 Kokkos::View<char*, host_dev_type>
exports_h (exports.getRawPtr (),
895template<
typename ST,
typename LO,
typename GO,
typename NT>
908 Kokkos::View<int*, device_type>
exportPIDs_d (
"exportPIDs", 0);
909 constexpr bool pack_pids =
false;
922 "Tpetra::Details::packCrsMatrixNew",
925 PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
930template<
typename ST,
typename LO,
typename GO,
typename NT>
935 const Teuchos::ArrayView<const LO>&
exportLIDs,
936 const Teuchos::ArrayView<const int>&
sourcePIDs,
941 typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
942 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace>
host_dev_type;
944 typename local_matrix_device_type::device_type
outputDevice;
945 typedef typename NT::execution_space execution_space;
949 std::unique_ptr<std::string>
prefix;
951 const int myRank = [&] () {
953 if (
map.get () ==
nullptr) {
956 auto comm =
map->getComm ();
957 if (comm.get () ==
nullptr) {
960 return comm->getRank ();
962 std::ostringstream
os;
963 os <<
"Proc " <<
myRank <<
": packCrsMatrixWithOwningPIDs: ";
964 prefix = std::unique_ptr<std::string> (
new std::string (
os.str ()));
966 std::ostringstream
os2;
968 std::cerr <<
os2.str ();
979 "num_packets_per_lid");
995 constexpr bool pack_pids =
true;
997 PackCrsMatrixImpl::packCrsMatrix
1001 catch (std::exception&
e) {
1003 std::ostringstream
os;
1004 os << *
prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw: "
1005 <<
e.what () << std::endl;
1006 std::cerr <<
os.str ();
1012 std::ostringstream
os;
1013 os << *
prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw an exception "
1014 "not a subclass of std::exception" << std::endl;
1015 std::cerr <<
os.str ();
1029 catch (std::exception&
e) {
1031 std::ostringstream
os;
1032 os << *
prefix <<
"Kokkos::deep_copy threw: " <<
e.what () << std::endl;
1033 std::cerr <<
os.str ();
1039 std::ostringstream
os;
1040 os << *
prefix <<
"Kokkos::deep_copy threw an exception not a subclass "
1041 "of std::exception" << std::endl;
1042 std::cerr <<
os.str ();
1049 std::ostringstream
os;
1050 os << *
prefix <<
"done" << std::endl;
1051 std::cerr <<
os.str ();
1058#define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1060 Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1061 Teuchos::Array<char>&, \
1062 const Teuchos::ArrayView<size_t>&, \
1063 const Teuchos::ArrayView<const LO>&, \
1066 Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1067 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1068 const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1069 const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1072 Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1073 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1074 const Teuchos::ArrayView<size_t>&, \
1075 const Teuchos::ArrayView<const LO>&, \
1076 const Teuchos::ArrayView<const int>&, \
Declaration of the Tpetra::CrsMatrix class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
KOKKOS_FUNCTION Kokkos::pair< int, size_t > packCrsMatrixRow(const ColumnMap &col_map, const Kokkos::View< char *, BufferDeviceType > &exports, const typename PackTraits< typename ColumnMap::local_ordinal_type >::input_array_type &lids_in, const typename PackTraits< int >::input_array_type &pids_in, const typename PackTraits< ST >::input_array_type &vals_in, const size_t offset, const size_t num_ent, const size_t num_bytes_per_value, const bool pack_pids)
Packs a single row of the CrsMatrix.
Struct that holds views of the contents of a CrsMatrix.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool verbose()
Whether Tpetra is in verbose mode.
"Local" part of Map suitable for Kokkos kernels.
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
Compute the number of packets and offsets for the pack procedure.
int getError() const
Host function for getting the error.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Implementation details of Tpetra.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.