10#ifndef TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
11#define TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
13#include "TpetraCore_config.h"
14#include "Teuchos_Array.hpp"
15#include "Teuchos_ArrayView.hpp"
55namespace PackCrsGraphImpl {
63template<
class OutputOffsetsViewType,
65 class InputOffsetsViewType,
66 class InputLocalRowIndicesViewType,
67 class InputLocalRowPidsViewType,
69#ifdef HAVE_TPETRA_DEBUG
77 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
78 typedef typename CountsViewType::non_const_value_type count_type;
79 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
80 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
81 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
83 typedef typename OutputOffsetsViewType::device_type device_type;
84 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
85 typename device_type::execution_space>::value,
86 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
87 static_assert (Kokkos::is_view<OutputOffsetsViewType>::value,
88 "OutputOffsetsViewType must be a Kokkos::View.");
89 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
90 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
91 static_assert (std::is_integral<output_offset_type>::value,
92 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
93 static_assert (Kokkos::is_view<CountsViewType>::value,
94 "CountsViewType must be a Kokkos::View.");
95 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
96 "CountsViewType must be a nonconst Kokkos::View.");
97 static_assert (std::is_integral<count_type>::value,
98 "The type of each entry of CountsViewType must be a built-in integer type.");
99 static_assert (Kokkos::is_view<InputOffsetsViewType>::value,
100 "InputOffsetsViewType must be a Kokkos::View.");
101 static_assert (std::is_integral<input_offset_type>::value,
102 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
103 static_assert (Kokkos::is_view<InputLocalRowIndicesViewType>::value,
104 "InputLocalRowIndicesViewType must be a Kokkos::View.");
105 static_assert (std::is_integral<local_row_index_type>::value,
106 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
121 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
123 if (
numRowsToPack !=
static_cast<size_t> (counts_.extent (0))) {
124 std::ostringstream
os;
126 <<
" != counts.extent(0) = " << counts_.extent (0)
131 static_cast<size_t> (outputOffsets_.extent (0))) {
132 std::ostringstream
os;
134 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
143 output_offset_type& update,
144 const bool final)
const
147 if (
curInd <
static_cast<local_row_index_type
> (0)) {
155 if (
curInd >=
static_cast<local_row_index_type
> (outputOffsets_.extent (0))) {
160 outputOffsets_(
curInd) = update;
163 if (
curInd <
static_cast<local_row_index_type
> (counts_.extent (0))) {
165 if (
static_cast<size_t> (
lclRow + 1) >=
static_cast<size_t> (rowOffsets_.extent (0)) ||
166 static_cast<local_row_index_type
> (
lclRow) <
static_cast<local_row_index_type
> (0)) {
174 const count_type
count =
175 static_cast<count_type
> (rowOffsets_(
lclRow+1) - rowOffsets_(
lclRow));
180 ?
static_cast<count_type
>(0)
181 :
count * (1 + (lclRowPids_.size() > 0 ? 1 : 0));
196 auto error_h = Kokkos::create_mirror_view (error_);
201 Kokkos::deep_copy (
error_h, error_);
209 typename InputOffsetsViewType::const_type rowOffsets_;
210 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
211 typename InputLocalRowPidsViewType::const_type lclRowPids_;
212 Kokkos::View<int, device_type> error_;
229typename CountsViewType::non_const_value_type
238 typename InputLocalRowIndicesViewType::const_type,
239 typename InputLocalRowPidsViewType::const_type>
functor_type;
240 typedef typename CountsViewType::non_const_value_type count_type;
241 typedef typename OutputOffsetsViewType::size_type size_type;
242 typedef typename OutputOffsetsViewType::execution_space execution_space;
243 typedef typename functor_type::local_row_index_type LO;
244 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
245 const char prefix[] =
"computeNumPacketsAndOffsets: ";
247 count_type
count = 0;
255 (
rowOffsets.extent (0) <=
static_cast<size_type
> (1),
256 std::invalid_argument,
prefix <<
"There is at least one row to pack, "
257 "but the graph has no rows. lclRowInds.extent(0) = " <<
262 static_cast<size_type
> (
numRowsToPack + 1), std::invalid_argument,
263 prefix <<
"Output dimension does not match number of rows to pack. "
265 <<
" != lclRowInds.extent(0) + 1 = "
273 Kokkos::parallel_scan (
"Tpetra::Details::computeNumPacketsAndOffsets::scan", range_type (0,
numRowsToPack + 1),
f);
278 (
errCode != 0, std::runtime_error,
prefix <<
"parallel_scan error code "
288 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
294 <<
total <<
"." << std::endl;
298 os <<
"outputOffsets: [";
305 os <<
"]" << std::endl;
313 os <<
"]" << std::endl;
327 using Tpetra::Details::getEntryOnHost;
328 return static_cast<count_type
> (getEntryOnHost (
outputOffsets,
351 const Kokkos::View<Packet*, BufferDeviceType>& exports,
356 const bool pack_pids)
358 using LO =
typename LocalMapType::local_ordinal_type;
359 using GO =
typename LocalMapType::global_ordinal_type;
363 return static_cast<size_t>(0);
394struct PackCrsGraphFunctor {
400 using num_packets_per_lid_view_type =
401 Kokkos::View<const size_t*, BufferDeviceType>;
402 using offsets_view_type = Kokkos::View<const size_t*, BufferDeviceType>;
403 using exports_view_type = Kokkos::View<Packet*, BufferDeviceType>;
404 using export_lids_view_type =
406 using source_pids_view_type =
410 typename num_packets_per_lid_view_type::non_const_value_type;
411 using offset_type =
typename offsets_view_type::non_const_value_type;
412 using value_type = Kokkos::pair<int, LO>;
414 static_assert (std::is_same<LO, typename local_graph_type::data_type>::value,
415 "local_map_type::local_ordinal_type and "
416 "local_graph_type::data_type must be the same.");
418 local_graph_type local_graph;
419 local_map_type local_col_map;
420 exports_view_type exports;
421 num_packets_per_lid_view_type num_packets_per_lid;
422 export_lids_view_type export_lids;
423 source_pids_view_type source_pids;
424 offsets_view_type offsets;
446 static_cast<LO
> (local_graph.row_map.extent (0));
449 std::logic_error,
"local_graph.row_map.extent(0) = "
455 using ::Tpetra::Details::OrdinalTraits;
459 KOKKOS_INLINE_FUNCTION
void
460 join (value_type& dst,
const value_type& src)
const
464 if (src.first != 0 && dst.first == 0) {
469 KOKKOS_INLINE_FUNCTION
470 void operator() (
const LO i, value_type& dst)
const
472 const size_t offset = offsets[i];
473 const LO export_lid = export_lids[i];
474 const size_t buf_size = exports.size();
475 const size_t num_packets_this_lid = num_packets_per_lid(i);
476 const size_t num_ent =
477 static_cast<size_t> (local_graph.row_map[export_lid+1]
478 - local_graph.row_map[export_lid]);
488 if (export_lid >=
static_cast<LO
>(local_graph.numRows())) {
489 if (dst.first != 0) {
490 dst = Kokkos::make_pair (1, i);
494 else if ((offset > buf_size || offset + num_packets_this_lid > buf_size)) {
495 if (dst.first != 0) {
496 dst = Kokkos::make_pair (2, i);
506 const auto row_beg = local_graph.row_map[export_lid];
507 const auto row_end = local_graph.row_map[export_lid + 1];
508 auto lids_in = Kokkos::subview (local_graph.entries,
509 Kokkos::make_pair (row_beg, row_end));
510 size_t num_ent_packed_this_row =
511 packRow (local_col_map, exports, lids_in,
512 source_pids, offset, num_ent, pack_pids);
513 if (num_ent_packed_this_row != num_packets_this_lid) {
514 if (dst.first != 0) {
515 dst = Kokkos::make_pair (3, i);
528template<
class Packet,
531 class BufferDeviceType>
535 const Kokkos::View<Packet*, BufferDeviceType>& exports,
538 >::input_array_type& num_packets_per_lid,
541 >::input_array_type& export_lids,
544 >::input_array_type& source_pids,
545 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
546 const bool pack_pids)
549 using execution_space =
typename LocalGraph::device_type::execution_space;
550 using range_type = Kokkos::RangePolicy<execution_space, LO>;
551 const char prefix[] =
"Tpetra::Details::PackCrsGraphImpl::do_pack: ";
553 if (export_lids.extent (0) != 0) {
555 (
static_cast<size_t> (offsets.extent (0)) !=
556 static_cast<size_t> (export_lids.extent (0) + 1),
557 std::invalid_argument,
prefix <<
"offsets.extent(0) = "
558 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
559 << export_lids.extent (0) <<
") + 1.");
561 (export_lids.extent (0) != num_packets_per_lid.extent (0),
562 std::invalid_argument,
prefix <<
"export_lids.extent(0) = " <<
563 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
564 << num_packets_per_lid.extent (0) <<
".");
569 (pack_pids && exports.extent (0) != 0 &&
570 source_pids.extent (0) == 0, std::invalid_argument,
prefix <<
571 "pack_pids is true, and exports.extent(0) = " <<
572 exports.extent (0) <<
" != 0, meaning that we need to pack at "
573 "least one graph entry, but source_pids.extent(0) = 0.");
580 num_packets_per_lid, export_lids,
581 source_pids, offsets, pack_pids);
583 typename pack_functor_type::value_type
result;
584 range_type
range (0, num_packets_per_lid.extent (0));
585 Kokkos::parallel_reduce (
"Tpetra::Details::computeNumPacketsAndOffsets::reduce",
range,
f,
result);
590 std::ostringstream
os;
592 os <<
"invalid local row index";
594 else if (
result.first == 2) {
595 os <<
"invalid offset";
598 (
true, std::runtime_error,
prefix <<
"PackCrsGraphFunctor "
599 "reported error code " <<
result.first <<
" (" <<
os.str ()
600 <<
") for the first bad row " <<
result.second <<
".");
630template<
typename LO,
typename GO,
typename NT>
641 >& num_packets_per_lid,
651 const bool pack_pids)
655 using packet_type =
typename crs_graph_type::packet_type;
656 using buffer_device_type =
typename crs_graph_type::buffer_device_type;
657 using exports_view_type = Kokkos::DualView<packet_type*, buffer_device_type>;
658 using local_graph_device_type =
typename crs_graph_type::local_graph_device_type;
660 const char prefix[] =
"Tpetra::Details::packCrsGraph: ";
661 constexpr bool debug =
false;
663 local_graph_device_type local_graph =
sourceGraph.getLocalGraphDevice ();
664 local_map_type local_col_map =
sourceGraph.getColMap ()->getLocalMap ();
674 std::invalid_argument,
prefix <<
"num_export_lids.extent(0) = "
676 << num_packets_per_lid.extent (0) <<
".");
679 (num_packets_per_lid.data () ==
nullptr, std::invalid_argument,
681 "num_packets_per_lid.data() = "
682 << num_packets_per_lid.data () <<
" == NULL.");
686 exports = exports_view_type (
"exports", 0);
700 if (
count >
size_t (exports.extent (0))) {
701 exports = exports_view_type (
"exports",
count);
703 std::ostringstream
os;
704 os <<
"*** exports resized to " <<
count << std::endl;
705 std::cerr <<
os.str ();
709 std::ostringstream
os;
710 os <<
"*** count: " <<
count <<
", exports.extent(0): "
711 << exports.extent (0) << std::endl;
712 std::cerr <<
os.str ();
719 (pack_pids && exports.extent (0) != 0 &&
721 "pack_pids is true, and exports.extent(0) = " <<
722 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
723 "one graph entry, but export_pids.extent(0) = 0.");
725 exports.modify_device ();
728 (local_graph, local_col_map,
exports_d, num_packets_per_lid,
735template<
typename LO,
typename GO,
typename NT>
740 const Teuchos::ArrayView<const LO>&
exportLIDs,
743 using Kokkos::HostSpace;
744 using Kokkos::MemoryUnmanaged;
747 using packet_type =
typename crs_graph_type::packet_type;
748 using BDT =
typename crs_graph_type::buffer_device_type;
759 "num_packets_per_lid");
768 Kokkos::DualView<packet_type*, BDT>
exports_dv;
769 constexpr bool pack_pids =
false;
775 "num_packets_per_lid_d's non_const_value_type should be size_t.");
780 "num_packets_per_lid_d's BDT should be size_t.");
785 "export_lids_d's device_type should be BDT.");
790 "export_pids_d's non_const_value_type should be int.");
795 "export_pids_d's device_type should be BDT.");
797 PackCrsGraphImpl::packCrsGraph
808 using execution_space =
typename BDT::execution_space;
816 if (
static_cast<size_t> (exports.size ()) !=
817 static_cast<size_t> (
exports_dv.extent (0))) {
821 exports_h (exports.getRawPtr (), exports.size ());
824 execution_space().fence();
829template<
typename LO,
typename GO,
typename NT>
832 const Kokkos::DualView<
836 const Kokkos::DualView<
846 > num_packets_per_lid,
848 const bool pack_pids)
852 using BDT =
typename crs_graph_type::buffer_device_type;
853 using PT =
typename crs_graph_type::packet_type;
855 using LGT =
typename crs_graph_type::local_graph_device_type;
856 using LMT =
typename crs_graph_type::map_type::local_map_type;
857 const char prefix[] =
"Tpetra::Details::packCrsGraphNew: ";
860 const LMT local_col_map =
sourceGraph.getColMap ()->getLocalMap ();
868 static_cast<size_t> (export_lids.extent (0));
871 static_cast<size_t> (num_packets_per_lid.extent (0)),
872 std::invalid_argument,
prefix <<
"num_export_lids.extent(0) = "
874 << num_packets_per_lid.extent (0) <<
".");
877 num_packets_per_lid.view_device ().data () ==
nullptr,
879 <<
" != 0, but num_packets_per_lid.view_device().data() = nullptr.");
887 using offsets_type = Kokkos::View<size_t*, BDT>;
892 num_packets_per_lid.clear_sync_state ();
893 num_packets_per_lid.modify_device ();
894 using PackCrsGraphImpl::computeNumPacketsAndOffsets;
896 computeNumPacketsAndOffsets (offsets, num_packets_per_lid.view_device (),
898 export_lids.view_device (),
902 if (
count >
static_cast<size_t> (exports.extent (0))) {
910 (pack_pids && exports.extent (0) != 0 &&
912 "pack_pids is true, and exports.extent(0) = " <<
913 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
914 "one graph entry, but export_pids.extent(0) = 0.");
916 exports.modify_device ();
917 using PackCrsGraphImpl::do_pack;
919 exports.view_device (),
920 num_packets_per_lid.view_device (),
921 export_lids.view_device (),
926template<
typename LO,
typename GO,
typename NT>
935 const Teuchos::ArrayView<const LO>&
exportLIDs,
936 const Teuchos::ArrayView<const int>&
sourcePIDs,
939 using Kokkos::HostSpace;
940 using Kokkos::MemoryUnmanaged;
943 using buffer_device_type =
typename crs_graph_type::buffer_device_type;
953 "num_packets_per_lid");
969 constexpr bool pack_pids =
true;
970 PackCrsGraphImpl::packCrsGraph
979 using execution_space =
typename buffer_device_type::execution_space;
980 Kokkos::deep_copy (execution_space(),
982 execution_space().fence();
988#define TPETRA_DETAILS_PACKCRSGRAPH_INSTANT( LO, GO, NT ) \
990 Details::packCrsGraph<LO, GO, NT> ( \
991 const CrsGraph<LO, GO, NT>&, \
992 Teuchos::Array<CrsGraph<LO,GO,NT>::packet_type>&, \
993 const Teuchos::ArrayView<size_t>&, \
994 const Teuchos::ArrayView<const LO>&, \
997 Details::packCrsGraphNew<LO, GO, NT> ( \
998 const CrsGraph<LO, GO, NT>&, \
999 const Kokkos::DualView< \
1001 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1002 const Kokkos::DualView< \
1004 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1006 CrsGraph<LO,GO,NT>::packet_type*, \
1007 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1010 CrsGraph<LO,GO,NT>::buffer_device_type>, \
1014 Details::packCrsGraphWithOwningPIDs<LO, GO, NT> ( \
1015 const CrsGraph<LO, GO, NT>&, \
1016 Kokkos::DualView<CrsGraph<LO,GO,NT>::packet_type*, CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1017 const Teuchos::ArrayView<size_t>&, \
1018 const Teuchos::ArrayView<const LO>&, \
1019 const Teuchos::ArrayView<const int>&, \
Declaration of the Tpetra::CrsGraph class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
CountsViewType::non_const_value_type computeNumPacketsAndOffsets(const OutputOffsetsViewType &outputOffsets, const CountsViewType &counts, const InputOffsetsViewType &rowOffsets, const InputLocalRowIndicesViewType &lclRowInds, const InputLocalRowPidsViewType &lclRowPids)
Compute the number of packets and offsets for the pack procedure.
void do_pack(const LocalGraph &local_graph, const LocalMap &local_map, const Kokkos::View< Packet *, BufferDeviceType > &exports, const typename PackTraits< size_t >::input_array_type &num_packets_per_lid, const typename PackTraits< typename LocalMap::local_ordinal_type >::input_array_type &export_lids, const typename PackTraits< int >::input_array_type &source_pids, const Kokkos::View< const size_t *, BufferDeviceType > &offsets, const bool pack_pids)
Perform the pack operation for the graph.
KOKKOS_FUNCTION size_t packRow(const LocalMapType &col_map, const Kokkos::View< Packet *, BufferDeviceType > &exports, const InputLidsType &lids_in, const InputPidsType &pids_in, const size_t offset, const size_t num_ent, const bool pack_pids)
Packs a single row of the CrsGraph.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
Struct that holds views of the contents of a CrsMatrix.
"Local" part of Map suitable for Kokkos kernels.
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
Compute the number of packets and offsets for the pack procedure.
int getError() const
Host function for getting the error.
Implementation details of Tpetra.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Traits class for packing / unpacking data of type T.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.