Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_BlockCrsMatrix_Helpers_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
11#define TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
12
14
15#include "Tpetra_BlockCrsMatrix.hpp"
16#include "Tpetra_CrsMatrix.hpp"
17#include "Tpetra_HashTable.hpp"
18#include "Tpetra_Import.hpp"
19#include "Tpetra_Map.hpp"
20#include "Tpetra_MultiVector.hpp"
21#include "Teuchos_ParameterList.hpp"
22#include "Teuchos_ScalarTraits.hpp"
23#include <ctime>
24#include <fstream>
25
26namespace Tpetra {
27
28 template<class Scalar, class LO, class GO, class Node>
30 Teuchos::ParameterList pl;
31 std::ofstream out;
32 out.open(fileName.c_str());
34 }
35
36 template<class Scalar, class LO, class GO, class Node>
37 void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::string const &fileName, Teuchos::ParameterList const &params) {
38 std::ofstream out;
39 out.open(fileName.c_str());
41 }
42
43 template<class Scalar, class LO, class GO, class Node>
45 Teuchos::ParameterList pl;
47 }
48
49 template<class Scalar, class LO, class GO, class Node>
50 void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::ostream &os, Teuchos::ParameterList const &params) {
51
52 using Teuchos::RCP;
53 using Teuchos::rcp;
54
55 typedef Teuchos::OrdinalTraits<GO> TOT;
56 typedef BlockCrsMatrix<Scalar, LO, GO, Node> block_crs_matrix_type;
57 typedef Tpetra::Import<LO, GO, Node> import_type;
58 typedef Tpetra::Map<LO, GO, Node> map_type;
60 typedef Tpetra::CrsGraph<LO, GO, Node> crs_graph_type;
61
62 RCP<const map_type> const rowMap = A.getRowMap(); //"mesh" map
63 RCP<const Teuchos::Comm<int> > comm = rowMap->getComm();
64 const int myRank = comm->getRank();
65 const size_t numProcs = comm->getSize();
66
67 // If true, force use of the import strip-mining infrastructure. This is useful for debugging on one process.
68 bool alwaysUseParallelAlgorithm = false;
69 if (params.isParameter("always use parallel algorithm"))
70 alwaysUseParallelAlgorithm = params.get<bool>("always use parallel algorithm");
71 bool printMatrixMarketHeader = true;
72 if (params.isParameter("print MatrixMarket header"))
73 printMatrixMarketHeader = params.get<bool>("print MatrixMarket header");
74
76 std::time_t now = std::time(NULL);
77
78 const std::string dataTypeStr =
79 Teuchos::ScalarTraits<Scalar>::isComplex ? "complex" : "real";
80
81 // Explanation of first line of file:
82 // - "%%MatrixMarket" is the tag for Matrix Market format.
83 // - "matrix" is what we're printing.
84 // - "coordinate" means sparse (triplet format), rather than dense.
85 // - "real" / "complex" is the type (in an output format sense,
86 // not in a C++ sense) of each value of the matrix. (The
87 // other two possibilities are "integer" (not really necessary
88 // here) and "pattern" (no values, just graph).)
89 os << "%%MatrixMarket matrix coordinate " << dataTypeStr << " general" << std::endl;
90 os << "% time stamp: " << ctime(&now);
91 os << "% written from " << numProcs << " processes" << std::endl;
92 os << "% point representation of Tpetra::BlockCrsMatrix" << std::endl;
93 size_t numRows = A.getGlobalNumRows();
94 size_t numCols = A.getGlobalNumCols();
95 os << "% " << numRows << " block rows, " << numCols << " block columns" << std::endl;
96 const LO blockSize = A.getBlockSize();
97 os << "% block size " << blockSize << std::endl;
98 os << numRows*blockSize << " " << numCols*blockSize << " " << A.getGlobalNumEntries()*blockSize*blockSize << std::endl;
99 }
100
103 } else {
104 size_t numRows = rowMap->getLocalNumElements();
105
106 //Create source map
107 RCP<const map_type> allMeshGidsMap = rcp(new map_type(TOT::invalid(), numRows, A.getIndexBase(), comm));
108 //Create and populate vector of mesh GIDs corresponding to this pid's rows.
109 //This vector will be imported one pid's worth of information at a time to pid 0.
110 mv_type allMeshGids(allMeshGidsMap,1);
111 Teuchos::ArrayRCP<GO> allMeshGidsData = allMeshGids.getDataNonConst(0);
112
113 for (size_t i=0; i<numRows; i++)
114 allMeshGidsData[i] = rowMap->getGlobalElement(i);
115 allMeshGidsData = Teuchos::null;
116
117 // Now construct a RowMatrix on PE 0 by strip-mining the rows of the input matrix A.
118 size_t stripSize = allMeshGids.getGlobalLength() / numProcs;
119 size_t remainder = allMeshGids.getGlobalLength() % numProcs;
120 size_t curStart = 0;
121 size_t curStripSize = 0;
122 Teuchos::Array<GO> importMeshGidList;
123 for (size_t i=0; i<numProcs; i++) {
124 if (myRank==0) { // Only PE 0 does this part
126 if (i<remainder) curStripSize++; // handle leftovers
127 importMeshGidList.resize(curStripSize); // Set size of vector to max needed
128 for (size_t j=0; j<curStripSize; j++) importMeshGidList[j] = j + curStart + A.getIndexBase();
130 }
131 // The following import map should be non-trivial only on PE 0.
133 std::runtime_error, "Tpetra::blockCrsMatrixWriter: (pid "
134 << myRank << ") map size should be zero, but is " << curStripSize);
135 RCP<map_type> importMeshGidMap = rcp(new map_type(TOT::invalid(), importMeshGidList(), A.getIndexBase(), comm));
139
140 // importMeshGids now has a list of GIDs for the current strip of matrix rows.
141 // Use these values to build another importer that will get rows of the matrix.
142
143 // The following import map will be non-trivial only on PE 0.
144 Teuchos::ArrayRCP<const GO> importMeshGidsData = importMeshGids.getData(0);
145 Teuchos::Array<GO> importMeshGidsGO;
146 importMeshGidsGO.reserve(importMeshGidsData.size());
147 for (typename Teuchos::ArrayRCP<const GO>::size_type j=0; j<importMeshGidsData.size(); ++j)
149 RCP<const map_type> importMap = rcp(new map_type(TOT::invalid(), importMeshGidsGO(), rowMap->getIndexBase(), comm) );
150
151 import_type importer(rowMap,importMap );
152 size_t numEntriesPerRow = A.getCrsGraph().getGlobalMaxNumRowEntries();
154 RCP<const map_type> domainMap = A.getCrsGraph().getDomainMap();
155 graph->doImport(A.getCrsGraph(), importer, INSERT);
156 graph->fillComplete(domainMap, importMap);
157
158 block_crs_matrix_type importA(*graph, A.getBlockSize());
159 importA.doImport(A, importer, INSERT);
160
161 // Finally we are ready to write this strip of the matrix
163 }
164 }
165 }
166
167 template<class Scalar, class LO, class GO, class Node>
168 void writeMatrixStrip(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::ostream &os, Teuchos::ParameterList const &params) {
169 using Teuchos::RCP;
170 using map_type = Tpetra::Map<LO, GO, Node>;
172 using bcrs_local_inds_host_view_type = typename bcrs_type::local_inds_host_view_type;
173 using bcrs_values_host_view_type = typename bcrs_type::values_host_view_type;
174 using impl_scalar_type = typename bcrs_type::impl_scalar_type;
175
176 size_t numRows = A.getGlobalNumRows();
177 RCP<const map_type> rowMap = A.getRowMap();
178 RCP<const map_type> colMap = A.getColMap();
179 RCP<const Teuchos::Comm<int> > comm = rowMap->getComm();
180 const int myRank = comm->getRank();
181
182 const size_t meshRowOffset = rowMap->getIndexBase();
183 const size_t meshColOffset = colMap->getIndexBase();
185 std::runtime_error, "Tpetra::writeMatrixStrip: "
186 "mesh row index base != mesh column index base");
187
188 if (myRank !=0) {
189
190 TEUCHOS_TEST_FOR_EXCEPTION(A.getLocalNumRows() != 0,
191 std::runtime_error, "Tpetra::writeMatrixStrip: pid "
192 << myRank << " should have 0 rows but has " << A.getLocalNumRows());
193 TEUCHOS_TEST_FOR_EXCEPTION(A.getLocalNumCols() != 0,
194 std::runtime_error, "Tpetra::writeMatrixStrip: pid "
195 << myRank << " should have 0 columns but has " << A.getLocalNumCols());
196
197 } else {
198
199 TEUCHOS_TEST_FOR_EXCEPTION(numRows != A.getLocalNumRows(),
200 std::runtime_error, "Tpetra::writeMatrixStrip: "
201 "number of rows on pid 0 does not match global number of rows");
202
203
204 int err = 0;
205 const LO blockSize = A.getBlockSize();
206 const size_t numLocalRows = A.getLocalNumRows();
207 bool precisionChanged=false;
208 int oldPrecision = 0; // avoid "unused variable" warning
209 if (params.isParameter("precision")) {
210 oldPrecision = os.precision(params.get<int>("precision"));
211 precisionChanged=true;
212 }
213 int pointOffset = 1;
214 if (params.isParameter("zero-based indexing")) {
215 if (params.get<bool>("zero-based indexing") == true)
216 pointOffset = 0;
217 }
218
219 size_t localRowInd;
221
222 // Get a view of the current row.
225 LO numEntries;
226 A.getLocalRowView (localRowInd, localColInds, vals); numEntries = localColInds.extent(0);
227 GO globalMeshRowID = rowMap->getGlobalElement(localRowInd) - meshRowOffset;
228
229 for (LO k = 0; k < numEntries; ++k) {
230 GO globalMeshColID = colMap->getGlobalElement(localColInds[k]) - meshColOffset;
231 const impl_scalar_type* curBlock = vals.data() + blockSize * blockSize * k;
232 // Blocks are stored in row-major format.
233 for (LO j = 0; j < blockSize; ++j) {
235 for (LO i = 0; i < blockSize; ++i) {
237 const impl_scalar_type curVal = curBlock[i + j * blockSize];
238
239 os << globalPointRowID << " " << globalPointColID << " ";
240 if (Teuchos::ScalarTraits<impl_scalar_type>::isComplex) {
241 // Matrix Market format wants complex values to be
242 // written as space-delimited pairs. See Bug 6469.
244 << Teuchos::ScalarTraits<impl_scalar_type>::imag (curVal);
245 }
246 else {
247 os << curVal;
248 }
249 os << std::endl;
250 }
251 }
252 }
253 }
255 os.precision(oldPrecision);
257 std::runtime_error, "Tpetra::writeMatrixStrip: "
258 "error getting view of local row " << localRowInd);
259
260 }
261
262 }
263
264 template<class Scalar, class LO, class GO, class Node>
265 Teuchos::RCP<Tpetra::CrsGraph<LO, GO, Node> >
267 {
268 TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::getBlockCrsGraph", getBlockCrsGraph0);
269 /*
270 ASSUMPTIONS:
271
272 1) In point matrix, all entries associated with a little block are present (even if they are zero).
273 2) For given mesh DOF, point DOFs appear consecutively and in ascending order in row & column maps.
274 3) Point column map and block column map are ordered consistently.
275 */
276
277 using Teuchos::Array;
278 using Teuchos::ArrayView;
279 using Teuchos::RCP;
280
281 typedef Tpetra::Map<LO,GO,Node> map_type;
282 typedef Tpetra::CrsGraph<LO,GO,Node> crs_graph_type;
283 typedef Tpetra::CrsMatrix<Scalar, LO,GO,Node> crs_matrix_type;
284
285 using local_graph_device_type = typename crs_matrix_type::local_graph_device_type;
286 using row_map_type = typename local_graph_device_type::row_map_type::non_const_type;
287 using entries_type = typename local_graph_device_type::entries_type::non_const_type;
288
289 using offset_type = typename row_map_type::non_const_value_type;
290
291 using execution_space = typename Node::execution_space;
292 using range_type = Kokkos::RangePolicy<execution_space, LO>;
293
294 const map_type &pointRowMap = *(pointMatrix.getRowMap());
296
297 const map_type &pointColMap = *(pointMatrix.getColMap());
298 const map_type &pointDomainMap = *(pointMatrix.getDomainMap());
299 const map_type &pointRangeMap = *(pointMatrix.getRangeMap());
300
301 {
302 TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::getBlockCrsGraph::createMeshMaps", getBlockCrsGraph1);
307 Kokkos::DefaultExecutionSpace().fence();
308 }
309
310 if(meshColMap.is_null()) throw std::runtime_error("ERROR: Cannot create mesh colmap");
311
312 auto localMeshColMap = meshColMap->getLocalMap();
313 auto localPointColMap = pointColMap.getLocalMap();
314 auto localPointRowMap = pointRowMap.getLocalMap();
315
317
318 const offset_type bs2 = blockSize * blockSize;
319
320 if (use_LID) {
321 TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::getBlockCrsGraph::LID", getBlockCrsGraph2);
322 auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
323 auto pointRowptr = pointLocalGraph.row_map;
324 auto pointColind = pointLocalGraph.entries;
325
327 std::runtime_error, "Tpetra::getBlockCrsGraph: "
328 "local number of non zero entries is not a multiple of blockSize^2 ");
329
330 LO block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
331 row_map_type blockRowptr("blockRowptr", block_rows+1);
332 entries_type blockColind("blockColind", pointColind.extent(0)/(bs2));
333
334 Kokkos::parallel_for("fillMesh",range_type(0,block_rows), KOKKOS_LAMBDA(const LO i) {
335
336 const LO offset_b = pointRowptr(i*blockSize)/bs2;
337 const LO offset_b_max = pointRowptr((i+1)*blockSize)/bs2;
338
339 if (i==block_rows-1)
342
343 const LO offset_p = pointRowptr(i*blockSize);
344
345 for (LO k=0; k<offset_b_max-offset_b; ++k) {
347 }
348 });
349
352 Kokkos::DefaultExecutionSpace().fence();
353 }
354 else {
355 TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::getBlockCrsGraph::GID", getBlockCrsGraph3);
356 auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
357 auto pointRowptr = pointLocalGraph.row_map;
358 auto pointColind = pointLocalGraph.entries;
359
361 std::runtime_error, "Tpetra::getBlockCrsGraph: "
362 "local number of non zero entries is not a multiple of blockSize^2 ");
363
364 LO block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
365 row_map_type blockRowptr("blockRowptr", block_rows+1);
366 entries_type blockColind("blockColind", pointColind.extent(0)/(bs2));
367
368 Kokkos::parallel_for("fillMesh",range_type(0,block_rows), KOKKOS_LAMBDA(const LO i) {
369
370 const LO offset_b = pointRowptr(i*blockSize)/bs2;
371 const LO offset_b_max = pointRowptr((i+1)*blockSize)/bs2;
372
373 if (i==block_rows-1)
376
377 const LO offset_p = pointRowptr(i*blockSize);
378 const LO offset_p_max = pointRowptr((i+1)*blockSize);
379
380 LO filled_block = 0;
381 for (LO p_i=0; p_i<offset_p_max-offset_p; ++p_i) {
382 auto bcol_GID = localPointColMap.getGlobalElement(pointColind(offset_p + p_i))/blockSize;
383 auto bcol_LID = localMeshColMap.getLocalElement(bcol_GID);
384
385 bool visited = false;
386 for (LO k=0; k<filled_block; ++k) {
387 if (blockColind(offset_b + k) == bcol_LID)
388 visited = true;
389 }
390 if (!visited) {
392 ++filled_block;
393 }
394 }
395 });
396
399 Kokkos::DefaultExecutionSpace().fence();
400 }
401
402 return meshCrsGraph;
403 }
404
405 template<class Scalar, class LO, class GO, class Node>
406 Teuchos::RCP<BlockCrsMatrix<Scalar, LO, GO, Node> >
408 {
409 TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::convertToBlockCrsMatrix", convertToBlockCrsMatrix0);
410 /*
411 ASSUMPTIONS:
412
413 1) In point matrix, all entries associated with a little block are present (even if they are zero).
414 2) For given mesh DOF, point DOFs appear consecutively and in ascending order in row & column maps.
415 3) Point column map and block column map are ordered consistently.
416 */
417
418 using Teuchos::Array;
419 using Teuchos::ArrayView;
420 using Teuchos::RCP;
421
422 typedef Tpetra::BlockCrsMatrix<Scalar,LO,GO,Node> block_crs_matrix_type;
423 typedef Tpetra::CrsMatrix<Scalar, LO,GO,Node> crs_matrix_type;
424
425 using local_graph_device_type = typename crs_matrix_type::local_graph_device_type;
426 using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
427 using row_map_type = typename local_graph_device_type::row_map_type::non_const_type;
428 using values_type = typename local_matrix_device_type::values_type::non_const_type;
429
430 using offset_type = typename row_map_type::non_const_value_type;
431
432 using execution_space = typename Node::execution_space;
433 using range_type = Kokkos::RangePolicy<execution_space, LO>;
434
436
437 const offset_type bs2 = blockSize * blockSize;
438
440
441 if (use_LID) {
442 TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::convertToBlockCrsMatrix::LID", convertToBlockCrsMatrix1);
443 auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
444 auto pointRowptr = pointLocalGraph.row_map;
445 auto pointColind = pointLocalGraph.entries;
446
447 offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
448 values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2);
449 auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly);
450 auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map;
451
452 Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
453 const offset_type blkBeg = blockRowptr[i];
454 const offset_type numBlocks = blockRowptr[i+1] - blkBeg;
455
456 // For each block in the row...
457 for (offset_type block=0; block < numBlocks; block++) {
458
459 // For each entry in the block...
465 }
466 }
467
468 }
469 });
470 blockMatrix = rcp(new block_crs_matrix_type(*meshCrsGraph, blockValues, blockSize));
471 Kokkos::DefaultExecutionSpace().fence();
472 }
473 else {
474 TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::convertToBlockCrsMatrix::GID", convertToBlockCrsMatrix2);
475 auto localMeshColMap = meshCrsGraph->getColMap()->getLocalMap();
476 auto localPointColMap = pointMatrix.getColMap()->getLocalMap();
477
478 values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2);
479 {
480 auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
481 auto pointRowptr = pointLocalGraph.row_map;
482 auto pointColind = pointLocalGraph.entries;
483
484 offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
485 auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly);
486 auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map;
487 auto blockColind = meshCrsGraph->getLocalGraphDevice().entries;
488
489 row_map_type pointGColind("pointGColind", pointColind.extent(0));
490
491 Kokkos::parallel_for("computePointGColind",range_type(0,pointColind.extent(0)),KOKKOS_LAMBDA(const LO i) {
492 pointGColind(i) = localPointColMap.getGlobalElement(pointColind(i));
493 });
494
495 row_map_type blockGColind("blockGColind", blockColind.extent(0));
496
497 Kokkos::parallel_for("computeBlockGColind",range_type(0,blockGColind.extent(0)),KOKKOS_LAMBDA(const LO i) {
498 blockGColind(i) = localMeshColMap.getGlobalElement(blockColind(i));
499 });
500
501 Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
502 const offset_type blkBeg = blockRowptr[i];
503 const offset_type numBlocks = blockRowptr[i+1] - blkBeg;
504
505 for (offset_type point_i=0; point_i < pointRowptr[i*blockSize + 1] - pointRowptr[i*blockSize]; point_i++) {
506
507 offset_type block_inv=static_cast<offset_type>(-1);
508 offset_type little_col_inv=static_cast<offset_type>(-1);
509 for (offset_type block_2=0; block_2 < numBlocks; block_2++) {
514 break;
515 }
516 }
517 if (block_inv!=static_cast<offset_type>(-1))
518 break;
519 }
520
523 }
524 }
525 });
526 }
527 blockMatrix = rcp(new block_crs_matrix_type(*meshCrsGraph, blockValues, blockSize));
528 Kokkos::DefaultExecutionSpace().fence();
529 }
530
531 return blockMatrix;
532
533 }
534
535 template<class Scalar, class LO, class GO, class Node>
536 Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LO, GO, Node>>
538 {
540 using execution_space = typename Node::execution_space;
541 using dev_row_view_t = typename crs_t::local_graph_device_type::row_map_type::non_const_type;
542 using dev_col_view_t = typename crs_t::local_graph_device_type::entries_type::non_const_type;
543 using dev_val_view_t = typename crs_t::local_matrix_device_type::values_type::non_const_type;
544 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
545 using team_policy = Kokkos::TeamPolicy<execution_space>;
546 using member_type = typename team_policy::member_type;
547 using scratch_view = Kokkos::View<bool*, typename execution_space::scratch_memory_space>;
548 using Ordinal = typename dev_row_view_t::non_const_value_type;
549
550 // Get structure / values
551 auto local = pointMatrix.getLocalMatrixDevice();
552 auto row_ptrs = local.graph.row_map;
553 auto col_inds = local.graph.entries;
554 auto values = local.values;
555 const auto nrows = pointMatrix.getLocalNumRows();
556
557 //
558 // Populate all active blocks, they must be fully populated with entries so fill with zeroes
559 //
560
561 // Make row workspace views
562 dev_row_view_t new_rowmap("new_rowmap", nrows+1);
563 const auto blocks_per_row = nrows / blockSize; // assumes square matrix
564 dev_row_view_t active_block_row_map("active_block_row_map", blocks_per_row + 1);
565 const int max_threads = execution_space::concurrency();
566 assert(blockSize > 1);
567 assert(nrows % blockSize == 0);
568 const int mem_level = 1;
569 const int bytes = scratch_view::shmem_size(blocks_per_row);
570
571 if (max_threads >= blockSize) {
572 // Prefer 1 team per block since this will require a lot less scratch memory
573 team_policy tp(blocks_per_row, blockSize);
574
575 // Count active blocks
576 Kokkos::parallel_for("countActiveBlocks", tp.set_scratch_size(mem_level, Kokkos::PerTeam(bytes)), KOKKOS_LAMBDA(const member_type& team) {
577 Ordinal block_row = team.league_rank();
578
580 Kokkos::single(
581 Kokkos::PerTeam(team), [&] () {
584 }
585 });
586 team.team_barrier();
587
588 // All threads in a team scan a blocks-worth of rows to see which
589 // blocks are active
590 Kokkos::parallel_for(
591 Kokkos::TeamThreadRange(team, blockSize), [&] (Ordinal block_offset) {
592
594 Ordinal row_itr = row_ptrs(row);
595 Ordinal row_end = row_ptrs(row+1);
596
602 // This block has at least one nnz in this row
604 ++row_itr;
605 if (row_itr == row_end) break;
607 }
608 }
609 });
610
611 team.team_barrier();
612
613 Kokkos::single(
614 Kokkos::PerTeam(team), [&] () {
615 Ordinal count = 0;
618 ++count;
619 }
620 }
622 });
623 });
624 }
625 else {
626 // We don't have enough parallelism to make a thread team handle a block, so just
627 // have 1 thread per block
628 team_policy tp(blocks_per_row, 1);
629
630 // Count active blocks
631 Kokkos::parallel_for("countActiveBlocks", tp.set_scratch_size(mem_level, Kokkos::PerTeam(bytes)), KOKKOS_LAMBDA(const member_type& team) {
632 Ordinal block_row = team.league_rank();
633
637 }
638
639 // One thread scans a blocks-worth of rows to see which blocks are active
642 Ordinal row_itr = row_ptrs(row);
643 Ordinal row_end = row_ptrs(row+1);
644
650 // This block has at least one nnz in this row
652 ++row_itr;
653 if (row_itr == row_end) break;
655 }
656 }
657 }
658
659 Ordinal count = 0;
662 ++count;
663 }
664 }
666 });
667 }
668
670#if KOKKOSKERNELS_VERSION >= 40199
671 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
672 execution_space>(active_block_row_map.extent(0), active_block_row_map, nnz_block_count);
673#else
674 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
676#endif
678
679 // Find active blocks
680 if (max_threads >= blockSize) {
681 // Prefer 1 team per block since this will require a lot less scratch memory
682 team_policy tp(blocks_per_row, blockSize);
683
684 Kokkos::parallel_for("findActiveBlocks", tp.set_scratch_size(mem_level, Kokkos::PerTeam(bytes)), KOKKOS_LAMBDA(const member_type& team) {
685 Ordinal block_row = team.league_rank();
686
688 Kokkos::single(
689 Kokkos::PerTeam(team), [&] () {
692 }
693 });
694 team.team_barrier();
695
696 // All threads in a team scan a blocks-worth of rows to see which
697 // blocks are active
698 Kokkos::parallel_for(
699 Kokkos::TeamThreadRange(team, blockSize), [&] (Ordinal block_offset) {
700
702 Ordinal row_itr = row_ptrs(row);
703 Ordinal row_end = row_ptrs(row+1);
704
710 // This block has at least one nnz in this row
712 ++row_itr;
713 if (row_itr == row_end) break;
715 }
716 }
717 });
718
719 team.team_barrier();
720
721 Kokkos::single(
722 Kokkos::PerTeam(team), [&] () {
727 ++offset;
728 }
729 }
730 });
731 });
732 }
733 else {
734 team_policy tp(blocks_per_row, 1);
735
736 Kokkos::parallel_for("findActiveBlocks", tp.set_scratch_size(mem_level, Kokkos::PerTeam(bytes)), KOKKOS_LAMBDA(const member_type& team) {
737 Ordinal block_row = team.league_rank();
738
742 }
743
744 // One thread scans a blocks-worth of rows to see which blocks are active
747 Ordinal row_itr = row_ptrs(row);
748 Ordinal row_end = row_ptrs(row+1);
749
755 // This block has at least one nnz in this row
757 ++row_itr;
758 if (row_itr == row_end) break;
760 }
761 }
762 }
763
768 ++offset;
769 }
770 }
771 });
772 }
773
774 // Sizing
775 Kokkos::parallel_for("sizing", range_type(0, nrows), KOKKOS_LAMBDA(const size_t row) {
776 const auto block_row = row / blockSize;
780 new_rowmap(row) = row_nnz;
781 });
782
784#if KOKKOSKERNELS_VERSION >= 40199
785 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
786 execution_space>(new_rowmap.extent(0), new_rowmap, new_nnz_count);
787#else
788 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
789 dev_row_view_t, execution_space>(new_rowmap.extent(0), new_rowmap, new_nnz_count);
790#endif
791 // Now populate cols and vals
794 Kokkos::parallel_for("entries", range_type(0, nrows), KOKKOS_LAMBDA(const size_t row) {
795 Ordinal row_itr = row_ptrs(row);
796 Ordinal row_end = row_ptrs(row+1);
798
802
811 // Already a non-zero entry
813 ++row_itr;
814 }
815 }
816 }
817 });
818
819 // Create new, filled CRS
820 auto crs_row_map = pointMatrix.getRowMap();
821 auto crs_col_map = pointMatrix.getColMap();
822 Teuchos::RCP<crs_t> result = Teuchos::rcp(new crs_t(crs_row_map, crs_col_map, new_rowmap, new_col_ids, new_vals));
823 result->fillComplete();
824 return result;
825 }
826
827 template<class Scalar, class LO, class GO, class Node>
828 Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LO, GO, Node>>
830 {
832 using dev_row_view_t = typename crs_t::local_graph_device_type::row_map_type::non_const_type;
833 using dev_col_view_t = typename crs_t::local_graph_device_type::entries_type::non_const_type;
834 using dev_val_view_t = typename crs_t::local_matrix_device_type::values_type::non_const_type;
835 using impl_scalar_t = typename Kokkos::ArithTraits<Scalar>::val_type;
836 using STS = Kokkos::ArithTraits<impl_scalar_t>;
837 using Ordinal = typename dev_row_view_t::non_const_value_type;
838 using execution_space = typename Node::execution_space;
839 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
840
841 // Get structure / values
842 auto local = pointMatrix.getLocalMatrixHost();
843 auto row_ptrs = local.graph.row_map;
844 auto col_inds = local.graph.entries;
845 auto values = local.values;
846 const auto nrows = pointMatrix.getLocalNumRows();
847
848 // Sizing and rows
849 dev_row_view_t new_rowmap("new_rowmap", nrows+1);
850 const impl_scalar_t zero = STS::zero();
851 Kokkos::parallel_for("sizing", range_type(0, nrows), KOKKOS_LAMBDA(const size_t row) {
852 const Ordinal row_nnz_begin = row_ptrs(row);
853 Ordinal row_nnzs = 0;
854 for (Ordinal row_nnz = row_nnz_begin; row_nnz < row_ptrs(row+1); ++row_nnz) {
855 const impl_scalar_t value = values(row_nnz);
856 if (value != zero) {
857 ++row_nnzs;
858 }
859 }
860 new_rowmap(row) = row_nnzs;
861 });
862
863 Ordinal real_nnzs = 0;
864#if KOKKOSKERNELS_VERSION >= 40199
865 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
866 execution_space>(new_rowmap.extent(0), new_rowmap, real_nnzs);
867#else
868 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
869 dev_row_view_t, execution_space>(new_rowmap.extent(0), new_rowmap, real_nnzs);
870#endif
871 // Now populate cols and vals
872 dev_col_view_t new_col_ids("new_col_ids", real_nnzs);
873 dev_val_view_t new_vals("new_vals", real_nnzs);
874 Kokkos::parallel_for("entries", range_type(0, nrows), KOKKOS_LAMBDA(const size_t row) {
875 Ordinal row_nnzs = 0;
879 const impl_scalar_t value = values(old_row_nnz);
880 if (value != zero) {
883 ++row_nnzs;
884 }
885 }
886 });
887
888 // Create new, unfilled CRS
889 auto crs_row_map = pointMatrix.getRowMap();
890 auto crs_col_map = pointMatrix.getColMap();
891 Teuchos::RCP<crs_t> result = Teuchos::rcp(new crs_t(crs_row_map, crs_col_map, new_rowmap, new_col_ids, new_vals));
892 result->fillComplete();
893 return result;
894 }
895
896 template<class LO, class GO, class Node>
897 Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
899 {
900 typedef Teuchos::OrdinalTraits<Tpetra::global_size_t> TOT;
901 typedef Tpetra::Map<LO,GO,Node> map_type;
902
903 if(use_LID) {
904
905 using execution_space = typename Node::execution_space;
906 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
907
908 auto pointGlobalID = pointMap.getMyGlobalIndicesDevice();
909 LO block_rows = pointGlobalID.extent(0)/blockSize;
910 Kokkos::View<GO*, typename map_type::device_type> meshGlobalID("meshGlobalID", block_rows);
911
912 Kokkos::parallel_for("fillMeshMap",range_type(0,block_rows), KOKKOS_LAMBDA(const LO i) {
914 });
915
916 Teuchos::RCP<const map_type> meshMap = Teuchos::rcp( new map_type(TOT::invalid(), meshGlobalID, 0, pointMap.getComm()) );
917 return meshMap;
918 }
919 else {
920 //calculate mesh GIDs
921 Teuchos::ArrayView<const GO> pointGids = pointMap.getLocalElementList();
922 Teuchos::Array<GO> meshGids;
923 GO indexBase = pointMap.getIndexBase();
924
925 // Use hash table to track whether we've encountered this GID previously. This will happen
926 // when striding through the point DOFs in a block. It should not happen otherwise.
927 // I don't use sort/make unique because I don't want to change the ordering.
928 meshGids.reserve(pointGids.size());
930 for (int i=0; i<pointGids.size(); ++i) {
932 if (hashTable.get(meshGid) == -1) {
933 hashTable.add(meshGid,1); //(key,value)
934 meshGids.push_back(meshGid);
935 }
936 }
937
938 Teuchos::RCP<const map_type> meshMap = Teuchos::rcp( new map_type(TOT::invalid(), meshGids(), 0, pointMap.getComm()) );
939 return meshMap;
940 }
941 }
942
943
944 template<class LO, class GO, class Node>
945 Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
947 {
948 typedef Teuchos::OrdinalTraits<Tpetra::global_size_t> TOT;
949 typedef Tpetra::Map<LO,GO,Node> map_type;
950
951 //calculate mesh GIDs
952 Teuchos::ArrayView<const GO> blockGids = blockMap.getLocalElementList();
953 Teuchos::Array<GO> pointGids(blockGids.size() * blockSize);
954 GO indexBase = blockMap.getIndexBase();
955
956 for(LO i=0, ct=0; i<(LO)blockGids.size(); i++) {
958 for(LO j=0; j<blockSize; j++, ct++)
960 }
961
962 Teuchos::RCP<const map_type> pointMap = Teuchos::rcp( new map_type(TOT::invalid(), pointGids(), 0, blockMap.getComm()) );
963 return pointMap;
964
965 }
966
967
968 template<class Scalar, class LO, class GO, class Node>
969 Teuchos::RCP<CrsMatrix<Scalar, LO, GO, Node> >
971
972 using Teuchos::Array;
973 using Teuchos::ArrayView;
974 using Teuchos::RCP;
975
976 typedef Tpetra::BlockCrsMatrix<Scalar,LO,GO,Node> block_crs_matrix_type;
977 typedef Tpetra::Map<LO,GO,Node> map_type;
978 typedef Tpetra::CrsMatrix<Scalar, LO,GO,Node> crs_matrix_type;
979
980 using crs_graph_type = typename block_crs_matrix_type::crs_graph_type;
981 using local_graph_device_type = typename crs_matrix_type::local_graph_device_type;
982 using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
983 using row_map_type = typename local_graph_device_type::row_map_type::non_const_type;
984 using entries_type = typename local_graph_device_type::entries_type::non_const_type;
985 using values_type = typename local_matrix_device_type::values_type::non_const_type;
986
987 using row_map_type_const = typename local_graph_device_type::row_map_type;
988 using entries_type_const = typename local_graph_device_type::entries_type;
989
990 using little_block_type = typename block_crs_matrix_type::const_little_block_type;
991 using offset_type = typename row_map_type::non_const_value_type;
992 using column_type = typename entries_type::non_const_value_type;
993
994 using execution_space = typename Node::execution_space;
995 using range_type = Kokkos::RangePolicy<execution_space, LO>;
996
997
998 LO blocksize = blockMatrix.getBlockSize();
999 const offset_type bs2 = blocksize * blocksize;
1000 size_t block_nnz = blockMatrix.getLocalNumEntries();
1001 size_t point_nnz = block_nnz * bs2;
1002
1003 // We can get these from the blockMatrix directly
1006
1007 // We need to generate the row/col Map ourselves.
1010
1013
1014 // Get the last few things
1015
1016 const crs_graph_type & blockGraph = blockMatrix.getCrsGraph();
1017 LO point_rows = (LO) pointRowMap->getLocalNumElements();
1018 LO block_rows = (LO) blockRowMap->getLocalNumElements();
1019 auto blockValues = blockMatrix.getValuesDevice();
1020 auto blockLocalGraph = blockGraph.getLocalGraphDevice();
1023
1024 // Generate the point matrix rowptr / colind / values
1025 row_map_type rowptr("row_map", point_rows+1);
1026 entries_type colind("entries", point_nnz);
1027 values_type values("values", point_nnz);
1028
1029 // Pre-fill the rowptr
1030 Kokkos::parallel_for("fillRowPtr",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
1031 offset_type base = blockRowptr[i];
1032 offset_type diff = blockRowptr[i+1] - base;
1033 for(LO j=0; j<blocksize; j++) {
1034 rowptr[i*blocksize +j] = base*bs2 + j*diff*blocksize;
1035 }
1036
1037 // Fill the last guy, if we're on the final entry
1038 if(i==block_rows-1) {
1039 rowptr[block_rows*blocksize] = blockRowptr[i+1] * bs2;
1040 }
1041 });
1042
1043
1044 Kokkos::parallel_for("copyEntriesAndValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
1045 const offset_type blkBeg = blockRowptr[i];
1046 const offset_type numBlocks = blockRowptr[i+1] - blkBeg;
1047
1048 // For each block in the row...
1049 for (offset_type block=0; block < numBlocks; block++) {
1051 little_block_type my_block(blockValues.data () + (blkBeg+block) * bs2, blocksize, blocksize);
1052
1053 // For each entry in the block...
1054 for(LO little_row=0; little_row<blocksize; little_row++) {
1055 offset_type point_row_offset = rowptr[i*blocksize + little_row];
1056 for(LO little_col=0; little_col<blocksize; little_col++) {
1059 }
1060 }
1061
1062 }
1063 });
1064
1065 // Generate the matrix
1067 pointCrsMatrix->setAllValues(rowptr,colind,values);
1068
1069 // FUTURE OPTIMIZATION: Directly compute import/export, rather than letting ESFC do it
1070 pointCrsMatrix->expertStaticFillComplete(pointDomainMap,pointRangeMap);
1071
1072 return pointCrsMatrix;
1073 }
1074
1075
1076} // namespace Tpetra
1077
1078//
1079// Explicit instantiation macro for blockCrsMatrixWriter (various
1080// overloads), writeMatrixStrip, and convertToBlockCrsMatrix.
1081//
1082// Must be expanded from within the Tpetra namespace!
1083//
1084#define TPETRA_BLOCKCRSMATRIX_HELPERS_INSTANT(S,LO,GO,NODE) \
1085 template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::string const &fileName); \
1086 template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::string const &fileName, Teuchos::ParameterList const &params); \
1087 template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os); \
1088 template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os, Teuchos::ParameterList const &params); \
1089 template void writeMatrixStrip(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os, Teuchos::ParameterList const &params); \
1090 template Teuchos::RCP<BlockCrsMatrix<S, LO, GO, NODE> > convertToBlockCrsMatrix(const CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize, bool use_LID); \
1091 template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > fillLogicalBlocks(const CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize); \
1092 template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > unfillFormerBlockCrs(const CrsMatrix<S, LO, GO, NODE>& pointMatrix); \
1093 template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > convertToCrsMatrix(const BlockCrsMatrix<S, LO, GO, NODE>& blockMatrix); \
1094 template Teuchos::RCP<CrsGraph<LO, GO, NODE>> getBlockCrsGraph(const Tpetra::CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize, bool use_local_ID);
1095
1096//
1097// Explicit instantiation macro for createMeshMap / createPointMap
1098//
1099#define TPETRA_CREATEMESHMAP_INSTANT(LO,GO,NODE) \
1100 template Teuchos::RCP<const Map<LO,GO,NODE> > createMeshMap (const LO& blockSize, const Map<LO,GO,NODE>& pointMap, bool use_local_ID); \
1101 template Teuchos::RCP<const Map<LO,GO,NODE> > createPointMap (const LO& blockSize, const Map<LO,GO,NODE>& blockMap);
1102
1103#endif // TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
Struct that holds views of the contents of a CrsMatrix.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, Node > > fillLogicalBlocks(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix, const LO &blockSize)
Fill all point entries in a logical block of a CrsMatrix with zeroes. This should be called before co...
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, Node > > unfillFormerBlockCrs(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix)
Unfill all point entries in a logical block of a CrsMatrix with zeroes. This can be called after conv...
void writeMatrixStrip(BlockCrsMatrix< Scalar, LO, GO, Node > const &A, std::ostream &os, Teuchos::ParameterList const &params)
Helper function called by blockCrsMatrixWriter.
Teuchos::RCP< Tpetra::CrsGraph< LO, GO, Node > > getBlockCrsGraph(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix, const LO &blockSize, bool use_local_ID=true)
Non-member constructor that creates the CrsGraph of a BlockCrsMatrix from an existing point CrsMatrix...
Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > createPointMap(LO const &blockSize, const Tpetra::Map< LO, GO, Node > &blockMap)
Helper function to generate a point map from a block map (with a given block size) GIDs associated wi...
Teuchos::RCP< CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > createCrsGraph(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, size_t maxNumEntriesPerRow=0, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Nonmember function to create an empty CrsGraph given a row Map and the max number of entries allowed ...
Teuchos::RCP< CrsMatrix< Scalar, LO, GO, Node > > convertToCrsMatrix(const Tpetra::BlockCrsMatrix< Scalar, LO, GO, Node > &blockMatrix)
Non-member constructor that creates a point CrsMatrix from an existing BlockCrsMatrix.
Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > createMeshMap(LO const &blockSize, const Tpetra::Map< LO, GO, Node > &pointMap, bool use_local_ID=false)
Helper function to generate a mesh map from a point map. Important! It's assumed that point GIDs asso...
void blockCrsMatrixWriter(BlockCrsMatrix< Scalar, LO, GO, Node > const &A, std::string const &fileName)
Helper function to write a BlockCrsMatrix. Calls the 3-argument version.
@ INSERT
Insert new values that don't currently exist.
Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > convertToBlockCrsMatrix(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix, const LO &blockSize, bool use_local_ID=true)
Non-member constructor that creates a BlockCrsMatrix from an existing point CrsMatrix.