22#include "Teuchos_OrdinalTraits.hpp"
23#include "Teuchos_TestForException.hpp"
24#include "TpetraCore_config.h"
26#include "KokkosKernels_config.h"
63#if not(defined(WIN) && (_MSC_VER >= 1900))
71namespace BehaviorDetails {
73constexpr const std::string_view RESERVED_PREFIX =
"TPETRA_";
74constexpr const std::string_view ASSUME_GPU_AWARE_MPI =
75 "TPETRA_ASSUME_GPU_AWARE_MPI";
76constexpr const std::string_view CUDA_LAUNCH_BLOCKING =
"CUDA_LAUNCH_BLOCKING";
77constexpr const std::string_view MM_TAFC_OptimizationCoreCount =
78 "MM_TAFC_OptimizationCoreCount";
79constexpr const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
80 "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
81constexpr const std::string_view ROW_IMBALANCE_THRESHOLD =
82 "TPETRA_ROW_IMBALANCE_THRESHOLD";
83constexpr const std::string_view MULTIVECTOR_USE_MERGE_PATH =
84 "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
85constexpr const std::string_view VECTOR_DEVICE_THRESHOLD =
86 "TPETRA_VECTOR_DEVICE_THRESHOLD";
87constexpr const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
88 "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
89constexpr const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
90 "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
91constexpr const std::string_view USE_TEUCHOS_TIMERS =
92 "TPETRA_USE_TEUCHOS_TIMERS";
93constexpr const std::string_view USE_KOKKOS_PROFILING =
94 "TPETRA_USE_KOKKOS_PROFILING";
95constexpr const std::string_view DEBUG =
"TPETRA_DEBUG";
96constexpr const std::string_view VERBOSE =
"TPETRA_VERBOSE";
97constexpr const std::string_view TIMING =
"TPETRA_TIMING";
98constexpr const std::string_view HIERARCHICAL_UNPACK =
99 "TPETRA_HIERARCHICAL_UNPACK";
100constexpr const std::string_view SKIP_COPY_AND_PERMUTE =
101 "TPETRA_SKIP_COPY_AND_PERMUTE";
102constexpr const std::string_view FUSED_RESIDUAL =
"TPETRA_FUSED_RESIDUAL";
103constexpr const std::string_view OVERLAP =
"TPETRA_OVERLAP";
104constexpr const std::string_view SPACES_ID_WARN_LIMIT =
105 "TPETRA_SPACES_ID_WARN_LIMIT";
106constexpr const std::string_view TIME_KOKKOS_DEEP_COPY =
107 "TPETRA_TIME_KOKKOS_DEEP_COPY";
108constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
109 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
110constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
111 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
112constexpr const std::string_view TIME_KOKKOS_FENCE =
"TPETRA_TIME_KOKKOS_FENCE";
113constexpr const std::string_view TIME_KOKKOS_FUNCTIONS =
114 "TPETRA_TIME_KOKKOS_FUNCTIONS";
118template <
typename... Elems>
119constexpr std::array<std::string_view,
sizeof...(Elems)>
120make_array(Elems &&... elems) {
121 return {std::forward<Elems>(elems)...};
124constexpr const auto RECOGNIZED_VARS = make_array(
125 ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
126 VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
127 MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
128 HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
129 USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
130 HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
131 SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
132 TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS);
135std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
136bool verboseDisabled_ =
false;
137bool timingDisabled_ =
false;
142 enum EnvironmentVariableState
144 EnvironmentVariableIsSet_ON,
145 EnvironmentVariableIsSet_OFF,
146 EnvironmentVariableIsSet,
147 EnvironmentVariableIsNotSet
153 std::string stringToUpper (std::string s)
155 std::transform (s.begin (), s.end (), s.begin (),
156 [] (
unsigned char c) { return std::toupper (c); });
161 split(
const std::string_view s,
162 std::function<
void(
const std::string&)> f,
165 typedef std::string::size_type size_type;
166 size_type cur_pos, last_pos=0, length=s.length();
167 while(last_pos < length + 1)
169 cur_pos = s.find_first_of(sep, last_pos);
170 if(cur_pos == std::string::npos)
174 if(cur_pos!=last_pos) {
175 auto token = std::string(s.data()+last_pos, (size_type)cur_pos-last_pos);
178 last_pos = cur_pos + 1;
183 EnvironmentVariableState
184 environmentVariableState(
const std::string& environmentVariableValue)
186 std::string v = stringToUpper(environmentVariableValue);
187 if (v ==
"1" || v ==
"YES" || v ==
"TRUE" || v ==
"ON")
189 return EnvironmentVariableIsSet_ON;
190 else if (v ==
"0" || v ==
"NO" || v ==
"FALSE" || v ==
"OFF")
192 return EnvironmentVariableIsSet_OFF;
194 return EnvironmentVariableIsSet;
198 setEnvironmentVariableMap (
const char environmentVariableName[],
199 std::map<std::string,std::map<std::string, bool> >& valsMap,
200 const bool defaultValue)
208 valsMap[environmentVariableName] = map<string,bool>{{
"DEFAULT", defaultValue}};
210 const char* varVal = getenv (environmentVariableName);
211 if (varVal ==
nullptr) {
218 const string varStr(varVal);
219 vector<string> names;
220 split(varStr, [&](
const string& x){names.push_back(x);});
221 for (
auto const& name: names) {
222 auto state = environmentVariableState(name);
223 if (state == EnvironmentVariableIsSet_ON) {
226 valsMap[environmentVariableName][
"DEFAULT"] =
true;
228 else if (state == EnvironmentVariableIsSet_OFF) {
231 valsMap[environmentVariableName][
"DEFAULT"] =
false;
236 valsMap[environmentVariableName][name] =
true;
243 idempotentlyGetNamedEnvironmentVariableAsBool (
const char name[],
245 const char environmentVariableName[],
246 const bool defaultValue)
248 using BehaviorDetails::namedVariableMap_;
250 setEnvironmentVariableMap (environmentVariableName,
255 auto thisEnvironmentVariableMap = namedVariableMap_[environmentVariableName];
256 auto thisEnvironmentVariable = thisEnvironmentVariableMap.find(name);
257 if (thisEnvironmentVariable != thisEnvironmentVariableMap.end())
258 return thisEnvironmentVariable->second;
259 return thisEnvironmentVariableMap[
"DEFAULT"];
264T getEnvironmentVariable(
const std::string_view environmentVariableName,
265 const T defaultValue) {
266 const char prefix[] =
"Tpetra::Details::Behavior: ";
268 const char *varVal = std::getenv(environmentVariableName.data());
269 if (varVal ==
nullptr) {
272 std::stringstream ss(varVal);
276 TEUCHOS_TEST_FOR_EXCEPTION(!ss, std::out_of_range,
277 prefix <<
"Environment "
279 << environmentVariableName
283 <<
" that cannot be parsed as a "
284 <<
typeid(T).name() <<
".");
292bool getEnvironmentVariable<bool>(
293 const std::string_view environmentVariableName,
const bool defaultValue) {
294 const char *varVal = std::getenv(environmentVariableName.data());
295 bool retVal = defaultValue;
296 if (varVal !=
nullptr) {
297 auto state = environmentVariableState(std::string(varVal));
298 if (state == EnvironmentVariableIsSet_ON)
300 else if (state == EnvironmentVariableIsSet_OFF)
313getEnvironmentVariable<size_t>(
const std::string_view environmentVariableName,
314 const size_t defaultValue) {
315 const char prefix[] =
"Tpetra::Details::Behavior: ";
317 const char *varVal = std::getenv(environmentVariableName.data());
318 if (varVal ==
nullptr) {
321 long long val = std::stoll(stringToUpper(varVal));
322 if (val <
static_cast<long long>(0)) {
324 return std::numeric_limits<size_t>::max();
326 if (
sizeof(
long long) >
sizeof(
size_t)) {
330 constexpr long long maxSizeT =
331 static_cast<long long>(std::numeric_limits<size_t>::max());
332 TEUCHOS_TEST_FOR_EXCEPTION(
333 val > maxSizeT, std::out_of_range,
334 prefix <<
"Environment "
336 << environmentVariableName
339 << val <<
" larger than the largest size_t value " << maxSizeT
342 return static_cast<size_t>(val);
347T idempotentlyGetEnvironmentVariable(
348 T &value,
bool &initialized,
const std::string_view environmentVariableName,
349 const T defaultValue) {
351 value = getEnvironmentVariable<T>(environmentVariableName, defaultValue);
358 constexpr bool debugDefault () {
359#ifdef HAVE_TPETRA_DEBUG
366 constexpr bool verboseDefault () {
370 constexpr bool timingDefault () {
374 constexpr bool assumeMpiIsGPUAwareDefault () {
375#ifdef TPETRA_ASSUME_GPU_AWARE_MPI
382 constexpr bool cudaLaunchBlockingDefault () {
386 constexpr bool hierarchicalUnpackDefault () {
395 static bool once =
false;
398 const char prefix[] =
"Tpetra::Details::Behavior: ";
400#if defined(WIN) && (_MSC_VER >= 1900)
409 const std::string_view
ev(*
env);
414 [&](
const std::string &
s) {
423 if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
424 name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
425 BehaviorDetails::RESERVED_PREFIX) {
426 const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.
begin(),
427 BehaviorDetails::RECOGNIZED_VARS.
end(), name);
429 it == BehaviorDetails::RECOGNIZED_VARS.
end(), std::out_of_range,
432 << name <<
"\" (prefixed with \""
433 << BehaviorDetails::RESERVED_PREFIX
434 <<
"\") is not a recognized Tpetra variable.");
446 static bool initialized_ =
false;
452 if (BehaviorDetails::verboseDisabled_)
458 static bool initialized_ =
false;
464 if (BehaviorDetails::timingDisabled_)
470 static bool initialized_ =
false;
479 static bool initialized_ =
false;
481 value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
489 static bool initialized_ =
false;
491 value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
498 static bool initialized_ =
false;
500 value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
508 static bool initialized_ =
false;
510 value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
518 static bool initialized_ =
false;
520 value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
528 static bool initialized_ =
false;
530 value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
538 static bool initialized_ =
false;
540 value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
546#ifdef HAVE_TPETRA_INST_CUDA
553 static bool initialized_ =
false;
555 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
560#ifdef HAVE_TPETRA_INST_CUDA
563 const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
567 static bool initialized_ =
false;
569 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
577 static bool initialized_ =
false;
586 static bool initialized_ =
false;
588 value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
595 static bool initialized_ =
false;
601 if (BehaviorDetails::verboseDisabled_)
606 static bool initialized_ =
false;
612 BehaviorDetails::verboseDisabled_ =
false;
616 BehaviorDetails::verboseDisabled_ =
true;
620 if (BehaviorDetails::timingDisabled_)
625 static bool initialized_ =
false;
638 static bool initialized_ =
false;
647 static bool initialized_ =
false;
649 value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
654#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || \
655 defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \
656 defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
663 static bool initialized_ =
false;
672 static bool initialized_ =
false;
681 static bool initialized_ =
false;
683 value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
691 static bool initialized_ =
false;
693 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
701 static bool initialized_ =
false;
703 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
711 static bool initialized_ =
false;
713 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
721 static bool initialized_ =
false;
730 static bool initialized_ =
false;
732 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Struct that holds views of the contents of a CrsMatrix.
static bool timing()
Whether Tpetra is in timing mode.
static void enable_verbose_behavior()
Enable verbose mode, programatically.
static void disable_timing()
Disable timing, programatically.
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size....
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static bool debug()
Whether Tpetra is in debug mode.
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static void enable_timing()
Enable timing, programatically.
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
static bool verbose()
Whether Tpetra is in verbose mode.
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products,...
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination....
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static void disable_verbose_behavior()
Disable verbose mode, programatically.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row....
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
Implementation details of Tpetra.
Namespace Tpetra contains the class and methods constituting the Tpetra library.