3997 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3999 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
4000 insertNonownedGlobalValues (
const GlobalOrdinal globalRow,
4001 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
4002 const Teuchos::ArrayView<const Scalar>& values)
4004 using Teuchos::Array;
4005 typedef GlobalOrdinal GO;
4006 typedef typename Array<GO>::size_type size_type;
4008 const size_type numToInsert = indices.size ();
4011 std::pair<Array<GO>, Array<Scalar> >& curRow =
nonlocals_[globalRow];
4012 Array<GO>& curRowInds = curRow.first;
4013 Array<Scalar>& curRowVals = curRow.second;
4014 const size_type newCapacity = curRowInds.size () + numToInsert;
4015 curRowInds.reserve (newCapacity);
4016 curRowVals.reserve (newCapacity);
4017 for (size_type k = 0; k < numToInsert; ++k) {
4018 curRowInds.push_back (indices[k]);
4019 curRowVals.push_back (values[k]);
4023 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4030 using Teuchos::Comm;
4031 using Teuchos::outArg;
4034 using Teuchos::REDUCE_MAX;
4035 using Teuchos::REDUCE_MIN;
4036 using Teuchos::reduceAll;
4038 typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crs_matrix_type;
4040 typedef GlobalOrdinal GO;
4041 typedef typename Teuchos::Array<GO>::size_type size_type;
4042 const char tfecfFuncName[] =
"globalAssemble: ";
4043 ProfilingRegion regionGlobalAssemble (
"Tpetra::CrsMatrix::globalAssemble");
4045 const bool verbose = Behavior::verbose(
"CrsMatrix");
4046 std::unique_ptr<std::string> prefix;
4048 prefix = this->createPrefix(
"CrsMatrix",
"globalAssemble");
4049 std::ostringstream os;
4050 os << *prefix <<
"nonlocals_.size()=" <<
nonlocals_.size()
4052 std::cerr << os.str();
4054 RCP<const Comm<int> > comm =
getComm ();
4056 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4057 (!
isFillActive (), std::runtime_error,
"Fill must be active before "
4058 "you may call this method.");
4060 const size_t myNumNonlocalRows =
nonlocals_.size ();
4067 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
4068 int someoneHasNonlocalRows = 0;
4069 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
4070 outArg (someoneHasNonlocalRows));
4071 if (someoneHasNonlocalRows == 0) {
4085 RCP<const map_type> nonlocalRowMap;
4086 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
4088 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
4089 size_type curPos = 0;
4091 ++mapIter, ++curPos) {
4092 myNonlocalGblRows[curPos] = mapIter->first;
4095 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4096 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4103 sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
4104 typename Teuchos::Array<GO>::iterator gblCols_newEnd;
4105 typename Teuchos::Array<Scalar>::iterator vals_newEnd;
4106 merge2 (gblCols_newEnd, vals_newEnd,
4107 gblCols.begin (), gblCols.end (),
4108 vals.begin (), vals.end ());
4109 gblCols.erase (gblCols_newEnd, gblCols.end ());
4110 vals.erase (vals_newEnd, vals.end ());
4111 numEntPerNonlocalRow[curPos] = gblCols.size ();
4122 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
4124 auto iter = std::min_element (myNonlocalGblRows.begin (),
4125 myNonlocalGblRows.end ());
4126 if (iter != myNonlocalGblRows.end ()) {
4127 myMinNonlocalGblRow = *iter;
4130 GO gblMinNonlocalGblRow = 0;
4131 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
4132 outArg (gblMinNonlocalGblRow));
4133 const GO indexBase = gblMinNonlocalGblRow;
4134 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4135 nonlocalRowMap = rcp (
new map_type (INV, myNonlocalGblRows (), indexBase, comm));
4144 std::ostringstream os;
4145 os << *prefix <<
"Create nonlocal matrix" << endl;
4146 std::cerr << os.str();
4148 RCP<crs_matrix_type> nonlocalMatrix =
4149 rcp (
new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow ()));
4151 size_type curPos = 0;
4153 ++mapIter, ++curPos) {
4154 const GO gblRow = mapIter->first;
4156 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4157 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4159 nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
4172 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
4174 int isLocallyComplete = 1;
4176 if (origRowMapIsOneToOne) {
4178 std::ostringstream os;
4179 os << *prefix <<
"Original row Map is 1-to-1" << endl;
4180 std::cerr << os.str();
4182 export_type exportToOrig (nonlocalRowMap, origRowMap);
4184 isLocallyComplete = 0;
4187 std::ostringstream os;
4188 os << *prefix <<
"doExport from nonlocalMatrix" << endl;
4189 std::cerr << os.str();
4196 std::ostringstream os;
4197 os << *prefix <<
"Original row Map is NOT 1-to-1" << endl;
4198 std::cerr << os.str();
4205 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
4207 isLocallyComplete = 0;
4215 std::ostringstream os;
4216 os << *prefix <<
"Create & doExport into 1-to-1 matrix"
4218 std::cerr << os.str();
4220 crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
4222 oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne,
4228 std::ostringstream os;
4229 os << *prefix <<
"Free nonlocalMatrix" << endl;
4230 std::cerr << os.str();
4232 nonlocalMatrix = Teuchos::null;
4236 std::ostringstream os;
4237 os << *prefix <<
"doImport from 1-to-1 matrix" << endl;
4238 std::cerr << os.str();
4240 import_type importToOrig (oneToOneRowMap, origRowMap);
4249 std::ostringstream os;
4250 os << *prefix <<
"Free nonlocals_ (std::map)" << endl;
4251 std::cerr << os.str();
4263 int isGloballyComplete = 0;
4264 reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
4265 outArg (isGloballyComplete));
4266 TEUCHOS_TEST_FOR_EXCEPTION
4267 (isGloballyComplete != 1, std::runtime_error,
"On at least one process, "
4268 "you called insertGlobalValues with a global row index which is not in "
4269 "the matrix's row Map on any process in its communicator.");
4272 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4275 resumeFill (
const Teuchos::RCP<Teuchos::ParameterList>& params)
4278 myGraph_->resumeFill (params);
4280#if KOKKOSKERNELS_VERSION >= 40299
4282 applyHelper.reset();
4287 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4291 return getCrsGraphRef ().haveGlobalConstants ();
4294 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4297 fillComplete (
const Teuchos::RCP<Teuchos::ParameterList>& params)
4299 const char tfecfFuncName[] =
"fillComplete(params): ";
4301 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4302 (this->
getCrsGraph ().is_null (), std::logic_error,
4303 "getCrsGraph() returns null. This should not happen at this point. "
4304 "Please report this bug to the Tpetra developers.");
4314 Teuchos::RCP<const map_type> rangeMap = graph.
getRowMap ();
4315 Teuchos::RCP<const map_type> domainMap = rangeMap;
4320 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4323 fillComplete (
const Teuchos::RCP<const map_type>& domainMap,
4324 const Teuchos::RCP<const map_type>& rangeMap,
4325 const Teuchos::RCP<Teuchos::ParameterList>& params)
4329 using Teuchos::ArrayRCP;
4333 const char tfecfFuncName[] =
"fillComplete: ";
4334 ProfilingRegion regionFillComplete
4335 (
"Tpetra::CrsMatrix::fillComplete");
4336 const bool verbose = Behavior::verbose(
"CrsMatrix");
4337 std::unique_ptr<std::string> prefix;
4339 prefix = this->createPrefix(
"CrsMatrix",
"fillComplete(dom,ran,p)");
4340 std::ostringstream os;
4341 os << *prefix << endl;
4342 std::cerr << os.str ();
4345 "Tpetra::CrsMatrix::fillCompete",
4348 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4350 "Matrix fill state must be active (isFillActive() "
4351 "must be true) before you may call fillComplete().");
4352 const int numProcs = this->
getComm ()->getSize ();
4362 bool assertNoNonlocalInserts =
false;
4365 bool sortGhosts =
true;
4367 if (! params.is_null ()) {
4368 assertNoNonlocalInserts = params->get (
"No Nonlocal Changes",
4369 assertNoNonlocalInserts);
4370 if (params->isParameter (
"sort column map ghost gids")) {
4371 sortGhosts = params->get (
"sort column map ghost gids", sortGhosts);
4373 else if (params->isParameter (
"Sort column Map ghost GIDs")) {
4374 sortGhosts = params->get (
"Sort column Map ghost GIDs", sortGhosts);
4379 const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
4381 if (! this->myGraph_.is_null ()) {
4382 this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4385 if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4395 if (needGlobalAssemble) {
4399 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4401 std::runtime_error,
"Cannot have nonlocal entries on a serial run. "
4402 "An invalid entry (i.e., with row index not in the row Map) must have "
4403 "been submitted to the CrsMatrix.");
4414#ifdef HAVE_TPETRA_DEBUG
4432 const bool domainMapsMatch =
4433 this->staticGraph_->getDomainMap ()->isSameAs (*domainMap);
4434 const bool rangeMapsMatch =
4435 this->staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
4437 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4438 (! domainMapsMatch, std::runtime_error,
4439 "The CrsMatrix's domain Map does not match the graph's domain Map. "
4440 "The graph cannot be changed because it was given to the CrsMatrix "
4441 "constructor as const. You can fix this by passing in the graph's "
4442 "domain Map and range Map to the matrix's fillComplete call.");
4444 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4445 (! rangeMapsMatch, std::runtime_error,
4446 "The CrsMatrix's range Map does not match the graph's range Map. "
4447 "The graph cannot be changed because it was given to the CrsMatrix "
4448 "constructor as const. You can fix this by passing in the graph's "
4449 "domain Map and range Map to the matrix's fillComplete call.");
4462 this->myGraph_->setDomainRangeMaps (domainMap, rangeMap);
4465 Teuchos::Array<int> remotePIDs (0);
4466 const bool mustBuildColMap = ! this->
hasColMap ();
4467 if (mustBuildColMap) {
4468 this->myGraph_->makeColMap (remotePIDs);
4473 const std::pair<size_t, std::string> makeIndicesLocalResult =
4474 this->myGraph_->makeIndicesLocal(verbose);
4479 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4480 (makeIndicesLocalResult.first != 0, std::runtime_error,
4481 makeIndicesLocalResult.second);
4483 const bool sorted = this->myGraph_->isSorted ();
4484 const bool merged = this->myGraph_->isMerged ();
4490 this->myGraph_->makeImportExport (remotePIDs, mustBuildColMap);
4496 const bool callGraphComputeGlobalConstants = params.get () ==
nullptr ||
4497 params->get (
"compute global constants",
true);
4498 if (callGraphComputeGlobalConstants) {
4499 this->myGraph_->computeGlobalConstants ();
4502 this->myGraph_->computeLocalConstants ();
4504 this->myGraph_->fillComplete_ =
true;
4505 this->myGraph_->checkInternalState ();
4513 "Tpetra::CrsMatrix::fillCompete",
"checkInternalState"
4519 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4523 const Teuchos::RCP<const map_type> & rangeMap,
4524 const Teuchos::RCP<const import_type>& importer,
4525 const Teuchos::RCP<const export_type>& exporter,
4526 const Teuchos::RCP<Teuchos::ParameterList> ¶ms)
4528#ifdef HAVE_TPETRA_MMM_TIMINGS
4530 if(!params.is_null())
4531 label = params->get(
"Timer Label",label);
4532 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
4533 using Teuchos::TimeMonitor;
4535 Teuchos::TimeMonitor all(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-all")));
4538 const char tfecfFuncName[] =
"expertStaticFillComplete: ";
4540 std::runtime_error,
"Matrix fill state must be active (isFillActive() "
4541 "must be true) before calling fillComplete().");
4542 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4543 myGraph_.is_null (), std::logic_error,
"myGraph_ is null. This is not allowed.");
4546#ifdef HAVE_TPETRA_MMM_TIMINGS
4547 Teuchos::TimeMonitor graph(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-Graph")));
4550 myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
4554#ifdef HAVE_TPETRA_MMM_TIMINGS
4555 TimeMonitor fLGAM(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-fLGAM")));
4566#ifdef HAVE_TPETRA_DEBUG
4567 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
isFillActive(), std::logic_error,
4568 ": We're at the end of fillComplete(), but isFillActive() is true. "
4569 "Please report this bug to the Tpetra developers.");
4570 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!
isFillComplete(), std::logic_error,
4571 ": We're at the end of fillComplete(), but isFillActive() is true. "
4572 "Please report this bug to the Tpetra developers.");
4575#ifdef HAVE_TPETRA_MMM_TIMINGS
4576 Teuchos::TimeMonitor cIS(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-M-cIS")));
4583 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4589 LocalOrdinal* beg = cols;
4590 LocalOrdinal* end = cols + rowLen;
4591 LocalOrdinal* newend = beg;
4593 LocalOrdinal* cur = beg + 1;
4597 while (cur != end) {
4598 if (*cur != *newend) {
4615 return newend - beg;
4618 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4624 typedef LocalOrdinal LO;
4625 typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
4626 host_execution_space;
4627 typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4628 const char tfecfFuncName[] =
"sortAndMergeIndicesAndValues: ";
4629 ProfilingRegion regionSAM (
"Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
4631 if (! sorted || ! merged) {
4632 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4633 (this->
isStaticGraph (), std::runtime_error,
"Cannot sort or merge with "
4634 "\"static\" (const) graph, since the matrix does not own the graph.");
4635 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4636 (this->myGraph_.is_null (), std::logic_error,
"myGraph_ is null, but "
4637 "this matrix claims ! isStaticGraph(). "
4638 "Please report this bug to the Tpetra developers.");
4639 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4641 "this method if the graph's storage has already been optimized. "
4642 "Please report this bug to the Tpetra developers.");
4646 size_t totalNumDups = 0;
4651 auto vals_ = this->valuesUnpacked_wdv.getHostView(Access::ReadWrite);
4653 Kokkos::parallel_reduce (
"sortAndMergeIndicesAndValues", range_type (0, lclNumRows),
4654 [=] (
const LO lclRow,
size_t& numDups) {
4655 size_t rowBegin = rowBegins_(lclRow);
4656 size_t rowLen = rowLengths_(lclRow);
4657 LO* cols = cols_.data() + rowBegin;
4660 sort2 (cols, cols + rowLen, vals);
4664 rowLengths_(lclRow) = newRowLength;
4665 numDups += rowLen - newRowLength;
4678 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4689 using Teuchos::rcp_const_cast;
4690 using Teuchos::rcpFromRef;
4691 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4692 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
4698 if (alpha ==
ZERO) {
4701 }
else if (beta != ONE) {
4715 RCP<const import_type> importer = this->
getGraph ()->getImporter ();
4716 RCP<const export_type> exporter = this->
getGraph ()->getExporter ();
4722 const bool Y_is_overwritten = (beta ==
ZERO);
4725 const bool Y_is_replicated =
4734 if (Y_is_replicated && this->
getComm ()->getRank () > 0) {
4741 RCP<const MV> X_colMap;
4742 if (importer.is_null ()) {
4752 X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
4757 X_colMap = rcpFromRef (X_in);
4761 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply: Import");
4770 X_colMapNonConst->doImport (X_in, *importer,
INSERT);
4771 X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
4785 if (! exporter.is_null ()) {
4786 this->
localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha,
ZERO);
4788 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply: Export");
4794 if (Y_is_overwritten) {
4827 this->
localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
4831 this->
localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
4839 if (Y_is_replicated) {
4840 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply: Reduce Y");
4845 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4850 const Teuchos::ETransp mode,
4855 using Teuchos::null;
4858 using Teuchos::rcp_const_cast;
4859 using Teuchos::rcpFromRef;
4860 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4863 if (alpha ==
ZERO) {
4876 else if (beta ==
ZERO) {
4894 RCP<const import_type> importer = this->
getGraph ()->getImporter ();
4895 RCP<const export_type> exporter = this->
getGraph ()->getExporter ();
4901 const bool Y_is_overwritten = (beta ==
ZERO);
4902 if (Y_is_replicated && this->
getComm ()->getRank () > 0) {
4908 X = rcp (
new MV (X_in, Teuchos::Copy));
4910 X = rcpFromRef (X_in);
4914 if (importer != Teuchos::null) {
4922 if (exporter != Teuchos::null) {
4933 if (! exporter.is_null ()) {
4934 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply (transpose): Import");
4942 if (importer != Teuchos::null) {
4943 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply (transpose): Export");
4954 if (Y_is_overwritten) {
4971 MV Y (Y_in, Teuchos::Copy);
4975 this->
localApply (*X, Y_in, mode, alpha, beta);
4982 if (Y_is_replicated) {
4983 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply (transpose): Reduce Y");
4988 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4993 const Teuchos::ETransp mode,
4994 const Scalar& alpha,
4995 const Scalar& beta)
const
4998 using Teuchos::NO_TRANS;
4999 ProfilingRegion regionLocalApply (
"Tpetra::CrsMatrix::localApply");
5006 const char tfecfFuncName[] =
"localApply: ";
5007 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5011 const bool transpose = (mode != Teuchos::NO_TRANS);
5012 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5014 getColMap ()->getLocalNumElements (), std::runtime_error,
5015 "NO_TRANS case: X has the wrong number of local rows. "
5017 "getColMap()->getLocalNumElements() = " <<
5018 getColMap ()->getLocalNumElements () <<
".");
5019 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5021 getRowMap ()->getLocalNumElements (), std::runtime_error,
5022 "NO_TRANS case: Y has the wrong number of local rows. "
5024 "getRowMap()->getLocalNumElements() = " <<
5025 getRowMap ()->getLocalNumElements () <<
".");
5026 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5028 getRowMap ()->getLocalNumElements (), std::runtime_error,
5029 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5031 <<
" != getRowMap()->getLocalNumElements() = "
5032 <<
getRowMap ()->getLocalNumElements () <<
".");
5033 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5035 getColMap ()->getLocalNumElements (), std::runtime_error,
5036 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5038 <<
" != getColMap()->getLocalNumElements() = "
5039 <<
getColMap ()->getLocalNumElements () <<
".");
5040 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5042 "fill complete. You must call fillComplete() (possibly with "
5043 "domain and range Map arguments) without an intervening "
5044 "resumeFill() call before you may call this method.");
5045 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5047 std::runtime_error,
"X and Y must be constant stride.");
5052 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5053 (X_lcl.data () == Y_lcl.data () && X_lcl.data () !=
nullptr
5054 && X_lcl.extent(0) != 0,
5055 std::runtime_error,
"X and Y may not alias one another.");
5058#if KOKKOSKERNELS_VERSION >= 40299
5061 if(!applyHelper.get()) {
5064 bool useMergePath =
false;
5065#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
5071 if constexpr(std::is_same_v<execution_space, Kokkos::Cuda>) {
5073 LocalOrdinal maxRowImbalance = 0;
5078 useMergePath =
true;
5081 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map,
5082 useMergePath ? KokkosSparse::SPMV_MERGE_PATH : KokkosSparse::SPMV_DEFAULT);
5086 const char* modeKK =
nullptr;
5089 case Teuchos::NO_TRANS:
5090 modeKK = KokkosSparse::NoTranspose;
break;
5091 case Teuchos::TRANS:
5092 modeKK = KokkosSparse::Transpose;
break;
5093 case Teuchos::CONJ_TRANS:
5094 modeKK = KokkosSparse::ConjugateTranspose;
break;
5096 throw std::invalid_argument(
"Tpetra::CrsMatrix::localApply: invalid mode");
5099 if(applyHelper->shouldUseIntRowptrs())
5101 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
5103 &applyHelper->handle_int, modeKK,
5109 &applyHelper->handle, modeKK,
5114 LocalOrdinal maxRowImbalance = 0;
5120 matrix_lcl->applyImbalancedRows (X_lcl, Y_lcl, mode, alpha, beta);
5122 matrix_lcl->apply (X_lcl, Y_lcl, mode, alpha, beta);
5126 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5131 Teuchos::ETransp mode,
5136 const char fnName[] =
"Tpetra::CrsMatrix::apply";
5138 TEUCHOS_TEST_FOR_EXCEPTION
5140 fnName <<
": Cannot call apply() until fillComplete() "
5141 "has been called.");
5143 if (mode == Teuchos::NO_TRANS) {
5144 ProfilingRegion regionNonTranspose (fnName);
5148 ProfilingRegion regionTranspose (
"Tpetra::CrsMatrix::apply (transpose)");
5154 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5156 Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
5161 typedef CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> output_matrix_type;
5162 const char tfecfFuncName[] =
"convert: ";
5164 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5165 (! this->
isFillComplete (), std::runtime_error,
"This matrix (the source "
5166 "of the conversion) is not fill complete. You must first call "
5167 "fillComplete() (possibly with the domain and range Map) without an "
5168 "intervening call to resumeFill(), before you may call this method.");
5170 RCP<output_matrix_type> newMatrix
5175 copyConvert (newMatrix->getLocalMatrixDevice ().values,
5176 this->getLocalMatrixDevice ().values);
5186 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5193 const char tfecfFuncName[] =
"checkInternalState: ";
5194 const char err[] =
"Internal state is not consistent. "
5195 "Please report this bug to the Tpetra developers.";
5199 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5200 (staticGraph_.is_null (), std::logic_error, err);
5204 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5205 (! myGraph_.is_null () && myGraph_ != staticGraph_,
5206 std::logic_error, err);
5208 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5210 std::logic_error, err <<
" Specifically, the matrix is fill complete, "
5211 "but its graph is NOT fill complete.");
5214 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5215 (staticGraph_->indicesAreAllocated () &&
5216 staticGraph_->getLocalAllocationSize() > 0 &&
5217 staticGraph_->getLocalNumRows() > 0 &&
5218 valuesUnpacked_wdv.extent (0) == 0,
5219 std::logic_error, err);
5223 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5228 std::ostringstream os;
5230 os <<
"Tpetra::CrsMatrix (Kokkos refactor): {";
5231 if (this->getObjectLabel () !=
"") {
5232 os <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5235 os <<
"isFillComplete: true"
5242 os <<
"isFillComplete: false"
5249 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5252 describe (Teuchos::FancyOStream &out,
5253 const Teuchos::EVerbosityLevel verbLevel)
const
5257 using Teuchos::ArrayView;
5258 using Teuchos::Comm;
5260 using Teuchos::TypeNameTraits;
5261 using Teuchos::VERB_DEFAULT;
5262 using Teuchos::VERB_NONE;
5263 using Teuchos::VERB_LOW;
5264 using Teuchos::VERB_MEDIUM;
5265 using Teuchos::VERB_HIGH;
5266 using Teuchos::VERB_EXTREME;
5268 const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
5270 if (vl == VERB_NONE) {
5275 Teuchos::OSTab tab0 (out);
5277 RCP<const Comm<int> > comm = this->
getComm();
5278 const int myRank = comm->getRank();
5279 const int numProcs = comm->getSize();
5284 width = std::max<size_t> (width,
static_cast<size_t> (11)) + 2;
5294 out <<
"Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5296 Teuchos::OSTab tab1 (out);
5299 if (this->getObjectLabel () !=
"") {
5300 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5303 out <<
"Template parameters:" << endl;
5304 Teuchos::OSTab tab2 (out);
5305 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
5306 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
5307 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
5308 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
5311 out <<
"isFillComplete: true" << endl
5315 << endl <<
"Global max number of entries in a row: "
5319 out <<
"isFillComplete: false" << endl
5325 if (vl < VERB_MEDIUM) {
5331 out << endl <<
"Row Map:" << endl;
5335 out <<
"null" << endl;
5347 out <<
"Column Map: ";
5351 out <<
"null" << endl;
5355 out <<
"same as row Map" << endl;
5366 out <<
"Domain Map: ";
5370 out <<
"null" << endl;
5374 out <<
"same as row Map" << endl;
5378 out <<
"same as column Map" << endl;
5389 out <<
"Range Map: ";
5393 out <<
"null" << endl;
5397 out <<
"same as domain Map" << endl;
5401 out <<
"same as row Map" << endl;
5411 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5412 if (myRank == curRank) {
5413 out <<
"Process rank: " << curRank << endl;
5414 Teuchos::OSTab tab2 (out);
5415 if (! staticGraph_->indicesAreAllocated ()) {
5416 out <<
"Graph indices not allocated" << endl;
5419 out <<
"Number of allocated entries: "
5420 << staticGraph_->getLocalAllocationSize () << endl;
5432 if (vl < VERB_HIGH) {
5437 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5438 if (myRank == curRank) {
5439 out << std::setw(width) <<
"Proc Rank"
5440 << std::setw(width) <<
"Global Row"
5441 << std::setw(width) <<
"Num Entries";
5442 if (vl == VERB_EXTREME) {
5443 out << std::setw(width) <<
"(Index,Value)";
5448 GlobalOrdinal gid =
getRowMap()->getGlobalElement(r);
5449 out << std::setw(width) << myRank
5450 << std::setw(width) << gid
5451 << std::setw(width) << nE;
5452 if (vl == VERB_EXTREME) {
5454 global_inds_host_view_type rowinds;
5455 values_host_view_type rowvals;
5457 for (
size_t j = 0; j < nE; ++j) {
5458 out <<
" (" << rowinds[j]
5459 <<
", " << rowvals[j]
5464 local_inds_host_view_type rowinds;
5465 values_host_view_type rowvals;
5467 for (
size_t j=0; j < nE; ++j) {
5468 out <<
" (" <<
getColMap()->getGlobalElement(rowinds[j])
5469 <<
", " << rowvals[j]
5485 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5499 return (srcRowMat !=
nullptr);
5502 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5506 const typename crs_graph_type::padding_type& padding,
5513 using row_ptrs_type =
5514 typename local_graph_device_type::row_map_type::non_const_type;
5515 using range_policy =
5516 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5517 const char tfecfFuncName[] =
"applyCrsPadding";
5518 const char suffix[] =
5519 ". Please report this bug to the Tpetra developers.";
5520 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::applyCrsPadding");
5522 std::unique_ptr<std::string> prefix;
5524 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
5525 std::ostringstream os;
5526 os << *prefix <<
"padding: ";
5529 std::cerr << os.str();
5531 const int myRank = ! verbose ? -1 : [&] () {
5532 auto map = this->getMap();
5533 if (map.is_null()) {
5536 auto comm = map->getComm();
5537 if (comm.is_null()) {
5540 return comm->getRank();
5544 if (! myGraph_->indicesAreAllocated()) {
5546 std::ostringstream os;
5547 os << *prefix <<
"Call allocateIndices" << endl;
5548 std::cerr << os.str();
5550 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
5562 std::ostringstream os;
5563 os << *prefix <<
"Allocate row_ptrs_beg: "
5564 << myGraph_->getRowPtrsUnpackedHost().extent(0) << endl;
5565 std::cerr << os.str();
5567 using Kokkos::view_alloc;
5568 using Kokkos::WithoutInitializing;
5569 row_ptrs_type row_ptr_beg(view_alloc(
"row_ptr_beg", WithoutInitializing),
5570 myGraph_->rowPtrsUnpacked_dev_.extent(0));
5572 Kokkos::deep_copy(
execution_space(),row_ptr_beg, myGraph_->rowPtrsUnpacked_dev_);
5574 const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) :
5575 size_t(row_ptr_beg.extent(0) - 1);
5577 std::ostringstream os;
5578 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
5579 std::cerr << os.str();
5581 row_ptrs_type row_ptr_end(
5582 view_alloc(
"row_ptr_end", WithoutInitializing), N);
5584 row_ptrs_type num_row_entries_d;
5586 const bool refill_num_row_entries =
5587 myGraph_->k_numRowEntries_.extent(0) != 0;
5589 if (refill_num_row_entries) {
5592 num_row_entries_d = create_mirror_view_and_copy(memory_space(),
5593 myGraph_->k_numRowEntries_);
5594 Kokkos::parallel_for
5595 (
"Fill end row pointers", range_policy(0, N),
5596 KOKKOS_LAMBDA (
const size_t i) {
5597 row_ptr_end(i) = row_ptr_beg(i) + num_row_entries_d(i);
5604 Kokkos::parallel_for
5605 (
"Fill end row pointers", range_policy(0, N),
5606 KOKKOS_LAMBDA (
const size_t i) {
5607 row_ptr_end(i) = row_ptr_beg(i+1);
5611 if (myGraph_->isGloballyIndexed()) {
5613 myGraph_->gblInds_wdv,
5614 valuesUnpacked_wdv, padding, myRank, verbose);
5615 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5616 const auto newColIndsLen = myGraph_->gblInds_wdv.extent(0);
5617 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5618 (newValuesLen != newColIndsLen, std::logic_error,
5619 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5620 <<
" != myGraph_->gblInds_wdv.extent(0)=" << newColIndsLen
5625 myGraph_->lclIndsUnpacked_wdv,
5626 valuesUnpacked_wdv, padding, myRank, verbose);
5627 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5628 const auto newColIndsLen = myGraph_->lclIndsUnpacked_wdv.extent(0);
5629 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5630 (newValuesLen != newColIndsLen, std::logic_error,
5631 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5632 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0)=" << newColIndsLen
5636 if (refill_num_row_entries) {
5637 Kokkos::parallel_for
5638 (
"Fill num entries", range_policy(0, N),
5639 KOKKOS_LAMBDA (
const size_t i) {
5640 num_row_entries_d(i) = row_ptr_end(i) - row_ptr_beg(i);
5642 Kokkos::deep_copy(myGraph_->k_numRowEntries_, num_row_entries_d);
5646 std::ostringstream os;
5647 os << *prefix <<
"Assign myGraph_->rowPtrsUnpacked_; "
5648 <<
"old size: " << myGraph_->rowPtrsUnpacked_host_.extent(0)
5649 <<
", new size: " << row_ptr_beg.extent(0) << endl;
5650 std::cerr << os.str();
5651 TEUCHOS_ASSERT( myGraph_->getRowPtrsUnpackedHost().extent(0) ==
5652 row_ptr_beg.extent(0) );
5654 myGraph_->setRowPtrsUnpacked(row_ptr_beg);
5657 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5662 const size_t numSameIDs,
5663 const LocalOrdinal permuteToLIDs[],
5664 const LocalOrdinal permuteFromLIDs[],
5665 const size_t numPermutes)
5668 using Teuchos::Array;
5669 using Teuchos::ArrayView;
5671 using LO = LocalOrdinal;
5672 using GO = GlobalOrdinal;
5673 const char tfecfFuncName[] =
"copyAndPermuteStaticGraph";
5674 const char suffix[] =
5675 " Please report this bug to the Tpetra developers.";
5676 ProfilingRegion regionCAP
5677 (
"Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
5681 std::unique_ptr<std::string> prefix;
5683 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5684 std::ostringstream os;
5685 os << *prefix <<
"Start" << endl;
5687 const char*
const prefix_raw =
5688 verbose ? prefix.get()->c_str() :
nullptr;
5690 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5695 const map_type& srcRowMap = * (srcMat.getRowMap ());
5696 nonconst_global_inds_host_view_type rowInds;
5697 nonconst_values_host_view_type rowVals;
5698 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5699 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5703 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5704 const GO targetGID = sourceGID;
5706 ArrayView<const GO>rowIndsConstView;
5707 ArrayView<const Scalar> rowValsConstView;
5709 if (sourceIsLocallyIndexed) {
5710 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5711 if (rowLength >
static_cast<size_t> (rowInds.size())) {
5712 Kokkos::resize(rowInds,rowLength);
5713 Kokkos::resize(rowVals,rowLength);
5717 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5718 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5723 size_t checkRowLength = 0;
5724 srcMat.getGlobalRowCopy (sourceGID, rowIndsView,
5725 rowValsView, checkRowLength);
5727 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5728 (rowLength != checkRowLength, std::logic_error,
"For "
5729 "global row index " << sourceGID <<
", the source "
5730 "matrix's getNumEntriesInGlobalRow returns a row length "
5731 "of " << rowLength <<
", but getGlobalRowCopy reports "
5732 "a row length of " << checkRowLength <<
"." << suffix);
5739 rowIndsConstView = Teuchos::ArrayView<const GO> (
5740 rowIndsView.data(), rowIndsView.extent(0),
5741 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5742 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5743 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5744 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5749 global_inds_host_view_type rowIndsView;
5750 values_host_view_type rowValsView;
5751 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5756 rowIndsConstView = Teuchos::ArrayView<const GO> (
5757 rowIndsView.data(), rowIndsView.extent(0),
5758 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5759 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5760 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5761 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5769 combineGlobalValues(targetGID, rowIndsConstView,
5771 prefix_raw, debug, verbose);
5775 std::ostringstream os;
5776 os << *prefix <<
"Do permutes" << endl;
5779 const map_type& tgtRowMap = * (this->getRowMap ());
5780 for (
size_t p = 0; p < numPermutes; ++p) {
5781 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5782 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5784 ArrayView<const GO> rowIndsConstView;
5785 ArrayView<const Scalar> rowValsConstView;
5787 if (sourceIsLocallyIndexed) {
5788 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5789 if (rowLength >
static_cast<size_t> (rowInds.size ())) {
5790 Kokkos::resize(rowInds,rowLength);
5791 Kokkos::resize(rowVals,rowLength);
5795 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5796 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5801 size_t checkRowLength = 0;
5802 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5803 rowValsView, checkRowLength);
5805 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5806 (rowLength != checkRowLength, std::logic_error,
"For "
5807 "source matrix global row index " << sourceGID <<
", "
5808 "getNumEntriesInGlobalRow returns a row length of " <<
5809 rowLength <<
", but getGlobalRowCopy a row length of "
5810 << checkRowLength <<
"." << suffix);
5817 rowIndsConstView = Teuchos::ArrayView<const GO> (
5818 rowIndsView.data(), rowIndsView.extent(0),
5819 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5820 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5821 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5822 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5827 global_inds_host_view_type rowIndsView;
5828 values_host_view_type rowValsView;
5829 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5834 rowIndsConstView = Teuchos::ArrayView<const GO> (
5835 rowIndsView.data(), rowIndsView.extent(0),
5836 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5837 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5838 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5839 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5844 combineGlobalValues(targetGID, rowIndsConstView,
5846 prefix_raw, debug, verbose);
5850 std::ostringstream os;
5851 os << *prefix <<
"Done" << endl;
5855 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5860 const size_t numSameIDs,
5861 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
5862 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
5863 const size_t numPermutes)
5866 using Teuchos::Array;
5867 using Teuchos::ArrayView;
5869 using LO = LocalOrdinal;
5870 using GO = GlobalOrdinal;
5871 const char tfecfFuncName[] =
"copyAndPermuteNonStaticGraph";
5872 const char suffix[] =
5873 " Please report this bug to the Tpetra developers.";
5874 ProfilingRegion regionCAP
5875 (
"Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
5879 std::unique_ptr<std::string> prefix;
5881 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5882 std::ostringstream os;
5883 os << *prefix <<
"Start" << endl;
5885 const char*
const prefix_raw =
5886 verbose ? prefix.get()->c_str() :
nullptr;
5890 const row_graph_type& srcGraph = *(srcMat.getGraph());
5892 myGraph_->computeCrsPadding(srcGraph, numSameIDs,
5893 permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
5894 applyCrsPadding(*padding, verbose);
5896 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5901 const map_type& srcRowMap = * (srcMat.getRowMap ());
5902 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5903 using gids_type = nonconst_global_inds_host_view_type;
5904 using vals_type = nonconst_values_host_view_type;
5907 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5911 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5912 const GO targetGID = sourceGID;
5914 ArrayView<const GO> rowIndsConstView;
5915 ArrayView<const Scalar> rowValsConstView;
5917 if (sourceIsLocallyIndexed) {
5919 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5920 if (rowLength >
static_cast<size_t> (rowInds.extent(0))) {
5921 Kokkos::resize(rowInds,rowLength);
5922 Kokkos::resize(rowVals,rowLength);
5926 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5927 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5932 size_t checkRowLength = 0;
5933 srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView,
5936 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5937 (rowLength != checkRowLength, std::logic_error,
": For "
5938 "global row index " << sourceGID <<
", the source "
5939 "matrix's getNumEntriesInGlobalRow returns a row length "
5940 "of " << rowLength <<
", but getGlobalRowCopy reports "
5941 "a row length of " << checkRowLength <<
"." << suffix);
5943 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5944 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
5947 global_inds_host_view_type rowIndsView;
5948 values_host_view_type rowValsView;
5949 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5955 rowIndsConstView = Teuchos::ArrayView<const GO> (
5956 rowIndsView.data(), rowIndsView.extent(0),
5957 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5958 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5959 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5960 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5966 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5967 rowValsConstView, prefix_raw, debug, verbose);
5971 std::ostringstream os;
5972 os << *prefix <<
"Do permutes" << endl;
5974 const LO*
const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
5975 const LO*
const permuteToLIDs = permuteToLIDs_dv.view_host().data();
5977 const map_type& tgtRowMap = * (this->getRowMap ());
5978 for (
size_t p = 0; p < numPermutes; ++p) {
5979 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5980 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5982 ArrayView<const GO> rowIndsConstView;
5983 ArrayView<const Scalar> rowValsConstView;
5985 if (sourceIsLocallyIndexed) {
5986 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5987 if (rowLength >
static_cast<size_t> (rowInds.extent(0))) {
5988 Kokkos::resize(rowInds,rowLength);
5989 Kokkos::resize(rowVals,rowLength);
5993 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5994 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5999 size_t checkRowLength = 0;
6000 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
6001 rowValsView, checkRowLength);
6003 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6004 (rowLength != checkRowLength, std::logic_error,
"For "
6005 "source matrix global row index " << sourceGID <<
", "
6006 "getNumEntriesInGlobalRow returns a row length of " <<
6007 rowLength <<
", but getGlobalRowCopy a row length of "
6008 << checkRowLength <<
"." << suffix);
6010 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
6011 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
6014 global_inds_host_view_type rowIndsView;
6015 values_host_view_type rowValsView;
6016 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6022 rowIndsConstView = Teuchos::ArrayView<const GO> (
6023 rowIndsView.data(), rowIndsView.extent(0),
6024 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6025 rowValsConstView = Teuchos::ArrayView<const Scalar> (
6026 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
6027 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6033 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
6034 rowValsConstView, prefix_raw, debug, verbose);
6038 std::ostringstream os;
6039 os << *prefix <<
"Done" << endl;
6043 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6048 const size_t numSameIDs,
6049 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
6050 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
6059 const char tfecfFuncName[] =
"copyAndPermute: ";
6060 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermute");
6062 const bool verbose = Behavior::verbose(
"CrsMatrix");
6063 std::unique_ptr<std::string> prefix;
6065 prefix = this->createPrefix(
"CrsMatrix",
"copyAndPermute");
6066 std::ostringstream os;
6067 os << *prefix << endl
6068 << *prefix <<
" numSameIDs: " << numSameIDs << endl
6069 << *prefix <<
" numPermute: " << permuteToLIDs.extent(0)
6072 << dualViewStatusToString (permuteToLIDs,
"permuteToLIDs")
6075 << dualViewStatusToString (permuteFromLIDs,
"permuteFromLIDs")
6078 <<
"isStaticGraph: " << (
isStaticGraph() ?
"true" :
"false")
6080 std::cerr << os.str ();
6083 const auto numPermute = permuteToLIDs.extent (0);
6084 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6085 (numPermute != permuteFromLIDs.extent (0),
6086 std::invalid_argument,
"permuteToLIDs.extent(0) = "
6087 << numPermute <<
"!= permuteFromLIDs.extent(0) = "
6088 << permuteFromLIDs.extent (0) <<
".");
6093 const RMT& srcMat =
dynamic_cast<const RMT&
> (srcObj);
6095 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
6096 auto permuteToLIDs_h = permuteToLIDs.view_host ();
6097 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
6098 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
6100 copyAndPermuteStaticGraph(srcMat, numSameIDs,
6101 permuteToLIDs_h.data(),
6102 permuteFromLIDs_h.data(),
6106 copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs,
6107 permuteFromLIDs, numPermute);
6111 std::ostringstream os;
6112 os << *prefix <<
"Done" << endl;
6113 std::cerr << os.str();
6117 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6122 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6123 Kokkos::DualView<char*, buffer_device_type>& exports,
6124 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6125 size_t& constantNumPackets)
6130 using Teuchos::outArg;
6131 using Teuchos::REDUCE_MAX;
6132 using Teuchos::reduceAll;
6134 typedef LocalOrdinal LO;
6135 typedef GlobalOrdinal GO;
6136 const char tfecfFuncName[] =
"packAndPrepare: ";
6137 ProfilingRegion regionPAP (
"Tpetra::CrsMatrix::packAndPrepare");
6139 const bool debug = Behavior::debug(
"CrsMatrix");
6140 const bool verbose = Behavior::verbose(
"CrsMatrix");
6143 Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
6144 if (pComm.is_null ()) {
6147 const Teuchos::Comm<int>& comm = *pComm;
6148 const int myRank = comm.getSize ();
6150 std::unique_ptr<std::string> prefix;
6152 prefix = this->createPrefix(
"CrsMatrix",
"packAndPrepare");
6153 std::ostringstream os;
6154 os << *prefix <<
"Start" << endl
6156 << dualViewStatusToString (exportLIDs,
"exportLIDs")
6159 << dualViewStatusToString (exports,
"exports")
6162 << dualViewStatusToString (numPacketsPerLID,
"numPacketsPerLID")
6164 std::cerr << os.str ();
6187 std::ostringstream msg;
6191 const crs_matrix_type* srcCrsMat =
6192 dynamic_cast<const crs_matrix_type*
> (&source);
6193 if (srcCrsMat !=
nullptr) {
6195 std::ostringstream os;
6196 os << *prefix <<
"Source matrix same (CrsMatrix) type as target; "
6197 "calling packNew" << endl;
6198 std::cerr << os.str ();
6201 srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
6202 constantNumPackets);
6204 catch (std::exception& e) {
6206 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6210 using Kokkos::HostSpace;
6211 using Kokkos::subview;
6212 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6213 using range_type = Kokkos::pair<size_t, size_t>;
6216 std::ostringstream os;
6217 os << *prefix <<
"Source matrix NOT same (CrsMatrix) type as target"
6219 std::cerr << os.str ();
6222 const row_matrix_type* srcRowMat =
6223 dynamic_cast<const row_matrix_type*
> (&source);
6224 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6225 (srcRowMat ==
nullptr, std::invalid_argument,
6226 "The source object of the Import or Export operation is neither a "
6227 "CrsMatrix (with the same template parameters as the target object), "
6228 "nor a RowMatrix (with the same first four template parameters as the "
6239 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6240 auto exportLIDs_h = exportLIDs.view_host ();
6241 Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6242 exportLIDs_h.size ());
6246 Teuchos::Array<char> exports_a;
6252 numPacketsPerLID.clear_sync_state ();
6253 numPacketsPerLID.modify_host ();
6254 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6255 Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6256 numPacketsPerLID_h.size ());
6261 srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6262 constantNumPackets);
6264 catch (std::exception& e) {
6266 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6270 const size_t newAllocSize =
static_cast<size_t> (exports_a.size ());
6271 if (
static_cast<size_t> (exports.extent (0)) < newAllocSize) {
6272 const std::string oldLabel = exports.view_device().label ();
6273 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6274 exports = exports_type (newLabel, newAllocSize);
6279 exports.modify_host();
6281 auto exports_h = exports.view_host ();
6282 auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
6286 typedef typename exports_type::t_host::execution_space HES;
6287 typedef Kokkos::Device<HES, HostSpace> host_device_type;
6288 Kokkos::View<const char*, host_device_type>
6289 exports_a_kv (exports_a.getRawPtr (), newAllocSize);
6291 Kokkos::deep_copy (exports_h_sub, exports_a_kv);
6296 reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
6299 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6300 (
true, std::logic_error,
"packNew() or pack() threw an exception on "
6301 "one or more participating processes.");
6305 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6306 (lclBad != 0, std::logic_error,
"packNew threw an exception on one "
6307 "or more participating processes. Here is this process' error "
6308 "message: " << msg.str ());
6312 std::ostringstream os;
6313 os << *prefix <<
"packAndPrepare: Done!" << endl
6323 std::cerr << os.str ();
6327 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6331 const size_t offset,
6332 const size_t numEnt,
6333 const GlobalOrdinal gidsIn[],
6335 const size_t numBytesPerValue)
const
6338 using Kokkos::subview;
6339 using Tpetra::Details::PackTraits;
6340 typedef LocalOrdinal LO;
6341 typedef GlobalOrdinal GO;
6350 const LO numEntLO =
static_cast<size_t> (numEnt);
6352 const size_t numEntBeg = offset;
6353 const size_t numEntLen = PackTraits<LO>::packValueCount (numEntLO);
6354 const size_t gidsBeg = numEntBeg + numEntLen;
6355 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6356 const size_t valsBeg = gidsBeg + gidsLen;
6357 const size_t valsLen = numEnt * numBytesPerValue;
6359 char*
const numEntOut = exports + numEntBeg;
6360 char*
const gidsOut = exports + gidsBeg;
6361 char*
const valsOut = exports + valsBeg;
6363 size_t numBytesOut = 0;
6365 numBytesOut += PackTraits<LO>::packValue (numEntOut, numEntLO);
6368 Kokkos::pair<int, size_t> p;
6369 p = PackTraits<GO>::packArray (gidsOut, gidsIn, numEnt);
6370 errorCode += p.first;
6371 numBytesOut += p.second;
6373 p = PackTraits<ST>::packArray (valsOut, valsIn, numEnt);
6374 errorCode += p.first;
6375 numBytesOut += p.second;
6378 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6379 TEUCHOS_TEST_FOR_EXCEPTION
6380 (numBytesOut != expectedNumBytes, std::logic_error,
"packRow: "
6381 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6382 << expectedNumBytes <<
".");
6383 TEUCHOS_TEST_FOR_EXCEPTION
6384 (errorCode != 0, std::runtime_error,
"packRow: "
6385 "PackTraits::packArray returned a nonzero error code");
6390 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6395 const char imports[],
6396 const size_t offset,
6397 const size_t numBytes,
6398 const size_t numEnt,
6399 const size_t numBytesPerValue)
6402 using Kokkos::subview;
6403 using Tpetra::Details::PackTraits;
6404 typedef LocalOrdinal LO;
6405 typedef GlobalOrdinal GO;
6409 "Tpetra::CrsMatrix::unpackRow",
6413 if (numBytes == 0) {
6416 const int myRank = this->getMap ()->getComm ()->getRank ();
6417 TEUCHOS_TEST_FOR_EXCEPTION
6418 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6419 "unpackRow: The number of bytes to unpack numBytes=0, but the "
6420 "number of entries to unpack (as reported by numPacketsPerLID) "
6421 "for this row numEnt=" << numEnt <<
" != 0.");
6426 if (numEnt == 0 && numBytes != 0) {
6427 const int myRank = this->getMap ()->getComm ()->getRank ();
6428 TEUCHOS_TEST_FOR_EXCEPTION
6429 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6430 "unpackRow: The number of entries to unpack (as reported by "
6431 "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6432 "numBytes=" << numBytes <<
" != 0.");
6438 const size_t numEntBeg = offset;
6439 const size_t numEntLen = PackTraits<LO>::packValueCount (lid);
6440 const size_t gidsBeg = numEntBeg + numEntLen;
6441 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6442 const size_t valsBeg = gidsBeg + gidsLen;
6443 const size_t valsLen = numEnt * numBytesPerValue;
6445 const char*
const numEntIn = imports + numEntBeg;
6446 const char*
const gidsIn = imports + gidsBeg;
6447 const char*
const valsIn = imports + valsBeg;
6449 size_t numBytesOut = 0;
6452 numBytesOut += PackTraits<LO>::unpackValue (numEntOut, numEntIn);
6453 if (
static_cast<size_t> (numEntOut) != numEnt ||
6454 numEntOut ==
static_cast<LO
> (0)) {
6455 const int myRank = this->getMap ()->getComm ()->getRank ();
6456 std::ostringstream os;
6457 os <<
"(Proc " << myRank <<
") CrsMatrix::unpackRow: ";
6458 bool firstErrorCondition =
false;
6459 if (
static_cast<size_t> (numEntOut) != numEnt) {
6460 os <<
"Number of entries from numPacketsPerLID numEnt=" << numEnt
6461 <<
" does not equal number of entries unpacked from imports "
6462 "buffer numEntOut=" << numEntOut <<
".";
6463 firstErrorCondition =
true;
6465 if (numEntOut ==
static_cast<LO
> (0)) {
6466 if (firstErrorCondition) {
6469 os <<
"Number of entries unpacked from imports buffer numEntOut=0, "
6470 "but number of bytes to unpack for this row numBytes=" << numBytes
6471 <<
" != 0. This should never happen, since packRow should only "
6472 "ever pack rows with a nonzero number of entries. In this case, "
6473 "the number of entries from numPacketsPerLID is numEnt=" << numEnt
6476 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, os.str ());
6480 Kokkos::pair<int, size_t> p;
6481 p = PackTraits<GO>::unpackArray (gidsOut, gidsIn, numEnt);
6482 errorCode += p.first;
6483 numBytesOut += p.second;
6485 p = PackTraits<ST>::unpackArray (valsOut, valsIn, numEnt);
6486 errorCode += p.first;
6487 numBytesOut += p.second;
6490 TEUCHOS_TEST_FOR_EXCEPTION
6491 (numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = "
6492 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
6494 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6495 TEUCHOS_TEST_FOR_EXCEPTION
6496 (numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: "
6497 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6498 << expectedNumBytes <<
".");
6500 TEUCHOS_TEST_FOR_EXCEPTION
6501 (errorCode != 0, std::runtime_error,
"unpackRow: "
6502 "PackTraits::unpackArray returned a nonzero error code");
6507 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6511 size_t& totalNumEntries,
6512 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs)
const
6518 typedef LocalOrdinal LO;
6519 typedef GlobalOrdinal GO;
6525 const bool verbose = Behavior::verbose(
"CrsMatrix");
6526 std::unique_ptr<std::string> prefix;
6528 prefix = this->
createPrefix(
"CrsMatrix",
"allocatePackSpaceNew");
6529 std::ostringstream os;
6530 os << *prefix <<
"Before:"
6538 std::cerr << os.str ();
6543 const LO numExportLIDs =
static_cast<LO
> (exportLIDs.extent (0));
6545 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6546 auto exportLIDs_h = exportLIDs.view_host ();
6549 totalNumEntries = 0;
6550 for (LO i = 0; i < numExportLIDs; ++i) {
6551 const LO lclRow = exportLIDs_h[i];
6552 size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
6555 if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
6558 totalNumEntries += curNumEntries;
6569 const size_t allocSize =
6570 static_cast<size_t> (numExportLIDs) *
sizeof (LO) +
6571 totalNumEntries * (
sizeof (IST) +
sizeof (GO));
6572 if (
static_cast<size_t> (exports.extent (0)) < allocSize) {
6573 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6575 const std::string oldLabel = exports.view_device().label ();
6576 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6577 exports = exports_type (newLabel, allocSize);
6581 std::ostringstream os;
6582 os << *prefix <<
"After:"
6590 std::cerr << os.str ();
6594 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6597 packNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6598 Kokkos::DualView<char*, buffer_device_type>& exports,
6599 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6600 size_t& constantNumPackets)
const
6606 packCrsMatrixNew (*
this, exports, numPacketsPerLID, exportLIDs,
6607 constantNumPackets);
6610 this->packNonStaticNew (exportLIDs, exports, numPacketsPerLID,
6611 constantNumPackets);
6615 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6618 packNonStaticNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6619 Kokkos::DualView<char*, buffer_device_type>& exports,
6620 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6621 size_t& constantNumPackets)
const
6629 using LO = LocalOrdinal;
6630 using GO = GlobalOrdinal;
6632 const char tfecfFuncName[] =
"packNonStaticNew: ";
6634 const bool verbose = Behavior::verbose(
"CrsMatrix");
6635 std::unique_ptr<std::string> prefix;
6637 prefix = this->createPrefix(
"CrsMatrix",
"packNonStaticNew");
6638 std::ostringstream os;
6639 os << *prefix <<
"Start" << endl;
6640 std::cerr << os.str ();
6643 const size_t numExportLIDs =
static_cast<size_t> (exportLIDs.extent (0));
6644 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6645 (numExportLIDs !=
static_cast<size_t> (numPacketsPerLID.extent (0)),
6646 std::invalid_argument,
"exportLIDs.size() = " << numExportLIDs
6647 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
6653 constantNumPackets = 0;
6658 size_t totalNumEntries = 0;
6659 this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
6660 const size_t bufSize =
static_cast<size_t> (exports.extent (0));
6663 exports.clear_sync_state();
6664 exports.modify_host();
6665 auto exports_h = exports.view_host ();
6667 std::ostringstream os;
6668 os << *prefix <<
"After marking exports as modified on host, "
6669 << dualViewStatusToString (exports,
"exports") << endl;
6670 std::cerr << os.str ();
6674 auto exportLIDs_h = exportLIDs.view_host ();
6677 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->clear_sync_state();
6678 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->modify_host();
6679 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6684 auto maxRowNumEnt = this->getLocalMaxNumRowEntries();
6688 typename global_inds_host_view_type::non_const_type gidsIn_k;
6689 if (this->isLocallyIndexed()) {
6691 typename global_inds_host_view_type::non_const_type(
"packGids",
6696 for (
size_t i = 0; i < numExportLIDs; ++i) {
6697 const LO lclRow = exportLIDs_h[i];
6699 size_t numBytes = 0;
6700 size_t numEnt = this->getNumEntriesInLocalRow (lclRow);
6707 numPacketsPerLID_h[i] = 0;
6711 if (this->isLocallyIndexed ()) {
6712 typename global_inds_host_view_type::non_const_type gidsIn;
6713 values_host_view_type valsIn;
6717 local_inds_host_view_type lidsIn;
6718 this->getLocalRowView (lclRow, lidsIn, valsIn);
6719 const map_type& colMap = * (this->getColMap ());
6720 for (
size_t k = 0; k < numEnt; ++k) {
6721 gidsIn_k[k] = colMap.getGlobalElement (lidsIn[k]);
6723 gidsIn = Kokkos::subview(gidsIn_k, Kokkos::make_pair(GO(0),GO(numEnt)));
6725 const size_t numBytesPerValue =
6726 PackTraits<ST>::packValueCount (valsIn[0]);
6727 numBytes = this->
packRow (exports_h.data (), offset, numEnt,
6728 gidsIn.data (), valsIn.data (),
6731 else if (this->isGloballyIndexed ()) {
6732 global_inds_host_view_type gidsIn;
6733 values_host_view_type valsIn;
6739 const map_type& rowMap = * (this->getRowMap ());
6740 const GO gblRow = rowMap.getGlobalElement (lclRow);
6741 this->getGlobalRowView (gblRow, gidsIn, valsIn);
6743 const size_t numBytesPerValue =
6744 PackTraits<ST>::packValueCount (valsIn[0]);
6745 numBytes = this->
packRow (exports_h.data (), offset, numEnt,
6746 gidsIn.data (), valsIn.data (),
6753 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6754 (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
6755 "First invalid offset into 'exports' pack buffer at index i = " << i
6756 <<
". exportLIDs_h[i]: " << exportLIDs_h[i] <<
", bufSize: " <<
6757 bufSize <<
", offset: " << offset <<
", numBytes: " << numBytes <<
6762 numPacketsPerLID_h[i] = numBytes;
6767 std::ostringstream os;
6768 os << *prefix <<
"Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
6775 std::cerr << os.str ();
6779 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6783 const LocalOrdinal numEnt,
6785 const GlobalOrdinal cols[],
6787 const char*
const prefix,
6791 using GO = GlobalOrdinal;
6795 const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
6796 Teuchos::ArrayView<const GO> cols_av
6797 (numEnt == 0 ?
nullptr : cols, numEnt);
6798 Teuchos::ArrayView<const Scalar> vals_av
6799 (numEnt == 0 ?
nullptr :
reinterpret_cast<const Scalar*
> (vals), numEnt);
6804 combineGlobalValues(gblRow, cols_av, vals_av, combMode,
6805 prefix, debug, verbose);
6809 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6813 const GlobalOrdinal globalRowIndex,
6814 const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
6815 const Teuchos::ArrayView<const Scalar>& values,
6817 const char*
const prefix,
6821 const char tfecfFuncName[] =
"combineGlobalValues: ";
6823 if (isStaticGraph ()) {
6827 if (combineMode ==
ADD) {
6828 sumIntoGlobalValues (globalRowIndex, columnIndices, values);
6830 else if (combineMode ==
REPLACE) {
6831 replaceGlobalValues (globalRowIndex, columnIndices, values);
6833 else if (combineMode ==
ABSMAX) {
6834 using ::Tpetra::Details::AbsMax;
6836 this->
template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
6840 else if (combineMode ==
INSERT) {
6841 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6842 (isStaticGraph() && combineMode ==
INSERT,
6843 std::invalid_argument,
"INSERT combine mode is forbidden "
6844 "if the matrix has a static (const) graph (i.e., was "
6845 "constructed with the CrsMatrix constructor that takes a "
6846 "const CrsGraph pointer).");
6849 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6850 (
true, std::logic_error,
"Invalid combine mode; should "
6852 "Please report this bug to the Tpetra developers.");
6856 if (combineMode ==
ADD || combineMode ==
INSERT) {
6863 insertGlobalValuesFilteredChecked(globalRowIndex,
6864 columnIndices, values, prefix, debug, verbose);
6875 else if (combineMode ==
ABSMAX) {
6876 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6877 ! isStaticGraph () && combineMode ==
ABSMAX, std::logic_error,
6878 "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
6881 else if (combineMode ==
REPLACE) {
6882 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6883 ! isStaticGraph () && combineMode ==
REPLACE, std::logic_error,
6884 "REPLACE combine mode when the matrix has a dynamic graph is not yet "
6888 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6889 true, std::logic_error,
"Should never get here! Please report this "
6890 "bug to the Tpetra developers.");
6895 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6899 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
6900 Kokkos::DualView<char*, buffer_device_type> imports,
6901 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6902 const size_t constantNumPackets,
6909 const char tfecfFuncName[] =
"unpackAndCombine: ";
6910 ProfilingRegion regionUAC (
"Tpetra::CrsMatrix::unpackAndCombine");
6912 const bool debug = Behavior::debug(
"CrsMatrix");
6913 const bool verbose = Behavior::verbose(
"CrsMatrix");
6914 constexpr int numValidModes = 5;
6917 const char* validModeNames[numValidModes] =
6918 {
"ADD",
"REPLACE",
"ABSMAX",
"INSERT",
"ZERO"};
6920 std::unique_ptr<std::string> prefix;
6922 prefix = this->createPrefix(
"CrsMatrix",
"unpackAndCombine");
6923 std::ostringstream os;
6924 os << *prefix <<
"Start:" << endl
6926 << dualViewStatusToString (importLIDs,
"importLIDs")
6929 << dualViewStatusToString (imports,
"imports")
6932 << dualViewStatusToString (numPacketsPerLID,
"numPacketsPerLID")
6934 << *prefix <<
" constantNumPackets: " << constantNumPackets
6938 std::cerr << os.str ();
6942 if (std::find (validModes, validModes+numValidModes, combineMode) ==
6943 validModes+numValidModes) {
6944 std::ostringstream os;
6945 os <<
"Invalid combine mode. Valid modes are {";
6946 for (
int k = 0; k < numValidModes; ++k) {
6947 os << validModeNames[k];
6948 if (k < numValidModes - 1) {
6953 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6954 (
true, std::invalid_argument, os.str ());
6956 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6957 (importLIDs.extent(0) != numPacketsPerLID.extent(0),
6958 std::invalid_argument,
"importLIDs.extent(0)="
6959 << importLIDs.extent(0)
6960 <<
" != numPacketsPerLID.extent(0)="
6961 << numPacketsPerLID.extent(0) <<
".");
6964 if (combineMode ==
ZERO) {
6969 using Teuchos::reduceAll;
6970 std::unique_ptr<std::ostringstream> msg (
new std::ostringstream ());
6973 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6974 constantNumPackets, combineMode,
6976 }
catch (std::exception& e) {
6981 const Teuchos::Comm<int>& comm = * (this->
getComm ());
6982 reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
6983 lclBad, Teuchos::outArg (gblBad));
6989 std::ostringstream os;
6990 os <<
"Proc " << comm.getRank () <<
": " << msg->str () << endl;
6991 msg = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
6993 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6994 (
true, std::logic_error, std::endl <<
"unpackAndCombineImpl "
6995 "threw an exception on one or more participating processes: "
6996 << endl << msg->str ());
7000 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
7001 constantNumPackets, combineMode,
7006 std::ostringstream os;
7007 os << *prefix <<
"Done!" << endl
7009 << dualViewStatusToString (importLIDs,
"importLIDs")
7012 << dualViewStatusToString (imports,
"imports")
7015 << dualViewStatusToString (numPacketsPerLID,
"numPacketsPerLID")
7017 std::cerr << os.str ();
7021 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7027 Kokkos::DualView<char*, buffer_device_type> imports,
7028 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7029 const size_t constantNumPackets,
7034 "Tpetra::CrsMatrix::unpackAndCombineImpl",
7038 const char tfecfFuncName[] =
"unpackAndCombineImpl";
7039 std::unique_ptr<std::string> prefix;
7041 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
7042 std::ostringstream os;
7043 os << *prefix <<
"isStaticGraph(): "
7044 << (isStaticGraph() ?
"true" :
"false")
7045 <<
", importLIDs.extent(0): "
7046 << importLIDs.extent(0)
7047 <<
", imports.extent(0): "
7048 << imports.extent(0)
7049 <<
", numPacketsPerLID.extent(0): "
7050 << numPacketsPerLID.extent(0)
7052 std::cerr << os.str();
7055 if (isStaticGraph ()) {
7056 using Details::unpackCrsMatrixAndCombineNew;
7057 unpackCrsMatrixAndCombineNew(*
this, imports, numPacketsPerLID,
7058 importLIDs, constantNumPackets,
7063 using padding_type =
typename crs_graph_type::padding_type;
7064 std::unique_ptr<padding_type> padding;
7066 padding = myGraph_->computePaddingForCrsMatrixUnpack(
7067 importLIDs, imports, numPacketsPerLID, verbose);
7069 catch (std::exception& e) {
7070 const auto rowMap = getRowMap();
7071 const auto comm = rowMap.is_null() ? Teuchos::null :
7073 const int myRank = comm.is_null() ? -1 : comm->getRank();
7074 TEUCHOS_TEST_FOR_EXCEPTION
7075 (
true, std::runtime_error,
"Proc " << myRank <<
": "
7076 "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
7077 "threw an exception: " << e.what());
7080 std::ostringstream os;
7081 os << *prefix <<
"Call applyCrsPadding" << endl;
7082 std::cerr << os.str();
7084 applyCrsPadding(*padding, verbose);
7087 std::ostringstream os;
7088 os << *prefix <<
"Call unpackAndCombineImplNonStatic" << endl;
7089 std::cerr << os.str();
7091 unpackAndCombineImplNonStatic(importLIDs, imports,
7098 std::ostringstream os;
7099 os << *prefix <<
"Done" << endl;
7100 std::cerr << os.str();
7104 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7110 Kokkos::DualView<char*, buffer_device_type> imports,
7111 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7112 const size_t constantNumPackets,
7116 using Kokkos::subview;
7117 using Kokkos::MemoryUnmanaged;
7124 using LO = LocalOrdinal;
7125 using GO = GlobalOrdinal;
7127 using size_type =
typename Teuchos::ArrayView<LO>::size_type;
7129 typename View<int*, device_type>::HostMirror::execution_space;
7130 using pair_type = std::pair<typename View<int*, HES>::size_type,
7131 typename View<int*, HES>::size_type>;
7132 using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
7133 using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
7134 const char tfecfFuncName[] =
"unpackAndCombineImplNonStatic";
7136 const bool debug = Behavior::debug(
"CrsMatrix");
7137 const bool verbose = Behavior::verbose(
"CrsMatrix");
7138 std::unique_ptr<std::string> prefix;
7140 prefix = this->
createPrefix(
"CrsMatrix", tfecfFuncName);
7141 std::ostringstream os;
7142 os << *prefix << endl;
7143 std::cerr << os.str ();
7145 const char*
const prefix_raw =
7146 verbose ? prefix.get()->c_str() :
nullptr;
7148 const size_type numImportLIDs = importLIDs.extent (0);
7149 if (combineMode ==
ZERO || numImportLIDs == 0) {
7154 "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
7159 if (imports.need_sync_host()) {
7160 imports.sync_host ();
7162 auto imports_h = imports.view_host();
7165 if (numPacketsPerLID.need_sync_host()) {
7166 numPacketsPerLID.sync_host ();
7168 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
7170 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
7171 auto importLIDs_h = importLIDs.view_host();
7173 size_t numBytesPerValue;
7184 numBytesPerValue = PackTraits<ST>::packValueCount (val);
7189 size_t maxRowNumEnt = 0;
7190 for (size_type i = 0; i < numImportLIDs; ++i) {
7191 const size_t numBytes = numPacketsPerLID_h[i];
7192 if (numBytes == 0) {
7197 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7198 (offset + numBytes >
size_t(imports_h.extent (0)),
7199 std::logic_error,
": At local row index importLIDs_h[i="
7200 << i <<
"]=" << importLIDs_h[i] <<
", offset (=" << offset
7201 <<
") + numBytes (=" << numBytes <<
") > "
7202 "imports_h.extent(0)=" << imports_h.extent (0) <<
".");
7207 const size_t theNumBytes =
7208 PackTraits<LO>::packValueCount (numEntLO);
7209 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7210 (theNumBytes > numBytes, std::logic_error,
": theNumBytes="
7211 << theNumBytes <<
" > numBytes = " << numBytes <<
".");
7213 const char*
const inBuf = imports_h.data () + offset;
7214 const size_t actualNumBytes =
7215 PackTraits<LO>::unpackValue (numEntLO, inBuf);
7218 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7219 (actualNumBytes > numBytes, std::logic_error,
": At i=" << i
7220 <<
", actualNumBytes=" << actualNumBytes
7221 <<
" > numBytes=" << numBytes <<
".");
7222 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7223 (numEntLO == 0, std::logic_error,
": At local row index "
7224 "importLIDs_h[i=" << i <<
"]=" << importLIDs_h[i] <<
", "
7225 "the number of entries read from the packed data is "
7226 "numEntLO=" << numEntLO <<
", but numBytes=" << numBytes
7230 maxRowNumEnt = std::max(
size_t(numEntLO), maxRowNumEnt);
7238 View<GO*, HES> gblColInds;
7239 View<LO*, HES> lclColInds;
7240 View<ST*, HES> vals;
7253 gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
7254 gid, maxRowNumEnt,
"gids");
7255 lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
7256 lid, maxRowNumEnt,
"lids");
7257 vals = ScalarViewTraits<ST, HES>::allocateArray(
7258 val, maxRowNumEnt,
"vals");
7262 for (size_type i = 0; i < numImportLIDs; ++i) {
7263 const size_t numBytes = numPacketsPerLID_h[i];
7264 if (numBytes == 0) {
7268 const char*
const inBuf = imports_h.data () + offset;
7269 (void) PackTraits<LO>::unpackValue (numEntLO, inBuf);
7271 const size_t numEnt =
static_cast<size_t>(numEntLO);;
7272 const LO lclRow = importLIDs_h[i];
7274 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
7275 vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
7277 const size_t numBytesOut =
7278 unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
7279 offset, numBytes, numEnt, numBytesPerValue);
7280 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7281 (numBytes != numBytesOut, std::logic_error,
": At i=" << i
7282 <<
", numBytes=" << numBytes <<
" != numBytesOut="
7283 << numBytesOut <<
".");
7285 const ST*
const valsRaw =
const_cast<const ST*
> (valsOut.data ());
7286 const GO*
const gidsRaw =
const_cast<const GO*
> (gidsOut.data ());
7287 combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
7288 combineMode, prefix_raw, debug, verbose);
7294 std::ostringstream os;
7295 os << *prefix <<
"Done" << endl;
7296 std::cerr << os.str();
7300 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7301 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7304 const bool force)
const
7306 using Teuchos::null;
7310 TEUCHOS_TEST_FOR_EXCEPTION(
7311 ! this->
hasColMap (), std::runtime_error,
"Tpetra::CrsMatrix::getColumn"
7312 "MapMultiVector: You may only call this method if the matrix has a "
7313 "column Map. If the matrix does not yet have a column Map, you should "
7314 "first call fillComplete (with domain and range Map if necessary).");
7318 TEUCHOS_TEST_FOR_EXCEPTION(
7320 "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
7321 "this matrix's graph is fill complete.");
7324 RCP<const import_type> importer = this->
getGraph ()->getImporter ();
7325 RCP<const map_type> colMap = this->
getColMap ();
7338 if (! importer.is_null () || force) {
7340 X_colMap = rcp (
new MV (colMap, numVecs));
7357 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7358 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7361 const bool force)
const
7363 using Teuchos::null;
7369 TEUCHOS_TEST_FOR_EXCEPTION(
7371 "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7372 "matrix's graph is fill complete.");
7375 RCP<const export_type> exporter = this->
getGraph ()->getExporter ();
7379 RCP<const map_type> rowMap = this->
getRowMap ();
7391 if (! exporter.is_null () || force) {
7393 Y_rowMap = rcp (
new MV (rowMap, numVecs));
7403 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7408 TEUCHOS_TEST_FOR_EXCEPTION(
7409 myGraph_.is_null (), std::logic_error,
"Tpetra::CrsMatrix::"
7410 "removeEmptyProcessesInPlace: This method does not work when the matrix "
7411 "was created with a constant graph (that is, when it was created using "
7412 "the version of its constructor that takes an RCP<const CrsGraph>). "
7413 "This is because the matrix is not allowed to modify the graph in that "
7414 "case, but removing empty processes requires modifying the graph.");
7415 myGraph_->removeEmptyProcessesInPlace (newMap);
7423 staticGraph_ = Teuchos::rcp_const_cast<const Graph> (myGraph_);
7426 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7427 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7429 add (
const Scalar& alpha,
7432 const Teuchos::RCP<const map_type>& domainMap,
7433 const Teuchos::RCP<const map_type>& rangeMap,
7434 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7436 using Teuchos::Array;
7437 using Teuchos::ArrayView;
7438 using Teuchos::ParameterList;
7441 using Teuchos::rcp_implicit_cast;
7442 using Teuchos::sublist;
7446 using crs_matrix_type =
7447 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
7448 const char errPfx[] =
"Tpetra::CrsMatrix::add: ";
7452 std::unique_ptr<std::string> prefix;
7454 prefix = this->createPrefix(
"CrsMatrix",
"add");
7455 std::ostringstream os;
7456 os << *prefix <<
"Start" << endl;
7457 std::cerr << os.str ();
7460 const crs_matrix_type& B = *
this;
7461 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero();
7462 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
7469 RCP<const map_type> A_rangeMap = A.
getRangeMap ();
7470 RCP<const map_type> B_domainMap = B.getDomainMap ();
7471 RCP<const map_type> B_rangeMap = B.getRangeMap ();
7473 RCP<const map_type> theDomainMap = domainMap;
7474 RCP<const map_type> theRangeMap = rangeMap;
7476 if (domainMap.is_null ()) {
7477 if (B_domainMap.is_null ()) {
7478 TEUCHOS_TEST_FOR_EXCEPTION(
7479 A_domainMap.is_null (), std::invalid_argument,
7480 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7481 "then you must supply a nonnull domain Map to this method.");
7482 theDomainMap = A_domainMap;
7484 theDomainMap = B_domainMap;
7487 if (rangeMap.is_null ()) {
7488 if (B_rangeMap.is_null ()) {
7489 TEUCHOS_TEST_FOR_EXCEPTION(
7490 A_rangeMap.is_null (), std::invalid_argument,
7491 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7492 "then you must supply a nonnull range Map to this method.");
7493 theRangeMap = A_rangeMap;
7495 theRangeMap = B_rangeMap;
7503 if (! A_domainMap.is_null() && ! A_rangeMap.is_null()) {
7504 if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7505 TEUCHOS_TEST_FOR_EXCEPTION
7506 (! B_domainMap->isSameAs(*A_domainMap),
7507 std::invalid_argument,
7508 errPfx <<
"The input RowMatrix A must have a domain Map "
7509 "which is the same as (isSameAs) this RowMatrix's "
7511 TEUCHOS_TEST_FOR_EXCEPTION
7512 (! B_rangeMap->isSameAs(*A_rangeMap), std::invalid_argument,
7513 errPfx <<
"The input RowMatrix A must have a range Map "
7514 "which is the same as (isSameAs) this RowMatrix's range "
7516 TEUCHOS_TEST_FOR_EXCEPTION
7517 (! domainMap.is_null() &&
7518 ! domainMap->isSameAs(*B_domainMap),
7519 std::invalid_argument,
7520 errPfx <<
"The input domain Map must be the same as "
7521 "(isSameAs) this RowMatrix's domain Map.");
7522 TEUCHOS_TEST_FOR_EXCEPTION
7523 (! rangeMap.is_null() &&
7524 ! rangeMap->isSameAs(*B_rangeMap),
7525 std::invalid_argument,
7526 errPfx <<
"The input range Map must be the same as "
7527 "(isSameAs) this RowMatrix's range Map.");
7530 else if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7531 TEUCHOS_TEST_FOR_EXCEPTION
7532 (! domainMap.is_null() &&
7533 ! domainMap->isSameAs(*B_domainMap),
7534 std::invalid_argument,
7535 errPfx <<
"The input domain Map must be the same as "
7536 "(isSameAs) this RowMatrix's domain Map.");
7537 TEUCHOS_TEST_FOR_EXCEPTION
7538 (! rangeMap.is_null() && ! rangeMap->isSameAs(*B_rangeMap),
7539 std::invalid_argument,
7540 errPfx <<
"The input range Map must be the same as "
7541 "(isSameAs) this RowMatrix's range Map.");
7544 TEUCHOS_TEST_FOR_EXCEPTION
7545 (domainMap.is_null() || rangeMap.is_null(),
7546 std::invalid_argument, errPfx <<
"If neither A nor B "
7547 "have a domain and range Map, then you must supply a "
7548 "nonnull domain and range Map to this method.");
7555 bool callFillComplete =
true;
7556 RCP<ParameterList> constructorSublist;
7557 RCP<ParameterList> fillCompleteSublist;
7558 if (! params.is_null()) {
7560 params->get(
"Call fillComplete", callFillComplete);
7561 constructorSublist = sublist(params,
"Constructor parameters");
7562 fillCompleteSublist = sublist(params,
"fillComplete parameters");
7565 RCP<const map_type> A_rowMap = A.
getRowMap ();
7566 RCP<const map_type> B_rowMap = B.getRowMap ();
7567 RCP<const map_type> C_rowMap = B_rowMap;
7568 RCP<crs_matrix_type> C;
7574 if (A_rowMap->isSameAs (*B_rowMap)) {
7575 const LO localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7576 Array<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
7579 if (alpha !=
ZERO) {
7580 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7582 C_maxNumEntriesPerRow[localRow] += A_numEntries;
7587 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7588 const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7589 C_maxNumEntriesPerRow[localRow] += B_numEntries;
7593 if (constructorSublist.is_null ()) {
7594 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow ()));
7596 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
7597 constructorSublist));
7608 TEUCHOS_TEST_FOR_EXCEPTION
7609 (
true, std::invalid_argument, errPfx <<
"The row maps must "
7610 "be the same for statically allocated matrices, to ensure "
7611 "that there is sufficient space to do the addition.");
7614 TEUCHOS_TEST_FOR_EXCEPTION
7615 (C.is_null (), std::logic_error,
7616 errPfx <<
"C should not be null at this point. "
7617 "Please report this bug to the Tpetra developers.");
7620 std::ostringstream os;
7621 os << *prefix <<
"Compute C = alpha*A + beta*B" << endl;
7622 std::cerr << os.str ();
7624 using gids_type = nonconst_global_inds_host_view_type;
7625 using vals_type = nonconst_values_host_view_type;
7629 if (alpha !=
ZERO) {
7630 const LO A_localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7631 for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
7633 const GO globalRow = A_rowMap->getGlobalElement (localRow);
7634 if (A_numEntries >
static_cast<size_t> (ind.size ())) {
7635 Kokkos::resize(ind,A_numEntries);
7636 Kokkos::resize(val,A_numEntries);
7638 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, A_numEntries));
7639 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, A_numEntries));
7643 for (
size_t k = 0; k < A_numEntries; ++k) {
7644 valView[k] *= alpha;
7647 C->insertGlobalValues (globalRow, A_numEntries,
7648 reinterpret_cast<Scalar *
>(valView.data()),
7654 const LO B_localNumRows =
static_cast<LO
> (B_rowMap->getLocalNumElements ());
7655 for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
7656 size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7657 const GO globalRow = B_rowMap->getGlobalElement (localRow);
7658 if (B_numEntries >
static_cast<size_t> (ind.size ())) {
7659 Kokkos::resize(ind,B_numEntries);
7660 Kokkos::resize(val,B_numEntries);
7662 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, B_numEntries));
7663 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, B_numEntries));
7664 B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
7667 for (
size_t k = 0; k < B_numEntries; ++k) {
7671 C->insertGlobalValues (globalRow, B_numEntries,
7672 reinterpret_cast<Scalar *
>(valView.data()),
7677 if (callFillComplete) {
7679 std::ostringstream os;
7680 os << *prefix <<
"Call fillComplete on C" << endl;
7681 std::cerr << os.str ();
7683 if (fillCompleteSublist.is_null ()) {
7684 C->fillComplete (theDomainMap, theRangeMap);
7686 C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
7690 std::ostringstream os;
7691 os << *prefix <<
"Do NOT call fillComplete on C" << endl;
7692 std::cerr << os.str ();
7696 std::ostringstream os;
7697 os << *prefix <<
"Done" << endl;
7698 std::cerr << os.str ();
7700 return rcp_implicit_cast<row_matrix_type> (C);
7705 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7709 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
7710 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
7711 const Teuchos::RCP<const map_type>& domainMap,
7712 const Teuchos::RCP<const map_type>& rangeMap,
7713 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7720 using Teuchos::ArrayRCP;
7721 using Teuchos::ArrayView;
7722 using Teuchos::Comm;
7723 using Teuchos::ParameterList;
7726 typedef LocalOrdinal LO;
7727 typedef GlobalOrdinal GO;
7733 const bool debug = Behavior::debug(
"CrsMatrix");
7734 const bool verbose = Behavior::verbose(
"CrsMatrix");
7735 int MyPID = getComm ()->getRank ();
7737 std::unique_ptr<std::string> verbosePrefix;
7740 this->createPrefix(
"CrsMatrix",
"transferAndFillComplete");
7741 std::ostringstream os;
7742 os <<
"Start" << endl;
7743 std::cerr << os.str();
7750 bool reverseMode =
false;
7751 bool restrictComm =
false;
7753 int mm_optimization_core_count =
7754 Behavior::TAFC_OptimizationCoreCount();
7755 RCP<ParameterList> matrixparams;
7756 bool overrideAllreduce =
false;
7757 bool useKokkosPath =
false;
7758 if (! params.is_null ()) {
7759 matrixparams = sublist (params,
"CrsMatrix");
7760 reverseMode = params->get (
"Reverse Mode", reverseMode);
7761 useKokkosPath = params->get (
"TAFC: use kokkos path", useKokkosPath);
7762 restrictComm = params->get (
"Restrict Communicator", restrictComm);
7763 auto & slist = params->sublist(
"matrixmatrix: kernel params",
false);
7764 isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
7765 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
7767 overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
7768 if(getComm()->getSize() < mm_optimization_core_count && isMM) isMM =
false;
7769 if(reverseMode) isMM =
false;
7773 std::shared_ptr< ::Tpetra::Details::CommRequest> iallreduceRequest;
7775 int reduced_mismatch = 0;
7776 if (isMM && !overrideAllreduce) {
7779 const bool source_vals = ! getGraph ()->getImporter ().is_null();
7780 const bool target_vals = ! (rowTransfer.getExportLIDs ().size() == 0 ||
7781 rowTransfer.getRemoteLIDs ().size() == 0);
7782 mismatch = (source_vals != target_vals) ? 1 : 0;
7784 ::Tpetra::Details::iallreduce (mismatch, reduced_mismatch,
7785 Teuchos::REDUCE_MAX, * (getComm ()));
7788#ifdef HAVE_TPETRA_MMM_TIMINGS
7789 using Teuchos::TimeMonitor;
7791 if(!params.is_null())
7792 label = params->get(
"Timer Label",label);
7793 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
7796 std::ostringstream os;
7797 if(isMM) os<<
":MMOpt";
7798 else os<<
":MMLegacy";
7802 Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC All") +tlstr ));
7812 TEUCHOS_TEST_FOR_EXCEPTION(
7813 xferAsImport ==
nullptr && xferAsExport ==
nullptr, std::invalid_argument,
7814 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
7815 "argument must be either an Import or an Export, and its template "
7816 "parameters must match the corresponding template parameters of the "
7824 Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<const import_type> (domainTransfer);
7825 Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<const export_type> (domainTransfer);
7827 if(! domainTransfer.is_null()) {
7828 TEUCHOS_TEST_FOR_EXCEPTION(
7829 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7830 "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
7831 "argument must be either an Import or an Export, and its template "
7832 "parameters must match the corresponding template parameters of the "
7835 TEUCHOS_TEST_FOR_EXCEPTION(
7836 ( xferAsImport !=
nullptr || ! xferDomainAsImport.is_null() ) &&
7837 (( xferAsImport !=
nullptr && xferDomainAsImport.is_null() ) ||
7838 ( xferAsImport ==
nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7839 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7840 "arguments must be of the same type (either Import or Export).");
7842 TEUCHOS_TEST_FOR_EXCEPTION(
7843 ( xferAsExport !=
nullptr || ! xferDomainAsExport.is_null() ) &&
7844 (( xferAsExport !=
nullptr && xferDomainAsExport.is_null() ) ||
7845 ( xferAsExport ==
nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7846 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7847 "arguments must be of the same type (either Import or Export).");
7853 const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
7857 RCP<const map_type> MyRowMap = reverseMode ?
7858 rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
7859 RCP<const map_type> MyColMap;
7860 RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
7861 domainMap : getDomainMap ();
7862 RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
7863 rangeMap : getRangeMap ();
7864 RCP<const map_type> BaseRowMap = MyRowMap;
7865 RCP<const map_type> BaseDomainMap = MyDomainMap;
7873 if (! destMat.is_null ()) {
7884 const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
7885 ! destMat->getGraph ()->isGloballyIndexed ();
7886 TEUCHOS_TEST_FOR_EXCEPTION(
7887 ! NewFlag, std::invalid_argument,
"Tpetra::CrsMatrix::"
7888 "transferAndFillComplete: The input argument 'destMat' is only allowed "
7889 "to be nonnull, if its graph is empty (neither locally nor globally "
7898 TEUCHOS_TEST_FOR_EXCEPTION(
7899 ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
7900 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
7901 "input argument 'destMat' is not the same as the (row) Map specified "
7902 "by the input argument 'rowTransfer'.");
7903 TEUCHOS_TEST_FOR_EXCEPTION(
7904 ! destMat->checkSizes (*
this), std::invalid_argument,
7905 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
7906 "destination matrix, but checkSizes() indicates that it is not a legal "
7907 "legal target for redistribution from the source matrix (*this). This "
7908 "may mean that they do not have the same dimensions.");
7922 TEUCHOS_TEST_FOR_EXCEPTION(
7923 ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
7924 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7925 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7926 TEUCHOS_TEST_FOR_EXCEPTION(
7927 ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
7928 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7929 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7932 TEUCHOS_TEST_FOR_EXCEPTION(
7933 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7934 std::invalid_argument,
7935 "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
7936 "argument must be the same as the rebalanced domain map 'domainMap'");
7938 TEUCHOS_TEST_FOR_EXCEPTION(
7939 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7940 std::invalid_argument,
7941 "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
7942 "argument must be the same as the rebalanced domain map 'domainMap'");
7955 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7956 ArrayView<const LO> ExportLIDs = reverseMode ?
7957 rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
7958 auto RemoteLIDs = reverseMode ?
7959 rowTransfer.getExportLIDs_dv() : rowTransfer.getRemoteLIDs_dv();
7960 auto PermuteToLIDs = reverseMode ?
7961 rowTransfer.getPermuteFromLIDs_dv() : rowTransfer.getPermuteToLIDs_dv();
7962 auto PermuteFromLIDs = reverseMode ?
7963 rowTransfer.getPermuteToLIDs_dv() : rowTransfer.getPermuteFromLIDs_dv();
7964 Distributor& Distor = rowTransfer.getDistributor ();
7967 Teuchos::Array<int> SourcePids;
7970 RCP<const map_type> ReducedRowMap, ReducedColMap,
7971 ReducedDomainMap, ReducedRangeMap;
7972 RCP<const Comm<int> > ReducedComm;
7976 if (destMat.is_null ()) {
7977 destMat = rcp (
new this_CRS_type (MyRowMap, 0, matrixparams));
7984#ifdef HAVE_TPETRA_MMM_TIMINGS
7985 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrictComm")));
7987 ReducedRowMap = MyRowMap->removeEmptyProcesses ();
7988 ReducedComm = ReducedRowMap.is_null () ?
7990 ReducedRowMap->getComm ();
7991 destMat->removeEmptyProcessesInPlace (ReducedRowMap);
7993 ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
7995 MyDomainMap->replaceCommWithSubset (ReducedComm);
7996 ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
7998 MyRangeMap->replaceCommWithSubset (ReducedComm);
8001 MyRowMap = ReducedRowMap;
8002 MyDomainMap = ReducedDomainMap;
8003 MyRangeMap = ReducedRangeMap;
8006 if (! ReducedComm.is_null ()) {
8007 MyPID = ReducedComm->getRank ();
8014 ReducedComm = MyRowMap->getComm ();
8023 RCP<const import_type> MyImporter = getGraph ()->getImporter ();
8026 bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
8028 if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
8029#ifdef HAVE_TPETRA_MMM_TIMINGS
8030 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs same map")));
8040 else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
8043#ifdef HAVE_TPETRA_MMM_TIMINGS
8044 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs restricted comm")));
8046 IntVectorType SourceDomain_pids(getDomainMap (),
true);
8047 IntVectorType SourceCol_pids(getColMap());
8049 SourceDomain_pids.putScalar(MyPID);
8051 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8052 SourcePids.resize (getColMap ()->getLocalNumElements ());
8053 SourceCol_pids.get1dCopy (SourcePids ());
8055 else if (MyImporter.is_null ()) {
8057#ifdef HAVE_TPETRA_MMM_TIMINGS
8058 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs all local entries")));
8060 SourcePids.resize (getColMap ()->getLocalNumElements ());
8061 SourcePids.assign (getColMap ()->getLocalNumElements (), MyPID);
8063 else if ( ! MyImporter.is_null () &&
8064 ! domainTransfer.is_null () ) {
8069#ifdef HAVE_TPETRA_MMM_TIMINGS
8070 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs rectangular case")));
8074 IntVectorType TargetDomain_pids (domainMap);
8075 TargetDomain_pids.putScalar (MyPID);
8078 IntVectorType SourceDomain_pids (getDomainMap ());
8081 IntVectorType SourceCol_pids (getColMap ());
8083 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8084 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8086 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8087 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8089 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8090 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8092 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8093 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8096 TEUCHOS_TEST_FOR_EXCEPTION(
8097 true, std::logic_error,
"Tpetra::CrsMatrix::"
8098 "transferAndFillComplete: Should never get here! "
8099 "Please report this bug to a Tpetra developer.");
8101 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8102 SourcePids.resize (getColMap ()->getLocalNumElements ());
8103 SourceCol_pids.get1dCopy (SourcePids ());
8105 else if ( ! MyImporter.is_null () &&
8106 BaseDomainMap->isSameAs (*BaseRowMap) &&
8107 getDomainMap ()->isSameAs (*getRowMap ())) {
8109#ifdef HAVE_TPETRA_MMM_TIMINGS
8110 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs query import")));
8113 IntVectorType TargetRow_pids (domainMap);
8114 IntVectorType SourceRow_pids (getRowMap ());
8115 IntVectorType SourceCol_pids (getColMap ());
8117 TargetRow_pids.putScalar (MyPID);
8118 if (! reverseMode && xferAsImport !=
nullptr) {
8119 SourceRow_pids.doExport (TargetRow_pids, *xferAsImport,
INSERT);
8121 else if (reverseMode && xferAsExport !=
nullptr) {
8122 SourceRow_pids.doExport (TargetRow_pids, *xferAsExport,
INSERT);
8124 else if (! reverseMode && xferAsExport !=
nullptr) {
8125 SourceRow_pids.doImport (TargetRow_pids, *xferAsExport,
INSERT);
8127 else if (reverseMode && xferAsImport !=
nullptr) {
8128 SourceRow_pids.doImport (TargetRow_pids, *xferAsImport,
INSERT);
8131 TEUCHOS_TEST_FOR_EXCEPTION(
8132 true, std::logic_error,
"Tpetra::CrsMatrix::"
8133 "transferAndFillComplete: Should never get here! "
8134 "Please report this bug to a Tpetra developer.");
8137 SourceCol_pids.doImport (SourceRow_pids, *MyImporter,
INSERT);
8138 SourcePids.resize (getColMap ()->getLocalNumElements ());
8139 SourceCol_pids.get1dCopy (SourcePids ());
8142 TEUCHOS_TEST_FOR_EXCEPTION(
8143 true, std::invalid_argument,
"Tpetra::CrsMatrix::"
8144 "transferAndFillComplete: This method only allows either domainMap == "
8145 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8146 "getDomainMap () == getRowMap ()).");
8150 size_t constantNumPackets = destMat->constantNumberOfPackets ();
8152#ifdef HAVE_TPETRA_MMM_TIMINGS
8153 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC reallocate buffers")));
8155 if (constantNumPackets == 0) {
8156 destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
8157 RemoteLIDs.view_host().size ());
8164 const size_t rbufLen = RemoteLIDs.view_host().size() * constantNumPackets;
8165 destMat->reallocImportsIfNeeded (rbufLen,
false,
nullptr);
8171#ifdef HAVE_TPETRA_MMM_TIMINGS
8172 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC pack and prepare")));
8175 using Teuchos::outArg;
8176 using Teuchos::REDUCE_MAX;
8177 using Teuchos::reduceAll;
8180 RCP<const Teuchos::Comm<int> > comm = this->getComm ();
8181 const int myRank = comm->getRank ();
8183 std::ostringstream errStrm;
8187 Teuchos::ArrayView<size_t> numExportPacketsPerLID;
8190 destMat->numExportPacketsPerLID_.modify_host ();
8191 numExportPacketsPerLID =
8194 catch (std::exception& e) {
8195 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw: "
8196 << e.what () << std::endl;
8200 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw "
8201 "an exception not a subclass of std::exception" << std::endl;
8205 if (! comm.is_null ()) {
8206 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8210 TEUCHOS_TEST_FOR_EXCEPTION(
8211 true, std::runtime_error,
"getArrayViewFromDualView threw an "
8212 "exception on at least one process.");
8216 std::ostringstream os;
8217 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8219 std::cerr << os.str ();
8224 numExportPacketsPerLID,
8227 constantNumPackets);
8229 catch (std::exception& e) {
8230 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw: "
8231 << e.what () << std::endl;
8235 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw "
8236 "an exception not a subclass of std::exception" << std::endl;
8241 std::ostringstream os;
8242 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8244 std::cerr << os.str ();
8247 if (! comm.is_null ()) {
8248 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8252 TEUCHOS_TEST_FOR_EXCEPTION(
8253 true, std::runtime_error,
"packCrsMatrixWithOwningPIDs threw an "
8254 "exception on at least one process.");
8259 destMat->numExportPacketsPerLID_.modify_host ();
8260 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8263 std::ostringstream os;
8264 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8266 std::cerr << os.str ();
8270 numExportPacketsPerLID,
8273 constantNumPackets);
8275 std::ostringstream os;
8276 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8278 std::cerr << os.str ();
8285#ifdef HAVE_TPETRA_MMM_TIMINGS
8286 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs exchange remote data")));
8288 if (! communication_needed) {
8290 std::ostringstream os;
8291 os << *verbosePrefix <<
"Communication not needed" << std::endl;
8292 std::cerr << os.str ();
8297 if (constantNumPackets == 0) {
8299 std::ostringstream os;
8300 os << *verbosePrefix <<
"Reverse mode, variable # packets / LID"
8302 std::cerr << os.str ();
8307 destMat->numExportPacketsPerLID_.sync_host ();
8308 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8310 destMat->numImportPacketsPerLID_.sync_host ();
8311 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8315 std::ostringstream os;
8316 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8318 std::cerr << os.str ();
8320 Distor.doReversePostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8321 destMat->numImportPacketsPerLID_.view_host());
8323 std::ostringstream os;
8324 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8326 std::cerr << os.str ();
8329 size_t totalImportPackets = 0;
8330 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8331 totalImportPackets += numImportPacketsPerLID[i];
8336 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8337 verbosePrefix.get ());
8338 destMat->imports_.modify_host ();
8339 auto hostImports = destMat->imports_.view_host();
8342 destMat->exports_.sync_host ();
8343 auto hostExports = destMat->exports_.view_host();
8345 std::ostringstream os;
8346 os << *verbosePrefix <<
"Calling 4-arg doReversePostsAndWaits"
8348 std::cerr << os.str ();
8350 Distor.doReversePostsAndWaits (hostExports,
8351 numExportPacketsPerLID,
8353 numImportPacketsPerLID);
8355 std::ostringstream os;
8356 os << *verbosePrefix <<
"Finished 4-arg doReversePostsAndWaits"
8358 std::cerr << os.str ();
8363 std::ostringstream os;
8364 os << *verbosePrefix <<
"Reverse mode, constant # packets / LID"
8366 std::cerr << os.str ();
8368 destMat->imports_.modify_host ();
8369 auto hostImports = destMat->imports_.view_host();
8372 destMat->exports_.sync_host ();
8373 auto hostExports = destMat->exports_.view_host();
8375 std::ostringstream os;
8376 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8378 std::cerr << os.str ();
8380 Distor.doReversePostsAndWaits (hostExports,
8384 std::ostringstream os;
8385 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8387 std::cerr << os.str ();
8392 if (constantNumPackets == 0) {
8394 std::ostringstream os;
8395 os << *verbosePrefix <<
"Forward mode, variable # packets / LID"
8397 std::cerr << os.str ();
8402 destMat->numExportPacketsPerLID_.sync_host ();
8403 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8405 destMat->numImportPacketsPerLID_.sync_host ();
8406 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8409 std::ostringstream os;
8410 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8412 std::cerr << os.str ();
8414 Distor.doPostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8415 destMat->numImportPacketsPerLID_.view_host());
8417 std::ostringstream os;
8418 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8420 std::cerr << os.str ();
8423 size_t totalImportPackets = 0;
8424 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8425 totalImportPackets += numImportPacketsPerLID[i];
8430 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8431 verbosePrefix.get ());
8432 destMat->imports_.modify_host ();
8433 auto hostImports = destMat->imports_.view_host();
8436 destMat->exports_.sync_host ();
8437 auto hostExports = destMat->exports_.view_host();
8439 std::ostringstream os;
8440 os << *verbosePrefix <<
"Calling 4-arg doPostsAndWaits"
8442 std::cerr << os.str ();
8444 Distor.doPostsAndWaits (hostExports,
8445 numExportPacketsPerLID,
8447 numImportPacketsPerLID);
8449 std::ostringstream os;
8450 os << *verbosePrefix <<
"Finished 4-arg doPostsAndWaits"
8452 std::cerr << os.str ();
8457 std::ostringstream os;
8458 os << *verbosePrefix <<
"Forward mode, constant # packets / LID"
8460 std::cerr << os.str ();
8462 destMat->imports_.modify_host ();
8463 auto hostImports = destMat->imports_.view_host();
8466 destMat->exports_.sync_host ();
8467 auto hostExports = destMat->exports_.view_host();
8469 std::ostringstream os;
8470 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8472 std::cerr << os.str ();
8474 Distor.doPostsAndWaits (hostExports,
8478 std::ostringstream os;
8479 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8481 std::cerr << os.str ();
8492 bool runOnHost = std::is_same_v<typename device_type::memory_space, Kokkos::HostSpace> && !useKokkosPath;
8494 Teuchos::Array<int> RemotePids;
8496 Teuchos::Array<int> TargetPids;
8502 destMat->numImportPacketsPerLID_.modify_host();
8504# ifdef HAVE_TPETRA_MMM_TIMINGS
8505 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8507 ArrayRCP<size_t> CSR_rowptr;
8508 ArrayRCP<GO> CSR_colind_GID;
8509 ArrayRCP<LO> CSR_colind_LID;
8510 ArrayRCP<Scalar> CSR_vals;
8512 destMat->imports_.sync_device ();
8513 destMat->numImportPacketsPerLID_.sync_device ();
8515 size_t N = BaseRowMap->getLocalNumElements ();
8517 auto RemoteLIDs_d = RemoteLIDs.view_device();
8518 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8519 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8524 destMat->imports_.view_device(),
8525 destMat->numImportPacketsPerLID_.view_device(),
8539 if (
typeid (LO) ==
typeid (GO)) {
8540 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
8543 CSR_colind_LID.resize (CSR_colind_GID.size());
8545 CSR_colind_LID.resize (CSR_colind_GID.size());
8550 for(
size_t i=0; i<static_cast<size_t>(TargetPids.size()); i++)
8552 if(TargetPids[i] == -1) TargetPids[i] = MyPID;
8554#ifdef HAVE_TPETRA_MMM_TIMINGS
8555 tmCopySPRdata = Teuchos::null;
8564 std::ostringstream os;
8565 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8567 std::cerr << os.str ();
8570#ifdef HAVE_TPETRA_MMM_TIMINGS
8571 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8573 Import_Util::lowCommunicationMakeColMapAndReindexSerial(CSR_rowptr (),
8583 std::ostringstream os;
8584 os << *verbosePrefix <<
"restrictComm="
8585 << (restrictComm ?
"true" :
"false") << std::endl;
8586 std::cerr << os.str ();
8593#ifdef HAVE_TPETRA_MMM_TIMINGS
8594 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8597 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8599 MyColMap->replaceCommWithSubset (ReducedComm);
8600 MyColMap = ReducedColMap;
8605 std::ostringstream os;
8606 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8607 std::cerr << os.str ();
8609 destMat->replaceColMap (MyColMap);
8616 if (ReducedComm.is_null ()) {
8618 std::ostringstream os;
8619 os << *verbosePrefix <<
"I am no longer in the communicator; "
8620 "returning" << std::endl;
8621 std::cerr << os.str ();
8630 if ((! reverseMode && xferAsImport !=
nullptr) ||
8631 (reverseMode && xferAsExport !=
nullptr)) {
8633 std::ostringstream os;
8634 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8635 std::cerr << os.str ();
8637#ifdef HAVE_TPETRA_MMM_TIMINGS
8638 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8644 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8645 (reverseMode && xferAsImport !=
nullptr)) {
8647 std::ostringstream os;
8648 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8650 std::cerr << os.str();
8652#ifdef HAVE_TPETRA_MMM_TIMINGS
8653 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8658 if (CSR_rowptr[N] !=
static_cast<size_t>(CSR_vals.size())) {
8659 CSR_colind_LID.resize (CSR_rowptr[N]);
8660 CSR_vals.resize (CSR_rowptr[N]);
8664 TEUCHOS_TEST_FOR_EXCEPTION(
8665 true, std::logic_error,
"Tpetra::CrsMatrix::"
8666 "transferAndFillComplete: Should never get here! "
8667 "Please report this bug to a Tpetra developer.");
8674 std::ostringstream os;
8675 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8676 std::cerr << os.str ();
8685#ifdef HAVE_TPETRA_MMM_TIMINGS
8686 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8688 destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
8700 destMat->numImportPacketsPerLID_.modify_host();
8702# ifdef HAVE_TPETRA_MMM_TIMINGS
8703 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8705 ArrayRCP<size_t> CSR_rowptr;
8706 ArrayRCP<GO> CSR_colind_GID;
8707 ArrayRCP<LO> CSR_colind_LID;
8708 ArrayRCP<Scalar> CSR_vals;
8710 destMat->imports_.sync_device ();
8711 destMat->numImportPacketsPerLID_.sync_device ();
8713 size_t N = BaseRowMap->getLocalNumElements ();
8715 auto RemoteLIDs_d = RemoteLIDs.view_device();
8716 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8717 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8719 Kokkos::View<size_t*,device_type> CSR_rowptr_d;
8720 Kokkos::View<GO*,device_type> CSR_colind_GID_d;
8721 Kokkos::View<LO*,device_type> CSR_colind_LID_d;
8722 Kokkos::View<impl_scalar_type*,device_type> CSR_vals_d;
8723 Kokkos::View<int*,device_type> TargetPids_d;
8728 destMat->imports_.view_device(),
8729 destMat->numImportPacketsPerLID_.view_device(),
8741 Kokkos::resize (CSR_colind_LID_d, CSR_colind_GID_d.size());
8743#ifdef HAVE_TPETRA_MMM_TIMINGS
8744 tmCopySPRdata = Teuchos::null;
8753 std::ostringstream os;
8754 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8756 std::cerr << os.str ();
8759#ifdef HAVE_TPETRA_MMM_TIMINGS
8760 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8772 std::ostringstream os;
8773 os << *verbosePrefix <<
"restrictComm="
8774 << (restrictComm ?
"true" :
"false") << std::endl;
8775 std::cerr << os.str ();
8782#ifdef HAVE_TPETRA_MMM_TIMINGS
8783 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8786 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8788 MyColMap->replaceCommWithSubset (ReducedComm);
8789 MyColMap = ReducedColMap;
8794 std::ostringstream os;
8795 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8796 std::cerr << os.str ();
8798 destMat->replaceColMap (MyColMap);
8805 if (ReducedComm.is_null ()) {
8807 std::ostringstream os;
8808 os << *verbosePrefix <<
"I am no longer in the communicator; "
8809 "returning" << std::endl;
8810 std::cerr << os.str ();
8820 if ((! reverseMode && xferAsImport !=
nullptr) ||
8821 (reverseMode && xferAsExport !=
nullptr)) {
8823 std::ostringstream os;
8824 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8825 std::cerr << os.str ();
8827#ifdef HAVE_TPETRA_MMM_TIMINGS
8828 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8834 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8835 (reverseMode && xferAsImport !=
nullptr)) {
8837 std::ostringstream os;
8838 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8840 std::cerr << os.str();
8842#ifdef HAVE_TPETRA_MMM_TIMINGS
8843 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8850 TEUCHOS_TEST_FOR_EXCEPTION(
8851 true, std::logic_error,
"Tpetra::CrsMatrix::"
8852 "transferAndFillComplete: Should never get here! "
8853 "Please report this bug to a Tpetra developer.");
8861 std::ostringstream os;
8862 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8863 std::cerr << os.str ();
8867#ifdef HAVE_TPETRA_MMM_TIMINGS
8868 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8870 destMat->setAllValues (CSR_rowptr_d, CSR_colind_LID_d, CSR_vals_d);
8878#ifdef HAVE_TPETRA_MMM_TIMINGS
8879 RCP<TimeMonitor> tmIESFC = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC build importer and esfc"))));
8882 Teuchos::ParameterList esfc_params;
8884 RCP<import_type> MyImport;
8887 if (iallreduceRequest.get () !=
nullptr) {
8889 std::ostringstream os;
8890 os << *verbosePrefix <<
"Calling iallreduceRequest->wait()"
8892 std::cerr << os.str ();
8894 iallreduceRequest->wait ();
8895 if (reduced_mismatch != 0) {
8901#ifdef HAVE_TPETRA_MMM_TIMINGS
8902 Teuchos::TimeMonitor MMisMM (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM Block")));
8907 std::ostringstream os;
8908 os << *verbosePrefix <<
"Getting CRS pointers" << endl;
8909 std::cerr << os.str ();
8912 Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
8913 Teuchos::ArrayRCP<int> type3PIDs;
8914 auto rowptr = getCrsGraph()->getLocalRowPtrsHost();
8915 auto colind = getCrsGraph()->getLocalIndicesHost();
8918 std::ostringstream os;
8919 os << *verbosePrefix <<
"Calling reverseNeighborDiscovery" << std::endl;
8920 std::cerr << os.str ();
8924#ifdef HAVE_TPETRA_MMM_TIMINGS
8925 TimeMonitor tm_rnd (*TimeMonitor::getNewTimer(prefix + std::string(
"isMMrevNeighDis")));
8927 Import_Util::reverseNeighborDiscovery(*
this,
8939 std::ostringstream os;
8940 os << *verbosePrefix <<
"Done with reverseNeighborDiscovery" << std::endl;
8941 std::cerr << os.str ();
8944 Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
8945 Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const LO>() : MyImporter->getExportLIDs();
8947 Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs();
8948 Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
8950 const int numCols = getGraph()->getColMap()->getLocalNumElements();
8952 std::vector<bool> IsOwned(numCols,
true);
8953 std::vector<int> SentTo(numCols,-1);
8954 if (! MyImporter.is_null ()) {
8955 for (
auto && rlid : MyImporter->getRemoteLIDs()) {
8956 IsOwned[rlid]=
false;
8960 std::vector<std::pair<int,GO> > usrtg;
8961 usrtg.reserve(TEPID2.size());
8964 const auto& colMap = * (this->getColMap ());
8966 const LO row = TELID2[i];
8967 const int pid = TEPID2[i];
8968 for (
auto j = rowptr[row]; j < rowptr[row+1]; ++j) {
8969 const int col = colind[j];
8970 if (IsOwned[col] && SentTo[col] != pid) {
8972 GO gid = colMap.getGlobalElement (col);
8973 usrtg.push_back (std::pair<int,GO> (pid, gid));
8980 std::sort(usrtg.begin(),usrtg.end());
8981 auto eopg = std ::unique(usrtg.begin(),usrtg.end());
8983 usrtg.erase(eopg,usrtg.end());
8986 Teuchos::ArrayRCP<int> EPID2=Teuchos::arcp(
new int[type2_us_size],0,type2_us_size,
true);
8987 Teuchos::ArrayRCP< LO> ELID2=Teuchos::arcp(
new LO[type2_us_size],0,type2_us_size,
true);
8990 for(
auto && p : usrtg) {
8991 EPID2[pos]= p.first;
8992 ELID2[pos]= this->getDomainMap()->getLocalElement(p.second);
8996 Teuchos::ArrayView<int> EPID3 = type3PIDs();
8997 Teuchos::ArrayView< LO> ELID3 = type3LIDs();
8998 GO InfGID = std::numeric_limits<GO>::max();
8999 int InfPID = INT_MAX;
9003#define TPETRA_MIN3(x,y,z) ((x)<(y)?(std::min(x,z)):(std::min(y,z)))
9004 int i1=0, i2=0, i3=0;
9005 int Len1 = EPID1.size();
9006 int Len2 = EPID2.size();
9007 int Len3 = EPID3.size();
9009 int MyLen=Len1+Len2+Len3;
9010 Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(
new LO[MyLen],0,MyLen,
true);
9011 Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(
new int[MyLen],0,MyLen,
true);
9014 while(i1 < Len1 || i2 < Len2 || i3 < Len3){
9015 int PID1 = (i1<Len1)?(EPID1[i1]):InfPID;
9016 int PID2 = (i2<Len2)?(EPID2[i2]):InfPID;
9017 int PID3 = (i3<Len3)?(EPID3[i3]):InfPID;
9019 GO GID1 = (i1<Len1)?getDomainMap()->getGlobalElement(ELID1[i1]):InfGID;
9020 GO GID2 = (i2<Len2)?getDomainMap()->getGlobalElement(ELID2[i2]):InfGID;
9021 GO GID3 = (i3<Len3)?getDomainMap()->getGlobalElement(ELID3[i3]):InfGID;
9023 int MIN_PID = TPETRA_MIN3(PID1,PID2,PID3);
9024 GO MIN_GID = TPETRA_MIN3( ((PID1==MIN_PID)?GID1:InfGID), ((PID2==MIN_PID)?GID2:InfGID), ((PID3==MIN_PID)?GID3:InfGID));
9028 bool added_entry=
false;
9030 if(PID1 == MIN_PID && GID1 == MIN_GID){
9031 userExportLIDs[iloc]=ELID1[i1];
9032 userExportPIDs[iloc]=EPID1[i1];
9037 if(PID2 == MIN_PID && GID2 == MIN_GID){
9039 userExportLIDs[iloc]=ELID2[i2];
9040 userExportPIDs[iloc]=EPID2[i2];
9046 if(PID3 == MIN_PID && GID3 == MIN_GID){
9048 userExportLIDs[iloc]=ELID3[i3];
9049 userExportPIDs[iloc]=EPID3[i3];
9057 std::ostringstream os;
9058 os << *verbosePrefix <<
"Create Import" << std::endl;
9059 std::cerr << os.str ();
9062#ifdef HAVE_TPETRA_MMM_TIMINGS
9063 auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMIportCtor")));
9065 Teuchos::RCP<Teuchos::ParameterList> plist = rcp(
new Teuchos::ParameterList());
9067 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9071 userExportLIDs.view(0,iloc).getConst(),
9072 userExportPIDs.view(0,iloc).getConst(),
9077 std::ostringstream os;
9078 os << *verbosePrefix <<
"Call expertStaticFillComplete" << std::endl;
9079 std::cerr << os.str ();
9083#ifdef HAVE_TPETRA_MMM_TIMINGS
9084 TimeMonitor esfc (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM::destMat->eSFC")));
9085 esfc_params.set(
"Timer Label",label+std::string(
"isMM eSFC"));
9087 if(!params.is_null())
9088 esfc_params.set(
"compute global constants",params->get(
"compute global constants",
true));
9089 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(
new Teuchos::ParameterList(esfc_params)));
9095#ifdef HAVE_TPETRA_MMM_TIMINGS
9096 TimeMonitor MMnotMMblock (*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMblock")));
9099 std::ostringstream os;
9100 os << *verbosePrefix <<
"Create Import" << std::endl;
9101 std::cerr << os.str ();
9104#ifdef HAVE_TPETRA_MMM_TIMINGS
9105 TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMCreateImporter")));
9107 Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(
new Teuchos::ParameterList);
9108 mypars->set(
"Timer Label",
"notMMFrom_tAFC");
9109 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9110 MyImport = rcp (
new import_type (MyDomainMap, MyColMap, RemotePids, mypars));
9113 std::ostringstream os;
9114 os << *verbosePrefix <<
"Call expertStaticFillComplete" << endl;
9115 std::cerr << os.str ();
9118#ifdef HAVE_TPETRA_MMM_TIMINGS
9119 TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string(
"notMMdestMat->expertStaticFillComplete")));
9120 esfc_params.set(
"Timer Label",prefix+std::string(
"notMM eSFC"));
9122 esfc_params.set(
"Timer Label",std::string(
"notMM eSFC"));
9125 if (!params.is_null ()) {
9126 esfc_params.set (
"compute global constants",
9127 params->get (
"compute global constants",
true));
9129 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap,
9130 MyImport, Teuchos::null,
9131 rcp (
new Teuchos::ParameterList (esfc_params)));
9134#ifdef HAVE_TPETRA_MMM_TIMINGS
9135 tmIESFC = Teuchos::null;
9139 std::ostringstream os;
9140 os << *verbosePrefix <<
"Done" << endl;
9141 std::cerr << os.str ();
9146 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9149 importAndFillComplete (Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& destMatrix,
9151 const Teuchos::RCP<const map_type>& domainMap,
9152 const Teuchos::RCP<const map_type>& rangeMap,
9153 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9155 transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
9158 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9161 importAndFillComplete (Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& destMatrix,
9164 const Teuchos::RCP<const map_type>& domainMap,
9165 const Teuchos::RCP<const map_type>& rangeMap,
9166 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9168 transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
9171 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9174 exportAndFillComplete (Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& destMatrix,
9176 const Teuchos::RCP<const map_type>& domainMap,
9177 const Teuchos::RCP<const map_type>& rangeMap,
9178 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9180 transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
9183 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9186 exportAndFillComplete (Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& destMatrix,
9189 const Teuchos::RCP<const map_type>& domainMap,
9190 const Teuchos::RCP<const map_type>& rangeMap,
9191 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9193 transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
9204#define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
9206 template class CrsMatrix< SCALAR , LO , GO , NODE >;
9208#define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
9210 template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
9211 CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
9213#define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9215 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9216 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9217 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9218 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9219 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
9220 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9221 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9222 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9223 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9224 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9225 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9226 const Teuchos::RCP<Teuchos::ParameterList>& params);
9228#define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9230 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9231 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9232 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9233 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9234 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
9235 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9236 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9237 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
9238 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9239 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9240 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9241 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9242 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9243 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9244 const Teuchos::RCP<Teuchos::ParameterList>& params);
9247#define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9249 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9250 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9251 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9252 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9253 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
9254 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9255 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9256 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9257 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9258 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9259 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9260 const Teuchos::RCP<Teuchos::ParameterList>& params);
9262#define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9264 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9265 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9266 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9267 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9268 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
9269 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9270 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9271 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
9272 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9273 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9274 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9275 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9276 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9277 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9278 const Teuchos::RCP<Teuchos::ParameterList>& params);
9281#define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
9282 TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
9283 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9284 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9285 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9286 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)