From 402baf6ea024762074d8e4bff286376486d2ca18 Mon Sep 17 00:00:00 2001 From: uliegecsm Date: Tue, 25 Jun 2024 14:52:44 +0000 Subject: [PATCH 01/14] tpetra multi vector space instance update --- packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp | 11 ++++++++++- packages/tpetra/core/src/Tpetra_MultiVector_def.hpp | 9 +++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp index 0b7a6284d53d..86bf16ba0205 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp @@ -1722,10 +1722,19 @@ namespace Tpetra { /// NaN entries. It is legal for the input A to alias this /// MultiVector. void - update (const Scalar& alpha, + update (const execution_space& exec, + const Scalar& alpha, const MultiVector& A, const Scalar& beta); + //! @overload + void + update (const Scalar& alpha, + const MultiVector& A, + const Scalar& beta) { + this->update(execution_space{}, alpha, A, beta); + } + /// \brief Update: this = gamma*this + alpha*A + beta*B. /// /// Update this MultiVector with scaled values of A and B. If diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp index a4bd3dff67fe..1847a8e76cd1 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp @@ -3125,7 +3125,8 @@ void MultiVector::copyAndPermute( template void MultiVector:: - update (const Scalar& alpha, + update (const execution_space& exec, + const Scalar& alpha, const MultiVector& A, const Scalar& beta) { @@ -3154,14 +3155,14 @@ void MultiVector::copyAndPermute( const std::pair rowRng (0, lclNumRows); const std::pair colRng (0, numVecs); - auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite); + auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite); // what about syncing with the 'exec' ? auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ()); auto X_lcl_orig = A.getLocalViewDevice(Access::ReadOnly); auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ()); // The device memory of *this is about to be modified if (isConstantStride () && A.isConstantStride ()) { - KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl); + KokkosBlas::axpby (exec, theAlpha, X_lcl, theBeta, Y_lcl); } else { // Make sure that Kokkos only uses the local length for add. @@ -3171,7 +3172,7 @@ void MultiVector::copyAndPermute( auto Y_k = subview (Y_lcl, ALL (), Y_col); auto X_k = subview (X_lcl, ALL (), X_col); - KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k); + KokkosBlas::axpby (exec, theAlpha, X_k, theBeta, Y_k); } } } From ec4065366f19a74a5e41c6f0f163e8132e27d973 Mon Sep 17 00:00:00 2001 From: uliegecsm Date: Tue, 25 Jun 2024 14:55:43 +0000 Subject: [PATCH 02/14] wrapped dual view get with exec space --- .../core/src/Tpetra_Details_WrappedDualView.hpp | 11 ++++++----- packages/tpetra/core/src/Tpetra_MultiVector_def.hpp | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp index c050122f377b..37238741a695 100644 --- a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp @@ -109,11 +109,11 @@ template enableIfConstData sync_host(DualViewType dualView) { } -template +template enableIfNonConstData -sync_device(DualViewType dualView) { +sync_device(const ExecSpace& exec, DualViewType dualView) { // This will sync, but only if needed - dualView.sync_device(); + dualView.sync_device(exec); } template @@ -308,15 +308,16 @@ class WrappedDualView { return dualView.view_host(); } + template typename t_dev::const_type - getDeviceView(Access::ReadOnlyStruct + getDeviceView(const ExecSpace& exec, Access::ReadOnlyStruct DEBUG_UVM_REMOVAL_ARGUMENT ) const { DEBUG_UVM_REMOVAL_PRINT_CALLER("getDeviceViewReadOnly"); if(needsSyncPath()) { throwIfHostViewAlive(); - impl::sync_device(originalDualView); + impl::sync_device(exec, originalDualView); } return dualView.view_device(); } diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp index 1847a8e76cd1..3b1ba67c5b83 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp @@ -3155,9 +3155,9 @@ void MultiVector::copyAndPermute( const std::pair rowRng (0, lclNumRows); const std::pair colRng (0, numVecs); - auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite); // what about syncing with the 'exec' ? + auto Y_lcl_orig = this->getLocalViewDevice(exec, Access::ReadWrite); // what about syncing with the 'exec' ? auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ()); - auto X_lcl_orig = A.getLocalViewDevice(Access::ReadOnly); + auto X_lcl_orig = A.getLocalViewDevice(exec, Access::ReadOnly); auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ()); // The device memory of *this is about to be modified From 348331987935433c62dc05850f3a9af5b37f1418 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 25 Jun 2024 09:08:00 -0600 Subject: [PATCH 03/14] Tpetra: WDV mods --- .../src/Tpetra_Details_WrappedDualView.hpp | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp index 37238741a695..42e89e04a284 100644 --- a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp @@ -120,6 +120,13 @@ template enableIfConstData sync_device(DualViewType dualView) { } +template +enableIfConstData +sync_device(const ExecSpace& exec, DualViewType dualView) { } + +} + + }// end namespace Impl /// \brief Whether WrappedDualView reference count checking is enabled. Initially true. @@ -308,6 +315,19 @@ class WrappedDualView { return dualView.view_host(); } + typename t_dev::const_type + getDeviceView(Access::ReadOnlyStruct + DEBUG_UVM_REMOVAL_ARGUMENT + ) const + { + DEBUG_UVM_REMOVAL_PRINT_CALLER("getDeviceViewReadOnly"); + if(needsSyncPath()) { + throwIfHostViewAlive(); + impl::sync_device(originalDualView); + } + return dualView.view_device(); + } + template typename t_dev::const_type getDeviceView(const ExecSpace& exec, Access::ReadOnlyStruct From 524c381811be2b256959569c34ec974089585a62 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 25 Jun 2024 09:17:34 -0600 Subject: [PATCH 04/14] Tpetra: WDV mods --- .../src/Tpetra_Details_WrappedDualView.hpp | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp index 42e89e04a284..284375066308 100644 --- a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp @@ -358,6 +358,24 @@ class WrappedDualView { return dualView.view_device(); } + template + t_dev + getDeviceView(const ExecSpace& exec, Access::ReadWriteStruct + DEBUG_UVM_REMOVAL_ARGUMENT + ) + { + DEBUG_UVM_REMOVAL_PRINT_CALLER("getDeviceViewReadWrite"); + static_assert(dualViewHasNonConstData, + "ReadWrite views are not available for DualView with const data"); + if(needsSyncPath()) { + throwIfHostViewAlive(); + impl::sync_device(exec,originalDualView); + originalDualView.modify_device(); + } + return dualView.view_device(); + } + + t_dev getDeviceView(Access::OverwriteAllStruct DEBUG_UVM_REMOVAL_ARGUMENT @@ -378,6 +396,22 @@ class WrappedDualView { return dualView.view_device(); } + + template + t_dev + getDeviceView(const ExecSpace& exec,Access::OverwriteAllStruct + DEBUG_UVM_REMOVAL_ARGUMENT + ) + { + // Since we're never syncing in this case, the execution_space is meaningless here +#ifdef DEBUG_UVM_REMOVAL + return getDeviceView(Access::OverwriteAllStruct,callerstr,filestr,linnum); +#else + return getDeviceView(Access::OverwriteAllStruct); +#endif + } + + template typename std::remove_reference().template view())>::type::const_type getView (Access::ReadOnlyStruct s DEBUG_UVM_REMOVAL_ARGUMENT) const { @@ -402,7 +436,31 @@ class WrappedDualView { return dualView.template view(); } + template + typename std::remove_reference().template view())>::type::const_type + getView (const ExecSpace & exec, Access::ReadOnlyStruct s DEBUG_UVM_REMOVAL_ARGUMENT) const { + using ReturnViewType = typename std::remove_reference().template view())>::type::const_type; + using ReturnDeviceType = typename ReturnViewType::device_type; + constexpr bool returnDevice = std::is_same::value; + if(returnDevice) { + DEBUG_UVM_REMOVAL_PRINT_CALLER("getViewReadOnly"); + if(needsSyncPath()) { + throwIfHostViewAlive(); + impl::sync_device(exec,originalDualView); + } + } + else { + DEBUG_UVM_REMOVAL_PRINT_CALLER("getViewReadOnly"); + if(needsSyncPath()) { + throwIfDeviceViewAlive(); + impl::sync_host(exec,originalDualView); + } + } + + return dualView.template view(); + } + template typename std::remove_reference().template view())>::type getView (Access::ReadWriteStruct s DEBUG_UVM_REMOVAL_ARGUMENT) const { @@ -435,6 +493,39 @@ class WrappedDualView { } + template + typename std::remove_reference().template view())>::type + getView (const ExecSpace & exec,Access::ReadWriteStruct s DEBUG_UVM_REMOVAL_ARGUMENT) const { + using ReturnViewType = typename std::remove_reference().template view())>::type; + using ReturnDeviceType = typename ReturnViewType::device_type; + constexpr bool returnDevice = std::is_same::value; + + if(returnDevice) { + DEBUG_UVM_REMOVAL_PRINT_CALLER("getViewReadWrite"); + static_assert(dualViewHasNonConstData, + "ReadWrite views are not available for DualView with const data"); + if(needsSyncPath()) { + throwIfHostViewAlive(); + impl::sync_device(exec,originalDualView); + originalDualView.modify_device(); + } + } + else { + DEBUG_UVM_REMOVAL_PRINT_CALLER("getViewReadWrite"); + static_assert(dualViewHasNonConstData, + "ReadWrite views are not available for DualView with const data"); + if(needsSyncPath()) { + throwIfDeviceViewAlive(); + impl::sync_host(exec,originalDualView); + originalDualView.modify_host(); + } + } + + return dualView.template view(); + } + + + template typename std::remove_reference().template view())>::type getView (Access::OverwriteAllStruct s DEBUG_UVM_REMOVAL_ARGUMENT) const { @@ -471,6 +562,21 @@ class WrappedDualView { } + template + typename std::remove_reference().template view())>::type + getView (const ExecSpace & exec, Access::OverwriteAllStruct s DEBUG_UVM_REMOVAL_ARGUMENT) const { + using ReturnViewType = typename std::remove_reference().template view())>::type; + using ReturnDeviceType = typename ReturnViewType::device_type; + // Since nothing syncs here, the ExecSpace is meaningless +#ifdef DEBUG_UVM_REMOVAL + return getView(Access::OverwriteAllStruct,callerstr,filestr,linnum); +#else + return getView(Access::OverwriteAllStruct); +#endif + + } + + typename t_host::const_type getHostSubview(int offset, int numEntries, Access::ReadOnlyStruct DEBUG_UVM_REMOVAL_ARGUMENT From 0bd0a31f66c2367f980216257c2b93fe87fbf2bd Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 25 Jun 2024 09:27:08 -0600 Subject: [PATCH 05/14] Tpetra: WDV mods --- .../src/Tpetra_Details_WrappedDualView.hpp | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp index 284375066308..9bc7bf4068f0 100644 --- a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp @@ -98,6 +98,8 @@ using enableIfConstData = std::enable_if_t::value>; template using enableIfNonConstData = std::enable_if_t::value>; +/* sync_host functions */ + template enableIfNonConstData sync_host(DualViewType dualView) { @@ -109,6 +111,16 @@ template enableIfConstData sync_host(DualViewType dualView) { } + +/* sync_device functions */ + +template +enableIfNonConstData +sync_device(DualViewType dualView) { + // This will sync, but only if needed + dualView.sync_device(); +} + template enableIfNonConstData sync_device(const ExecSpace& exec, DualViewType dualView) { @@ -630,6 +642,21 @@ class WrappedDualView { return getSubview(dualView.view_device(), offset, numEntries); } + template + typename t_dev::const_type + getDeviceSubview(const ExecSpace & exec, int offset, int numEntries, Access::ReadOnlyStruct + DEBUG_UVM_REMOVAL_ARGUMENT + ) const + { + DEBUG_UVM_REMOVAL_PRINT_CALLER("getDeviceSubviewReadOnly"); + if(needsSyncPath()) { + throwIfHostViewAlive(); + impl::sync_device(exec,originalDualView); + } + return getSubview(dualView.view_device(), offset, numEntries); + } + + t_dev getDeviceSubview(int offset, int numEntries, Access::ReadWriteStruct DEBUG_UVM_REMOVAL_ARGUMENT @@ -646,6 +673,23 @@ class WrappedDualView { return getSubview(dualView.view_device(), offset, numEntries); } + template + t_dev + getDeviceSubview(const ExecSpace & exec, int offset, int numEntries, Access::ReadWriteStruct + DEBUG_UVM_REMOVAL_ARGUMENT + ) + { + DEBUG_UVM_REMOVAL_PRINT_CALLER("getDeviceSubviewReadWrite"); + static_assert(dualViewHasNonConstData, + "ReadWrite views are not available for DualView with const data"); + if(needsSyncPath()) { + throwIfHostViewAlive(); + impl::sync_device(exec, originalDualView); + originalDualView.modify_device(); + } + return getSubview(dualView.view_device(), offset, numEntries); + } + t_dev getDeviceSubview(int offset, int numEntries, Access::OverwriteAllStruct DEBUG_UVM_REMOVAL_ARGUMENT @@ -657,6 +701,18 @@ class WrappedDualView { return getDeviceSubview(offset, numEntries, Access::ReadWrite); } + template + t_dev + getDeviceSubview(const ExecSpace& exec, int offset, int numEntries, Access::OverwriteAllStruct + DEBUG_UVM_REMOVAL_ARGUMENT + ) + { + DEBUG_UVM_REMOVAL_PRINT_CALLER("getDeviceSubviewOverwriteAll"); + static_assert(dualViewHasNonConstData, + "OverwriteAll views are not available for DualView with const data"); + return getDeviceSubview(exec,offset, numEntries, Access::ReadWrite); + } + // Debugging functions to get copies of the view state typename t_host::HostMirror getHostCopy() const { From 4b697cdd679118ea3c214a230d126f67a033c22d Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 25 Jun 2024 09:36:21 -0600 Subject: [PATCH 06/14] Tpetra: WDV mods --- .../tpetra/core/src/Tpetra_Details_WrappedDualView.hpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp index 9bc7bf4068f0..b0a5495ce113 100644 --- a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp @@ -111,6 +111,16 @@ template enableIfConstData sync_host(DualViewType dualView) { } +template +enableIfNonConstData +sync_host(const ExecSpace& exec, DualViewType dualView) { + // This will sync, but only if needed + dualView.sync_host(); +} + +template +enableIfConstData +sync_host(DualViewType dualView) { } /* sync_device functions */ From c7f746c49dbba9f12e008d2c68a92c06e3cdd738 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 25 Jun 2024 10:04:56 -0600 Subject: [PATCH 07/14] Tpetra: WDV mods --- .../src/Tpetra_Details_WrappedDualView.hpp | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp index b0a5495ce113..b822a151a2f8 100644 --- a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp @@ -107,10 +107,6 @@ sync_host(DualViewType dualView) { dualView.sync_host(); } -template -enableIfConstData -sync_host(DualViewType dualView) { } - template enableIfNonConstData sync_host(const ExecSpace& exec, DualViewType dualView) { @@ -122,6 +118,10 @@ template enableIfConstData sync_host(DualViewType dualView) { } +template +enableIfConstData +sync_host(DualViewType dualView) { } + /* sync_device functions */ template @@ -146,8 +146,6 @@ template enableIfConstData sync_device(const ExecSpace& exec, DualViewType dualView) { } -} - }// end namespace Impl @@ -421,15 +419,15 @@ class WrappedDualView { template t_dev - getDeviceView(const ExecSpace& exec,Access::OverwriteAllStruct + getDeviceView(const ExecSpace& exec,Access::OverwriteAllStruct s DEBUG_UVM_REMOVAL_ARGUMENT ) { // Since we're never syncing in this case, the execution_space is meaningless here #ifdef DEBUG_UVM_REMOVAL - return getDeviceView(Access::OverwriteAllStruct,callerstr,filestr,linnum); + return getDeviceView(s,callerstr,filestr,linnum); #else - return getDeviceView(Access::OverwriteAllStruct); + return getDeviceView(s); #endif } @@ -591,9 +589,9 @@ class WrappedDualView { using ReturnDeviceType = typename ReturnViewType::device_type; // Since nothing syncs here, the ExecSpace is meaningless #ifdef DEBUG_UVM_REMOVAL - return getView(Access::OverwriteAllStruct,callerstr,filestr,linnum); + return getView(s,callerstr,filestr,linnum); #else - return getView(Access::OverwriteAllStruct); + return getView(s); #endif } From ada41744823b89bb466977d4c906962e67bf24bc Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 25 Jun 2024 10:45:50 -0600 Subject: [PATCH 08/14] Tpetra: WDV mods; compiling --- .../core/src/Tpetra_MultiVector_decl.hpp | 18 +++++++++++++ .../core/src/Tpetra_MultiVector_def.hpp | 25 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp index 86bf16ba0205..672fa0844fe9 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp @@ -1466,14 +1466,32 @@ namespace Tpetra { /// This requires that there are no live host-space views. typename dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const; + /// \brief Return a read-only, up-to-date view of this MultiVector's local data on device. + /// This requires that there are no live host-space views. + /// WARNING: This function will only synchronize the provided execution_space instance, which if not used correctly + /// can lead to errors. + typename dual_view_type::t_dev::const_type getLocalViewDevice(const execution_space & exec, Access::ReadOnlyStruct) const; + /// \brief Return a mutable, up-to-date view of this MultiVector's local data on device. /// This requires that there are no live host-space views. typename dual_view_type::t_dev getLocalViewDevice(Access::ReadWriteStruct); + /// \brief Return a mutable, up-to-date view of this MultiVector's local data on device. + /// This requires that there are no live host-space views. + /// WARNING: This function will only synchronize the provided execution_space instance, which if not used correctly + /// can lead to errors. + typename dual_view_type::t_dev getLocalViewDevice(const execution_space & exec,Access::ReadWriteStruct); + /// \brief Return a mutable view of this MultiVector's local data on device, assuming all existing data will be overwritten. /// This requires that there are no live host-space views. typename dual_view_type::t_dev getLocalViewDevice(Access::OverwriteAllStruct); + /// \brief Return a mutable view of this MultiVector's local data on device, assuming all existing data will be overwritten. + /// This requires that there are no live host-space views. + /// WARNING: This function will only synchronize the provided execution_space instance, which if not used correctly + /// can lead to errors. + typename dual_view_type::t_dev getLocalViewDevice(const execution_space & exec, Access::OverwriteAllStruct); + /// \brief Return the wrapped dual view holding this MultiVector's local data. /// /// \warning This method is ONLY for use by experts. We highly recommend accessing the local data diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp index 3b1ba67c5b83..9f439e296209 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp @@ -4007,6 +4007,14 @@ void MultiVector::copyAndPermute( return view_.getDeviceView(s); } + template + typename MultiVector::dual_view_type::t_dev::const_type + MultiVector:: + getLocalViewDevice(const execution_space &exec, Access::ReadOnlyStruct s) const + { + return view_.getDeviceView(exec,s); + } + template typename MultiVector::dual_view_type::t_dev MultiVector:: @@ -4015,6 +4023,14 @@ void MultiVector::copyAndPermute( return view_.getDeviceView(s); } + template + typename MultiVector::dual_view_type::t_dev + MultiVector:: + getLocalViewDevice(const execution_space &exec, Access::ReadWriteStruct s) + { + return view_.getDeviceView(exec,s); + } + template typename MultiVector::dual_view_type::t_dev MultiVector:: @@ -4023,6 +4039,15 @@ void MultiVector::copyAndPermute( return view_.getDeviceView(s); } + template + typename MultiVector::dual_view_type::t_dev + MultiVector:: + getLocalViewDevice(const execution_space &exec,Access::OverwriteAllStruct s) + { + return view_.getDeviceView(exec,s); + } + + template typename MultiVector::wrapped_dual_view_type MultiVector:: From 8035f34160f239fb0c6b3a3e2cd27b4594034707 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 25 Jun 2024 12:44:45 -0600 Subject: [PATCH 09/14] Tpetra: WDV moving to Option #1 --- .../src/Tpetra_Details_WrappedDualView.hpp | 50 ++++++++----------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp index b822a151a2f8..b4038d65d9ee 100644 --- a/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_WrappedDualView.hpp @@ -107,9 +107,9 @@ sync_host(DualViewType dualView) { dualView.sync_host(); } -template +template enableIfNonConstData -sync_host(const ExecSpace& exec, DualViewType dualView) { +sync_host(const typename DualViewType::t_host::execution_space& exec, DualViewType dualView) { // This will sync, but only if needed dualView.sync_host(); } @@ -118,9 +118,9 @@ template enableIfConstData sync_host(DualViewType dualView) { } -template +template enableIfConstData -sync_host(DualViewType dualView) { } +sync_host(const typename DualViewType::t_host::execution_space& exec, DualViewType dualView) { } /* sync_device functions */ @@ -128,23 +128,23 @@ template enableIfNonConstData sync_device(DualViewType dualView) { // This will sync, but only if needed - dualView.sync_device(); + dualView.sync_device(); } -template +template enableIfNonConstData -sync_device(const ExecSpace& exec, DualViewType dualView) { +sync_device(const typename DualViewType::t_dev::execution_space& exec, DualViewType dualView) { // This will sync, but only if needed - dualView.sync_device(exec); + dualView.sync_device(exec); } template enableIfConstData sync_device(DualViewType dualView) { } -template +template enableIfConstData -sync_device(const ExecSpace& exec, DualViewType dualView) { } +sync_device(const typename DualViewType::t_dev::execution_space& exec, DualViewType dualView) { } }// end namespace Impl @@ -348,9 +348,8 @@ class WrappedDualView { return dualView.view_device(); } - template typename t_dev::const_type - getDeviceView(const ExecSpace& exec, Access::ReadOnlyStruct + getDeviceView(const typename DualViewType::t_dev::execution_space& exec, Access::ReadOnlyStruct DEBUG_UVM_REMOVAL_ARGUMENT ) const { @@ -378,9 +377,8 @@ class WrappedDualView { return dualView.view_device(); } - template t_dev - getDeviceView(const ExecSpace& exec, Access::ReadWriteStruct + getDeviceView(const typename DualViewType::t_dev::execution_space& exec, Access::ReadWriteStruct DEBUG_UVM_REMOVAL_ARGUMENT ) { @@ -417,9 +415,8 @@ class WrappedDualView { } - template t_dev - getDeviceView(const ExecSpace& exec,Access::OverwriteAllStruct s + getDeviceView(const typename DualViewType::t_dev::execution_space& exec, Access::OverwriteAllStruct s DEBUG_UVM_REMOVAL_ARGUMENT ) { @@ -456,9 +453,9 @@ class WrappedDualView { return dualView.template view(); } - template + template typename std::remove_reference().template view())>::type::const_type - getView (const ExecSpace & exec, Access::ReadOnlyStruct s DEBUG_UVM_REMOVAL_ARGUMENT) const { + getView (const typename TargetDeviceType::execution_space & exec, Access::ReadOnlyStruct s DEBUG_UVM_REMOVAL_ARGUMENT) const { using ReturnViewType = typename std::remove_reference().template view())>::type::const_type; using ReturnDeviceType = typename ReturnViewType::device_type; constexpr bool returnDevice = std::is_same::value; @@ -513,9 +510,9 @@ class WrappedDualView { } - template + template typename std::remove_reference().template view())>::type - getView (const ExecSpace & exec,Access::ReadWriteStruct s DEBUG_UVM_REMOVAL_ARGUMENT) const { + getView (const typename TargetDeviceType::execution_space & exec,Access::ReadWriteStruct s DEBUG_UVM_REMOVAL_ARGUMENT) const { using ReturnViewType = typename std::remove_reference().template view())>::type; using ReturnDeviceType = typename ReturnViewType::device_type; constexpr bool returnDevice = std::is_same::value; @@ -582,9 +579,9 @@ class WrappedDualView { } - template + template typename std::remove_reference().template view())>::type - getView (const ExecSpace & exec, Access::OverwriteAllStruct s DEBUG_UVM_REMOVAL_ARGUMENT) const { + getView (const typename TargetDeviceType::execution_space & exec, Access::OverwriteAllStruct s DEBUG_UVM_REMOVAL_ARGUMENT) const { using ReturnViewType = typename std::remove_reference().template view())>::type; using ReturnDeviceType = typename ReturnViewType::device_type; // Since nothing syncs here, the ExecSpace is meaningless @@ -650,9 +647,8 @@ class WrappedDualView { return getSubview(dualView.view_device(), offset, numEntries); } - template typename t_dev::const_type - getDeviceSubview(const ExecSpace & exec, int offset, int numEntries, Access::ReadOnlyStruct + getDeviceSubview(const typename DualViewType::t_dev::execution_space& exec, int offset, int numEntries, Access::ReadOnlyStruct DEBUG_UVM_REMOVAL_ARGUMENT ) const { @@ -681,9 +677,8 @@ class WrappedDualView { return getSubview(dualView.view_device(), offset, numEntries); } - template t_dev - getDeviceSubview(const ExecSpace & exec, int offset, int numEntries, Access::ReadWriteStruct + getDeviceSubview(const typename DualViewType::t_dev::execution_space& exec, int offset, int numEntries, Access::ReadWriteStruct DEBUG_UVM_REMOVAL_ARGUMENT ) { @@ -709,9 +704,8 @@ class WrappedDualView { return getDeviceSubview(offset, numEntries, Access::ReadWrite); } - template t_dev - getDeviceSubview(const ExecSpace& exec, int offset, int numEntries, Access::OverwriteAllStruct + getDeviceSubview(const typename DualViewType::t_dev::execution_space& exec, int offset, int numEntries, Access::OverwriteAllStruct DEBUG_UVM_REMOVAL_ARGUMENT ) { From 7ceac93f59f2f1b3e8d605343ffa110dc3a32667 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 25 Jun 2024 12:56:58 -0600 Subject: [PATCH 10/14] Tpetra: MultiVector moving to Option #1 --- .../core/src/Tpetra_MultiVector_decl.hpp | 22 ++++--- .../core/src/Tpetra_MultiVector_def.hpp | 58 ++++++++++++++++++- 2 files changed, 72 insertions(+), 8 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp index 672fa0844fe9..b184e02a5e06 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp @@ -1733,6 +1733,7 @@ namespace Tpetra { scale (const Scalar& alpha, const MultiVector& A); + /// \brief Update: this = beta*this + alpha*A. /// /// Update this MultiVector with scaled values of A. If beta is @@ -1740,19 +1741,26 @@ namespace Tpetra { /// NaN entries. It is legal for the input A to alias this /// MultiVector. void - update (const execution_space& exec, - const Scalar& alpha, + update (const Scalar& alpha, const MultiVector& A, const Scalar& beta); - //! @overload + /// \brief Update: this = beta*this + alpha*A. + /// + /// Update this MultiVector with scaled values of A. If beta is + /// zero, overwrite \c *this unconditionally, even if it contains + /// NaN entries. It is legal for the input A to alias this + /// MultiVector. + /// + /// WARNING: This will only synchronize the MultiVectors w.r.t. the used-provided execution space instance. + /// This can lead to incorrect behavior if other execution_space instances are attempting to modify the vectors. void - update (const Scalar& alpha, + update (const execution_space& exec, + const Scalar& alpha, const MultiVector& A, - const Scalar& beta) { - this->update(execution_space{}, alpha, A, beta); - } + const Scalar& beta); + /// \brief Update: this = gamma*this + alpha*A + beta*B. /// /// Update this MultiVector with scaled values of A and B. If diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp index 9f439e296209..bc57a1b22cad 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp @@ -3122,6 +3122,62 @@ void MultiVector::copyAndPermute( } } + template + void + MultiVector:: + update (const Scalar& alpha, + const MultiVector& A, + const Scalar& beta) + { + // NOTE: This is intentionally not implemented with a call to the 4-arg update() which takes an execution space + // instance, because that has different synchronization behavior. + const char tfecfFuncName[] = "update: "; + using Kokkos::subview; + using Kokkos::ALL; + + ::Tpetra::Details::ProfilingRegion region ("Tpetra::MV::update(alpha,A,beta)"); + + const size_t lclNumRows = getLocalLength (); + const size_t numVecs = getNumVectors (); + + if (::Tpetra::Details::Behavior::debug ()) { + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( + lclNumRows != A.getLocalLength (), std::invalid_argument, + "this->getLocalLength() = " << lclNumRows << " != A.getLocalLength() = " + << A.getLocalLength () << "."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( + numVecs != A.getNumVectors (), std::invalid_argument, + "this->getNumVectors() = " << numVecs << " != A.getNumVectors() = " + << A.getNumVectors () << "."); + } + + const impl_scalar_type theAlpha = static_cast (alpha); + const impl_scalar_type theBeta = static_cast (beta); + const std::pair rowRng (0, lclNumRows); + const std::pair colRng (0, numVecs); + + auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite); + auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ()); + auto X_lcl_orig = A.getLocalViewDevice(Access::ReadOnly); + auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ()); + + // The device memory of *this is about to be modified + if (isConstantStride () && A.isConstantStride ()) { + KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl); + } + else { + // Make sure that Kokkos only uses the local length for add. + for (size_t k = 0; k < numVecs; ++k) { + const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k]; + const size_t X_col = A.isConstantStride () ? k : A.whichVectors_[k]; + auto Y_k = subview (Y_lcl, ALL (), Y_col); + auto X_k = subview (X_lcl, ALL (), X_col); + + KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k); + } + } + } + template void MultiVector:: @@ -3155,7 +3211,7 @@ void MultiVector::copyAndPermute( const std::pair rowRng (0, lclNumRows); const std::pair colRng (0, numVecs); - auto Y_lcl_orig = this->getLocalViewDevice(exec, Access::ReadWrite); // what about syncing with the 'exec' ? + auto Y_lcl_orig = this->getLocalViewDevice(exec, Access::ReadWrite); auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ()); auto X_lcl_orig = A.getLocalViewDevice(exec, Access::ReadOnly); auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ()); From f231ac66ed065ca0873ef850f08087e4d7e22e7b Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Wed, 26 Jun 2024 15:02:47 -0600 Subject: [PATCH 11/14] Tpetra: Adding more execution_space support to MultiVector --- .../core/src/Tpetra_MultiVector_decl.hpp | 50 ++++- .../core/src/Tpetra_MultiVector_def.hpp | 211 +++++++++++++++--- 2 files changed, 223 insertions(+), 38 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp index b184e02a5e06..399d51cb9c5e 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp @@ -594,7 +594,7 @@ namespace Tpetra { /// around with multiple memory spaces. MultiVector (const Teuchos::RCP& map, const typename dual_view_type::t_dev& d_view); - + /// \brief Expert mode constructor, that takes a Kokkos::DualView /// of the MultiVector's data and the "original" /// Kokkos::DualView of the data, and returns a MultiVector that @@ -1467,11 +1467,11 @@ namespace Tpetra { typename dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const; /// \brief Return a read-only, up-to-date view of this MultiVector's local data on device. - /// This requires that there are no live host-space views. + /// This requires that there are no live host-space views. /// WARNING: This function will only synchronize the provided execution_space instance, which if not used correctly /// can lead to errors. typename dual_view_type::t_dev::const_type getLocalViewDevice(const execution_space & exec, Access::ReadOnlyStruct) const; - + /// \brief Return a mutable, up-to-date view of this MultiVector's local data on device. /// This requires that there are no live host-space views. typename dual_view_type::t_dev getLocalViewDevice(Access::ReadWriteStruct); @@ -1479,9 +1479,9 @@ namespace Tpetra { /// \brief Return a mutable, up-to-date view of this MultiVector's local data on device. /// This requires that there are no live host-space views. /// WARNING: This function will only synchronize the provided execution_space instance, which if not used correctly - /// can lead to errors. + /// can lead to errors. typename dual_view_type::t_dev getLocalViewDevice(const execution_space & exec,Access::ReadWriteStruct); - + /// \brief Return a mutable view of this MultiVector's local data on device, assuming all existing data will be overwritten. /// This requires that there are no live host-space views. typename dual_view_type::t_dev getLocalViewDevice(Access::OverwriteAllStruct); @@ -1489,8 +1489,8 @@ namespace Tpetra { /// \brief Return a mutable view of this MultiVector's local data on device, assuming all existing data will be overwritten. /// This requires that there are no live host-space views. /// WARNING: This function will only synchronize the provided execution_space instance, which if not used correctly - /// can lead to errors. - typename dual_view_type::t_dev getLocalViewDevice(const execution_space & exec, Access::OverwriteAllStruct); + /// can lead to errors. + typename dual_view_type::t_dev getLocalViewDevice(const execution_space & exec, Access::OverwriteAllStruct); /// \brief Return the wrapped dual view holding this MultiVector's local data. /// @@ -1689,9 +1689,23 @@ namespace Tpetra { //! Put element-wise absolute values of input Multi-vector in target: A = abs(this) void abs (const MultiVector& A); + //\brief Put element-wise absolute values of input Multi-vector in target: A = abs(this) + /// + /// WARNING: This will only synchronize the MultiVectors w.r.t. the used-provided execution space instance. + /// This can lead to incorrect behavior if other execution_space instances are attempting to modify the vectors. + void abs (const execution_space& exec, const MultiVector& A); + //! Put element-wise reciprocal values of input Multi-vector in target, this(i,j) = 1/A(i,j). void reciprocal (const MultiVector& A); + //\brief Put element-wise reciprocal values of input Multi-vector in target, this(i,j) = 1/A(i,j). + /// + /// WARNING: This will only synchronize the MultiVectors w.r.t. the used-provided execution space instance. + /// This can lead to incorrect behavior if other execution_space instances are attempting to modify the vectors. + void reciprocal (const execution_space& exec, const MultiVector& A); + + + /// \brief Scale in place: this = alpha*this. /// /// Replace this MultiVector with alpha times this MultiVector. @@ -1733,7 +1747,7 @@ namespace Tpetra { scale (const Scalar& alpha, const MultiVector& A); - + /// \brief Update: this = beta*this + alpha*A. /// /// Update this MultiVector with scaled values of A. If beta is @@ -1760,7 +1774,7 @@ namespace Tpetra { const MultiVector& A, const Scalar& beta); - + /// \brief Update: this = gamma*this + alpha*A + beta*B. /// /// Update this MultiVector with scaled values of A and B. If @@ -1774,6 +1788,24 @@ namespace Tpetra { const MultiVector& B, const Scalar& gamma); + /// \brief Update: this = gamma*this + alpha*A + beta*B. + /// + /// Update this MultiVector with scaled values of A and B. If + /// gamma is zero, overwrite \c *this unconditionally, even if it + /// contains NaN entries. It is legal for the inputs A or B to + /// alias this MultiVector. + /// + /// WARNING: This will only synchronize the MultiVectors w.r.t. the used-provided execution space instance. + /// This can lead to incorrect behavior if other execution_space instances are attempting to modify the vectors. + void + update (const execution_space& exec, + const Scalar& alpha, + const MultiVector& A, + const Scalar& beta, + const MultiVector& B, + const Scalar& gamma); + + /// \brief Compute the one-norm of each vector (column), storing /// the result in a host view. /// diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp index bc57a1b22cad..c6c8caa7cd1a 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp @@ -297,8 +297,8 @@ namespace { // (anonymous) // If you take a subview of a view with zero rows Kokkos::subview() // always returns a DualView with the same data pointers. This will break // pointer equality testing in between two subviews of the same 2D View if - // it has zero row extent. While the one (known) case where this was actually used - // has been fixed, that sort of check could very easily be reintroduced in the future, + // it has zero row extent. While the one (known) case where this was actually used + // has been fixed, that sort of check could very easily be reintroduced in the future, // hence I've added this if check here. // // This is not a bug in Kokkos::subview(), just some very subtle behavior which @@ -349,16 +349,16 @@ namespace { // (anonymous) template bool - runKernelOnHost ( - Kokkos::DualView imports + runKernelOnHost ( + Kokkos::DualView imports ) { if (! imports.need_sync_device ()) { return false; // most up-to-date on device } - else { // most up-to-date on host, + else { // most up-to-date on host, // but if large enough, worth running on device anyway - size_t localLengthThreshold = + size_t localLengthThreshold = Tpetra::Details::Behavior::multivectorKernelLocationThreshold(); return imports.extent(0) <= localLengthThreshold; } @@ -374,7 +374,7 @@ namespace { // (anonymous) } else { // most up-to-date on host // but if large enough, worth running on device anyway - size_t localLengthThreshold = + size_t localLengthThreshold = Tpetra::Details::Behavior::multivectorKernelLocationThreshold(); return X.getLocalLength () <= localLengthThreshold; } @@ -1219,19 +1219,19 @@ namespace Tpetra { auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol); auto src_j = Kokkos::subview (src_h, rows, srcCol); - if (CM == ADD_ASSIGN) { + if (CM == ADD_ASSIGN) { // Sum src_j into tgt_j - using range_t = + using range_t = Kokkos::RangePolicy; range_t rp(space, 0,numSameIDs); Tpetra::Details::AddAssignFunctor aaf(tgt_j, src_j); Kokkos::parallel_for(rp, aaf); } - else { + else { // Copy src_j into tgt_j // DEEP_COPY REVIEW - HOSTMIRROR-TO-HOSTMIRROR - Kokkos::deep_copy (space, tgt_j, src_j); + Kokkos::deep_copy (space, tgt_j, src_j); space.fence(); } } @@ -1247,19 +1247,19 @@ namespace Tpetra { auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol); auto src_j = Kokkos::subview (src_d, rows, srcCol); - if (CM == ADD_ASSIGN) { + if (CM == ADD_ASSIGN) { // Sum src_j into tgt_j - using range_t = + using range_t = Kokkos::RangePolicy; range_t rp(space, 0,numSameIDs); Tpetra::Details::AddAssignFunctor aaf(tgt_j, src_j); Kokkos::parallel_for(rp, aaf); } - else { + else { // Copy src_j into tgt_j // DEEP_COPY REVIEW - DEVICE-TO-DEVICE - Kokkos::deep_copy (space, tgt_j, src_j); + Kokkos::deep_copy (space, tgt_j, src_j); space.fence(); } } @@ -1618,7 +1618,7 @@ void MultiVector::copyAndPermute( // clears out the 'modified' flags. if (packOnHost) { // nde 06 Feb 2020: If 'exports' does not require resize - // when reallocDualViewIfNeeded is called, the modified flags + // when reallocDualViewIfNeeded is called, the modified flags // are not cleared out. This can result in host and device views // being out-of-sync, resuling in an error in exports.modify_* calls. // Clearing the sync flags prevents this possible case. @@ -1627,7 +1627,7 @@ void MultiVector::copyAndPermute( } else { // nde 06 Feb 2020: If 'exports' does not require resize - // when reallocDualViewIfNeeded is called, the modified flags + // when reallocDualViewIfNeeded is called, the modified flags // are not cleared out. This can result in host and device views // being out-of-sync, resuling in an error in exports.modify_* calls. // Clearing the sync flags prevents this possible case. @@ -1782,7 +1782,7 @@ void MultiVector::copyAndPermute( Kokkos::DualView& exports, Kokkos::DualView numExportPacketsPerLID, size_t& constantNumPackets) { - packAndPrepare(sourceObj, exportLIDs, exports, numExportPacketsPerLID, constantNumPackets, execution_space()); + packAndPrepare(sourceObj, exportLIDs, exports, numExportPacketsPerLID, constantNumPackets, execution_space()); } // clang-format off @@ -3083,6 +3083,46 @@ void MultiVector::copyAndPermute( } } + template + void + MultiVector:: + reciprocal (const execution_space& exec, const MultiVector& A) + { + const char tfecfFuncName[] = "reciprocal: "; + + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( + getLocalLength () != A.getLocalLength (), std::runtime_error, + "MultiVectors do not have the same local length. " + "this->getLocalLength() = " << getLocalLength () + << " != A.getLocalLength() = " << A.getLocalLength () << "."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( + A.getNumVectors () != this->getNumVectors (), std::runtime_error, + ": MultiVectors do not have the same number of columns (vectors). " + "this->getNumVectors() = " << getNumVectors () + << " != A.getNumVectors() = " << A.getNumVectors () << "."); + + const size_t numVecs = getNumVectors (); + + auto this_view_dev = this->getLocalViewDevice(exec,Access::ReadWrite); + auto A_view_dev = A.getLocalViewDevice(exec,Access::ReadOnly); + + if (isConstantStride () && A.isConstantStride ()) { + KokkosBlas::reciprocal (exec,this_view_dev, A_view_dev); + } + else { + using Kokkos::ALL; + using Kokkos::subview; + for (size_t k = 0; k < numVecs; ++k) { + const size_t this_col = isConstantStride () ? k : whichVectors_[k]; + auto vector_k = subview (this_view_dev, ALL (), this_col); + const size_t A_col = isConstantStride () ? k : A.whichVectors_[k]; + auto vector_Ak = subview (A_view_dev, ALL (), A_col); + KokkosBlas::reciprocal (exec,vector_k, vector_Ak); + } + } + } + + template void MultiVector:: @@ -3121,6 +3161,45 @@ void MultiVector::copyAndPermute( } } } + template + void + MultiVector:: + abs (const execution_space& exec, const MultiVector& A) + { + const char tfecfFuncName[] = "abs"; + + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( + getLocalLength () != A.getLocalLength (), std::runtime_error, + ": MultiVectors do not have the same local length. " + "this->getLocalLength() = " << getLocalLength () + << " != A.getLocalLength() = " << A.getLocalLength () << "."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( + A.getNumVectors () != this->getNumVectors (), std::runtime_error, + ": MultiVectors do not have the same number of columns (vectors). " + "this->getNumVectors() = " << getNumVectors () + << " != A.getNumVectors() = " << A.getNumVectors () << "."); + const size_t numVecs = getNumVectors (); + + auto this_view_dev = this->getLocalViewDevice(exec,Access::ReadWrite); + auto A_view_dev = A.getLocalViewDevice(exec,Access::ReadOnly); + + if (isConstantStride () && A.isConstantStride ()) { + KokkosBlas::abs (exec,this_view_dev, A_view_dev); + } + else { + using Kokkos::ALL; + using Kokkos::subview; + + for (size_t k=0; k < numVecs; ++k) { + const size_t this_col = isConstantStride () ? k : whichVectors_[k]; + auto vector_k = subview (this_view_dev, ALL (), this_col); + const size_t A_col = isConstantStride () ? k : A.whichVectors_[k]; + auto vector_Ak = subview (A_view_dev, ALL (), A_col); + KokkosBlas::abs (exec,vector_k, vector_Ak); + } + } + } + template void @@ -3177,7 +3256,7 @@ void MultiVector::copyAndPermute( } } } - + template void MultiVector:: @@ -3272,7 +3351,7 @@ void MultiVector::copyAndPermute( "The input MultiVector B has " << B.getNumVectors () << " column(s), " "but this MultiVector has " << numVecs << " column(s)."); } - + const impl_scalar_type theAlpha = static_cast (alpha); const impl_scalar_type theBeta = static_cast (beta); const impl_scalar_type theGamma = static_cast (gamma); @@ -3304,6 +3383,80 @@ void MultiVector::copyAndPermute( } } + + + template + void + MultiVector:: + update (const execution_space& exec, + const Scalar& alpha, + const MultiVector& A, + const Scalar& beta, + const MultiVector& B, + const Scalar& gamma) + { + using Kokkos::ALL; + using Kokkos::subview; + + const char tfecfFuncName[] = "update(alpha,A,beta,B,gamma): "; + + ::Tpetra::Details::ProfilingRegion region ("Tpetra::MV::update(alpha,A,beta,B,gamma)"); + + const size_t lclNumRows = this->getLocalLength (); + const size_t numVecs = getNumVectors (); + + if (::Tpetra::Details::Behavior::debug ()) { + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( + lclNumRows != A.getLocalLength (), std::invalid_argument, + "The input MultiVector A has " << A.getLocalLength () << " local " + "row(s), but this MultiVector has " << lclNumRows << " local " + "row(s)."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( + lclNumRows != B.getLocalLength (), std::invalid_argument, + "The input MultiVector B has " << B.getLocalLength () << " local " + "row(s), but this MultiVector has " << lclNumRows << " local " + "row(s)."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( + A.getNumVectors () != numVecs, std::invalid_argument, + "The input MultiVector A has " << A.getNumVectors () << " column(s), " + "but this MultiVector has " << numVecs << " column(s)."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( + B.getNumVectors () != numVecs, std::invalid_argument, + "The input MultiVector B has " << B.getNumVectors () << " column(s), " + "but this MultiVector has " << numVecs << " column(s)."); + } + + const impl_scalar_type theAlpha = static_cast (alpha); + const impl_scalar_type theBeta = static_cast (beta); + const impl_scalar_type theGamma = static_cast (gamma); + + const std::pair rowRng (0, lclNumRows); + const std::pair colRng (0, numVecs); + + // Prefer 'auto' over specifying the type explicitly. This avoids + // issues with a subview possibly having a different type than the + // original view. + auto C_lcl = subview (this->getLocalViewDevice(exec,Access::ReadWrite), rowRng, ALL ()); + auto A_lcl = subview (A.getLocalViewDevice(exec,Access::ReadOnly), rowRng, ALL ()); + auto B_lcl = subview (B.getLocalViewDevice(exec,Access::ReadOnly), rowRng, ALL ()); + + if (isConstantStride () && A.isConstantStride () && B.isConstantStride ()) { + KokkosBlas::update (exec, theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl); + } + else { + // Some input (or *this) is not constant stride, + // so perform the update one column at a time. + for (size_t k = 0; k < numVecs; ++k) { + const size_t this_col = isConstantStride () ? k : whichVectors_[k]; + const size_t A_col = A.isConstantStride () ? k : A.whichVectors_[k]; + const size_t B_col = B.isConstantStride () ? k : B.whichVectors_[k]; + KokkosBlas::update (exec, theAlpha, subview (A_lcl, rowRng, A_col), + theBeta, subview (B_lcl, rowRng, B_col), + theGamma, subview (C_lcl, rowRng, this_col)); + } + } + } + template Teuchos::ArrayRCP MultiVector:: @@ -3887,7 +4040,7 @@ void MultiVector::copyAndPermute( /// can change the local data and we do not know which one the user want as a copy throw std::runtime_error("Tpetra::MultiVector: A non-const view is alive outside and we cannot give a copy where host or device view can be modified outside"); } - else { + else { const bool useHostView = view_.host_view_use_count() >= view_.device_view_use_count(); if (this->isConstantStride ()) { if (useHostView) { @@ -3904,7 +4057,7 @@ void MultiVector::copyAndPermute( for (size_t j = 0; j < numCols; ++j) { const size_t srcCol = this->whichVectors_[j]; auto dstColView = Kokkos::subview (A_view, rowRange, j); - + if (useHostView) { auto srcView_host = this->getLocalViewHost(Access::ReadOnly); auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol); @@ -4070,7 +4223,7 @@ void MultiVector::copyAndPermute( { return view_.getDeviceView(exec,s); } - + template typename MultiVector::dual_view_type::t_dev MultiVector:: @@ -4085,7 +4238,7 @@ void MultiVector::copyAndPermute( getLocalViewDevice(const execution_space &exec, Access::ReadWriteStruct s) { return view_.getDeviceView(exec,s); - } + } template typename MultiVector::dual_view_type::t_dev @@ -4103,9 +4256,9 @@ void MultiVector::copyAndPermute( return view_.getDeviceView(exec,s); } - + template - typename MultiVector::wrapped_dual_view_type + typename MultiVector::wrapped_dual_view_type MultiVector:: getWrappedDualView() const { return view_; @@ -4741,7 +4894,7 @@ void MultiVector::copyAndPermute( // so we can't use our regular accessor functins // NOTE: This is an occasion where we do *not* want the auto-sync stuff - // to trigger (since this function is conceptually const). Thus, we + // to trigger (since this function is conceptually const). Thus, we // get *copies* of the view's data instead. auto X_dev = view_.getDeviceCopy(); auto X_host = view_.getHostCopy(); @@ -4750,12 +4903,12 @@ void MultiVector::copyAndPermute( // One single allocation Details::print_vector(out,"unified",X_host); } - else { + else { Details::print_vector(out,"host",X_host); Details::print_vector(out,"dev",X_dev); } } - } + } out.flush (); // make sure the ostringstream got everything return outStringP->str (); } From 78e83fc461cebad6d0c4243f63e48c86a346793d Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Wed, 26 Jun 2024 15:28:13 -0600 Subject: [PATCH 12/14] Tpetra: Adding more execution_space support to MultiVector --- .../core/src/Tpetra_MultiVector_decl.hpp | 41 +++- .../core/src/Tpetra_MultiVector_def.hpp | 175 ++++++++++++++++++ 2 files changed, 214 insertions(+), 2 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp index 399d51cb9c5e..81e50a9117df 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp @@ -1704,8 +1704,6 @@ namespace Tpetra { /// This can lead to incorrect behavior if other execution_space instances are attempting to modify the vectors. void reciprocal (const execution_space& exec, const MultiVector& A); - - /// \brief Scale in place: this = alpha*this. /// /// Replace this MultiVector with alpha times this MultiVector. @@ -1715,6 +1713,18 @@ namespace Tpetra { /// this method finishes. void scale (const Scalar& alpha); + /// \brief Scale in place: this = alpha*this. + /// + /// Replace this MultiVector with alpha times this MultiVector. + /// This method will always multiply, even if alpha is zero. That + /// means, for example, that if \c *this contains NaN entries + /// before calling this method, the NaN entries will remain after + /// this method finishes. + /// + /// WARNING: This will only synchronize the MultiVectors w.r.t. the used-provided execution space instance. + /// This can lead to incorrect behavior if other execution_space instances are attempting to modify the vectors. + void scale (const execution_space& exec, const Scalar& alpha); + /// \brief Scale each column in place: this[j] = alpha[j]*this[j]. /// /// Replace each column j of this MultiVector with @@ -1735,6 +1745,19 @@ namespace Tpetra { /// the NaN entries will remain after this method finishes. void scale (const Kokkos::View& alpha); + /// \brief Scale each column in place: this[j] = alpha[j]*this[j]. + /// + /// Replace each column j of this MultiVector with + /// alpha[j] times the current column j of this + /// MultiVector. This method will always multiply, even if all + /// the entries of alpha are zero. That means, for example, that + /// if \c *this contains NaN entries before calling this method, + /// the NaN entries will remain after this method finishes. + /// + /// WARNING: This will only synchronize the MultiVectors w.r.t. the used-provided execution space instance. + /// This can lead to incorrect behavior if other execution_space instances are attempting to modify the vectors. + void scale (const execution_space& exec, const Kokkos::View& alpha); + /// \brief Scale in place: this = alpha * A. /// /// Replace this MultiVector with scaled values of A. This method @@ -1747,6 +1770,20 @@ namespace Tpetra { scale (const Scalar& alpha, const MultiVector& A); + /// \brief Scale in place: this = alpha * A. + /// + /// Replace this MultiVector with scaled values of A. This method + /// will always multiply, even if alpha is zero. That means, for + /// example, that if \c *this contains NaN entries before calling + /// this method, the NaN entries will remain after this method + /// finishes. It is legal for the input A to alias this + /// MultiVector. + /// + /// WARNING: This will only synchronize the MultiVectors w.r.t. the used-provided execution space instance. + /// This can lead to incorrect behavior if other execution_space instances are attempting to modify the vectors. + void + scale (const execution_space& exec, const Scalar& alpha, + const MultiVector& A); /// \brief Update: this = beta*this + alpha*A. /// diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp index c6c8caa7cd1a..5caf0429ef99 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp @@ -2898,6 +2898,60 @@ void MultiVector::copyAndPermute( } + template + void + MultiVector:: + scale (const execution_space& exec, const Scalar& alpha) + { + using Kokkos::ALL; + using IST = impl_scalar_type; + + const IST theAlpha = static_cast (alpha); + if (theAlpha == Kokkos::ArithTraits::one ()) { + return; // do nothing + } + const size_t lclNumRows = getLocalLength (); + const size_t numVecs = getNumVectors (); + const std::pair rowRng (0, lclNumRows); + const std::pair colRng (0, numVecs); + + // We can't substitute putScalar(0.0) for scale(0.0), because the + // former will overwrite NaNs present in the MultiVector. The + // semantics of this call require multiplying them by 0, which + // IEEE 754 requires to be NaN. + + // If we need sync to device, then host has the most recent version. + const bool useHostVersion = need_sync_device (); + if (useHostVersion) { + auto Y_lcl = Kokkos::subview (getLocalViewHost(Access::ReadWrite), rowRng, ALL ()); + if (isConstantStride ()) { + KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl); + } + else { + for (size_t k = 0; k < numVecs; ++k) { + const size_t Y_col = whichVectors_[k]; + auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col); + KokkosBlas::scal (Y_k, theAlpha, Y_k); + } + } + } + else { // work on device + auto Y_lcl = Kokkos::subview (getLocalViewDevice(exec, Access::ReadWrite), rowRng, ALL ()); + if (isConstantStride ()) { + KokkosBlas::scal (exec, Y_lcl, theAlpha, Y_lcl); + } + else { + for (size_t k = 0; k < numVecs; ++k) { + const size_t Y_col = isConstantStride () ? k : whichVectors_[k]; + auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col); + KokkosBlas::scal (exec, Y_k, theAlpha, Y_k); + } + } + } + } + + + template void MultiVector:: @@ -2995,6 +3049,81 @@ void MultiVector::copyAndPermute( } } + template + void + MultiVector:: + scale (const execution_space& exec, const Kokkos::View& alphas) + { + using Kokkos::ALL; + using Kokkos::subview; + + const size_t lclNumRows = this->getLocalLength (); + const size_t numVecs = this->getNumVectors (); + TEUCHOS_TEST_FOR_EXCEPTION( + static_cast (alphas.extent (0)) != numVecs, + std::invalid_argument, "Tpetra::MultiVector::scale(alphas): " + "alphas.extent(0) = " << alphas.extent (0) + << " != this->getNumVectors () = " << numVecs << "."); + const std::pair rowRng (0, lclNumRows); + const std::pair colRng (0, numVecs); + + // NOTE (mfh 08 Apr 2015) We prefer to let the compiler deduce the + // type of the return value of subview. This is because if we + // switch the array layout from LayoutLeft to LayoutRight + // (preferred for performance of block operations), the types + // below won't be valid. (A view of a column of a LayoutRight + // multivector has LayoutStride, not LayoutLeft.) + + // If we need sync to device, then host has the most recent version. + const bool useHostVersion = this->need_sync_device (); + if (useHostVersion) { + // Work in host memory. This means we need to create a host + // mirror of the input View of coefficients. + auto alphas_h = Kokkos::create_mirror_view (alphas); + // DEEP_COPY REVIEW - NOT TESTED + Kokkos::deep_copy (exec, alphas_h, alphas); + exec.fence(); + + auto Y_lcl = subview (this->getLocalViewHost(Access::ReadWrite), rowRng, ALL ()); + if (isConstantStride ()) { + KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl); + } + else { + for (size_t k = 0; k < numVecs; ++k) { + const size_t Y_col = this->isConstantStride () ? k : + this->whichVectors_[k]; + auto Y_k = subview (Y_lcl, ALL (), Y_col); + // We don't have to use the entire 1-D View here; we can use + // the version that takes a scalar coefficient. + KokkosBlas::scal (Y_k, alphas_h(k), Y_k); + } + } + } + else { // Work in device memory, using the input View 'alphas' directly. + auto Y_lcl = subview (this->getLocalViewDevice(exec, Access::ReadWrite), rowRng, ALL ()); + if (isConstantStride ()) { + KokkosBlas::scal (exec, Y_lcl, alphas, Y_lcl); + } + else { + // FIXME (mfh 15 Mar 2019) We need one coefficient at a time, + // as values on host, so copy them to host. Another approach + // would be to fix scal() so that it takes a 0-D View as the + // second argument. + auto alphas_h = Kokkos::create_mirror_view (alphas); + // DEEP_COPY REVIEW - NOT TESTED + Kokkos::deep_copy (exec, alphas_h, alphas); + + for (size_t k = 0; k < numVecs; ++k) { + const size_t Y_col = this->isConstantStride () ? k : + this->whichVectors_[k]; + auto Y_k = subview (Y_lcl, ALL (), Y_col); + KokkosBlas::scal (exec, Y_k, alphas_h(k), Y_k); + } + } + } + } + + template void MultiVector:: @@ -3042,6 +3171,52 @@ void MultiVector::copyAndPermute( } } + template + void + MultiVector:: + scale (const execution_space& exec, const Scalar& alpha, + const MultiVector& A) + { + using Kokkos::ALL; + using Kokkos::subview; + const char tfecfFuncName[] = "scale: "; + + const size_t lclNumRows = getLocalLength (); + const size_t numVecs = getNumVectors (); + + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( + lclNumRows != A.getLocalLength (), std::invalid_argument, + "this->getLocalLength() = " << lclNumRows << " != A.getLocalLength() = " + << A.getLocalLength () << "."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( + numVecs != A.getNumVectors (), std::invalid_argument, + "this->getNumVectors() = " << numVecs << " != A.getNumVectors() = " + << A.getNumVectors () << "."); + + const impl_scalar_type theAlpha = static_cast (alpha); + const std::pair rowRng (0, lclNumRows); + const std::pair colRng (0, numVecs); + + auto Y_lcl_orig = this->getLocalViewDevice(exec, Access::ReadWrite); + auto X_lcl_orig = A.getLocalViewDevice(exec, Access::ReadOnly); + auto Y_lcl = subview (Y_lcl_orig, rowRng, ALL ()); + auto X_lcl = subview (X_lcl_orig, rowRng, ALL ()); + + if (isConstantStride () && A.isConstantStride ()) { + KokkosBlas::scal (exec, Y_lcl, theAlpha, X_lcl); + } + else { + // Make sure that Kokkos only uses the local length for add. + for (size_t k = 0; k < numVecs; ++k) { + const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k]; + const size_t X_col = A.isConstantStride () ? k : A.whichVectors_[k]; + auto Y_k = subview (Y_lcl, ALL (), Y_col); + auto X_k = subview (X_lcl, ALL (), X_col); + + KokkosBlas::scal (exec, Y_k, theAlpha, X_k); + } + } + } template From da0d101766aac1dd37e0043f88b9bb118d9face1 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Wed, 26 Jun 2024 15:43:40 -0600 Subject: [PATCH 13/14] Tpetra: Adding more execution_space support to MultiVector --- .../core/src/Tpetra_MultiVector_decl.hpp | 29 ++++++++++ .../core/src/Tpetra_MultiVector_def.hpp | 56 +++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp index 81e50a9117df..94047cecd629 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp @@ -2174,6 +2174,35 @@ namespace Tpetra { const Vector& A, const MultiVector& B, Scalar scalarThis); + + /// \brief Multiply a Vector A elementwise by a MultiVector B. + /// + /// Compute this = scalarThis * this + scalarAB * B @ A + /// where @ denotes element-wise multiplication. In + /// pseudocode, if C denotes *this MultiVector: + /// \code + /// C(i,j) = scalarThis * C(i,j) + scalarAB * B(i,j) * A(i,1); + /// \endcode + /// for all rows i and columns j of C. + /// + /// B must have the same dimensions as *this, while A + /// must have the same number of rows but a single column. + /// + /// We do not require that A, B, and *this have + /// compatible Maps, as long as the number of rows in A, B, and + /// *this on each process is the same. For example, one + /// or more of these vectors might have a locally replicated Map, + /// or a Map with a local communicator (MPI_COMM_SELF). + /// This case may occur in block relaxation algorithms when + /// applying a diagonal scaling. + /// WARNING: This function will only synchronize the provided execution_space instance, which if not used correctly + /// can lead to errors. + void + elementWiseMultiply (const execution_space& exec, Scalar scalarAB, + const Vector& A, + const MultiVector& B, + Scalar scalarThis); + //@} //! @name Attribute access functions //@{ diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp index 5caf0429ef99..95a5a3cfc66b 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp @@ -4706,6 +4706,7 @@ void MultiVector::copyAndPermute( } } + template void MultiVector:: @@ -4758,6 +4759,61 @@ void MultiVector::copyAndPermute( } } + + template + void + MultiVector:: + elementWiseMultiply (const execution_space& exec, Scalar scalarAB, + const Vector& A, + const MultiVector& B, + Scalar scalarThis) + { + using Kokkos::ALL; + using Kokkos::subview; + const char tfecfFuncName[] = "elementWiseMultiply: "; + + const size_t lclNumRows = this->getLocalLength (); + const size_t numVecs = this->getNumVectors (); + + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (lclNumRows != A.getLocalLength () || lclNumRows != B.getLocalLength (), + std::runtime_error, "MultiVectors do not have the same local length."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( + numVecs != B.getNumVectors (), std::runtime_error, "this->getNumVectors" + "() = " << numVecs << " != B.getNumVectors() = " << B.getNumVectors () + << "."); + + auto this_view = this->getLocalViewDevice(exec,Access::ReadWrite); + auto A_view = A.getLocalViewDevice(exec, Access::ReadOnly); + auto B_view = B.getLocalViewDevice(exec, Access::ReadOnly); + + if (isConstantStride () && B.isConstantStride ()) { + // A is just a Vector; it only has one column, so it always has + // constant stride. + // + // If both *this and B have constant stride, we can do an + // element-wise multiply on all columns at once. + KokkosBlas::mult (exec, + scalarThis, + this_view, + scalarAB, + subview (A_view, ALL (), 0), + B_view); + } + else { + for (size_t j = 0; j < numVecs; ++j) { + const size_t C_col = isConstantStride () ? j : whichVectors_[j]; + const size_t B_col = B.isConstantStride () ? j : B.whichVectors_[j]; + KokkosBlas::mult (exec, + scalarThis, + subview (this_view, ALL (), C_col), + scalarAB, + subview (A_view, ALL (), 0), + subview (B_view, ALL (), B_col)); + } + } + } + template void MultiVector:: From ba871b7cd994943b55ee8a938338c18a2c44c45d Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Thu, 27 Jun 2024 18:47:06 -0600 Subject: [PATCH 14/14] Update packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp Co-authored-by: Tomasetti Romin --- packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp index 94047cecd629..80680103ed19 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp @@ -1468,8 +1468,7 @@ namespace Tpetra { /// \brief Return a read-only, up-to-date view of this MultiVector's local data on device. /// This requires that there are no live host-space views. - /// WARNING: This function will only synchronize the provided execution_space instance, which if not used correctly - /// can lead to errors. + /// @warning This function will only synchronize the provided @c execution_space instance, which if not used correctly can lead to errors. typename dual_view_type::t_dev::const_type getLocalViewDevice(const execution_space & exec, Access::ReadOnlyStruct) const; /// \brief Return a mutable, up-to-date view of this MultiVector's local data on device.