Skip to content

Commit

Permalink
Merge pull request #490 from LLNL/task/rhornung67/add-resource
Browse files Browse the repository at this point in the history
Add resource to RAJA variants
  • Loading branch information
rhornung67 authored Oct 31, 2024
2 parents 9af20b3 + 81d9f43 commit ebe577b
Show file tree
Hide file tree
Showing 217 changed files with 947 additions and 406 deletions.
2 changes: 1 addition & 1 deletion src/algorithm/ATOMIC-Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ void ATOMIC::runCudaVariantReplicateGlobal(VariantID vid)
startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall<RAJA::cuda_exec<block_size, true /*async*/>>(
RAJA::forall<RAJA::cuda_exec<block_size, true /*async*/>>( res,
RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) {
ATOMIC_RAJA_BODY(RAJA::cuda_atomic, i, ATOMIC_VALUE);
});
Expand Down
2 changes: 1 addition & 1 deletion src/algorithm/ATOMIC-Hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ void ATOMIC::runHipVariantReplicateGlobal(VariantID vid)
startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall<RAJA::hip_exec<block_size, true /*async*/>>(
RAJA::forall<RAJA::hip_exec<block_size, true /*async*/>>( res,
RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) {
ATOMIC_RAJA_BODY(RAJA::hip_atomic, i, ATOMIC_VALUE);
});
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/ATOMIC-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,12 @@ void ATOMIC::runOpenMPVariantReplicate(VariantID vid)

case RAJA_OpenMP : {

auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>( res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
ATOMIC_RAJA_BODY(RAJA::omp_atomic, i, ATOMIC_VALUE);
});
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/ATOMIC-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,12 @@ void ATOMIC::runOpenMPTargetVariantReplicate(VariantID vid)

} else if ( vid == RAJA_OpenMPTarget ) {

auto res{getOmpTargetResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>( res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
ATOMIC_RAJA_BODY(RAJA::omp_atomic, i, ATOMIC_VALUE);
});
Expand Down
5 changes: 4 additions & 1 deletion src/algorithm/ATOMIC-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,13 @@ void ATOMIC::runSeqVariantReplicate(VariantID vid)

case RAJA_Seq : {

auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall<RAJA::seq_exec>( RAJA::RangeSegment(ibegin, iend),
RAJA::forall<RAJA::seq_exec>( res,
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
ATOMIC_RAJA_BODY(RAJA::seq_atomic, i, ATOMIC_VALUE);
});
Expand Down
1 change: 1 addition & 0 deletions src/algorithm/ATOMIC.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class ATOMIC : public KernelBase
void setOpenMPTuningDefinitions(VariantID vid);
void setCudaTuningDefinitions(VariantID vid);
void setHipTuningDefinitions(VariantID vid);
void setOpenMPTargetTuningDefinitions(VariantID vid);

template < size_t replication >
void runSeqVariantReplicate(VariantID vid);
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/HISTOGRAM-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,14 @@ void HISTOGRAM::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_

case RAJA_OpenMP : {

auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

HISTOGRAM_INIT_COUNTS_RAJA(RAJA::omp_multi_reduce);

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>( res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
HISTOGRAM_BODY;
});
Expand Down
5 changes: 4 additions & 1 deletion src/algorithm/HISTOGRAM-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,15 @@ void HISTOGRAM::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx

case RAJA_Seq : {

auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

HISTOGRAM_INIT_COUNTS_RAJA(RAJA::seq_multi_reduce);

RAJA::forall<RAJA::seq_exec>( RAJA::RangeSegment(ibegin, iend),
RAJA::forall<RAJA::seq_exec>( res,
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
HISTOGRAM_BODY;
});
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/MEMCPY-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,12 @@ void MEMCPY::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx

case RAJA_OpenMP : {

auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>( res,
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
MEMCPY_BODY;
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/MEMCPY-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,12 @@ void MEMCPY::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu

} else if ( vid == RAJA_OpenMPTarget ) {

auto res{getOmpTargetResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>( res,
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
MEMCPY_BODY;
Expand Down
7 changes: 5 additions & 2 deletions src/algorithm/MEMCPY-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ void MEMCPY::runSeqVariantLibrary(VariantID vid)
#if defined(RUN_RAJA_SEQ)
case RAJA_Seq : {

camp::resources::Host res = camp::resources::Host::get_default();
auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
Expand Down Expand Up @@ -113,10 +113,13 @@ void MEMCPY::runSeqVariantDefault(VariantID vid)

case RAJA_Seq : {

auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall<RAJA::seq_exec>( RAJA::RangeSegment(ibegin, iend),
RAJA::forall<RAJA::seq_exec>( res,
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
MEMCPY_BODY;
});
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/MEMSET-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,12 @@ void MEMSET::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx

case RAJA_OpenMP : {

auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>( res,
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
MEMSET_BODY;
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/MEMSET-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,12 @@ void MEMSET::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu

} else if ( vid == RAJA_OpenMPTarget ) {

auto res{getOmpTargetResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>( res,
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
MEMSET_BODY;
Expand Down
7 changes: 5 additions & 2 deletions src/algorithm/MEMSET-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void MEMSET::runSeqVariantLibrary(VariantID vid)
#if defined(RUN_RAJA_SEQ)
case RAJA_Seq : {

camp::resources::Host res = camp::resources::Host::get_default();
auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
Expand Down Expand Up @@ -113,11 +113,14 @@ void MEMSET::runSeqVariantDefault(VariantID vid)
}

case RAJA_Seq : {

auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall<RAJA::seq_exec>( RAJA::RangeSegment(ibegin, iend),
RAJA::forall<RAJA::seq_exec>( res,
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
MEMSET_BODY;
});
Expand Down
6 changes: 4 additions & 2 deletions src/algorithm/REDUCE_SUM-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,16 @@ void REDUCE_SUM::runOpenMPVariant(VariantID vid, size_t tune_idx)

case RAJA_OpenMP : {

auto res{getHostResource()};

if (tune_idx == 0) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::ReduceSum<RAJA::omp_reduce, Real_type> sum(m_sum_init);

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>( res,
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
REDUCE_SUM_BODY;
Expand All @@ -101,7 +103,7 @@ void REDUCE_SUM::runOpenMPVariant(VariantID vid, size_t tune_idx)

Real_type tsum = m_sum_init;

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>( res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] (Index_type i, Real_type& sum) {
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/REDUCE_SUM-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,14 @@ void REDUCE_SUM::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_AR

} else if ( vid == RAJA_OpenMPTarget ) {

auto res{getOmpTargetResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

Real_type tsum = m_sum_init;

RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>( res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] (Index_type i, Real_type& sum) {
Expand Down
8 changes: 6 additions & 2 deletions src/algorithm/REDUCE_SUM-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,17 @@ void REDUCE_SUM::runSeqVariant(VariantID vid, size_t tune_idx)

case RAJA_Seq : {

auto res{getHostResource()};

if (tune_idx == 0) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::ReduceSum<RAJA::seq_reduce, Real_type> sum(m_sum_init);

RAJA::forall<RAJA::seq_exec>( RAJA::RangeSegment(ibegin, iend),
RAJA::forall<RAJA::seq_exec>( res,
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
REDUCE_SUM_BODY;
});
Expand All @@ -100,7 +103,8 @@ void REDUCE_SUM::runSeqVariant(VariantID vid, size_t tune_idx)

Real_type tsum = m_sum_init;

RAJA::forall<RAJA::seq_exec>( RAJA::RangeSegment(ibegin, iend),
RAJA::forall<RAJA::seq_exec>( res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] (Index_type i, Real_type& sum) {
REDUCE_SUM_BODY;
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/SCAN-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,12 @@ void SCAN::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))

case RAJA_OpenMP : {

auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::exclusive_scan<RAJA::omp_parallel_for_exec>(RAJA_SCAN_ARGS);
RAJA::exclusive_scan<RAJA::omp_parallel_for_exec>(res, RAJA_SCAN_ARGS);

}
stopTimer();
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/SCAN-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,12 @@ void SCAN::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))

case RAJA_Seq : {

auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::exclusive_scan<RAJA::seq_exec>(RAJA_SCAN_ARGS);
RAJA::exclusive_scan<RAJA::seq_exec>(res, RAJA_SCAN_ARGS);

}
stopTimer();
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/SORT-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ void SORT::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))

case RAJA_OpenMP : {

auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::sort<RAJA::omp_parallel_for_exec>(RAJA_SORT_ARGS);
RAJA::sort<RAJA::omp_parallel_for_exec>(res, RAJA_SORT_ARGS);

}
stopTimer();
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/SORT-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,12 @@ void SORT::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
#if defined(RUN_RAJA_SEQ)
case RAJA_Seq : {

auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::sort<RAJA::seq_exec>(RAJA_SORT_ARGS);
RAJA::sort<RAJA::seq_exec>(res, RAJA_SORT_ARGS);

}
stopTimer();
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/SORTPAIRS-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ void SORTPAIRS::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_

case RAJA_OpenMP : {

auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::sort_pairs<RAJA::omp_parallel_for_exec>(RAJA_SORTPAIRS_ARGS);
RAJA::sort_pairs<RAJA::omp_parallel_for_exec>(res, RAJA_SORTPAIRS_ARGS);

}
stopTimer();
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/SORTPAIRS-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,12 @@ void SORTPAIRS::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx
#if defined(RUN_RAJA_SEQ)
case RAJA_Seq : {

auto res{getHostResource()};

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::sort_pairs<RAJA::seq_exec>(RAJA_SORTPAIRS_ARGS);
RAJA::sort_pairs<RAJA::seq_exec>(res, RAJA_SORTPAIRS_ARGS);

}
stopTimer();
Expand Down
4 changes: 3 additions & 1 deletion src/apps/CONVECTION3DPA-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ void CONVECTION3DPA::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(

case RAJA_OpenMP: {

auto res{getHostResource()};

using launch_policy = RAJA::LaunchPolicy<RAJA::omp_launch_t>;

using outer_x = RAJA::LoopPolicy<RAJA::omp_for_exec>;
Expand All @@ -145,7 +147,7 @@ void CONVECTION3DPA::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

// Grid is empty as the host does not need a compute grid to be specified
RAJA::launch<launch_policy>(
RAJA::launch<launch_policy>( res,
RAJA::LaunchParams(),
[=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {

Expand Down
4 changes: 3 additions & 1 deletion src/apps/CONVECTION3DPA-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ void CONVECTION3DPA::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun
#if defined(RUN_RAJA_SEQ)
case RAJA_Seq: {

auto res{getHostResource()};

using launch_policy = RAJA::LaunchPolicy<RAJA::seq_launch_t>;

using outer_x = RAJA::LoopPolicy<RAJA::seq_exec>;
Expand All @@ -143,7 +145,7 @@ void CONVECTION3DPA::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

// Grid is empty as the host does not need a compute grid to be specified
RAJA::launch<launch_policy>(
RAJA::launch<launch_policy>( res,
RAJA::LaunchParams(),
[=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {

Expand Down
Loading

0 comments on commit ebe577b

Please sign in to comment.