Skip to content

Commit

Permalink
#2240: Fix runtime failure in allreduce perf test
Browse files Browse the repository at this point in the history
  • Loading branch information
JacobDomagala committed Oct 10, 2024
1 parent 74bea76 commit 51ea74e
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 15 deletions.
5 changes: 3 additions & 2 deletions src/vt/collective/reduce/allreduce/rabenseifner.impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ template <
>
void Rabenseifner<DataT, Op, ObjT, finalHandler>::executeFinalHan() {
// theCB()->makeSend<finalHandler>(parent_proxy_[this_node_]).sendTuple(std::make_tuple(val_));
vt_debug_print(terse, allreduce, "Rabenseifner executing final handler\n");
parent_proxy_[this_node_].template invoke<finalHandler>(val_);
completed_ = true;
}
Expand Down Expand Up @@ -267,7 +268,7 @@ void Rabenseifner<DataT, Op, ObjT, finalHandler>::scatterReduceIter() {
auto dest = (vdest < nprocs_rem_) ? vdest * 2 : vdest + nprocs_rem_;
vt_debug_print(
terse, allreduce,
"Rabenseifner Part2 (step {}): Sending to Node {} starting with idx = {} and "
"Rabenseifner Part2 (Send step {}): To Node {} starting with idx = {} and "
"count "
"{} \n",
scatter_step_, dest, s_index_[scatter_step_],
Expand Down Expand Up @@ -310,7 +311,7 @@ void Rabenseifner<DataT, Op, ObjT, finalHandler>::scatterReduceIterHandler(

vt_debug_print(
terse, allreduce,
"Rabenseifner Part2 (step {}): scatter_mask_= {} nprocs_pof2_ = {}: "
"Rabenseifner Part2 (Recv step {}): scatter_mask_= {} nprocs_pof2_ = {}: "
"idx = {} from {}\n",
msg->step_, scatter_mask_, nprocs_pof2_, r_index_[msg->step_],
theContext()->getFromNodeCurrentTask()
Expand Down
20 changes: 7 additions & 13 deletions tests/perf/allreduce.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,11 @@
using namespace vt;
using namespace vt::tests::perf::common;

static constexpr std::array<size_t, 8> const payloadSizes = {
64, 128, 2048, 16384, 32768, 524288, 1048576, 2097152};
// static constexpr std::array<size_t, 8> const payloadSizes = {
// 64, 128, 2048, 16384, 32768, 524288, 1048576, 2097152};

static constexpr std::array<size_t, 1> const payloadSizes = {
2097152};

struct MyTest : PerfTestHarness {
MyTest() {
Expand Down Expand Up @@ -109,7 +112,6 @@ VT_PERF_TEST(MyTest, test_reduce) {
data.resize(payload_size, theContext()->getNode() + 1);

theCollective()->barrier();

StartTimer(grp_proxy[my_node_].get()->timer_names_.at(payload_size));
grp_proxy.allreduce<&NodeObj::handlerVec, collective::PlusOp>(data);
}
Expand All @@ -124,16 +126,12 @@ VT_PERF_TEST(MyTest, test_allreduce_rabenseifner) {
using Reducer = collective::reduce::allreduce::Rabenseifner<
DataT, collective::PlusOp, NodeObj, &NodeObj::handlerVec>;

auto grp_proxy = vt::theObjGroup()->makeCollective<Reducer>(
"allreduce_rabenseifner", proxy, num_nodes_, data);
grp_proxy[my_node_].get()->proxy_ = grp_proxy;

for (auto payload_size : payloadSizes) {
data.resize(payload_size, theContext()->getNode() + 1);

theCollective()->barrier();
StartTimer(proxy[my_node_].get()->timer_names_.at(payload_size));
grp_proxy[my_node_].template invoke<&Reducer::allreduce>();
proxy.allreduce_h<&NodeObj::handlerVec, collective::PlusOp>(data);
}
}

Expand All @@ -146,16 +144,12 @@ VT_PERF_TEST(MyTest, test_allreduce_recursive_doubling) {
using Reducer = collective::reduce::allreduce::RecursiveDoubling<
DataT, collective::PlusOp, NodeObj, &NodeObj::handlerVec>;

auto grp_proxy = vt::theObjGroup()->makeCollective<Reducer>(
"allreduce_recursive_doubling", proxy, num_nodes_, data);
grp_proxy[my_node_].get()->proxy_ = grp_proxy;

for (auto payload_size : payloadSizes) {
data.resize(payload_size, theContext()->getNode() + 1);

theCollective()->barrier();
StartTimer(proxy[my_node_].get()->timer_names_.at(payload_size));
grp_proxy[my_node_].template invoke<&Reducer::allreduce>();
proxy.allreduce_h<&NodeObj::handlerVec, collective::PlusOp>(data);
}
}

Expand Down

0 comments on commit 51ea74e

Please sign in to comment.