diff --git a/src/vt/collective/reduce/allreduce/rabenseifner.h b/src/vt/collective/reduce/allreduce/rabenseifner.h index e27b42de8c..8af07bff6a 100644 --- a/src/vt/collective/reduce/allreduce/rabenseifner.h +++ b/src/vt/collective/reduce/allreduce/rabenseifner.h @@ -86,11 +86,13 @@ struct AllreduceRbnMsg int32_t step_ = {}; }; -template +template < + typename DataT, template class Op, typename ObjT, + auto finalHandler> struct Rabenseifner { void initialize( const DataT& data, vt::objgroup::proxy::Proxy proxy, - uint32_t num_nodes) { + vt::objgroup::proxy::Proxy parentProxy, uint32_t num_nodes) { this_node_ = vt::theContext()->getNode(); is_even_ = this_node_ % 2 == 0; val_ = data; @@ -145,8 +147,8 @@ struct Rabenseifner { if constexpr (debug) { fmt::print( - "[{}] Initialize with size = {} num_steps {} \n", this_node_, - w_size_, num_steps_); + "[{}] Initialize with size = {} num_steps {} \n", this_node_, w_size_, + num_steps_); } } @@ -186,7 +188,7 @@ struct Rabenseifner { val_[(val_.size() / 2) + i] = msg->val_[i]; } - // partTwo(); + partTwo(); } void partTwo() { @@ -350,6 +352,7 @@ struct Rabenseifner { NodeType this_node_ = {}; bool is_even_ = false; vt::objgroup::proxy::Proxy proxy_ = {}; + vt::objgroup::proxy::Proxy parentProxy_ = {}; DataT val_ = {}; NodeType vrt_node_ = {}; bool is_part_of_adjustment_group_ = false; diff --git a/src/vt/collective/reduce/allreduce/distance_doubling.h b/src/vt/collective/reduce/allreduce/recursive_doubling.h similarity index 58% rename from src/vt/collective/reduce/allreduce/distance_doubling.h rename to src/vt/collective/reduce/allreduce/recursive_doubling.h index ddef0b28c3..e1ac7873d6 100644 --- a/src/vt/collective/reduce/allreduce/distance_doubling.h +++ b/src/vt/collective/reduce/allreduce/recursive_doubling.h @@ -87,22 +87,23 @@ struct AllreduceDblMsg int32_t step_ = {}; }; -template +template < + typename DataT, template class Op, typename ObjT, + auto finalHandler> struct DistanceDoubling { - void initialize( - const DataT& data, vt::objgroup::proxy::Proxy proxy, - vt::objgroup::proxy::Proxy parentProxy, - uint32_t num_nodes) { + template + DistanceDoubling(NodeType num_nodes, Args&&... args) + : val_(std::forward(args)...), + num_nodes_(num_nodes) { } + + void initialize() { this_node_ = vt::theContext()->getNode(); is_even_ = this_node_ % 2 == 0; - val_ = data; - proxy_ = proxy; - parentProxy_ = parentProxy; - num_steps_ = static_cast(log2(num_nodes)); + num_steps_ = static_cast(log2(num_nodes_)); messages.resize(num_steps_, nullptr); nprocs_pof2_ = 1 << num_steps_; - nprocs_rem_ = num_nodes - nprocs_pof2_; + nprocs_rem_ = num_nodes_ - nprocs_pof2_; is_part_of_adjustment_group_ = this_node_ < (2 * nprocs_rem_); if (is_part_of_adjustment_group_) { if (is_even_) { @@ -114,41 +115,76 @@ struct DistanceDoubling { vrt_node_ = this_node_ - nprocs_rem_; } - w_size_ = data.size(); - - expected_send_ = num_steps_; - expected_recv_ = num_steps_; - steps_sent_.resize(num_steps_, false); steps_recv_.resize(num_steps_, false); + steps_reduced_.resize(num_steps_, false); + + initialized_ = true; + } + + void allreduce( + vt::objgroup::proxy::Proxy proxy, + vt::objgroup::proxy::Proxy parentProxy) { + if (not initialized_) { + initialize(); + } + + proxy_ = proxy; + parent_proxy_ = parentProxy; + + if (nprocs_rem_) { + adjustForPowerOfTwo(); + } else { + reduceIter(); + } } - void partOne() { - if (not nprocs_rem_) { - // we're running on power of 2 number of nodes, proceed to second step - partTwo(); - } else if (is_part_of_adjustment_group_ and not is_even_) { - proxy_[this_node_ - 1].template send<&DistanceDoubling::partOneHandler>( - val_); + void adjustForPowerOfTwo() { + if (is_part_of_adjustment_group_ and not is_even_) { + if constexpr (isdebug) { + fmt::print( + "[{}] Part1: Sending to Node {} \n", this_node_, this_node_ - 1); + } + + proxy_[this_node_ - 1] + .template send<&DistanceDoubling::adjustForPowerOfTwoHandler>(val_); } } - void partOneHandler(AllreduceDblMsg* msg) { - Op(val_, msg->val_); - // for (int i = 0; i < msg->val_.size(); i++) { - // val_[i] += msg->val_[i]; - // } + void adjustForPowerOfTwoHandler(AllreduceDblMsg* msg) { + if constexpr (isdebug) { + std::string data(1024, 0x0); + for (auto val : msg->val_) { + data.append(fmt::format("{} ", val)); + } + fmt::print( + "[{}] Part1 Handler initialized_ = {}: Received data ({}) " + "from {}\n", + this_node_, initialized_, data, theContext()->getFromNodeCurrentTask()); + } + + Op()(val_, msg->val_); - partTwo(); + finished_adjustment_part_ = true; + + reduceIter(); } + bool done() { return step_ == num_steps_ and allMessagesReceived(); } bool isValid() { return (vrt_node_ != -1) and (step_ < num_steps_); } - bool isReady() { + bool allMessagesReceived() { return std::all_of( steps_recv_.cbegin(), steps_recv_.cbegin() + step_, [](const auto val) { return val; }); } - void partTwo() { - if (not isValid() or not isReady()) { + bool isReady() { + return (is_part_of_adjustment_group_ and finished_adjustment_part_) and + step_ == 0 or + allMessagesReceived(); + } + + void reduceIter() { + // Ensure we have received all necessary messages + if (not isReady()) { return; } @@ -158,91 +194,122 @@ struct DistanceDoubling { fmt::print( "[{}] Part2 Step {}: Sending to Node {} \n", this_node_, step_, dest); } - if (step_) { - for (int i = 0; i < val_.size(); ++i) { - val_[i] += messages.at(step_ - 1)->val_[i]; - } - } - proxy_[dest].template send<&DistanceDoubling::partTwoHandler>(val_, step_); + proxy_[dest].template send<&DistanceDoubling::reduceIterHandler>( + val_, step_); mask_ <<= 1; - num_send_++; - steps_sent_[step_] = true; step_++; - if (isReady()) { - partTwo(); + tryReduce(step_ - 1); + + if (done()) { + finalPart(); + } else if (isReady()) { + reduceIter(); } } - void partTwoHandler(AllreduceDblMsg* msg) { - messages.at(msg->step_) = promoteMsg(msg); + void tryReduce(int32_t step) { + if ( + (step < step_) and not steps_reduced_[step] and steps_recv_[step] and + std::all_of( + steps_reduced_.cbegin(), steps_reduced_.cbegin() + step, + [](const auto val) { return val; })) { + Op()(val_, messages.at(step)->val_); + steps_reduced_[step] = true; + } + } + void reduceIterHandler(AllreduceDblMsg* msg) { if constexpr (isdebug) { std::string data(1024, 0x0); for (auto val : msg->val_) { data.append(fmt::format("{} ", val)); } fmt::print( - "[{}] Part2 Step {} mask_= {} nprocs_pof2_ = {}: Received data ({}) " + "[{}] Part2 Step {} initialized_ = {} mask_= {} nprocs_pof2_ = {}: " + "Received data ({}) " "from {}\n", - this_node_, msg->step_, mask_, nprocs_pof2_, data, + this_node_, msg->step_, initialized_, mask_, nprocs_pof2_, data, theContext()->getFromNodeCurrentTask()); } - steps_recv_[msg->step_] = true; - num_recv_++; - if (mask_ < nprocs_pof2_) { - if (isReady()) { - partTwo(); + + // Special case when we receive step 2 message before step 1 is done on this node + if (not finished_adjustment_part_) { + if (not initialized_) { + initialize(); } + + messages.at(msg->step_) = promoteMsg(msg); + steps_recv_[msg->step_] = true; + + return; + } + + messages.at(msg->step_) = promoteMsg(msg); + steps_recv_[msg->step_] = true; + + tryReduce(msg->step_); + + if ((mask_ < nprocs_pof2_) and isReady()) { + reduceIter(); + + } else if (done()) { + finalPart(); } } - void partThree() { + void sendToExcludedNodes() { if (is_part_of_adjustment_group_ and is_even_) { if constexpr (isdebug) { fmt::print( - "[{}] Part4 : Sending to Node {} \n", this_node_, this_node_ + 1); + "[{}] Part3 : Sending to Node {} \n", this_node_, this_node_ + 1); } - proxy_[this_node_ + 1].template send<&DistanceDoubling::partThreeHandler>( - val_); + proxy_[this_node_ + 1] + .template send<&DistanceDoubling::sendToExcludedNodesHandler>(val_); } } - void partThreeHandler(AllreduceDblMsg* msg) { val_ = msg->val_; } + void sendToExcludedNodesHandler(AllreduceDblMsg* msg) { + val_ = msg->val_; + + parent_proxy_[this_node_].template invoke(val_); + } + void finalPart() { - if (vrt_node_ != -1) { - for (int i = 0; i < val_.size(); ++i) { - val_[i] += messages.at(step_ - 1)->val_[i]; - } + if (completed_) { + return; + } + + if (nprocs_rem_) { + sendToExcludedNodes(); } - parentProxy_[this_node_] .template invoke(val_); + parent_proxy_[this_node_].template invoke(val_); + completed_ = true; } NodeType this_node_ = {}; + uint32_t num_nodes_ = {}; bool is_even_ = false; vt::objgroup::proxy::Proxy proxy_ = {}; - vt::objgroup::proxy::Proxy parentProxy_ = {}; + vt::objgroup::proxy::Proxy parent_proxy_ = {}; DataT val_ = {}; NodeType vrt_node_ = {}; + bool initialized_ = false; bool is_part_of_adjustment_group_ = false; + bool finished_adjustment_part_ = false; int32_t num_steps_ = {}; int32_t nprocs_pof2_ = {}; int32_t nprocs_rem_ = {}; int32_t mask_ = 1; - bool startedPartThree_ = false; - size_t w_size_ = {}; int32_t step_ = 0; - int32_t num_send_ = 0; - int32_t expected_send_ = 0; - int32_t num_recv_ = 0; - int32_t expected_recv_ = 0; + bool completed_ = false; std::vector steps_recv_ = {}; - std::vector steps_sent_ = {}; + std::vector steps_reduced_ = {}; std::vector>> messages = {}; }; diff --git a/src/vt/objgroup/manager.impl.h b/src/vt/objgroup/manager.impl.h index f66dbc6250..5129f1114e 100644 --- a/src/vt/objgroup/manager.impl.h +++ b/src/vt/objgroup/manager.impl.h @@ -41,7 +41,6 @@ //@HEADER */ -#include #if !defined INCLUDED_VT_OBJGROUP_MANAGER_IMPL_H #define INCLUDED_VT_OBJGROUP_MANAGER_IMPL_H @@ -58,7 +57,6 @@ #include "vt/collective/collective_alg.h" #include "vt/messaging/active.h" #include "vt/elm/elm_id_bits.h" -#include "vt/collective/reduce/allreduce/distance_doubling.h" #include "vt/collective/reduce/allreduce/rabenseifner.h" #include "vt/messaging/message/smart_ptr.h" #include @@ -281,51 +279,42 @@ ObjGroupManager::allreduce_r(ProxyType proxy, const DataT& data) { } // using Reducer = collective::reduce::allreduce::Rabenseifner; - using Reducer = collective::reduce::allreduce::DistanceDoubling; - - return PendingSendType{[=] { - auto grp_proxy = - vt::theObjGroup()->makeCollective("allreduce_rabenseifner"); - if constexpr (std::is_same_v< - Reducer, - collective::reduce::allreduce::DistanceDoubling>) { - grp_proxy[this_node].template invoke<&Reducer::initialize>( - data, grp_proxy, num_nodes); - - grp_proxy[this_node].template invoke<&Reducer::partOne>(); - - // vt::runInEpochCollective( - // [=] { grp_proxy[this_node].template invoke<&Reducer::partTwo>(); }); - - // grp_proxy[this_node].template invoke<&Reducer::finalPart>(); - - // if (grp_proxy.get()->nprocs_rem_) { - // vt::runInEpochCollective( - // [=] { grp_proxy[this_node].template invoke<&Reducer::partThree>(); }); - // } - } else if constexpr (std::is_same_v< - Reducer, - collective::reduce::allreduce::Rabenseifner< - DataT>>) { - grp_proxy[this_node].template invoke<&Reducer::initialize>( - data, grp_proxy, num_nodes); - - if (grp_proxy.get()->nprocs_rem_) { - vt::runInEpochCollective( - [=] { grp_proxy[this_node].template invoke<&Reducer::partOne>(); }); - } - - vt::runInEpochCollective( - [=] { grp_proxy[this_node].template invoke<&Reducer::partTwo>(); }); - - vt::runInEpochCollective( - [=] { grp_proxy[this_node].template invoke<&Reducer::partThree>(); }); - - if (grp_proxy.get()->nprocs_rem_) { - vt::runInEpochCollective( - [=] { grp_proxy[this_node].template invoke<&Reducer::partFour>(); }); - } - } + // using Reducer = collective::reduce::allreduce::DistanceDoubling; + + return PendingSendType{theTerm()->getEpoch(), [=] { + // auto grp_proxy = + // vt::theObjGroup()->makeCollective("allreduce_rabenseifner"); + // if constexpr (std::is_same_v< + // Reducer, + // collective::reduce::allreduce::DistanceDoubling>) { + // grp_proxy[this_node].template invoke<&Reducer::initialize>( + // data, grp_proxy, proxy, num_nodes); + + // grp_proxy[this_node].template invoke<&Reducer::partOne>(); + + // } else if constexpr (std::is_same_v< + // Reducer, + // collective::reduce::allreduce::Rabenseifner< + // DataT, Op, ObjT, f>>) { + // grp_proxy[this_node].template invoke<&Reducer::initialize>( + // data, grp_proxy, num_nodes); + + // if (grp_proxy.get()->nprocs_rem_) { + // vt::runInEpochCollective( + // [=] { grp_proxy[this_node].template invoke<&Reducer::partOne>(); }); + // } + + // vt::runInEpochCollective( + // [=] { grp_proxy[this_node].template invoke<&Reducer::partTwo>(); }); + + // vt::runInEpochCollective( + // [=] { grp_proxy[this_node].template invoke<&Reducer::partThree>(); }); + + // if (grp_proxy.get()->nprocs_rem_) { + // vt::runInEpochCollective( + // [=] { grp_proxy[this_node].template invoke<&Reducer::partFour>(); }); + // } + // } }}; } diff --git a/tests/perf/allreduce.cc b/tests/perf/allreduce.cc index 0d97bbe698..645f83136d 100644 --- a/tests/perf/allreduce.cc +++ b/tests/perf/allreduce.cc @@ -41,11 +41,16 @@ //@HEADER */ #include "common/test_harness.h" +#include "vt/collective/collective_alg.h" +#include "vt/collective/reduce/operators/functors/plus_op.h" +#include "vt/configs/error/config_assert.h" #include "vt/context/context.h" #include #include #include #include +#include +#include #include @@ -53,58 +58,131 @@ using namespace vt; using namespace vt::tests::perf::common; static constexpr int num_iters = 1; +struct MyTest : PerfTestHarness { + void SetUp() override { + PerfTestHarness::SetUp(); + data.resize(1 << 4); + for (auto& val : data) { + val = theContext()->getNode() + 1; + } + } -struct MyTest : PerfTestHarness { }; + std::vector data; +}; struct NodeObj { explicit NodeObj(MyTest* test_obj) : test_obj_(test_obj) { } - void initialize() { proxy_ = vt::theObjGroup()->getProxy(this); + void initialize() { + proxy_ = vt::theObjGroup()->getProxy(this); + // data_["Node"] = theContext()->getNode(); } } - struct MyMsg : vt::Message {}; + struct MyMsg : vt::Message { }; - void reduceComplete(std::vector in) { - reduce_counter_++; - test_obj_->StopTimer(fmt::format("{} reduce", i)); - test_obj_->GetMemoryUsage(); - if (i < num_iters) { - i++; - auto this_node = theContext()->getNode(); - proxy_[this_node].send(); - } else if (theContext()->getNode() == 0) { - theTerm()->enableTD(); - } + void recursiveDoubling(std::vector in) { + // std::string printer(1024, 0x0); + // printer.append(fmt::format("\n[{}]: recursiveDoubling done! ", theContext()->getNode())); + + // for (int node = 0; node < theContext()->getNumNodes(); ++node) { + // if (node == theContext()->getNode()) { + + // for (auto val : in) { + // printer.append(fmt::format("{} ", val)); + // } + + // fmt::print("{}\n", printer); + + // theCollective()->barrier(); + // } + // } + + // fmt::print("\n"); + // const auto p = theContext()->getNumNodes(); + // const auto expected = (p * (p + 1)) / 2; + // for (auto val : in) { + // vtAssert(val == expected, "FAILURE!"); + // } + } + + void newReduceComplete(std::vector in) { + // fmt::print( + // "\n[{}]: allreduce_h done! (Size == {}) Results are ...\n", + // theContext()->getNode(), in.size()); + // const auto p = theContext()->getNumNodes(); + // const auto expected = (p * (p + 1)) / 2; + // for (auto val : in) { + // vtAssert(val == expected, "FAILURE!"); + // } + // for (int node = 0; node < theContext()->getNumNodes(); ++node) { + // if (node == theContext()->getNode()) { + // std::string printer(128, 0x0); + // for (auto val : in) { + // printer.append(fmt::format("{} ", val)); + // } + + // fmt::print("{}\n", printer); + + // theCollective()->barrier(); + // } + // } + + // fmt::print("\n"); } - void perfReduce(MyMsg* in_msg) { - test_obj_->StartTimer(fmt::format("{} reduce", i)); + void reduceComplete(std::vector in) { + // fmt::print( + // "[{}]: allreduce done! Results are ...\n", theContext()->getNode()); + // for (auto val : in) { + // fmt::print("{} ", val); + // } - proxy_.allreduce<&NodeObj::reduceComplete, collective::PlusOp>(data_); + // fmt::print("\n"); } private: MyTest* test_obj_ = nullptr; vt::objgroup::proxy::Proxy proxy_ = {}; - int reduce_counter_ = -1; - int i = 0; - std::vector data_ = {}; }; VT_PERF_TEST(MyTest, test_reduce) { - auto grp_proxy = vt::theObjGroup()->makeCollective( - "test_reduce", this - ); + auto grp_proxy = + vt::theObjGroup()->makeCollective("test_allreduce", this); - if (theContext()->getNode() == 0) { - theTerm()->disableTD(); - } + vt::runInEpochCollective([=] { + grp_proxy.allreduce<&NodeObj::reduceComplete, collective::PlusOp>(data); + }); +} - std::vector data(1024, theContext()->getNode()); - grp_proxy.allreduce<&NodeObj::reduceComplete, collective::PlusOp>(data); +VT_PERF_TEST(MyTest, test_allreduce_rabenseifner) { + auto proxy = + vt::theObjGroup()->makeCollective("test_allreduce_new", this); - if (theContext()->getNode() == 0) { - theTerm()->enableTD(); - } + using DataT = decltype(data); + using Reducer = collective::reduce::allreduce::Rabenseifner< + DataT, collective::PlusOp, NodeObj, &NodeObj::newReduceComplete>; + + auto grp_proxy = + vt::theObjGroup()->makeCollective("allreduce_rabenseifner"); + vt::runInEpochCollective([=] { + grp_proxy[my_node_].template invoke<&Reducer::initialize>( + data, grp_proxy, proxy, num_nodes_); + grp_proxy[my_node_].template invoke<&Reducer::partOne>(); + }); +} + +VT_PERF_TEST(MyTest, test_allreduce_recursive_doubling) { + auto proxy = + vt::theObjGroup()->makeCollective("test_allreduce_new_2", this); + + using DataT = decltype(data); + using Reducer = collective::reduce::allreduce::DistanceDoubling< + DataT, collective::PlusOp, NodeObj, &NodeObj::recursiveDoubling>; + + auto grp_proxy = vt::theObjGroup()->makeCollective( + "allreduce_recursive_doubling", num_nodes_, data); + vt::runInEpochCollective([=] { + grp_proxy[my_node_].template invoke<&Reducer::allreduce>(grp_proxy, proxy); + }); } VT_PERF_TEST_MAIN() diff --git a/tests/perf/reduce.cc b/tests/perf/reduce.cc index 6b0a6416b4..d650ef6c90 100644 --- a/tests/perf/reduce.cc +++ b/tests/perf/reduce.cc @@ -41,14 +41,9 @@ //@HEADER */ #include "common/test_harness.h" -#include "vt/collective/collective_alg.h" -#include "vt/configs/error/config_assert.h" -#include "vt/context/context.h" -#include #include #include #include -#include #include INCLUDE_FMT_CORE @@ -64,69 +59,48 @@ struct MyTest : PerfTestHarness { struct NodeObj { explicit NodeObj(MyTest* test_obj) : test_obj_(test_obj) { } - void initialize() { - proxy_ = vt::theObjGroup()->getProxy(this); - // data_["Node"] = theContext()->getNode(); } + void initialize() { proxy_ = vt::theObjGroup()->getProxy(this); } + + struct MyMsg : vt::Message {}; + + void reduceComplete() { + reduce_counter_++; + test_obj_->StopTimer(fmt::format("{} reduce", i)); + test_obj_->GetMemoryUsage(); + if (i < num_iters) { + i++; + auto this_node = theContext()->getNode(); + proxy_[this_node].send(); + } else if (theContext()->getNode() == 0) { + theTerm()->enableTD(); + } } - struct MyMsg : vt::Message { }; - void newReduceComplete(std::vector in) { - // fmt::print( - // "\n[{}]: allreduce_h done! (Size == {}) Results are ...\n", - // theContext()->getNode(), in.size()); - // const auto p = theContext()->getNumNodes(); - // const auto expected = (p * (p + 1)) / 2; - // for (auto val : in) { - // vtAssert(val == expected, "FAILURE!"); - // } - // for (int node = 0; node < theContext()->getNumNodes(); ++node) { - // if (node == theContext()->getNode()) { - // std::string printer(128, 0x0); - // for (auto val : in) { - // printer.append(fmt::format("{} ", val)); - // } - - // fmt::print("{}\n", printer); - - // theCollective()->barrier(); - // } - // } - - // fmt::print("\n"); - } - - void reduceComplete(std::vector in) { - // fmt::print( - // "[{}]: allreduce done! Results are ...\n", theContext()->getNode()); - // for (auto val : in) { - // fmt::print("{} ", val); - // } - - // fmt::print("\n"); + void perfReduce(MyMsg* in_msg) { + test_obj_->StartTimer(fmt::format("{} reduce", i)); + proxy_.allreduce<&NodeObj::reduceComplete>(); } private: MyTest* test_obj_ = nullptr; vt::objgroup::proxy::Proxy proxy_ = {}; + int reduce_counter_ = -1; + int i = 0; }; VT_PERF_TEST(MyTest, test_reduce) { - auto grp_proxy = - vt::theObjGroup()->makeCollective("test_allreduce", this); - - grp_proxy.allreduce<&NodeObj::reduceComplete, collective::PlusOp>(data); -} + auto grp_proxy = vt::theObjGroup()->makeCollective( + "test_reduce", this + ); -VT_PERF_TEST(MyTest, test_allreduce) { - auto grp_proxy = - vt::theObjGroup()->makeCollective("test_allreduce_new", this); + if (theContext()->getNode() == 0) { + theTerm()->disableTD(); + } - grp_proxy.allreduce_h<&NodeObj::newReduceComplete, collective::PlusOp>(data); -} + grp_proxy[my_node_].invoke<&NodeObj::initialize>(); -VT_PERF_TEST(MyTest, test_epoch_collective) { - vt::runInEpochCollective([] {}); - vt::runInEpochCollective([] {}); + using MsgType = typename NodeObj::MyMsg; + grp_proxy[my_node_].send(); } VT_PERF_TEST_MAIN() diff --git a/tests/perf/send_cost.cc b/tests/perf/send_cost.cc index a1195795e7..6a828b807b 100644 --- a/tests/perf/send_cost.cc +++ b/tests/perf/send_cost.cc @@ -609,3 +609,4 @@ VT_PERF_TEST(SendTest, test_collection_send) { } VT_PERF_TEST_MAIN() +