Skip to content

Commit

Permalink
[SYCLomatic] Refactor CUB migration rule and improve CUB APIs migrati…
Browse files Browse the repository at this point in the history
…on (#348)

Refactor the CUB migration rule.
Migrate cub::DeviceSegmentedReduce::Reduce/Sum/Max/Min with rewriter infrastructure.
Support migration of 4 CUB binary operator: cub::Sum/cub::Min/cub::Max/cub::Equality.
Improve help function dpct::device::segmented_reduce.

Signed-off-by: Wang, Yihan <[email protected]>
  • Loading branch information
yihanwg authored Nov 22, 2022
1 parent d0e2e49 commit 7390f65
Show file tree
Hide file tree
Showing 22 changed files with 550 additions and 343 deletions.
218 changes: 218 additions & 0 deletions clang/lib/DPCT/APINamesCUB.inc
Original file line number Diff line number Diff line change
Expand Up @@ -399,3 +399,221 @@ CONDITIONAL_FACTORY_ENTRY(
false, LITERAL("first"))),
LITERAL("1")),
false, "wait")))))))

// cub::DeviceSegmentedReduce::Reduce
CONDITIONAL_FACTORY_ENTRY(
CheckCubRedundantFunctionCall(),
REMOVE_API_FACTORY_ENTRY("cub::DeviceSegmentedReduce::Reduce"),
REMOVE_CUB_TEMP_STORAGE_FACTORY(FEATURE_REQUEST_FACTORY(
HelperFeatureEnum::DplExtrasDpcppExtensions_segmented_reduce,
HEADER_INSERT_FACTORY(
HeaderType::HT_DPL_Utils,
WARNING_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Reduce",
CONDITIONAL_FACTORY_ENTRY(
makeCheckAnd(CheckArgCountGreaterThan(9),
makeCheckNot(CheckArgIsDefaultCudaStream(9))),
CONDITIONAL_FACTORY_ENTRY(
checkArgCanMappingToSyclNativeBinaryOp(7),
CALL_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Reduce",
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getDpctNamespace() +
"device::segmented_reduce",
LITERAL("128")),
STREAM(9), ARG(2), ARG(3), ARG(4), ARG(5),
ARG(6), ARG(7), ARG(8))),
WARNING_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Reduce",
CALL_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Reduce",
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getDpctNamespace() +
"device::segmented_reduce",
LITERAL("128")),
STREAM(9), ARG(2), ARG(3), ARG(4), ARG(5),
ARG(6), LITERAL("dpct_placeholder"),
ARG(8))),
Diagnostics::UNSUPPORTED_BINARY_OPERATION)),
CONDITIONAL_FACTORY_ENTRY(
checkArgCanMappingToSyclNativeBinaryOp(7),
CALL_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Reduce",
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getDpctNamespace() +
"device::segmented_reduce",
LITERAL("128")),
QUEUESTR, ARG(2), ARG(3), ARG(4), ARG(5),
ARG(6), ARG(7), ARG(8))),
WARNING_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Reduce",
CALL_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Reduce",
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getDpctNamespace() +
"device::segmented_reduce",
LITERAL("128")),
QUEUESTR, ARG(2), ARG(3), ARG(4), ARG(5),
ARG(6), LITERAL("dpct_placeholder"),
ARG(8))),
Diagnostics::UNSUPPORTED_BINARY_OPERATION))),
Diagnostics::REDUCE_PERFORMANCE_TUNE)))))

// cub::DeviceSegmentedReduce::Sum
CONDITIONAL_FACTORY_ENTRY(
CheckCubRedundantFunctionCall(),
REMOVE_API_FACTORY_ENTRY("cub::DeviceSegmentedReduce::Sum"),
REMOVE_CUB_TEMP_STORAGE_FACTORY(FEATURE_REQUEST_FACTORY(
HelperFeatureEnum::DplExtrasDpcppExtensions_segmented_reduce,
HEADER_INSERT_FACTORY(
HeaderType::HT_DPL_Utils,
WARNING_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Sum",
CONDITIONAL_FACTORY_ENTRY(
makeCheckAnd(CheckArgCountGreaterThan(9),
makeCheckNot(CheckArgIsDefaultCudaStream(9))),
CALL_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Sum",
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getDpctNamespace() +
"device::segmented_reduce",
LITERAL("128")),
STREAM(9), ARG(2), ARG(3), ARG(4), ARG(5), ARG(6),
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getClNamespace() + "plus",
LITERAL(""))),
ZERO_INITIALIZER(TYPENAME(STATIC_MEMBER_EXPR(
TEMPLATED_NAME("std::iterator_traits",
CALL("decltype", ARG(3))),
LITERAL("value_type")))))),
CALL_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Sum",
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getDpctNamespace() +
"device::segmented_reduce",
LITERAL("128")),
QUEUESTR, ARG(2), ARG(3), ARG(4), ARG(5), ARG(6),
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getClNamespace() + "plus",
LITERAL(""))),
ZERO_INITIALIZER(TYPENAME(STATIC_MEMBER_EXPR(
TEMPLATED_NAME("std::iterator_traits",
CALL("decltype", ARG(3))),
LITERAL("value_type"))))))),
Diagnostics::REDUCE_PERFORMANCE_TUNE)))))

// cub::DeviceSegmentedReduce::Min
CONDITIONAL_FACTORY_ENTRY(
CheckCubRedundantFunctionCall(),
REMOVE_API_FACTORY_ENTRY("cub::DeviceSegmentedReduce::Min"),
REMOVE_CUB_TEMP_STORAGE_FACTORY(FEATURE_REQUEST_FACTORY(
HelperFeatureEnum::DplExtrasDpcppExtensions_segmented_reduce,
HEADER_INSERT_FACTORY(
HeaderType::HT_DPL_Utils,
HEADER_INSERT_FACTORY(
HeaderType::HT_STD_Numeric_Limits,
WARNING_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Min",
CONDITIONAL_FACTORY_ENTRY(
makeCheckAnd(
CheckArgCountGreaterThan(9),
makeCheckNot(CheckArgIsDefaultCudaStream(9))),
CALL_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Min",
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getDpctNamespace() +
"device::segmented_reduce",
LITERAL("128")),
STREAM(9), ARG(2), ARG(3), ARG(4), ARG(5),
ARG(6),
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getClNamespace() + "minimum",
LITERAL(""))),
CALL(STATIC_MEMBER_EXPR(
TEMPLATED_NAME(
"std::numeric_limits",
TYPENAME(STATIC_MEMBER_EXPR(
TEMPLATED_NAME(
"std::iterator_traits",
CALL("decltype", ARG(3))),
LITERAL("value_type")))),
LITERAL("max"))))),
CALL_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Min",
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getDpctNamespace() +
"device::segmented_reduce",
LITERAL("128")),
QUEUESTR, ARG(2), ARG(3), ARG(4), ARG(5),
ARG(6),
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getClNamespace() + "minimum",
LITERAL(""))),
CALL(STATIC_MEMBER_EXPR(
TEMPLATED_NAME(
"std::numeric_limits",
TYPENAME(STATIC_MEMBER_EXPR(
TEMPLATED_NAME(
"std::iterator_traits",
CALL("decltype", ARG(3))),
LITERAL("value_type")))),
LITERAL("max")))))),
Diagnostics::REDUCE_PERFORMANCE_TUNE))))))

// cub::DeviceSegmentedReduce::Max
CONDITIONAL_FACTORY_ENTRY(
CheckCubRedundantFunctionCall(),
REMOVE_API_FACTORY_ENTRY("cub::DeviceSegmentedReduce::Max"),
REMOVE_CUB_TEMP_STORAGE_FACTORY(FEATURE_REQUEST_FACTORY(
HelperFeatureEnum::DplExtrasDpcppExtensions_segmented_reduce,
HEADER_INSERT_FACTORY(
HeaderType::HT_DPL_Utils,
HEADER_INSERT_FACTORY(
HeaderType::HT_STD_Numeric_Limits,
WARNING_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Max",
CONDITIONAL_FACTORY_ENTRY(
makeCheckAnd(
CheckArgCountGreaterThan(9),
makeCheckNot(CheckArgIsDefaultCudaStream(9))),
CALL_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Max",
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getDpctNamespace() +
"device::segmented_reduce",
LITERAL("128")),
STREAM(9), ARG(2), ARG(3), ARG(4), ARG(5),
ARG(6),
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getClNamespace() + "maximum",
LITERAL(""))),
CALL(STATIC_MEMBER_EXPR(
TEMPLATED_NAME(
"std::numeric_limits",
TYPENAME(STATIC_MEMBER_EXPR(
TEMPLATED_NAME(
"std::iterator_traits",
CALL("decltype", ARG(3))),
LITERAL("value_type")))),
LITERAL("lowest"))))),
CALL_FACTORY_ENTRY(
"cub::DeviceSegmentedReduce::Max",
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getDpctNamespace() +
"device::segmented_reduce",
LITERAL("128")),
QUEUESTR, ARG(2), ARG(3), ARG(4), ARG(5),
ARG(6),
CALL(TEMPLATED_CALLEE_WITH_ARGS(
MapNames::getClNamespace() + "maximum",
LITERAL(""))),
CALL(STATIC_MEMBER_EXPR(
TEMPLATED_NAME(
"std::numeric_limits",
TYPENAME(STATIC_MEMBER_EXPR(
TEMPLATED_NAME(
"std::iterator_traits",
CALL("decltype", ARG(3))),
LITERAL("value_type")))),
LITERAL("lowest")))))),
Diagnostics::REDUCE_PERFORMANCE_TUNE))))))
14 changes: 14 additions & 0 deletions clang/lib/DPCT/APINamesTemplateType.inc
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,20 @@ TYPE_REWRITE_ENTRY("cub::ConstantInputIterator",
HEADER_INSERTION_FACTORY(HeaderType::HT_DPL_Utils,
TYPE_FACTORY(STR("dpct::constant_iterator"), TEMPLATE_ARG(0))))

TYPE_REWRITE_ENTRY("cub::Sum",
TYPE_FACTORY(STR(MapNames::getClNamespace() + "plus"),
STR("")))

TYPE_REWRITE_ENTRY("cub::Min",
TYPE_FACTORY(STR(MapNames::getClNamespace() + "minimum"),
STR("")))

TYPE_REWRITE_ENTRY("cub::Max",
TYPE_FACTORY(STR(MapNames::getClNamespace() + "maximum"),
STR("")))

TYPE_REWRITE_ENTRY("cub::Equality", TYPE_FACTORY(STR("std::equal_to"), STR("")))

FEATURE_REQUEST_FACTORY(
HelperFeatureEnum::Memory_usm_host_allocator_alias,
TYPE_REWRITE_ENTRY("thrust::system::cuda::experimental::pinned_allocator",
Expand Down
3 changes: 0 additions & 3 deletions clang/lib/DPCT/ASTTraversal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15416,9 +15416,6 @@ void TemplateSpecializationTypeLocRule::registerMatcher(
ast_matchers::MatchFinder &MF) {
auto TargetTypeName = [&]() {
return hasAnyName("thrust::not_equal_to", "thrust::constant_iterator",
"cub::CountingInputIterator",
"cub::TransformInputIterator",
"cub::ConstantInputIterator",
"thrust::system::cuda::experimental::pinned_allocator");
};

Expand Down
Loading

0 comments on commit 7390f65

Please sign in to comment.